Friday 8 May 2015

K-Means Clustering Code

We have written a code for the K-Means clustering of the mfccs we have obtained. We are yet to test the code, that will be our next task to accomplish in the coming week.

#train1.txt - contains all mfccs of all the categories' samples (FIRST frame coefficients only)

def dv(p1, p2): #takes two points as input
return ( math.sqrt((p1[0]*p1[0] - p2[0]*p2[0]) + ( p1[1]*p1[1] - p2[1]*p2[1])))

def mindv(p):
min = 0
mini = 0
for i in range(len(centroids)):
temp = dv(p, centroids[i])
if(temp<max):
temp = max
mini = i
rerurn mini

def meanPair(lst):
meanValX=0
meanValY=0
for l in lst:
meanValX+=l[0]
meanValY+=l[1]
return [float(meanValX)/len(lst),float(meanValY)/len(lst)]

def costFunc(cen,cls,m):
J=0.0
for l in range(len(cls)):
for p in cls[l]:
J+=(p[0]-cen[l][0])**2
J+=(p[1]-cen[l][1])**2
J=float(J)/m
return J

fd = open("train1.txt", "r")
L1 = []
for l in fd.read().split("\n"):
L1.append([float(i) for i in l.split(',')])

pairs = []



fd = open("train2.txt", "r")
L2 = []
for l in fd.read().split("\n"):
L2.append([float(i) for i in l.split(',')])

for i in range(len(L1)):
for j in range(13):
pairs.append([L1[i][j],L2[i][j]]

k=4
centroids=[]
for i in range(k):
centroids.append(pairs[random(0,len(pairs))])

oldJ=999999

#Loop Here
while 1:
C = [] #classification
for i in range(k):
C.append([])

#uncertain :P
for p in pairs:
C[mindv(p)].append(p)
#end uncertain :P

newJ=costFunc(centroids,C,len(pairs))
if((float(oldJ)-float(newJ))/float(oldJ))<=0.05:
break

centroids=[]
for i in range(k):
centroids.append(meanPair(c[i]))

print centroids, C