from numpy import random,argsort,argmax,bincount,int_,array,vstack,round from pylab import scatter,show def knn_classifier(x, D, labels, K): """ Classify the vector x D - data matrix (each row is a pattern). labels - class of each pattern. K - number of neighbour to use. Returns the class label and the neighbors indexes. """ neig_idx = knn_search(x,D,K) counts = bincount(labels[neig_idx]) # voting return argmax(counts),neig_idxLet's test the classifier on some random data:
# generating a random dataset with random labels data = random.rand(2,150) # random points labels = int_(round(random.rand(150)*1)) # random labels 0 or 1 x = random.rand(2,1) # random test point # label assignment using k=5 result,neig_idx = knn_classifier(x,data,labels,5) print 'Label assignment:', result # plotting the data and the input pattern # class 1, red points, class 0 blue points scatter(data[0,:],data[1,:], c=labels,alpha=0.8) scatter(x[0],x[1],marker='o',c='g',s=40) # highlighting the neighbours plot(data[0,neig_idx],data[1,neig_idx],'o', markerfacecolor='None',markersize=15,markeredgewidth=1) show()The script will show the following graph:
The query vector is represented with a green point and we can see that the 3 out of 5 nearest neighbors are red points (label 1) while the remaining 2 are blue (label 2).
The result of the classification will be printed on the console:
Label assignment: 1As we expected, the green point have been assigned to the class with red markers.
"""Thanks a lot :D I made a small change so that the user can classify several points with one call of knn_classifier(x, D, labels, K)"""
ReplyDeletefrom numpy import random,argsort,argmax,bincount,int_,array,vstack,round,sqrt,ones
from pylab import scatter,show,plot
def knn_search(x, D, K):
""" find K nearest neighbours of data among D """
ndata = D.shape[0]
# num of query points
queries=x.shape
K = K if K < ndata else ndata
# euclidean distances from the other points
diff=array(D*ones(queries,int)).T - x[:,:ndata].T
sqd=sqrt(((diff.T)**2).sum(axis=2))
# sorting
idx=argsort(sqd)
# return the indexes of K nearest neighbours
return idx[:,:K]
def knn_classifier(x, D, labels, K):
neig_idx = knn_search(x,D,K)
counts=list()
for i in range(len(neig_idx)):
counts.append(bincount(labels[neig_idx[i]])) # voting
return argmax(array(counts),axis=-1),neig_idx
#Let's test the classifier on some random data:
# generating a random dataset with random labels
data = random.rand(150,2)
labels = int_(round(random.rand(150)*1)) # random labels 0 or 1
x = array([[[0.4,0.4]],[[0.6,0.8]],[[0.9,0.2]],[[0.2,0.9]]])
# label assignment using k=5
result,neig_idx = knn_classifier(x,data,labels,5)
print 'Label assignment:', result
# plotting the data and the input pattern
# class 1, red points, class 0 blue points
scatter(data[:,0],data[:,1], c=labels,alpha=0.8)
scatter(x.T[0],x.T[1],marker='o',c='g',s=40)
# highlighting the neighbours
plot(data[neig_idx,0],data[neig_idx,1],'o', markerfacecolor='None',markersize=15,markeredgewidth=1)
show()