天天看點

python資料分類knn_使用KNN對iris資料集進行分類——python

filename=‘g:\data\iris.csv‘

lines=fr.readlines()

Mat=zeros((len(lines),4))

irisLabels=[]

index=0

for line in lines:

line=line.strip()

if len(line)>0:

listFromline=line.split(‘,‘)

irisLabels.append(listFromline[-1])

Mat[index,:]=listFromline[0:4]

index=index+1

Mat=Mat[0:150,:]

rowCount=Mat.shape[0]

hoRatio=0.2

testNum=int(hoRatio*rowCount)

train=Mat.copy()

train=train[testNum:,:]

trainLabel=irisLabels[testNum:]

def classify1(inX,train,labels,k):

rowCount=train.shape[0]

diffMat=tile(inX,(rowCount,1))-train

diffMat=diffMat**2

sqDistances=diffMat.sum(1)

distances=sqDistances**0.5

sortedDistIndices=distances.argsort()

classCount={}

for i in range(k):

voteLabels=labels[sortedDistIndices[i]]

classCount[voteLabels]=classCount.get(voteLabels,0)+1

sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)

return sortedClassCount[0][0]

errorCount=0

for i in range(testNum):

classifyResult=classify1(Mat[i,:],train,trainLabel,3)

if(irisLabels[i]!=classifyResult): errorCount+=1

print errorCount

原文:http://www.cnblogs.com/MarsMercury/p/5638928.html