天天看點

Python k-均值聚類算法二維執行個體

k-均值聚類算法二維執行個體,不多解釋,解釋就是掩飾,複制粘貼即可運作。

import time
import numpy as np
import random
import matplotlib.pyplot as plt
import operator

def func01():    #生成二維随機點

    #random.seed(1)
    kjz1=[[random.randint(,),random.randint(,)] for j in range(,)]
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    return kjz1

def func02(kjz2w): #繪圖

    if kjz2w!=[]:
        colors=['b','g','r','c','m','y','k'];s=;
        for j in kjz2w:
            for i in j:
                plt.plot(i[], i[], color=colors[s%len(colors)], marker='.')
            s=s+;
        plt.ion();plt.show();plt.pause();plt.close();

def func03(kjz1,k):    #計算初始均值,并傳回初始分組

    minxy=np.min(kjz1,axis=).tolist();maxxy=np.max(kjz1,axis=).tolist();
    xjg=(maxxy[]-minxy[])/k;yjg=(maxxy[]-minxy[])/k;
    meanxy=[];meanxy.append([minxy[],minxy[]]);meanxy.append([maxxy[],maxxy[]]);
    for j in range(,k-):
        meanxy.append([minxy[]+xjg*j,minxy[]+yjg*j])
    kjz2wxy2=[[] for j in range(,len(meanxy))];
    for j in kjz1:
        s=;lslb=[];
        for k in meanxy:
            lslb.append([s,(j[]-k[])**+(j[]-k[])**]);s=s+; #一個坐标一組
        lslb.sort(reverse=False,key=operator.itemgetter())  #正序
        kjz2wxy2[lslb[][]].append(j)
    return kjz2wxy2

def func05(lb2): #剔除空清單

    j=;
    while(True):
        if len(lb2[j])<=:
            lb2.pop(j)
        else:
            j=j+;
        if j>=len(lb2):
            break
    return lb2

def func06(kjz2wxy): #求組合中心(均值)

    meanxy=[];
    for j in kjz2wxy:
        meanxy.append(np.mean(j,axis=).tolist())
    kjz2wxy2=[[] for j in range(,len(meanxy))];
    for j in kjz2wxy:
        for i in j: #點
            s=;lslb=[];
            for k in meanxy:
                lslb.append([s,(i[]-k[])**+(i[]-k[])**]);s=s+;
            lslb.sort(reverse=False,key=operator.itemgetter())  #正序
            kjz2wxy2[lslb[][]].append(i)
    kjz2wxy2=func05(kjz2wxy2)
    return kjz2wxy2,meanxy

def func07(kjz2w,fz):

    kjz2wxy=func03(kjz2w,fz) #坐标清單,分組,0-按照x軸均分
    j=;
    while(True):
        kjz2wxy,meanxy=func06(kjz2wxy)
        if j> and meanxy==meanxy2:
            break
        meanxy2=meanxy.copy();
        j=j+;
    print('疊代%d次' % (j))
    func02(kjz2wxy) #繪圖

if __name__=='__main__':

    start=time.time();
    for j in range(,):
        kjz2w=func01()
        func07(kjz2w,) #分6組

    print('Time used:',int((time.time()-start)/*)/,'分鐘')
           
Python k-均值聚類算法二維執行個體

上圖是初始均值選取比較好的時候的情況,還有些情況是下面這樣的。

Python k-均值聚類算法二維執行個體

今天還寫了一種畫蛇添足的算法,從一維算法過度來的,就像下面這樣的。

import time
import numpy as np
import random
import matplotlib.pyplot as plt
import operator


def func01():    #生成二維随機點

    random.seed()
    kjz1=[[random.randint(,),random.randint(,)] for j in range(,)]
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    kjz1.extend([[random.randint(,),random.randint(,)] for j in range(,)])
    #繪圖
    plt.xlabel('x-axis')
    plt.ylabel('y-axis')  
    for j in kjz1:
        plt.plot(j[], j[], color='b', marker='.', label='y1 data')
    plt.ion();plt.show();plt.pause();plt.close();

    return kjz1

def func02(kjz1,k,axis):    #k個均值分k份

    kjz1.sort(reverse=False,key=operator.itemgetter(-axis))  #正序
    kjz1.sort(reverse=False,key=operator.itemgetter(axis))  #正序
    wb2=kjz1.copy();
    #初始均勻分組
    xlb=[];a=round(len(wb2)/k);b=len(wb2)%k;
    for j in range(,k+):
        xlb.append(j*a)
        if j==k:
            xlb[j-]=xlb[j-]+b;
    j=;wb1=[];
    for j in range(,k):
        wb1.append([])
    i=;j=;
    while(i<=len(wb2)-):
        wb1[j].append(wb2[i]);
        if i>=xlb[j]-:
            j=j+;
        i=i+;
    kj1=means(wb1,axis);#初始分組均值

    bj=;
    while(True):
        wb2=kjz1.copy();
        if bj!=:
            kj1=kj2.copy();
        wb3=[];
        for j in range(,k-):
            wb3.append([])
        for j in range(,k-):
            i=;
            while(True):
                if wb2[i][axis]<=kj1[j]:
                    wb3[j].append(wb2.pop(i));
                else:
                    i=i+;
                if i>=len(wb2):
                    break
        wb3.append(wb2)
        for j in wb3:
            if len(j)<=:
                print('分組出現空組,傳回[]');return []
        kj2=means(wb3,axis);#過程均值
        if bj==:
            if kj1==kj2:
                break
        bj=;
    return wb3

def means(lb1,axis):    #計算均值

    mean1=[];mean2=[];std1=[];
    for j in lb1:
        mean1.append(np.mean(j,axis=).tolist())
    for j in range(,len(mean1)):
        mean2.append(np.mean([mean1[j-][axis],mean1[j][axis]])) #分組均值使用各組的均值
    return mean2

def func03(kjz2w): #繪圖

    if kjz2w!=[]:
        colors=['b','g','r','c','m','y','k'];s=;
        for j in kjz2w:
            for i in j:
                plt.plot(i[], i[], color=colors[s%len(colors)], marker='.', label='y1 data')
            s=s+;
        plt.ion();plt.show();plt.pause();plt.close();

def func04(kjz2wx,kjz2wy): #x,y 組整合

    kjz2wxy=[[] for j in range(,len(kjz2wx)*len(kjz2wy))];k=;
    for j in kjz2wx:
        for i in kjz2wy:
            for a in j:
                if a in i:
                    kjz2wxy[k].append(a);
            k=k+;
    kjz2wxy=func05(kjz2wxy)
    return kjz2wxy


def func05(lb2): #組合疊代

    j=;
    while(True):
        if len(lb2[j])<=:
            lb2.pop(j)
        else:
            j=j+;
        if j>=len(lb2):
            break
    return lb2


def func06(kjz2wxy): #組合疊代

    #求組合中心(均值)
    meanxy=[];
    for j in kjz2wxy:
        meanxy.append(np.mean(j,axis=).tolist())
    kjz2wxy2=[[] for j in range(,len(meanxy))];
    for j in kjz2wxy:
        for i in j: #點
            s=;lslb=[];
            for k in meanxy:
                lslb.append([s,(i[]-k[])**+(i[]-k[])**]);s=s+;
            lslb.sort(reverse=False,key=operator.itemgetter())  #正序
            kjz2wxy2[lslb[][]].append(i)
    kjz2wxy2=func05(kjz2wxy2)
    return kjz2wxy2,meanxy

def func07(kjz2w):

    kjz2wx=func02(kjz2w,,) #清單,分組數,x軸0,y軸1
    func03(kjz2wx) #繪圖

    kjz2wy=func02(kjz2w,,)
    func03(kjz2wy) #繪圖

    kjz2wxy=func04(kjz2wx,kjz2wy)
    func03(kjz2wxy) #繪圖

    #kjz2wxy=func06(kjz2wxy)
    j=;
    while(True):
        kjz2wxy,meanxy=func06(kjz2wxy)
        if j> and meanxy==meanxy2:
            break
        meanxy2=meanxy.copy();
        j=j+;
    print('疊代%d次' % (j))
    func03(kjz2wxy) #繪圖

if __name__=='__main__':

    start=time.time();
    kjz2w=func01()

    func07(kjz2w)

    print('Time used:',int((time.time()-start)/*)/,'分鐘')