天天看點

python 基本Kmeans算法實作

# coding=utf-8
import sys
import math


class Item(object):
    #需要計算的字段,必須能相減的
    field = ["age", "h"]

    def __init__(self, name="", age=, h=):
        self.name = name
        self.age = age
        self.h = h

    def __eq__(self, other):
        """
        對象是否一樣
        """
        if self.__class__ != other.__class__:
            return False
        for field in self.field:
            val1 = getattr(self, field, )
            val2 = getattr(other, field, )
            if val1 != val2:
                return False
        return True

    def sub(self, other):
        """
        self和other相減的距離
        """
        if self.__class__ != other.__class__:
            return sys.maxint
        dob = 
        for field in self.field:
            val1 = getattr(self, field, )
            val2 = getattr(other, field, )
            dob += math.pow(val1 - val2, )
        return float(int(math.sqrt(dob)))

    __sub__ = __rsub__ = lambda x, y: x.sub(y)

    def __str__(self):
        return (self.name or "None") + ":" + (",".join(["%s:%s" % (x, getattr(self, x)) for x in self.field]))


class Kmeans(object):
    def __init__(self, objects, k):
        self.objects = objects
        self.k = k
        #初始對象
        self.init_objects = objects[: k]

    def com_put(self):
        results = []
        center_change = True
        while center_change:
            center_change = False
            results = []
            for index in range(, self.k):
                results.append([])
            for obj in self.objects:
                dists = {}
                for i, i_obj in enumerate(self.init_objects):
                    # 計算距離
                    dists[i] = i_obj - obj
                dist_index = self.comput_order(dists)
                results[dist_index].append(obj)
            for index in range(, self.k):
                new_item = self.find_new_center(results[index])
                old_item = self.init_objects[index]
                if not new_item == old_item:
                    center_change = True
                    self.init_objects[index] = new_item
        return results

    def find_new_center(self, dists):
        """
        找到中心點
        """
        ds = {}
        new_item = self.objects[].__class__()
        if dists is None or len(dists) == : return new_item

        for item in dists:
            for index, field in enumerate(item.field):
                ds[index] = ds.get(index, ) + getattr(item, field, )
        for index, field in enumerate(new_item.field):
            ds[index] /= len(dists)
            setattr(new_item, field, ds[index])
        return new_item

    def comput_order(self, dists):
        """
        得到最短距離,并傳回最短距離索引
        """
        m = 
        index = 
        for i, item in dists.items():
            if i == len(dists) - :
                break
            if i == :
                m = item
                index = 
            dist1 = dists[i + ]
            if m > dist1:
                m = dist1
                index = i + 
        return index


#記錄數
l = [Item("p1", , ), Item("p1", , ), Item("p1", , ),
     Item("p1", , ), Item("p1", , ), Item("p1", , ),
     Item("p1", , )]
#簇的個數
k = 
results = Kmeans(l, k).com_put()

for i, x in enumerate(results):
    print "#####category(%s)#####" % i
    for item in x:
        print item