# coding=utf-8
import sys
import math
class Item(object):
#需要計算的字段,必須能相減的
field = ["age", "h"]
def __init__(self, name="", age=, h=):
self.name = name
self.age = age
self.h = h
def __eq__(self, other):
"""
對象是否一樣
"""
if self.__class__ != other.__class__:
return False
for field in self.field:
val1 = getattr(self, field, )
val2 = getattr(other, field, )
if val1 != val2:
return False
return True
def sub(self, other):
"""
self和other相減的距離
"""
if self.__class__ != other.__class__:
return sys.maxint
dob =
for field in self.field:
val1 = getattr(self, field, )
val2 = getattr(other, field, )
dob += math.pow(val1 - val2, )
return float(int(math.sqrt(dob)))
__sub__ = __rsub__ = lambda x, y: x.sub(y)
def __str__(self):
return (self.name or "None") + ":" + (",".join(["%s:%s" % (x, getattr(self, x)) for x in self.field]))
class Kmeans(object):
def __init__(self, objects, k):
self.objects = objects
self.k = k
#初始對象
self.init_objects = objects[: k]
def com_put(self):
results = []
center_change = True
while center_change:
center_change = False
results = []
for index in range(, self.k):
results.append([])
for obj in self.objects:
dists = {}
for i, i_obj in enumerate(self.init_objects):
# 計算距離
dists[i] = i_obj - obj
dist_index = self.comput_order(dists)
results[dist_index].append(obj)
for index in range(, self.k):
new_item = self.find_new_center(results[index])
old_item = self.init_objects[index]
if not new_item == old_item:
center_change = True
self.init_objects[index] = new_item
return results
def find_new_center(self, dists):
"""
找到中心點
"""
ds = {}
new_item = self.objects[].__class__()
if dists is None or len(dists) == : return new_item
for item in dists:
for index, field in enumerate(item.field):
ds[index] = ds.get(index, ) + getattr(item, field, )
for index, field in enumerate(new_item.field):
ds[index] /= len(dists)
setattr(new_item, field, ds[index])
return new_item
def comput_order(self, dists):
"""
得到最短距離,并傳回最短距離索引
"""
m =
index =
for i, item in dists.items():
if i == len(dists) - :
break
if i == :
m = item
index =
dist1 = dists[i + ]
if m > dist1:
m = dist1
index = i +
return index
#記錄數
l = [Item("p1", , ), Item("p1", , ), Item("p1", , ),
Item("p1", , ), Item("p1", , ), Item("p1", , ),
Item("p1", , )]
#簇的個數
k =
results = Kmeans(l, k).com_put()
for i, x in enumerate(results):
print "#####category(%s)#####" % i
for item in x:
print item