一. 歐氏距離
距離越小,相似度越高
def EuclideanDistances(A, B):
BT = B.transpose()
vecProd = np.dot(A,BT)
SqA = A**2
sumSqA = np.matrix(np.sum(SqA, axis=1))
sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))
SqB = B**2
sumSqB = np.sum(SqB, axis=1)
sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
SqED = sumSqBEx + sumSqAEx - 2*vecProd
SqED[SqED<0]=0.0
ED = np.sqrt(SqED)
return ED
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
Euclidean_dis=EuclideanDistances(A,A)
print (Euclidean_dis)

# 歐幾裡得距離
from scipy.spatial.distance import cdist
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
dis = cdist(A,A,metric='euclidean')
print(dis)
二. 餘弦相似度:
# 餘弦相似度
def cosine_distance(matrix1,matrix2):
matrix1_matrix2 = np.dot(matrix1, matrix2.transpose())
matrix1_norm = np.sqrt(np.multiply(matrix1, matrix1).sum(axis=1))
matrix1_norm = matrix1_norm[:, np.newaxis]
matrix2_norm = np.sqrt(np.multiply(matrix2, matrix2).sum(axis=1))
matrix2_norm = matrix2_norm[:, np.newaxis]
cosine_distance = np.divide(matrix1_matrix2, np.dot(matrix1_norm, matrix2_norm.transpose()))
return cosine_distance
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
cosine_dis=cosine_distance(A,A)
print (cosine_dis)
# 餘弦相似度
from sklearn.metrics.pairwise import cosine_similarity
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
cosine_dis2 = cosine_similarity(A,A)
print('cosine_dis2:\n',cosine_dis2)
三. 皮爾遜相似度
# 皮爾遜相似度 參考https://cloud.tencent.com/developer/ask/181044
import numpy as np
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
dist_out = 1-pairwise_distances(A, metric="cosine")
dist_out
參考:
https://www.jianshu.com/p/3eaa970bd45c
https://cloud.tencent.com/developer/ask/181044