基于用户的协同电影推荐代码实现:
# -*- coding: UTF-8 -*-
'''
基于用户的推荐算法
'''
from math import sqrt,pow
import csv
import operator
def read_file():
#disfile = 'F:\download\数据集\电影\ml-latest-small\matings.csv'
with open("F:\download\数据集\电影\ml-latest-small\matings.csv", 'r', encoding="utf-8") as f:
csv_reader = csv.reader(f)
data_list = []
for row in csv_reader:
data_list.append(row)
#print(data_list[1])
list_datas=[[] for i in range(611)]
#print(list_datas)
for i in range(1,len(data_list)):
list_tag = data_list[i]
id = int(list_tag[0])
#print(id)
list_datas[id].append(list_tag[1])
list_datas[id].append(float(list_tag[2]))
#print(list_datas)
f={}
#f = { i: {} for i in range(1,len(list_datas))}
for i in range(1,len(list_datas)):
strs =str(i)
f.setdefault(strs,{})
#print(f)
for i in range(1,len(list_datas)):
#print(i)
k = str(i)
tag = list_datas[i]
b = {}
for j in range (0,len(tag),2):
b[tag[j]] = tag[j+1]
f[k] = b
#print(f.get('10'))
return f
def read_filemove(finshList):
with open("F:\download\数据集\电影\ml-latest-small\movies.csv", 'r', encoding="utf-8") as f:
csv_reader = csv.reader(f)
data_list = []
tags = 0
i = 0
for row in csv_reader:
tags = 0
if i == 5:
break
else:
for j in range(len(finshList)):
tag = finshList[j]
if row[0] == tag[0]:
tup1 = (row[1],tag[1])
data_list.append(tup1)
tags = 1
break
if tags == 1:
i = i+1
return data_list
class UserCf():
#获得初始化数据
def __init__(self,data):
self.data=data;
#通过用户名获得电影列表,仅调试使用
def getItems(self,username1,username2):
return self.data[username1],self.data[username2]
#计算两个用户的皮尔逊相关系数
def pearson(self,user1,user2):#数据格式为:电影,评分 {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}
sumXY=0.0;
n=0;
sumX=0.0;
sumY=0.0;
sumX2=0.0;
sumY2=0.0;
r = 0.0
# print("user1",user1)
# print("user2", user2)
try:
for movie1,score1 in user1.items():
if movie1 in user2.keys():#计算公共的电影的评分
n+=1;
sumXY+=score1*user2[movie1]
sumX+=score1;
sumY+=user2[movie1]
sumX2+=pow(score1,2)
sumY2+=pow(user2[movie1],2)
if n == 0:
r =0
else:
# print("sumXY",sumXY)
# print("(sumX*sumY)/n",(sumX*sumY)/n)
molecule=float(sumXY-(sumX*sumY)/n);
# print("molecule",molecule)
denominator=sqrt((sumX2-pow(sumX,2)/n)*(sumY2-pow(sumY,2)/n))
# print("denominator", denominator)
if denominator==0:
r = 0
else:
r = molecule / denominator
#
except Exception as e:
print("异常信息:",e.message)
return None
#print("r",r)
return r
#计算与当前用户的距离,获得最临近的用户
def nearstUser(self,username,n=1):
distances={};#用户,相似度
for otherUser,items in self.data.items():#遍历整个数据集
if otherUser not in username:#非当前的用户
distance=self.pearson(self.data[username],self.data[otherUser])#计算两个用户的相似度
distances[otherUser]=distance
sortedDistance=sorted(distances.items(),key=operator.itemgetter(1),reverse=True);#最相似的N个用户
#print("排序后的用户为:",sortedDistance)
return sortedDistance[:n]
#给用户推荐电影
def recomand(self,username,n=1):
recommand={};#待推荐的电影
for user,score in dict(self.nearstUser(username,n)).items():#最相近的n个用户
print("推荐的用户:",(user,score))
for movies,scores in self.data[user].items():#推荐的用户的电影列表
if movies not in self.data[username].keys():#当前username没有看过
#print("%s为该用户推荐的电影:%s"%(user,movies))
if movies not in recommand.keys():#添加到推荐列表中
recommand[movies]=scores
return sorted(recommand.items(),key=operator.itemgetter(1),reverse=True);#对推荐的结果按照电影评分排序
if __name__=='__main__':
users = read_file()
#print(users.items())
userCf=UserCf(data=users)
num = input("请输入要推荐的用户的编号:(1-610)")
recommandList=userCf.recomand(num, 5)
finshList = []
#print(len(recommandList))
for i in range(5):
finshList.append(recommandList[i])
name_file = read_filemove(finshList)
print("最终推荐:%s"%name_file)