天天看点

基于用户的协同过滤电影推荐

基于用户的协同电影推荐代码实现:

# -*- coding: UTF-8 -*-
'''
基于用户的推荐算法
'''
from math import sqrt,pow
import csv
import operator
def read_file():
    #disfile = 'F:\download\数据集\电影\ml-latest-small\matings.csv'
    with open("F:\download\数据集\电影\ml-latest-small\matings.csv", 'r', encoding="utf-8") as f:
        csv_reader = csv.reader(f)
        data_list = []
        for row in csv_reader:
            data_list.append(row)
        #print(data_list[1])

        list_datas=[[] for i in range(611)]
        #print(list_datas)

        for i in range(1,len(data_list)):
            list_tag = data_list[i]
            id = int(list_tag[0])
            #print(id)
            list_datas[id].append(list_tag[1])
            list_datas[id].append(float(list_tag[2]))
        #print(list_datas)
        f={}

        #f = { i: {} for i in range(1,len(list_datas))}
        for i in range(1,len(list_datas)):
            strs =str(i)
            f.setdefault(strs,{})
        #print(f)
        for i in range(1,len(list_datas)):
            #print(i)
            k = str(i)
            tag = list_datas[i]
            b = {}
            for j in range (0,len(tag),2):
                b[tag[j]] = tag[j+1]
            f[k] = b
        #print(f.get('10'))
    return f

def read_filemove(finshList):
    with open("F:\download\数据集\电影\ml-latest-small\movies.csv", 'r', encoding="utf-8") as f:
        csv_reader = csv.reader(f)
        data_list = []
        tags = 0
        i = 0
        for row in csv_reader:
            tags = 0
            if i == 5:
                break
            else:

                for j in range(len(finshList)):
                    tag = finshList[j]

                    if row[0] == tag[0]:
                        tup1 = (row[1],tag[1])
                        data_list.append(tup1)
                        tags = 1
                        break
                if tags == 1:
                    i = i+1
        return data_list
       


class UserCf():

    #获得初始化数据
    def __init__(self,data):
        self.data=data;

    #通过用户名获得电影列表,仅调试使用
    def getItems(self,username1,username2):
        return self.data[username1],self.data[username2]

    #计算两个用户的皮尔逊相关系数
    def pearson(self,user1,user2):#数据格式为:电影,评分  {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}
        sumXY=0.0;
        n=0;
        sumX=0.0;
        sumY=0.0;
        sumX2=0.0;
        sumY2=0.0;
        r = 0.0
        # print("user1",user1)
        # print("user2", user2)

        try:
            for movie1,score1 in user1.items():
                if movie1 in user2.keys():#计算公共的电影的评分
                    n+=1;
                    sumXY+=score1*user2[movie1]
                    sumX+=score1;
                    sumY+=user2[movie1]
                    sumX2+=pow(score1,2)
                    sumY2+=pow(user2[movie1],2)

            if n == 0:
                r =0
            else:
                # print("sumXY",sumXY)
                # print("(sumX*sumY)/n",(sumX*sumY)/n)
                molecule=float(sumXY-(sumX*sumY)/n);
                # print("molecule",molecule)

                denominator=sqrt((sumX2-pow(sumX,2)/n)*(sumY2-pow(sumY,2)/n))

                # print("denominator", denominator)
                if denominator==0:
                    r = 0
                else:
                    r = molecule / denominator
                #
        except Exception as e:
            print("异常信息:",e.message)
            return None
        #print("r",r)
        return r

    #计算与当前用户的距离,获得最临近的用户
    def nearstUser(self,username,n=1):
        distances={};#用户,相似度
        for otherUser,items in self.data.items():#遍历整个数据集
            if otherUser not in username:#非当前的用户
                distance=self.pearson(self.data[username],self.data[otherUser])#计算两个用户的相似度
                distances[otherUser]=distance
        sortedDistance=sorted(distances.items(),key=operator.itemgetter(1),reverse=True);#最相似的N个用户
        #print("排序后的用户为:",sortedDistance)
        return sortedDistance[:n]


    #给用户推荐电影
    def recomand(self,username,n=1):
        recommand={};#待推荐的电影
        for user,score in dict(self.nearstUser(username,n)).items():#最相近的n个用户
            print("推荐的用户:",(user,score))
            for movies,scores in self.data[user].items():#推荐的用户的电影列表
                if movies not in self.data[username].keys():#当前username没有看过
                    #print("%s为该用户推荐的电影:%s"%(user,movies))
                    if movies not in recommand.keys():#添加到推荐列表中
                        recommand[movies]=scores

        return sorted(recommand.items(),key=operator.itemgetter(1),reverse=True);#对推荐的结果按照电影评分排序

if __name__=='__main__':

    users = read_file()
    #print(users.items())
    userCf=UserCf(data=users)
    num = input("请输入要推荐的用户的编号:(1-610)")
    recommandList=userCf.recomand(num, 5)
    finshList = []
    #print(len(recommandList))
    for i in range(5):
        finshList.append(recommandList[i])
    name_file = read_filemove(finshList)
    print("最终推荐:%s"%name_file)