[Python] 纯文本查看 复制代码
critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
'The Night Listener': 4.5, 'Superman Returns': 4.0,
'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns':4.0}}
欧几里德距离评价算法是一个非常简单的计算相似度评价值的方法。它以经过人们一致评价的物品为坐标轴,然后将参与评价的人绘制到图上,并考查他们彼此间的距离远近。如图:[Python] 纯文本查看 复制代码
def sim_distance(prefs, person1, person2):
simple_flag = False
for item in prefs[person1]:
if item in prefs[person2]:
simple_flag = True
if simple_flag is False:
return 0
sum_of_squares = sum(pow(prefs[person1][item] - prefs[person2][item],2)
for item in prefs[person1] if item in prefs[person2])
return 1/(sqrt(sum_of_squares)+1)
[Python] 纯文本查看 复制代码
def sim_pearson(prefs, p1, p2):
sim = {}
for item in prefs[p1]:
if item in prefs[p2]:
sim[item] = 1
n = len(sim)
if n == 0:
return 0
sum1 = sum([prefs[p1][item] for item in sim])
sum2 = sum([prefs[p2][item] for item in sim])
sum1_pow = sum([pow(prefs[p1][item], 2) for item in sim])
sum2_pow = sum([pow(prefs[p2][item], 2) for item in sim])
pSum = sum([prefs[p1][item] * prefs[p2][item] for item in sim])
num = pSum - (sum1 * sum2)/n
den = sqrt((sum1_pow - pow(sum1, 2)/n) * (sum2_pow - pow(sum2, 2)/n))
if den == 0:
return 0
return num/den
皮尔逊相关系数是一个介于 1 和 -1 之间的值,其中,1 表示变量完全正相关, 0 表示无关,-1 表示完全负相关。[Python] 纯文本查看 复制代码
def top_matches(prefs, person, n=5, sim_def = sim_pearson):
# scores = []
# for other in prefs:
# if other != person:
# scores.append((sim_def(prefs, person, other), other))
scores = [(sim_def(prefs, person, other), other)
for other in prefs if other != person]
scores.sort()
scores.reverse()
return scores[0:n]