def get_similarity(word_list1, word_list2):
"""??????????????????
Keyword arguments:
word_list1, word_list2 -- ???????????????????
"""
words = list(set(word_list1 + word_list2))
vector1 = [float(word_list1.count(word)) for word in words]
vector2 = [float(word_list2.count(word)) for word in words]
vector3 = [vector1[x]*vector2[x] for x in xrange(len(vector1))]
vector4 = [1 for num in vector3 if num > 0.]
co_occur_num = sum(vector4)
if abs(co_occur_num) <= 1e-12:
return 0.
denominator = math.log(float(len(word_list1))) + math.log(float(len(word_list2))) # ??
if abs(denominator) < 1e-12:
return 0.
return co_occur_num / denominator
评论列表
文章目录