def calculate_similarity(text1,text2):
raw1 = jieba.cut(text1)
raw2 = jieba.cut(text2)
raw1 = Counter(raw1)
raw2 = Counter(raw2)
same_words = set(raw1) & set(raw2)
if (math.sqrt(len(raw1)) * math.sqrt(len(raw2))) != 0:
dot_product = 0
mod1 = 0
mod2 = 0
for word in same_words:
dot_product += raw1[word] * raw2[word]
for word in raw1:
mod1 += math.pow(raw1[word],2)
for word in raw2:
mod2 += math.pow(raw2[word],2)
cos = dot_product/math.sqrt(mod1*mod2)
else:
cos = 0
return cos
评论列表
文章目录