run.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:TiebaTool 作者: ZRStea 项目源码 文件源码
def calculate_similarity(text1,text2):
    raw1 = jieba.cut(text1)
    raw2 = jieba.cut(text2)
    raw1 = Counter(raw1)
    raw2 = Counter(raw2)
    same_words = set(raw1) & set(raw2)
    if (math.sqrt(len(raw1)) * math.sqrt(len(raw2))) != 0:
        dot_product = 0
        mod1 = 0
        mod2 = 0
        for word in same_words:
            dot_product += raw1[word] * raw2[word]
        for word in raw1:
            mod1 += math.pow(raw1[word],2)
        for word in raw2:
            mod2 += math.pow(raw2[word],2)
        cos = dot_product/math.sqrt(mod1*mod2)
    else:
        cos = 0
    return cos
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号