def jaccard_pinyin(pv1, pv2):
"""Similarity score between two pinyin vectors with jaccard.
?????????jaccard??????
According to the semantic jaccard model to calculate the similarity.
The similarity score interval for each two pinyin sentences was [0, 1].
????jaccard??????????????????????????[0, 1]?
"""
sv_matrix = []
sv_rows = []
for pinyin1 in pv1:
for pinyin2 in pv2:
score = match_pinyin(pinyin1, pinyin2)
sv_rows.append(score)
sv_matrix.append(sv_rows)
sv_rows = []
matrix = mat(sv_matrix)
result = sum_cosine(matrix, 0.7)
total = result["total"]
total_dif = result["total_dif"]
num = result["num_not_match"]
sim = total/(total + num*(1-total_dif))
return sim
评论列表
文章目录