def token_similarity(self, words ,rwords):
words = set(words)
rwords = set(rwords)
word_vec = np.zeros(self.word_dim)
rword_vec = np.zeros(self.word_dim)
word_count = 0
rword_count = 0
for word in words:
if self.word_vec.has_key(word) and word not in self.stopwords:
word_vec += self.word_vec[word]
word_count += 1
for word in rwords:
if self.word_vec.has_key(word):
rword_vec += self.word_vec[word]
rword_count += 1
if word_count > 0:
word_vec = word_vec / word_count
if rword_count > 0:
rword_vec = rword_vec / rword_count
if word_count>0 and rword_count>0:
return cosine(word_vec, rword_vec)
else:
return 1
评论列表
文章目录