def boc_term_vectors(word_list):
word_list = [word.lower() for word in word_list]
unique_chars = np.unique(
np.hstack([list(word)
for word in word_list]))
word_list_term_counts = [{char: count for char, count in itemfreq(list(word))}
for word in word_list]
boc_vectors = [np.array([int(word_term_counts.get(char, 0))
for char in unique_chars])
for word_term_counts in word_list_term_counts]
return list(unique_chars), boc_vectors
term_similarity.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录