def synonym_cut(sentence, pattern="wf"):
"""Cut the sentence into a synonym vector tag.
??????????????
If a word in this sentence was not found in the synonym dictionary,
it will be marked with default value of the word segmentation tool.
????????????????????????
Args:
pattern: 'w'-??, 'k'-??????'t'-?????, 'wf'-????, 'tf-?????'?
"""
sentence = sentence.rstrip(tone_words)
synonym_vector = []
if pattern == "w":
result = list(jieba.cut(sentence))
synonym_vector = [item for item in result if item not in punctuation_all]
elif pattern == "k":
synonym_vector = analyse.extract_tags(sentence, topK=1)
elif pattern == "t":
synonym_vector = analyse.extract_tags(sentence, topK=10)
elif pattern == "wf":
result = posseg.cut(sentence)
# synonym_vector = [(item.word, item.flag) for item in result \
# if item.word not in punctuation_all]
# Modify in 2017.4.27
for item in result:
if item.word not in punctuation_all:
if len(item.flag) < 4:
item.flag = list(posseg.cut(item.word))[0].flag
synonym_vector.append((item.word, item.flag))
elif pattern == "tf":
result = posseg.cut(sentence)
tags = analyse.extract_tags(sentence, topK=10)
for item in result:
if item.word in tags:
synonym_vector.append((item.word, item.flag))
return synonym_vector
评论列表
文章目录