def get_word(word):
inst = re.search(r"_\(([A-Za-z0-9_]+)\)", word)
if inst == None:
length = len(word.split("_"))
if length < 5:
return True, word
else:
if inst.group(1) != "disambiguation":
word2 = re.sub(r'_\(.+\)','',word)
if len(word2.split(" ")) <5:
return True, word
return False,word
# Load the trained doc2vec and word2vec models.
get_indices.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录