def getPOSLinks(text):
wordnet_lemmatizer = WordNetLemmatizer()
text = nltk.word_tokenize(text)
pos = nltk.pos_tag(text)
links = []
link = []
active = False
for w in pos:
part = w[1]
word = w[0]
if(not active and (part[:2] == "DT" or part == "WP" or part == "VB" or part == "IN")):
active = True
if(active):
link.append(wordnet_lemmatizer.lemmatize(word))
#extract main body
if(active and (part == "PRP" or part[:2] == "NN" or part == "." )):
active = False
links.append(" ".join(link))
link = []
return links
generate_ngram_pos_link.py 文件源码
python
阅读 37
收藏 0
点赞 0
评论 0
评论列表
文章目录