def get_continuous_chunks(self, text):
chunked = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text)))
prev = None
continuous_chunk = []
current_chunk = []
for i in chunked:
if type(i) == nltk.Tree:
current_chunk.append(" ".join([token for token, pos in i.leaves()]))
elif current_chunk:
named_entity = " ".join(current_chunk)
if named_entity not in continuous_chunk:
continuous_chunk.append(named_entity)
current_chunk = []
else:
continue
return continuous_chunk
评论列表
文章目录