def find_chunk(sent,chunk_rule=None):
if not chunk_rule:
chunk_rule = 'HCHUNK: <W.*><.*>*?{<N.*>+}'
label=chunk_rule.split(':')[0].strip()
cp = nltk.RegexpParser(chunk_rule)
tree = cp.parse(sent)
for subtree in tree.subtrees():
if subtree.label() == label:
subtree = ' '.join([a[0] for a in subtree ])
print (subtree)
return subtree
##this is required only once
评论列表
文章目录