def find_chunk(sent, chunk_rule=None):
if not chunk_rule:
chunk_rule = 'QWORD: <W.*><V.*><DT>*{<.*>*?<N.*>+}'
logger.debug(chunk_rule)
label=chunk_rule.split(':')[0].strip()
cp = nltk.RegexpParser(chunk_rule)
tree = cp.parse(sent)
for subtree in tree.subtrees():
if subtree.label() == label:
subtree = ' '.join([a[0] for a in subtree ])
return subtree
评论列表
文章目录