def binarize(line, lan = "en"):
assert lan in ['en', 'ch'], "illegal language (en or ch): %s" % lan
root = nltk.Tree(line)
stack = [root]
while stack:
curNode = stack.pop()
if len(curNode) > 2:
if curNode.node == 'NP':
rightBinarize(curNode)
elif curNode.node == 'VP':
if lan == 'en':
vvBinarize(curNode)
elif lan == 'ch':
if curNode[0].node in vvTags:
leftBinarize(curNode)
elif curNode[-1].node in vvTags:
rightBinarize(curNode)
else:
vvBinarize(curNode)
for child in curNode:
#print >> sys.stderr, child
if child.height() > 2:
stack.append(child)
continue
return ' '.join(root.pprint().split()) + '\n'
评论列表
文章目录