def tree2semi_rel(tree):
"""
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this
identifies pairs whose first member is a list (possibly empty) of terminal
strings, and whose second member is a ``Tree`` of the form (NE_label, terminals).
:param tree: a chunk tree
:return: a list of pairs (list(str), ``Tree``)
:rtype: list of tuple
"""
from nltk.tree import Tree
semi_rels = []
semi_rel = [[], None]
for dtr in tree:
if not isinstance(dtr, Tree):
semi_rel[0].append(dtr)
else:
# dtr is a Tree
semi_rel[1] = dtr
semi_rels.append(semi_rel)
semi_rel = [[], None]
return semi_rels
python类Tree()的实例源码
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
root_label='S', strict=False):
"""
Convert the CoNLL IOB format to a tree.
"""
tree = Tree(root_label, [])
for (word, postag, chunktag) in sentence:
if chunktag is None:
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as O
tree.append((word,postag))
elif chunktag.startswith('B-'):
tree.append(Tree(chunktag[2:], [(word,postag)]))
elif chunktag.startswith('I-'):
if (len(tree)==0 or not isinstance(tree[-1], Tree) or
tree[-1].label() != chunktag[2:]):
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as B-*
tree.append(Tree(chunktag[2:], [(word,postag)]))
else:
tree[-1].append((word,postag))
elif chunktag == 'O':
tree.append((word,postag))
else:
raise ValueError("Bad conll tag %r" % chunktag)
return tree
def _untag(self, tree):
for i, child in enumerate(tree):
if isinstance(child, Tree):
self._untag(child)
elif isinstance(child, tuple):
tree[i] = child[0]
else:
raise ValueError('expected child to be Tree or tuple')
return tree