def _process_coref_span_annotations_for_word(label: str,
word_index: int,
clusters: DefaultDict[int, List[Tuple[int, int]]],
coref_stacks: DefaultDict[int, List[int]]) -> None:
"""
For a given coref label, add it to a currently open span(s), complete a span(s) or
ignore it, if it is outside of all spans. This method mutates the clusters and coref_stacks
dictionaries.
Parameters
----------
label : ``str``
The coref label for this word.
word_index : ``int``
The word index into the sentence.
clusters : ``DefaultDict[int, List[Tuple[int, int]]]``
A dictionary mapping cluster ids to lists of inclusive spans into the
sentence.
coref_stacks: ``DefaultDict[int, List[int]]``
Stacks for each cluster id to hold the start indices of active spans (spans
which we are inside of when processing a given word). Spans with the same id
can be nested, which is why we collect these opening spans on a stack, e.g:
[Greg, the baker who referred to [himself]_ID1 as 'the bread man']_ID1
"""
if label != "-":
for segment in label.split("|"):
# The conll representation of coref spans allows spans to
# overlap. If spans end or begin at the same word, they are
# separated by a "|".
if segment[0] == "(":
# The span begins at this word.
if segment[-1] == ")":
# The span begins and ends at this word (single word span).
cluster_id = int(segment[1:-1])
clusters[cluster_id].append((word_index, word_index))
else:
# The span is starting, so we record the index of the word.
cluster_id = int(segment[1:])
coref_stacks[cluster_id].append(word_index)
else:
# The span for this id is ending, but didn't start at this word.
# Retrieve the start index from the document state and
# add the span to the clusters for this id.
cluster_id = int(segment[:-1])
start = coref_stacks[cluster_id].pop()
clusters[cluster_id].append((start, word_index))
评论列表
文章目录