def find_parts(self) -> ty.ValuesView:
""" Find all files generated by inputs.
Returns:
[ file path ]
"""
files = collections.defaultdict(list)
# type: ty.DefaultDict[str, ty.List[str]]
for fname in sorted(os.listdir(self._working_dir)):
fpath = os.path.join(self._working_dir, fname)
if not os.path.isfile(fpath):
continue
oid = fname.split('.', 1)[0]
if len(oid) == 40:
files[oid].append(fpath)
return files.values()
# @tc.typecheck
python类DefaultDict()的实例源码
def assign_data(centroids: Sequence[Centroid], data: Iterable[Point]) -> Dict[Centroid, Sequence[Point]]:
'Assign data the closest centroid'
d : DefaultDict[Point, List[Point]] = defaultdict(list)
for point in data:
centroid: Point = min(centroids, key=partial(dist, point))
d[centroid].append(point)
return dict(d)
def __init__(self) -> None:
self.sentence_buffer: List[str] = []
self.sentences: List[List[str]] = []
self.num_total_words = 0
# Cluster id -> List of (start_index, end_index) spans.
self.clusters: DefaultDict[int, List[Tuple[int, int]]] = collections.defaultdict(list)
# Cluster id -> List of start_indices which are open for this id.
self.coref_stacks: DefaultDict[int, List[int]] = collections.defaultdict(list)
def __init__(self, name, gatewayIDs: List[ModuleID]):
# call the thread class
super().__init__()
self.name = 'Dispatcher ' + name
self._gatewayIDs = gatewayIDs
self._routeMap = self._initRouteMap(gatewayIDs) # type: DefaultDict[ModuleID, ModuleID]
self._groupMap = {} # type: DefaultDict[GroupID, List[ModuleID]]
self._msgCount = 0
def _initRouteMap(self, gatewayIDs: List[ModuleID]) -> DefaultDict[ModuleID, ModuleID]:
"""
Initial filling the route map with gateways
"""
routeMap = {}
for gatewayID in gatewayIDs:
routeMap[gatewayID] = gatewayID
return routeMap
def _convertToMap(sourceParts: List[SourcePart]) -> DefaultDict[ModuleID, SourcePart]:
sourcesByID = {}
for sourcePart in sourceParts:
sourcesByID[sourcePart.sourceID] = sourcePart
return sourcesByID
def _pretty_print(self, frequencies: DefaultDict[Any, DefaultDict[Any, int]]) -> str:
lines = []
for category in frequencies:
line = '{}: '.format(category)
category_frequency = frequencies[category]
ordered = OrderedDict(sorted(category_frequency.items(), key=lambda t: t[1], reverse=True))
for word, frequency in ordered.items():
if frequency > 1:
line += '({}, {}) '.format(word, frequency)
lines.append(line)
lines.sort()
return '\n'.join(lines)
def __init__(self) -> None:
self.counters = DefaultDict[str, int](int)
def _process_coref_span_annotations_for_word(label: str,
word_index: int,
clusters: DefaultDict[int, List[Tuple[int, int]]],
coref_stacks: DefaultDict[int, List[int]]) -> None:
"""
For a given coref label, add it to a currently open span(s), complete a span(s) or
ignore it, if it is outside of all spans. This method mutates the clusters and coref_stacks
dictionaries.
Parameters
----------
label : ``str``
The coref label for this word.
word_index : ``int``
The word index into the sentence.
clusters : ``DefaultDict[int, List[Tuple[int, int]]]``
A dictionary mapping cluster ids to lists of inclusive spans into the
sentence.
coref_stacks: ``DefaultDict[int, List[int]]``
Stacks for each cluster id to hold the start indices of active spans (spans
which we are inside of when processing a given word). Spans with the same id
can be nested, which is why we collect these opening spans on a stack, e.g:
[Greg, the baker who referred to [himself]_ID1 as 'the bread man']_ID1
"""
if label != "-":
for segment in label.split("|"):
# The conll representation of coref spans allows spans to
# overlap. If spans end or begin at the same word, they are
# separated by a "|".
if segment[0] == "(":
# The span begins at this word.
if segment[-1] == ")":
# The span begins and ends at this word (single word span).
cluster_id = int(segment[1:-1])
clusters[cluster_id].append((word_index, word_index))
else:
# The span is starting, so we record the index of the word.
cluster_id = int(segment[1:])
coref_stacks[cluster_id].append(word_index)
else:
# The span for this id is ending, but didn't start at this word.
# Retrieve the start index from the document state and
# add the span to the clusters for this id.
cluster_id = int(segment[:-1])
start = coref_stacks[cluster_id].pop()
clusters[cluster_id].append((start, word_index))