def segment(self, *args):
"""Segment one or more datasets with this subword field.
Arguments:
Positional arguments: Dataset objects or other indexable
mutable sequences to segment. If a Dataset object is provided,
all columns corresponding to this field are used; individual
columns can also be provided directly.
"""
sources = []
for arg in args:
if isinstance(arg, Dataset):
sources += [getattr(arg, name) for name, field in
arg.fields.items() if field is self]
else:
sources.append(arg)
for data in sources:
for x in tqdm(data, 'segmenting'):
x[:] = self.vocab.segment(x)
评论列表
文章目录