def __iter__(self) -> Iterator[Tuple[str, str]]:
with EuroparlCache() as europarl_cache:
europarl_cache.download(name='europarl-v7.tgz', url=_v7_url)
# peak in tarball
filepath = europarl_cache.filepath('europarl-v7.tgz')
source_filepath, target_filepath = self._files()
with tar_extract_file(filepath, source_filepath) as source_file, \
tar_extract_file(filepath, target_filepath) as target_file:
observations = 0
for source, target in zip(source_file, target_file):
source, target = (source.rstrip(), target.rstrip())
if self._length_checker(source, target):
yield (source, target)
observations += 1
if self._max_observations is not None and \
observations >= self._max_observations:
break
评论列表
文章目录