def _list_files(repos, languages, remove):
repo_files = {lang: [] for lang in languages}
exts = {ext: lang for lang, exts in languages.items() for ext in exts}
for pos, zipped_repo in enumerate(repos, 1):
current_repo_files = {lang: [] for lang in languages}
size = len(repos)
if pos % STEP == 0 or pos == size:
LOGGER.debug("%.2f%%", 100 * pos / size)
try:
with ZipFile(zipped_repo) as zip_file:
for filename in zip_file.namelist():
lang = exts.get(Path(filename).suffix.lstrip('.'))
if not lang or len(current_repo_files[lang]) >= MAX_FILES:
continue
current_repo_files[lang].append(
'{}::{}'.format(zipped_repo, filename))
except BadZipFile as error:
LOGGER.warning("Malformed file %s, error: %s", zipped_repo, error)
if remove:
Path(zipped_repo).unlink()
LOGGER.debug("%s removed", zipped_repo)
continue
for lang, zipped_files in current_repo_files.items():
repo_files[lang].extend(zipped_files)
return repo_files
评论列表
文章目录