def identify_packages_from_files(self, files):
"""Identifies "packages" for a given collection of files
From an iterative collection of files, we identify the packages
that contain the files and any files that are not related.
Parameters
----------
files : iterable
Container (e.g. list or set) of file paths
Return
------
(found_packages, unknown_files)
- found_packages is a list of dicts that holds information about
the found packages. Package dicts need at least "name" and
"files" (that contains an array of related files)
- unknown_files is a list of files that were not found in
a package
"""
unknown_files = set()
found_packages = {}
nb_pkg_files = 0
# TODO: probably that _get_packagefields should create packagespecs
# internally and just return them. But we should make them hashable
file_to_package_dict = self._get_packagefields_for_files(files)
for f in files:
# Stores the file
if f not in file_to_package_dict:
unknown_files.add(f)
else:
# TODO: pkgname should become pkgid
# where for packages from distributions would be name,
# for VCS -- their path
pkgfields = file_to_package_dict[f]
if pkgfields is None:
unknown_files.add(f)
else:
pkgfields_hashable = tuple(x for x in pkgfields.items())
if pkgfields_hashable in found_packages:
found_packages[pkgfields_hashable].files.append(f)
nb_pkg_files += 1
else:
pkg = self._create_package(**pkgfields)
if pkg:
found_packages[pkgfields_hashable] = pkg
# we store only non-directories within 'files'
if not self._session.isdir(f):
pkg.files.append(f)
nb_pkg_files += 1
else:
unknown_files.add(f)
lgr.info("%s: %d packages with %d files, and %d other files",
self.__class__.__name__,
len(found_packages),
nb_pkg_files,
len(unknown_files))
return list(viewvalues(found_packages)), list(unknown_files)
评论列表
文章目录