def filter_visited(curr_dir, subdirs, already_visited, follow_dirlinks, on_error):
"""Filter subdirs that have already been visited.
This is used to avoid loops in the search performed by os.walk() in
index_files_by_size.
curr_dir is the path of the current directory, as returned by os.walk().
subdirs is the list of subdirectories for the current directory, as
returned by os.walk().
already_visited is a set of tuples (st_dev, st_ino) of already
visited directories. This set will not be modified.
on error is a function f(OSError) -> None, to be called in case of
error.
Returns a tuple: the new (possibly filtered) subdirs list, and a new
set of already visited directories, now including the subdirs.
"""
filtered = []
to_visit = set()
_already_visited = already_visited.copy()
try:
# mark the current directory as visited, so we catch symlinks to it
# immediately instead of after one iteration of the directory loop
file_info = os.stat(curr_dir) if follow_dirlinks else os.lstat(curr_dir)
_already_visited.add((file_info.st_dev, file_info.st_ino))
except OSError as e:
on_error(e)
for subdir in subdirs:
full_path = os.path.join(curr_dir, subdir)
try:
file_info = os.stat(full_path) if follow_dirlinks else os.lstat(full_path)
except OSError as e:
on_error(e)
continue
if not follow_dirlinks and stat.S_ISLNK(file_info.st_mode):
# following links to dirs is disabled, ignore this one
continue
dev_inode = (file_info.st_dev, file_info.st_ino)
if dev_inode not in _already_visited:
filtered.append(subdir)
to_visit.add(dev_inode)
else:
on_error(OSError(errno.ELOOP, "directory loop detected", full_path))
return filtered, _already_visited.union(to_visit)
评论列表
文章目录