finddups.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:capidup 作者: israel-lugo 项目源码 文件源码
def filter_visited(curr_dir, subdirs, already_visited, follow_dirlinks, on_error):
    """Filter subdirs that have already been visited.

    This is used to avoid loops in the search performed by os.walk() in
    index_files_by_size.

    curr_dir is the path of the current directory, as returned by os.walk().

    subdirs is the list of subdirectories for the current directory, as
    returned by os.walk().

    already_visited is a set of tuples (st_dev, st_ino) of already
    visited directories. This set will not be modified.

    on error is a function f(OSError) -> None, to be called in case of
    error.

    Returns a tuple: the new (possibly filtered) subdirs list, and a new
    set of already visited directories, now including the subdirs.

    """
    filtered = []
    to_visit = set()
    _already_visited = already_visited.copy()

    try:
        # mark the current directory as visited, so we catch symlinks to it
        # immediately instead of after one iteration of the directory loop
        file_info = os.stat(curr_dir) if follow_dirlinks else os.lstat(curr_dir)
        _already_visited.add((file_info.st_dev, file_info.st_ino))
    except OSError as e:
        on_error(e)

    for subdir in subdirs:
        full_path = os.path.join(curr_dir, subdir)
        try:
            file_info = os.stat(full_path) if follow_dirlinks else os.lstat(full_path)
        except OSError as e:
            on_error(e)
            continue

        if not follow_dirlinks and stat.S_ISLNK(file_info.st_mode):
            # following links to dirs is disabled, ignore this one
            continue

        dev_inode = (file_info.st_dev, file_info.st_ino)
        if dev_inode not in _already_visited:
            filtered.append(subdir)
            to_visit.add(dev_inode)
        else:
            on_error(OSError(errno.ELOOP, "directory loop detected", full_path))

    return filtered, _already_visited.union(to_visit)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号