def __init__(self, seeds=None, login_credentials=None, profile=None):
super().__init__()
self.le = LinkExtractor(canonicalize=False)
self.files_le = LinkExtractor(deny_extensions=[], canonicalize=False)
self.images_le = LinkExtractor(
tags=['img'], attrs=['src'], deny_extensions=[], canonicalize=False)
if seeds:
with Path(seeds).open('rt', encoding='utf8') as f:
self.start_urls = [url for url in (line.strip() for line in f)
if not url.startswith('#')]
if login_credentials:
with Path(login_credentials).open('rt', encoding='utf8') as f:
self.login_credentials = json.load(f)
else:
self.login_credentials = None
if profile:
setup_profiling(profile)
评论列表
文章目录