def main():
args = parse_args()
logging.basicConfig(level=(logging.WARN if args.quiet else logging.INFO))
# Don't allow more than 10 concurrent requests to the wayback machine
concurrency = min(args.concurrency, 10)
# Scrape results are stored in a temporary folder if no folder specified
target_folder = args.target_folder if args.target_folder else tempfile.gettempdir()
logger.info('Writing scrape results in the folder {target_folder}'.format(target_folder=target_folder))
# Parse the period entered by the user (throws an exception if the dates are not correctly formatted)
from_date = datetime.strptime(args.from_date, CLI_DATE_FORMAT)
to_date = datetime.strptime(args.to_date, CLI_DATE_FORMAT)
# The scraper downloads the elements matching the given xpath expression in the target folder
scraper = Scraper(target_folder, args.xpath)
# Launch the scraping using the scraper previously instantiated
scrape_archives(args.website_url, scraper.scrape, from_date, to_date, args.user_agent, timedelta(days=args.delta),
concurrency)
评论列表
文章目录