cli.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:waybackscraper 作者: abrenaut 项目源码 文件源码
def main():
    args = parse_args()

    logging.basicConfig(level=(logging.WARN if args.quiet else logging.INFO))

    # Don't allow more than 10 concurrent requests to the wayback machine
    concurrency = min(args.concurrency, 10)

    # Scrape results are stored in a temporary folder if no folder specified
    target_folder = args.target_folder if args.target_folder else tempfile.gettempdir()
    logger.info('Writing scrape results in the folder {target_folder}'.format(target_folder=target_folder))

    # Parse the period entered by the user (throws an exception if the dates are not correctly formatted)
    from_date = datetime.strptime(args.from_date, CLI_DATE_FORMAT)
    to_date = datetime.strptime(args.to_date, CLI_DATE_FORMAT)

    # The scraper downloads the elements matching the given xpath expression in the target folder
    scraper = Scraper(target_folder, args.xpath)

    # Launch the scraping using the scraper previously instantiated
    scrape_archives(args.website_url, scraper.scrape, from_date, to_date, args.user_agent, timedelta(days=args.delta),
                    concurrency)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号