generic_newspaper.py 文件源码

python
阅读 13 收藏 0 点赞 0 评论 0

项目:RedditDownloader 作者: shadowmoose 项目源码 文件源码
def handle(url, data):
    try:
        config = Config()
        config.browser_user_agent = data['user_agent']
        article = Article(url, config)
        article.download()
        article.parse()
        if article.top_image:
            print('\t\tNewspaper located image: %s' % article.top_image)

            r = requests.get(article.top_image, headers = {'User-Agent': data['user_agent']}, stream=True)
            if r.status_code == 200:
                content_type = r.headers['content-type']
                ext = mimetypes.guess_extension(content_type)
                if not ext or ext=='':
                    print('\t\tNewsPaper Error locating file MIME Type: %s' % url)
                    return False
                if '.jp' in ext:
                    ext = '.jpg'
                path = data['single_file'] % ext
                if not os.path.isfile(path):
                    if not os.path.isdir(data['parent_dir']):
                        print("\t\t+Building dir: %s" % data['parent_dir'])
                        os.makedirs(data['parent_dir'])# Parent dir for the full filepath is supplied already.
                    with open(path, 'wb') as f:
                        r.raw.decode_content = True
                        shutil.copyfileobj(r.raw, f)
                return path
            else:
                print('\t\tError Reading Image: %s responded with code %i!' % (url, r.status_code) )
                return False
    except Exception as e:
        print('\t\t"Newspaper" Generic handler failed. '+(str(e).strip()) )
    return False
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号