def handle(url, data):
try:
config = Config()
config.browser_user_agent = data['user_agent']
article = Article(url, config)
article.download()
article.parse()
if article.top_image:
print('\t\tNewspaper located image: %s' % article.top_image)
r = requests.get(article.top_image, headers = {'User-Agent': data['user_agent']}, stream=True)
if r.status_code == 200:
content_type = r.headers['content-type']
ext = mimetypes.guess_extension(content_type)
if not ext or ext=='':
print('\t\tNewsPaper Error locating file MIME Type: %s' % url)
return False
if '.jp' in ext:
ext = '.jpg'
path = data['single_file'] % ext
if not os.path.isfile(path):
if not os.path.isdir(data['parent_dir']):
print("\t\t+Building dir: %s" % data['parent_dir'])
os.makedirs(data['parent_dir'])# Parent dir for the full filepath is supplied already.
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
return path
else:
print('\t\tError Reading Image: %s responded with code %i!' % (url, r.status_code) )
return False
except Exception as e:
print('\t\t"Newspaper" Generic handler failed. '+(str(e).strip()) )
return False
评论列表
文章目录