def _convert(self, item, spider):
image_paths = [im['path'] for im in item['images']]
datapath = spider.crawler.settings['FILES_STORE']
image_files = [datapath + path for path in image_paths]
item['pdf_file'] = '%s.pdf' % item['id']
dest = '{root}/{spider}/{file}'.format(
root=datapath,
spider=item['spider'],
file=item['pdf_file'],
)
print "file:"+dest
# Use convert command from ImageMagick.
cmd = ['convert'] + image_files + [dest]
try:
# TODO: capture errors
subprocess.check_call(cmd, stdout=subprocess.PIPE)
except subprocess.CalledProcessError as detail:
print detail
raise DropItem("failed to generate PDF")
return item
评论列表
文章目录