pipelines.py 文件源码-python代码片段

pipelines.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：pydata_webscraping 作者: jmortega 项目源码文件源码

def _convert(self, item, spider):
        image_paths = [im['path'] for im in item['images']]

        datapath = spider.crawler.settings['FILES_STORE']
        image_files = [datapath + path for path in image_paths]

        item['pdf_file'] = '%s.pdf' % item['id']
        dest = '{root}/{spider}/{file}'.format(
            root=datapath,
            spider=item['spider'],
            file=item['pdf_file'],
        )
        print "file:"+dest
        # Use convert command from ImageMagick.
        cmd = ['convert'] + image_files + [dest]
        try:
            # TODO: capture errors
            subprocess.check_call(cmd, stdout=subprocess.PIPE)
        except subprocess.CalledProcessError as detail:
            print detail
            raise DropItem("failed to generate PDF")

        return item