harvest_public_document.py 文件源码-python代码片段

harvest_public_document.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

def download_files(self, url, folder_domain):
        filename = url.split('/')[-1]
        full_filename = 'belatiFiles/{}/{}'.format(folder_domain, filename)
        full_filename_location = '{}/belatiFiles/{}/{}'.format(util.get_current_work_dir(), folder_domain, filename)
        meta = MetaExifExtractor()

        if not os.path.exists(os.path.dirname(full_filename)):
            try:
                os.makedirs(os.path.dirname(full_filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise

        with tqdm(unit='B', unit_scale=True, miniters=1,desc=filename) as t:
            try:
                urllib.urlretrieve(url, filename=full_filename,reporthook=self.my_hook(t), data=None)
            except:
                pass

        meta_exif_json = meta.extract_json(full_filename_location)
        self.db.insert_public_doc(self.project_id, str(os.path.splitext(filename)[1]), str(url), str(full_filename), str(full_filename_location), str(meta_exif_json))