CVPR2013.py 文件源码-python代码片段

CVPR2013.py 文件源码

python

阅读 39 收藏 0 点赞 0 评论 0

项目：PaperCrawler 作者: JustJokerX 项目源码文件源码

def get_pdf(html):
    """ xxx"""
    reg = r'href="(.+?\.pdf)">pdf'
    pdfre = re.compile(reg)
    pdflist = re.findall(pdfre, html)
    dir_name = 'CVPR2013'
    maxrows = len(pdflist)
    pbar = prgbar.ProgressBar(total=maxrows)

    if os.path.exists(dir_name) is False:
        os.mkdir(dir_name)

    for idx, pdfurl in enumerate(pdflist):
        reg2 = r'papers/(.+?\.pdf)'
        pdfre2 = re.compile(reg2)
        filename = dir_name + '/' + re.findall(pdfre2, pdfurl)[0]
        pbar.log('http://www.cv-foundation.org/openaccess/' + pdfurl)
        if os.path.exists(filename) is True:
            pbar.log('Exist')
        else:
            urllib.urlretrieve(
                'http://www.cv-foundation.org/openaccess/' + pdfurl, filename)
        pbar.update(index=(idx + 1))

    pbar.finish()