cninfospider.py 文件源码-python代码片段

cninfospider.py 文件源码

python

阅读 39 收藏 0 点赞 0 评论 0

项目：cninfospider 作者: CloudDataX 项目源码文件源码

def downloadPDF(self, companyFolder, reportName, downloadURL, downloadTime):
        downloadTime -= 1
        if downloadTime == 0:
            return False

        suffix = downloadURL[downloadURL.find('.'):].lower()
        print "downloadPDF suffix", suffix

        if SysStr == "Windows":
            pdfPath = companyFolder + '\\'+ reportName + '.pdf'
            filePath = companyFolder + '\\' + reportName + suffix
        else:
            pdfPath = companyFolder + '/'+ reportName + '.pdf'
            filePath = companyFolder + '/' + reportName + suffix

        if ".pdf" != suffix and os.path.exists(pdfPath):
            os.remove(pdfPath)

        realURL = self.homePage + "/" + downloadURL
        print "Download pdfPath:", filePath, ' realURL:',realURL
        try:
            if not os.path.exists(filePath):
                urllib.urlretrieve(realURL, filePath)
            else:
                print 'WRN: ', filePath, 'is already exists'
                return filePath
        except Exception, e:
            urllib.urlcleanup()
            return self.downloadPDF(companyFolder, reportName, downloadURL, downloadTime)

        urllib.urlcleanup()
        gc.collect()
        return pdfPath