bitcointalk_parse.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:bitcointalk-sentiment 作者: DolphinBlockchainIntelligence 项目源码 文件源码
def requestURL(callPoint, url):
    time.sleep(PARSING_SLEEP + random.randrange(-PARSING_SLEEP_RAND_RANGE,PARSING_SLEEP_RAND_RANGE,1))
    while True:
        try:
            r = requests.get(url, headers = headers, proxies = proxy, timeout = PROXY_TIMEOUT)
            if r.text.find('Busy, try again (504)') != -1:
                print "proxy failed:  ", proxy['https']
                #print callPoint, ': response: ', r.status_code, ', "Busy, try again (504)" retrying connection in ', TIMEOUT_RETRY , ' sec.'
                time.sleep(TIMEOUT_RETRY + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
                rotateProxy()
                continue
            elif r.text.find('<h1>Busy, try again (502)</h1>') != -1:
                print "proxy failed:  ", proxy['https']
                #print callPoint, ': response: ', r.status_code, ', "Busy, try again (502)" retrying connection in ', TIMEOUT_RETRY , ' sec.'
                time.sleep(TIMEOUT_RETRY + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
                rotateProxy()
                continue
            elif r.text.find('<head><title>500 Internal Server Error</title></head>') != -1:
                print "Forum failed, need to take a timeout"
                #print callPoint, ': response: ', r.status_code, ', "500 Internal Server Error" retrying connection in ', TIMEOUT_RETRY , ' sec. dumped to error_page_500.dmp'
                f = open("error_page_500.dmp", "w")
                f.write(r.text)
                f.close()
                time.sleep(TIMEOUT_RETRY + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
                rotateProxy(failed=False)
                continue
            elif r.text.find('Sorry, SMF was unable to connect to the database') != -1:
                print "Forum failed, need to take a timeout"
                #print callPoint, ': response: ', r.status_code, ', "Busy, try again (502)" retrying connection in ', TIMEOUT_RETRY , ' sec.'
                time.sleep(TIMEOUT_RETRY * 10 + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
                rotateProxy(failed=False)
                continue
            elif r.status_code != 200:
                print "proxy failed:  ", proxy['https']
                #print callPoint, ': response: ', r.status_code, ', retrying connection in ', TIMEOUT_RETRY , ' sec.'
                time.sleep(TIMEOUT_RETRY + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
                rotateProxy()
                continue
            else:
                break
        except exceptions.BaseException as e:
            print "proxy failed:  ", proxy['https']
            # print 'Error:', exception.__class__.__name__, ' retrying connection in ', TIMEOUT_RETRY , ' sec.'
            # print callPoint, ': Exception:', e.message, ' retrying connection in ', TIMEOUT_RETRY , ' sec.'
            time.sleep(TIMEOUT_RETRY + random.randrange(-TIMEOUT_RAND_RANGE,TIMEOUT_RAND_RANGE,1))
            rotateProxy()

    rotateProxy(failed=False)
    print "URL request success" 
    return r.text
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号