def getBytesFromURL(url, handle=None, max_attempts=1, connect_timeout=None, timeout=None, gzip=True):
if not handle:
handle = pycurl.Curl()
url = quote(url, safe=':/?=')
b = BytesIO()
handle.setopt(handle.URL, url)
if connect_timeout:
handle.setopt(handle.CONNECTTIMEOUT, connect_timeout)
if timeout:
handle.setopt(handle.TIMEOUT, timeout)
handle.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36")
# handle.setopt(handle.VERBOSE, 1)
if gzip:
handle.setopt(handle.ENCODING, 'gzip, deflate')
handle.setopt(handle.WRITEFUNCTION, b.write)
attempts = 0
while attempts < max_attempts:
if attempts > 0:
time.sleep(2)
handle.perform()
if handle.getinfo(handle.RESPONSE_CODE) == 200:
return b
attempts += 1
msgr.send_tmsg("HTTP Code: {} while trying to retrieve URL: {}"
.format(handle.getinfo(handle.RESPONSE_CODE), url), logging.WARN)
return None
评论列表
文章目录