def test_post(self):
curl = CurlStub(b"result")
result = fetch("http://example.com", post=True, curl=curl)
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.POST: True,
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
python类CONNECTTIMEOUT的实例源码
def test_post_data(self):
curl = CurlStub(b"result")
result = fetch("http://example.com", post=True, data="data", curl=curl)
self.assertEqual(result, b"result")
self.assertEqual(curl.options[pycurl.READFUNCTION](), b"data")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.POST: True,
pycurl.POSTFIELDSIZE: 4,
pycurl.READFUNCTION: Any(),
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
def test_cainfo(self):
curl = CurlStub(b"result")
result = fetch("https://example.com", cainfo="cainfo", curl=curl)
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"https://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.CAINFO: b"cainfo",
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
def test_headers(self):
curl = CurlStub(b"result")
result = fetch("http://example.com",
headers={"a": "1", "b": "2"}, curl=curl)
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.HTTPHEADER: ["a: 1", "b: 2"],
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
def test_pycurl_insecure(self):
curl = CurlStub(b"result")
result = fetch("http://example.com/get-ca-cert", curl=curl,
insecure=True)
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com/get-ca-cert",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.SSL_VERIFYPEER: False,
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
def initHandle(self):
""" sets common options to curl handle """
self.c.setopt(pycurl.FOLLOWLOCATION, 1)
self.c.setopt(pycurl.MAXREDIRS, 5)
self.c.setopt(pycurl.CONNECTTIMEOUT, 30)
self.c.setopt(pycurl.NOSIGNAL, 1)
self.c.setopt(pycurl.NOPROGRESS, 1)
if hasattr(pycurl, "AUTOREFERER"):
self.c.setopt(pycurl.AUTOREFERER, 1)
self.c.setopt(pycurl.SSL_VERIFYPEER, 0)
self.c.setopt(pycurl.LOW_SPEED_TIME, 30)
self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5)
#self.c.setopt(pycurl.VERBOSE, 1)
self.c.setopt(pycurl.USERAGENT,
"Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
if pycurl.version_info()[7]:
self.c.setopt(pycurl.ENCODING, "gzip, deflate")
self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
"Accept-Language: en-US,en",
"Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
"Connection: keep-alive",
"Keep-Alive: 300",
"Expect:"])
def download_preparing(self, pyfile):
if not isinstance(pyfile.plugin.req, HTTPRequest):
return
connecttimeout = self.config.get('connecttimeout')
maxredirs = self.config.get('maxredirs')
useragent = self.config.get('useragent')
if connecttimeout:
pyfile.plugin.req.http.c.setopt(
pycurl.CONNECTTIMEOUT, connecttimeout)
if maxredirs:
pyfile.plugin.req.http.c.setopt(pycurl.MAXREDIRS, maxredirs)
if useragent:
self.log_debug("Use custom user-agent string `%s`" % useragent)
pyfile.plugin.req.http.c.setopt(
pycurl.USERAGENT, encode(useragent))
def get_page_data(url, head = None, curl = None):
stream_buffer = StringIO()
if not curl:
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)#curl doesn't support unicode
if head:
curl.setopt(pycurl.HTTPHEADER, head)#must be list, not dict
curl.setopt(pycurl.WRITEFUNCTION, stream_buffer.write)
curl.setopt(pycurl.CUSTOMREQUEST,"GET")
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 30)
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
curl.setopt(pycurl.SSL_VERIFYHOST, 0)
curl.perform()
page_data =stream_buffer.getvalue()
stream_buffer.close()
return page_data
def post_page_data(url, data = None, head = None, curl = None):
stream_buffer = StringIO()
if not curl:
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)#curl doesn't support unicode
if head:
curl.setopt(pycurl.HTTPHEADER, head)#must be list, not dict
curl.setopt(pycurl.POSTFIELDS, data)
curl.setopt(pycurl.WRITEFUNCTION, stream_buffer.write)
curl.setopt(pycurl.CUSTOMREQUEST,"POST")
# curl.setopt(pycurl.CONNECTTIMEOUT, 30)
# curl.setopt(pycurl.TIMEOUT, 30)
curl.perform()
page_data = stream_buffer.getvalue()
stream_buffer.close()
return page_data
def Curl(url,headers):
while 1:
try:
c = pycurl.Curl()
c.setopt(pycurl.REFERER, 'http://weixin.sogou.com/')
c.setopt(pycurl.FOLLOWLOCATION, True)
c.setopt(pycurl.MAXREDIRS,5)
c.setopt(pycurl.CONNECTTIMEOUT, 60)
c.setopt(pycurl.TIMEOUT,120)
c.setopt(pycurl.ENCODING, 'gzip,deflate')
c.fp = StringIO.StringIO()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.HTTPHEADER,headers)
c.setopt(c.WRITEFUNCTION, c.fp.write)
c.perform()
html = c.fp.getvalue()
if '??????' in html:
print u'??????,??10??'
time.sleep(600)
else:
return html
except Exception, e:
print url,'curl(url)',e
continue
#????????
def getHtml(url,headers):
c = pycurl.Curl() #??curl????????
c.setopt(pycurl.URL, url) #??????URL
c.setopt(pycurl.FOLLOWLOCATION, True) #????????
c.setopt(pycurl.MAXREDIRS,5) #?????????
c.setopt(pycurl.CONNECTTIMEOUT, 60) #??????
c.setopt(pycurl.TIMEOUT,120) #????
c.setopt(pycurl.ENCODING, 'gzip,deflate') #??gzip???????????????????gzip?????????gzip??????
c.fp = StringIO.StringIO() #??StringIO??
c.setopt(pycurl.HTTPHEADER,headers) #?????
c.setopt(pycurl.POST, 1) #??get
c.setopt(pycurl.POSTFIELDS, data) #??POST??
c.setopt(c.WRITEFUNCTION, c.fp.write) #???????
c.perform() #??
html = c.fp.getvalue() #?????
return html
def curl_read(url):
try:
c = pycurl.Curl()
c.setopt(c.URL, url)
resp = StringIO()
headers = StringIO()
c.setopt(c.WRITEFUNCTION, resp.write)
c.setopt(c.HEADERFUNCTION, headers.write)
c.setopt(pycurl.CONNECTTIMEOUT, 20)
c.setopt(pycurl.TIMEOUT, 20)
c.perform()
if c.getinfo(c.RESPONSE_CODE) == 200:
c.close()
is_hit = handle_response(resp, headers)
size = len(resp)
return True, is_hit, size
return False, False, 0
except:
return False, False, 0
def download_preparing(self, pyfile):
if not isinstance(pyfile.plugin.req, HTTPRequest):
return
connecttimeout = self.config.get('connecttimeout')
maxredirs = self.config.get('maxredirs')
useragent = self.config.get('useragent')
if connecttimeout:
pyfile.plugin.req.http.c.setopt(
pycurl.CONNECTTIMEOUT, connecttimeout)
if maxredirs:
pyfile.plugin.req.http.c.setopt(pycurl.MAXREDIRS, maxredirs)
if useragent:
self.log_debug("Use custom user-agent string `%s`" % useragent)
pyfile.plugin.req.http.c.setopt(
pycurl.USERAGENT, encode(useragent))
def get (url, user_agent=UA, referrer=None):
"""Make a GET request of the url using pycurl and return the data
(which is None if unsuccessful)"""
data = None
databuffer = StringIO()
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.CONNECTTIMEOUT, 5)
curl.setopt(pycurl.TIMEOUT, 8)
curl.setopt(pycurl.WRITEFUNCTION, databuffer.write)
curl.setopt(pycurl.COOKIEFILE, '')
if user_agent:
curl.setopt(pycurl.USERAGENT, user_agent)
if referrer is not None:
curl.setopt(pycurl.REFERER, referrer)
try:
curl.perform()
data = databuffer.getvalue()
except Exception:
pass
curl.close()
return data
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout / 6)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout / 6)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def test_basic(self):
curl = CurlStub(b"result")
result = fetch("http://example.com", curl=curl)
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
def test_create_curl(self):
curls = []
def pycurl_Curl():
curl = CurlStub(b"result")
curls.append(curl)
return curl
Curl = pycurl.Curl
try:
pycurl.Curl = pycurl_Curl
result = fetch("http://example.com")
curl = curls[0]
self.assertEqual(result, b"result")
self.assertEqual(curl.options,
{pycurl.URL: b"http://example.com",
pycurl.FOLLOWLOCATION: 1,
pycurl.MAXREDIRS: 5,
pycurl.CONNECTTIMEOUT: 30,
pycurl.LOW_SPEED_LIMIT: 1,
pycurl.LOW_SPEED_TIME: 600,
pycurl.NOSIGNAL: 1,
pycurl.WRITEFUNCTION: Any(),
pycurl.DNS_CACHE_TIMEOUT: 0,
pycurl.ENCODING: b"gzip,deflate"})
finally:
pycurl.Curl = Curl
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout / 6)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def handle_request(self):
curl_handle = pycurl.Curl()
# set default options.
curl_handle.setopt(pycurl.URL, self.request_url)
curl_handle.setopt(pycurl.REFERER, self.request_url)
curl_handle.setopt(pycurl.USERAGENT, self.useragent)
curl_handle.setopt(pycurl.TIMEOUT, self.curlopts['TIMEOUT'])
curl_handle.setopt(pycurl.CONNECTTIMEOUT, self.curlopts['CONNECTTIMEOUT'])
curl_handle.setopt(pycurl.HEADER, True)
#curl_handle.setopt(pycurl.VERBOSE, 1)
curl_handle.setopt(pycurl.FOLLOWLOCATION, 1)
curl_handle.setopt(pycurl.MAXREDIRS, 5)
if(self.request_headers and len(self.request_headers) > 0):
tmplist = list()
for(key, value) in self.request_headers.items():
tmplist.append(key + ':' + value)
curl_handle.setopt(pycurl.HTTPHEADER, tmplist)
#??????POST
curl_handle.setopt(pycurl.HTTPPROXYTUNNEL, 1)
curl_handle.setopt(pycurl.POSTFIELDS, self.request_body)
response = StringIO.StringIO()
curl_handle.setopt(pycurl.WRITEFUNCTION, response.write)
try:
curl_handle.perform()
except pycurl.error as error:
raise ChannelException(error, 5)
self.response_code = curl_handle.getinfo(curl_handle.HTTP_CODE)
header_size = curl_handle.getinfo(curl_handle.HEADER_SIZE)
resp_str = response.getvalue()
self.response_headers = resp_str[0 : header_size]
self.response_body = resp_str[header_size : ]
response.close()
curl_handle.close()
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def getKeyword(i):#??json
try:
time.sleep(1)
headers = [
'Host:fengchao.baidu.com',
'User-Agent: %s' %getUA(),
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding: gzip, deflate',
'Referer: http://fengchao.baidu.com/nirvana/main.html?userid=8048066',
'Connection: keep-alive',
'COOKIE:%s' %COOKIE,
]
post = urllib.urlencode({
'params': '{"entry":"kr_station","query":"%s","querytype":1,"pageNo":1,"pageSize":300}' % keyword_list[i],
'path': 'jupiter/GET/kr/word',
'token': TOKEN,
'userid': USERID,
})
url = 'http://fengchao.baidu.com/nirvana/request.ajax?path=jupiter/GET/kr/word'
c = pycurl.Curl()
# c.setopt(pycurl.PROXY, getRandomAlbIp())
c.setopt(pycurl.URL, url)
c.setopt(pycurl.FOLLOWLOCATION, True)
c.setopt(pycurl.MAXREDIRS,5)
c.setopt(pycurl.CONNECTTIMEOUT, 20)
c.setopt(pycurl.TIMEOUT,20)
c.setopt(pycurl.ENCODING, 'gzip,deflate')
c.fp = StringIO.StringIO()
c.setopt(pycurl.HTTPHEADER,headers)
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, post)
c.setopt(c.WRITEFUNCTION, c.fp.write)
c.perform()
# mutex.acquire()#??
jsonData = c.fp.getvalue()
analyseJsonData(i,jsonData)
# mutex.release()#??
except Exception,e:
print e
pass
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout / 6)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def curlRequest(self, url, headers = False, post = False, returnHeaders=True):
ch = pycurl.Curl()
ch.setopt(pycurl.URL, url)
hdrs = [
"Host: poloniex.com",
"Connection: close",
"User-Agent: Mozilla/5.0 (CLI; Linux x86_64) polproxy",
"accept: application/json"
]
if post != False:
ch.setopt(pycurl.POSTFIELDS, post)
hdrs = hdrs + ["content-type: application/x-www-form-urlencoded", "content-length: " + str(len(post))]
if headers != False:
hdrs = hdrs + headers
ch.setopt(pycurl.HTTPHEADER, hdrs)
ch.setopt(pycurl.SSL_VERIFYHOST, 0)
ch.setopt(pycurl.FOLLOWLOCATION, True)
ch.setopt(pycurl.CONNECTTIMEOUT, 5)
ch.setopt(pycurl.TIMEOUT, 5)
ret = BytesIO()
if returnHeaders:
ch.setopt(pycurl.HEADERFUNCTION, ret.write)
ch.setopt(pycurl.WRITEFUNCTION, ret.write)
try:
ch.perform()
except:
return ""
ch.close()
return ret.getvalue().decode("ISO-8859-1")
def init_handle(self):
"""
Sets common options to curl handle.
"""
self.setopt(pycurl.FOLLOWLOCATION, 1)
self.setopt(pycurl.MAXREDIRS, 5)
self.setopt(pycurl.CONNECTTIMEOUT, 30)
self.setopt(pycurl.NOSIGNAL, 1)
self.setopt(pycurl.NOPROGRESS, 1)
if hasattr(pycurl, "AUTOREFERER"):
self.setopt(pycurl.AUTOREFERER, 1)
self.setopt(pycurl.SSL_VERIFYPEER, 0)
# Interval for low speed, detects connection loss, but can abort dl if
# hoster stalls the download
self.setopt(pycurl.LOW_SPEED_TIME, 45)
self.setopt(pycurl.LOW_SPEED_LIMIT, 5)
# do not save the cookies
self.setopt(pycurl.COOKIEFILE, '')
self.setopt(pycurl.COOKIEJAR, '')
# self.setopt(pycurl.VERBOSE, 1)
self.setopt(
pycurl.USERAGENT,
'Mozilla/5.0 (Windows NT 10.0; Win64; rv:53.0) '
'Gecko/20100101 Firefox/53.0')
if pycurl.version_info()[7]:
self.setopt(pycurl.ENCODING, 'gzip,deflate')
self.headers.update(
{'Accept': "*/*",
'Accept-Language': "en-US,en",
'Accept-Charset': "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
'Connection': "keep-alive",
'Keep-Alive': "300",
'Expect': ""})
def _create_handle(self):
c = pycurl.Curl()
# c.setopt(pycurl.VERBOSE, 1)
c.setopt(pycurl.CONNECTTIMEOUT, CONNECT_TIMEOUT)
c.setopt(pycurl.TIMEOUT, TIMEOUT)
c.setopt(pycurl.ENCODING, 'gzip, deflate')
return c
def _set_def_curl_opts(curl):
curl.setopt(pycurl.CONNECTTIMEOUT, 8)
curl.setopt(pycurl.CAINFO, certifi.where())
def pideURL(url,cookie=False,cookie_name='cookie.txt', contador_curl = 0):
time.sleep(2)
print ("\n"+url+"\n")
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookies/'+cookie_name)
c.setopt(pycurl.COOKIEFILE, 'cookies/'+cookie_name)
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ,'Accept-Language: en-US,en;q=0.5' ,'Connection: keep-alive' ,'Content-Type: application/x-www-form-urlencoded' ,'Host: services6.arcgis.com' ,'Origin: https://sig-ruv.maps.arcgis.com' ,'Referer: https://sig-ruv.maps.arcgis.com/apps/webappviewer/index.html?id=1e3873d1c01749929457c7a7b9315cda'])
#c.setopt(pycurl.VERBOSE, 1)
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
try:
c.perform()
return b.getvalue()
#print (response_string)
b.close()
except Exception as e:
#log ('Razon:',e)
response_string = None
if contador_curl<=10:
time.sleep(5)
pideURL(url,contador_curl+1)
else:
print ('Error: ',url)
print ('Error log: ',e)
def pideURL(self,url,compressed = False, cookie=False, contador_curl = 0):
time.sleep(3)
Scrape.contador+=1
print ("\n"+url)
print ("\n\t.l."+str(Scrape.contador))
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookie.txt')
c.setopt(pycurl.COOKIEFILE, 'cookie.txt')
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, self.headers)
c.setopt( pycurl.PROXY, '127.0.0.1' )
c.setopt( pycurl.PROXYPORT, 9050 )
c.setopt( pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME )
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
self.url = url
try:
c.perform()
self.response_string = b.getvalue()
#print (self.response_string)
b.close()
except Exception as e:
#self.log ('Razon:',e)
self.response_string = None
if contador_curl<=10:
time.sleep(5)
self.pideURL(url,contador_curl+1)
else:
print ('Error: ',url)
print ('Error log: ',e)