def pidePOST(self,url,data,compressed = False,cookie=False, contador_curl = 0, debug=False):
time.sleep(3)
Scrape.contador+=1
print ("\n"+url)
print ("\n\t.l."+str(Scrape.contador))
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookie.txt')
c.setopt(pycurl.COOKIEFILE, 'cookie.txt')
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, self.headers)
if compressed:
c.setopt(pycurl.ENCODING, 'gzip,deflate')
c.setopt(c.POSTFIELDS, data)
if debug:
c.setopt(c.VERBOSE, True)
c.setopt( pycurl.PROXY, '127.0.0.1' )
c.setopt( pycurl.PROXYPORT, 9050 )
c.setopt( pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME )
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
self.url = url
try:
c.perform()
self.response_string = b.getvalue()
#print (self.response_string)
b.close()
except Exception as e:
#print ('Razon:',e)
self.response_string = None
python类CONNECTTIMEOUT的实例源码
def request(self, url, method, body, headers):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
if 'proxy_host' in self.proxy:
c.setopt(pycurl.PROXY, self.proxy['proxy_host'])
if 'proxy_port' in self.proxy:
c.setopt(pycurl.PROXYPORT, self.proxy['proxy_port'])
if 'proxy_user' in self.proxy:
c.setopt(pycurl.PROXYUSERPWD, "%(proxy_user)s:%(proxy_pass)s" % self.proxy)
self.buf = StringIO()
c.setopt(pycurl.WRITEFUNCTION, self.buf.write)
#c.setopt(pycurl.READFUNCTION, self.read)
#self.body = StringIO(body)
#c.setopt(pycurl.HEADERFUNCTION, self.header)
if self.cacert:
c.setopt(c.CAINFO, self.cacert)
c.setopt(pycurl.SSL_VERIFYPEER, self.cacert and 1 or 0)
c.setopt(pycurl.SSL_VERIFYHOST, self.cacert and 2 or 0)
c.setopt(pycurl.CONNECTTIMEOUT, self.timeout / 6)
c.setopt(pycurl.TIMEOUT, self.timeout)
if method == 'POST':
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, body)
if headers:
hdrs = ['%s: %s' % (k, v) for k, v in headers.items()]
log.debug(hdrs)
c.setopt(pycurl.HTTPHEADER, hdrs)
c.perform()
c.close()
return {}, self.buf.getvalue()
def perform(self):
self.__performHead=""
self.__performBody=""
conn=pycurl.Curl()
conn.setopt(pycurl.SSL_VERIFYPEER,False)
conn.setopt(pycurl.SSL_VERIFYHOST,1)
conn.setopt(pycurl.URL,self.completeUrl)
if self.__method or self.__userpass:
if self.__method=="basic":
conn.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC)
elif self.__method=="ntlm":
conn.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_NTLM)
elif self.__method=="digest":
conn.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST)
conn.setopt(pycurl.USERPWD, self.__userpass)
if self.__timeout:
conn.setopt(pycurl.CONNECTTIMEOUT, self.__timeout)
conn.setopt(pycurl.NOSIGNAL, 1)
if self.__totaltimeout:
conn.setopt(pycurl.TIMEOUT, self.__totaltimeout)
conn.setopt(pycurl.NOSIGNAL, 1)
conn.setopt(pycurl.WRITEFUNCTION, self.body_callback)
conn.setopt(pycurl.HEADERFUNCTION, self.header_callback)
if self.__proxy!=None:
conn.setopt(pycurl.PROXY,self.__proxy)
if self.__headers.has_key("Proxy-Connection"):
del self.__headers["Proxy-Connection"]
conn.setopt(pycurl.HTTPHEADER,self.__getHeaders())
if self.method=="POST":
conn.setopt(pycurl.POSTFIELDS,self.__postdata)
conn.perform()
rp=Response()
rp.parseResponse(self.__performHead)
rp.addContent(self.__performBody)
self.response=rp
######### ESTE conjunto de funciones no es necesario para el uso habitual de la clase
def to_pycurl_object(c, req):
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.WRITEFUNCTION, req.body_callback)
c.setopt(pycurl.HEADERFUNCTION, req.header_callback)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt(pycurl.SSL_VERIFYPEER, False)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.setopt(pycurl.URL,req.completeUrl)
if req.getConnTimeout():
c.setopt(pycurl.CONNECTTIMEOUT, req.getConnTimeout())
if req.getTotalTimeout():
c.setopt(pycurl.TIMEOUT, req.getTotalTimeout())
authMethod, userpass = req.getAuth()
if authMethod or userpass:
if authMethod == "basic":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC)
elif authMethod == "ntlm":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_NTLM)
elif authMethod == "digest":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST)
c.setopt(pycurl.USERPWD, userpass)
c.setopt(pycurl.HTTPHEADER, req.getHeaders())
if req.method == "POST":
c.setopt(pycurl.POSTFIELDS, req.postdata)
if req.method != "GET" and req.method != "POST":
c.setopt(pycurl.CUSTOMREQUEST, req.method)
if req.method == "HEAD":
c.setopt(pycurl.NOBODY, True)
if req.followLocation:
c.setopt(pycurl.FOLLOWLOCATION, 1)
proxy = req.getProxy()
if proxy != None:
c.setopt(pycurl.PROXY, proxy)
if req.proxytype=="SOCKS5":
c.setopt(pycurl.PROXYTYPE,pycurl.PROXYTYPE_SOCKS5)
elif req.proxytype=="SOCKS4":
c.setopt(pycurl.PROXYTYPE,pycurl.PROXYTYPE_SOCKS4)
req.delHeader("Proxy-Connection")
return c
def request(self, method, url, headers, post_data=None):
s = util.StringIO.StringIO()
rheaders = util.StringIO.StringIO()
curl = pycurl.Curl()
proxy = self._get_proxy(url)
if proxy:
if proxy.hostname:
curl.setopt(pycurl.PROXY, proxy.hostname)
if proxy.port:
curl.setopt(pycurl.PROXYPORT, proxy.port)
if proxy.username or proxy.password:
curl.setopt(
pycurl.PROXYUSERPWD,
"%s:%s" % (proxy.username, proxy.password))
if method == 'get':
curl.setopt(pycurl.HTTPGET, 1)
elif method == 'post':
curl.setopt(pycurl.POST, 1)
curl.setopt(pycurl.POSTFIELDS, post_data)
else:
curl.setopt(pycurl.CUSTOMREQUEST, method.upper())
# pycurl doesn't like unicode URLs
curl.setopt(pycurl.URL, util.utf8(url))
curl.setopt(pycurl.WRITEFUNCTION, s.write)
curl.setopt(pycurl.HEADERFUNCTION, rheaders.write)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 80)
curl.setopt(pycurl.HTTPHEADER, ['%s: %s' % (k, v)
for k, v in headers.items()])
if self._verify_ssl_certs:
curl.setopt(pycurl.CAINFO, os.path.join(
os.path.dirname(__file__), 'data/ca-certificates.crt'))
else:
curl.setopt(pycurl.SSL_VERIFYHOST, False)
try:
curl.perform()
except pycurl.error as e:
self._handle_request_error(e)
rbody = s.getvalue()
rcode = curl.getinfo(pycurl.RESPONSE_CODE)
return rbody, rcode, self.parse_headers(rheaders.getvalue())
def to_pycurl_object(c, req):
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.WRITEFUNCTION, req.body_callback)
c.setopt(pycurl.HEADERFUNCTION, req.header_callback)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt(pycurl.SSL_VERIFYPEER, False)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.setopt(pycurl.URL,req.completeUrl)
if req.getConnTimeout():
c.setopt(pycurl.CONNECTTIMEOUT, req.getConnTimeout())
if req.getTotalTimeout():
c.setopt(pycurl.TIMEOUT, req.getTotalTimeout())
authMethod, userpass = req.getAuth()
if authMethod or userpass:
if authMethod == "basic":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC)
elif authMethod == "ntlm":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_NTLM)
elif authMethod == "digest":
c.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST)
c.setopt(pycurl.USERPWD, userpass)
c.setopt(pycurl.HTTPHEADER, req.getHeaders())
if req.method == "POST":
c.setopt(pycurl.POSTFIELDS, req.postdata)
if req.method != "GET" and req.method != "POST":
c.setopt(pycurl.CUSTOMREQUEST, req.method)
if req.method == "HEAD":
c.setopt(pycurl.NOBODY, True)
if req.followLocation:
c.setopt(pycurl.FOLLOWLOCATION, 1)
proxy = req.getProxy()
if proxy != None:
c.setopt(pycurl.PROXY, proxy)
if req.proxytype=="SOCKS5":
c.setopt(pycurl.PROXYTYPE,pycurl.PROXYTYPE_SOCKS5)
elif req.proxytype=="SOCKS4":
c.setopt(pycurl.PROXYTYPE,pycurl.PROXYTYPE_SOCKS4)
req.delHeader("Proxy-Connection")
return c
def work_driver(arg_tup):
global PROGRESS_CTR_T, MEASURING
global REQ_ARRS
global COST_ARRS
nreqs, host_str, req_arr_ix, send_cost = arg_tup
req_arr = REQ_ARRS[req_arr_ix]
cost_arr = COST_ARRS[req_arr_ix]
success = 0
miss = 0
NCURL = 128
curl_hands = []
for i in range(NCURL):
c = pycurl.Curl()
curl_hands.append(c)
n = 0
for ix, item in enumerate(req_arr):
c = curl_hands[ix % NCURL]
try:
if send_cost:
url = host_str % (item, cost_arr[ix])
else:
url = host_str % item
c.setopt(c.URL, url)
resp = StringIO()
headers = StringIO()
c.setopt(c.WRITEFUNCTION, resp.write)
c.setopt(c.HEADERFUNCTION, headers.write)
c.setopt(pycurl.CONNECTTIMEOUT, 20)
c.setopt(pycurl.TIMEOUT, 20)
c.perform()
if c.getinfo(c.RESPONSE_CODE) == 200:
success += 1
is_hit = handle_response(resp, headers)
if not is_hit:
miss += 1
except Exception as e:
pass
PROGRESS_CTR_T[req_arr_ix] = ix
return (success, miss)
def work_driver(arg_tup):
global PROGRESS_CTR_T, MEASURING
global REQ_ARRS, COST_ARRS, CLASS_ARRS
nreqs, host_str, req_arr_ix = arg_tup
req_arr = REQ_ARRS[req_arr_ix]
cost_arr = COST_ARRS[req_arr_ix]
class_arr = CLASS_ARRS[req_arr_ix]
success = 0
miss = 0
costs = 0
NCURL = 128
curl_hands = []
for i in range(NCURL):
c = pycurl.Curl()
curl_hands.append(c)
n = 0
# my_output = open(TMP_FILE_NAME % req_arr_ix, 'w')
for ix, item in enumerate(req_arr):
c = curl_hands[ix % NCURL]
try:
url = host_str % (item, cost_arr[ix], class_arr[ix])
t_s = time.time()
c.setopt(c.URL, url)
resp = StringIO()
headers = StringIO()
c.setopt(c.WRITEFUNCTION, resp.write)
c.setopt(c.HEADERFUNCTION, headers.write)
c.setopt(pycurl.CONNECTTIMEOUT, 20)
c.setopt(pycurl.TIMEOUT, 20)
c.perform()
t_end = time.time()
if c.getinfo(c.RESPONSE_CODE) == 200:
success += 1
is_hit, cost = handle_response(resp, headers)
if not is_hit:
miss += 1
costs += cost
# my_output.write("%f, %f\n" % (t_end, (t_end - t_s)))
# my_output.flush()
except Exception as e:
print e
pass
PROGRESS_CTR_T[req_arr_ix] = success
# my_output.close()
return (success, miss, costs)
def post_file_curl(path, key, token):
c = pycurl.Curl()
c.setopt(c.POST, 1)
# if path[0] == "\"":
path = path[1:-1]
if os.path.exists(path):
suffix = os.path.splitext(path)[1]
# A fucking dirty hack - rename file
while True:
number = random.randint(10, 100000)
if not os.path.exists(os.path.split(path)[0] + "/" + str(number) + suffix):
newpath = os.path.split(path)[0] + "/" + str(number) + suffix
break
os.rename(path, newpath)
print("rename" + newpath)
bak_path = newpath
print(path)
fields = [('file', (c.FORM_FILE, newpath.encode('gbk'))),
('token', token),
('key', key),
('x:md5', key)]
c.setopt(c.VERBOSE, 1)
c.setopt(c.URL, "http://upload.qiniu.com/")
c.setopt(c.HTTPPOST, fields)
c.setopt(c.NOPROGRESS, 0)
c.setopt(c.PROGRESSFUNCTION, progress)
c.setopt(pycurl.CONNECTTIMEOUT, 60)
c.setopt(pycurl.TIMEOUT, 600)
try:
info = c.perform()
print(info)
print(fields)
if c.getinfo(c.HTTP_CODE) == 200:
os.rename(newpath, path)
print("rename" + path)
return True
except pycurl.error as e:
print(e)
sys.stdout.write("File no Found!")
return False
if os.path.exists(newpath):
os.rename(newpath, path)
print("rename" + path)
c.close()
return False