def request(self, endpoint, post=None):
buffer = BytesIO()
ch = pycurl.Curl()
ch.setopt(pycurl.URL, Constants.API_URL + endpoint)
ch.setopt(pycurl.USERAGENT, self.userAgent)
ch.setopt(pycurl.WRITEFUNCTION, buffer.write)
ch.setopt(pycurl.FOLLOWLOCATION, True)
ch.setopt(pycurl.HEADER, True)
ch.setopt(pycurl.VERBOSE, False)
ch.setopt(pycurl.COOKIEFILE, os.path.join(self.IGDataPath, self.username, self.username + "-cookies.dat"))
ch.setopt(pycurl.COOKIEJAR, os.path.join(self.IGDataPath, self.username, self.username + "-cookies.dat"))
if post is not None:
ch.setopt(pycurl.POST, True)
ch.setopt(pycurl.POSTFIELDS, post)
if self.proxy:
ch.setopt(pycurl.PROXY, self.proxyHost)
if self.proxyAuth:
ch.setopt(pycurl.PROXYUSERPWD, self.proxyAuth)
ch.perform()
resp = buffer.getvalue()
header_len = ch.getinfo(pycurl.HEADER_SIZE)
header = resp[0: header_len]
body = resp[header_len:]
ch.close()
if self.debug:
print("REQUEST: " + endpoint)
if post is not None:
if not isinstance(post, list):
print("DATA: " + str(post))
print("RESPONSE: " + body)
return [header, json_decode(body)]
python类COOKIEJAR的实例源码
def CurlPOST(url, data, cookie):
c = pycurl.Curl()
b = StringIO.StringIO()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.HTTPHEADER,['Content-Type: application/json'])
# c.setopt(pycurl.TIMEOUT, 10)
c.setopt(pycurl.WRITEFUNCTION, b.write)
c.setopt(pycurl.COOKIEFILE, cookie)
c.setopt(pycurl.COOKIEJAR, cookie)
c.setopt(pycurl.POSTFIELDS, data)
c.perform()
html = b.getvalue()
b.close()
c.close()
return html
def get_user_init(self):
'''
Description:
(re)initialize the nonce of current session and fetch the session ID
Return:
{
"success": true,
"jsessionid": "3185591CD191F18D1551440AE1BEF86A-n1.frontend3",
"nonce": "GTPSLZUcDyjEBqeL"
}
'''
uri = "user/init"
api_url = self.url + uri
c = pycurl.Curl()
output_init = BytesIO()
c.setopt(c.URL, api_url)
### Create the cookie File
c.setopt(pycurl.COOKIEJAR, 'cookie.txt')
c.setopt(c.WRITEFUNCTION, output_init.write)
c.perform()
return json.loads(output_init.getvalue())
def request(self, endpoint, headers=None, post=None, first=True):
buffer = BytesIO()
ch = pycurl.Curl()
ch.setopt(pycurl.URL, endpoint)
ch.setopt(pycurl.USERAGENT, self.userAgent)
ch.setopt(pycurl.WRITEFUNCTION, buffer.write)
ch.setopt(pycurl.FOLLOWLOCATION, True)
ch.setopt(pycurl.HEADER, True)
if headers:
ch.setopt(pycurl.HTTPHEADER, headers)
ch.setopt(pycurl.VERBOSE, self.debug)
ch.setopt(pycurl.SSL_VERIFYPEER, False)
ch.setopt(pycurl.SSL_VERIFYHOST, False)
ch.setopt(pycurl.COOKIEFILE, self.settingsPath + self.username + '-cookies.dat')
ch.setopt(pycurl.COOKIEJAR, self.settingsPath + self.username + '-cookies.dat')
if post:
import urllib
ch.setopt(pycurl.POST, len(post))
ch.setopt(pycurl.POSTFIELDS, urllib.urlencode(post))
ch.perform()
resp = buffer.getvalue()
header_len = ch.getinfo(pycurl.HEADER_SIZE)
header = resp[0: header_len]
body = resp[header_len:]
ch.close()
if self.debug:
import urllib
print("REQUEST: " + endpoint)
if post is not None:
if not isinstance(post, list):
print('DATA: ' + urllib.unquote_plus(json.dumps(post)))
print("RESPONSE: " + body + "\n")
return [header, json_decode(body)]
def CurlGET(url, cookie):
c = pycurl.Curl()
b = StringIO.StringIO()
c.setopt(pycurl.URL, url)
# c.setopt(pycurl.TIMEOUT, 10)
# c.setopt(pycurl.POST, 1)
c.setopt(pycurl.WRITEFUNCTION, b.write)
c.setopt(pycurl.COOKIEFILE, cookie)
c.setopt(pycurl.COOKIEJAR, cookie)
c.perform()
html = b.getvalue()
b.close()
c.close()
return html
def setRequestContext(self, url, get, post, referer, cookies, multipart=False):
""" sets everything needed for the request """
url = myquote(url)
if get:
get = urlencode(get)
url = "%s?%s" % (url, get)
self.c.setopt(pycurl.URL, url)
self.c.lastUrl = url
if post:
self.c.setopt(pycurl.POST, 1)
if not multipart:
if type(post) == unicode:
post = str(post) #unicode not allowed
elif type(post) == str:
pass
else:
post = myurlencode(post)
self.c.setopt(pycurl.POSTFIELDS, post)
else:
post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()]
self.c.setopt(pycurl.HTTPPOST, post)
else:
self.c.setopt(pycurl.POST, 0)
if referer and self.lastURL:
self.c.setopt(pycurl.REFERER, str(self.lastURL))
if cookies:
self.c.setopt(pycurl.COOKIEFILE, "")
self.c.setopt(pycurl.COOKIEJAR, "")
self.getCookies()
def curl_get(self, url, refUrl=None):
buf = cStringIO.StringIO()
curl = pycurl.Curl()
curl.setopt(curl.URL, url)
curl.setopt(curl.WRITEFUNCTION, buf.write)
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
#curl.setopt(pycurl.SSL_VERIFYHOST, 0)
#curl.setopt(pycurl.HEADERFUNCTION, self.headerCookie)
curl.setopt(pycurl.VERBOSE, 0)
curl.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0')
#curl.setopt(pycurl.HTTPGET,1)
#curl.setopt(pycurl.COOKIE, Cookie)
#curl.setopt(pycurl.POSTFIELDS, 'j_username={ngnms_user}&j_password={ngnms_password}'.format(**self.ngnms_login))
curl.setopt(pycurl.COOKIEJAR, '/htdocs/logs/py_cookie.txt')
curl.setopt(pycurl.COOKIEFILE, '/htdocs/logs/py_cookie.txt')
if refUrl:
curl.setopt(pycurl.REFERER, refUrl)
#curl.setopt(c.CONNECTTIMEOUT, 5)
#curl.setopt(c.TIMEOUT, 8)
curl.perform()
backinfo = ''
if curl.getinfo(pycurl.RESPONSE_CODE) == 200:
backinfo = buf.getvalue()
curl.close()
return backinfo
def init_handle(self):
"""
Sets common options to curl handle.
"""
self.setopt(pycurl.FOLLOWLOCATION, 1)
self.setopt(pycurl.MAXREDIRS, 5)
self.setopt(pycurl.CONNECTTIMEOUT, 30)
self.setopt(pycurl.NOSIGNAL, 1)
self.setopt(pycurl.NOPROGRESS, 1)
if hasattr(pycurl, "AUTOREFERER"):
self.setopt(pycurl.AUTOREFERER, 1)
self.setopt(pycurl.SSL_VERIFYPEER, 0)
# Interval for low speed, detects connection loss, but can abort dl if
# hoster stalls the download
self.setopt(pycurl.LOW_SPEED_TIME, 45)
self.setopt(pycurl.LOW_SPEED_LIMIT, 5)
# do not save the cookies
self.setopt(pycurl.COOKIEFILE, '')
self.setopt(pycurl.COOKIEJAR, '')
# self.setopt(pycurl.VERBOSE, 1)
self.setopt(
pycurl.USERAGENT,
'Mozilla/5.0 (Windows NT 10.0; Win64; rv:53.0) '
'Gecko/20100101 Firefox/53.0')
if pycurl.version_info()[7]:
self.setopt(pycurl.ENCODING, 'gzip,deflate')
self.headers.update(
{'Accept': "*/*",
'Accept-Language': "en-US,en",
'Accept-Charset': "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
'Connection': "keep-alive",
'Keep-Alive': "300",
'Expect': ""})
def __init__(self, data_file, verbose=False, **kwargs):
self.file_type = 'mzurl'
# strip off the final slash, if it exists
if data_file[-1] == '/':
data_file = data_file[:-1]
# Likewise, html or other madness.
if any([data_file.lower().endswith(x) for x in ['html', 'raw', 'wiff']]):
data_file = ".".join(data_file.split(".")[:-1])
self.data_file = data_file # actually a URL to a file
self.verbose = verbose
self._scans = None # cache of scan_info results for the whole file
# A string with the name and path of an appropriate temp file
# (varies by platform)
fd, self.cookie_file_name = tempfile.mkstemp(text=True)
os.close(fd)
# Handle to libcurl object
self.crl = pycurl.Curl()
# set some general options
self.crl.setopt(pycurl.COOKIEFILE, self.cookie_file_name)
self.crl.setopt(pycurl.COOKIEJAR, self.cookie_file_name)
self.crl.setopt(pycurl.FOLLOWLOCATION, True)
self.crl.setopt(pycurl.VERBOSE, verbose)
self.output = cStringIO.StringIO()
self.crl.setopt(pycurl.WRITEFUNCTION, self.output.write)
# how would you store an info file?
#if os.path.exists(data_file + '.mzi'):
#self._info_file = data_file + '.mzi'
#info_fh = open(self._info_file)
#self._info_scans = cPickle.load(info_fh)
#info_fh.close()
#else:
#self._info_file = None
def pideURL(url,cookie=False,cookie_name='cookie.txt', contador_curl = 0):
time.sleep(2)
print ("\n"+url+"\n")
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookies/'+cookie_name)
c.setopt(pycurl.COOKIEFILE, 'cookies/'+cookie_name)
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, ['Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ,'Accept-Language: en-US,en;q=0.5' ,'Connection: keep-alive' ,'Content-Type: application/x-www-form-urlencoded' ,'Host: services6.arcgis.com' ,'Origin: https://sig-ruv.maps.arcgis.com' ,'Referer: https://sig-ruv.maps.arcgis.com/apps/webappviewer/index.html?id=1e3873d1c01749929457c7a7b9315cda'])
#c.setopt(pycurl.VERBOSE, 1)
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
try:
c.perform()
return b.getvalue()
#print (response_string)
b.close()
except Exception as e:
#log ('Razon:',e)
response_string = None
if contador_curl<=10:
time.sleep(5)
pideURL(url,contador_curl+1)
else:
print ('Error: ',url)
print ('Error log: ',e)
def pideURL(self,url,compressed = False, cookie=False, contador_curl = 0):
time.sleep(3)
Scrape.contador+=1
print ("\n"+url)
print ("\n\t.l."+str(Scrape.contador))
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookie.txt')
c.setopt(pycurl.COOKIEFILE, 'cookie.txt')
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, self.headers)
c.setopt( pycurl.PROXY, '127.0.0.1' )
c.setopt( pycurl.PROXYPORT, 9050 )
c.setopt( pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME )
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
self.url = url
try:
c.perform()
self.response_string = b.getvalue()
#print (self.response_string)
b.close()
except Exception as e:
#self.log ('Razon:',e)
self.response_string = None
if contador_curl<=10:
time.sleep(5)
self.pideURL(url,contador_curl+1)
else:
print ('Error: ',url)
print ('Error log: ',e)
def pidePOST(self,url,data,compressed = False,cookie=False, contador_curl = 0, debug=False):
time.sleep(3)
Scrape.contador+=1
print ("\n"+url)
print ("\n\t.l."+str(Scrape.contador))
c = pycurl.Curl()
if cookie:
c.setopt(pycurl.COOKIEJAR, 'cookie.txt')
c.setopt(pycurl.COOKIEFILE, 'cookie.txt')
c.setopt(pycurl.URL, url)
c.setopt(pycurl.CONNECTTIMEOUT, 15)
c.setopt(pycurl.TIMEOUT, 25)
c.setopt(pycurl.HTTPHEADER, self.headers)
if compressed:
c.setopt(pycurl.ENCODING, 'gzip,deflate')
c.setopt(c.POSTFIELDS, data)
if debug:
c.setopt(c.VERBOSE, True)
c.setopt( pycurl.PROXY, '127.0.0.1' )
c.setopt( pycurl.PROXYPORT, 9050 )
c.setopt( pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME )
b = BytesIO()
BytesIO
c.setopt(pycurl.WRITEFUNCTION, b.write)
self.url = url
try:
c.perform()
self.response_string = b.getvalue()
#print (self.response_string)
b.close()
except Exception as e:
#print ('Razon:',e)
self.response_string = None