def get_cookies():
"""???? cookies, ?????."""
headers = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
"(KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36"
)
}
save_cookies_file = 'cookies.txt'
jar = cookielib.LWPCookieJar(save_cookies_file)
sess = requests.session()
sess.headers = headers
sess.cookies = jar
sess.get('http://tieba.baidu.com/')
jar.save(ignore_expires=True, ignore_discard=True)
return jar
python类LWPCookieJar()的实例源码
def get_cookies_in_cookiejar(host):
"""Export cookies and put them in a cookiejar.
Return value: a cookiejar filled with cookies."""
# based on http://www.guyrutenberg.com/2010/11/27/building-cookiejar-out-of-firefoxs-cookies-sqlite/
cj = LWPCookieJar() # This is a subclass of FileCookieJar that has useful load and save methods
cookie_db = get_cookie_db_path(str(FIREFOX_DIR))
conn = db.connect(cookie_db)
cursor = conn.cursor()
sql = "SELECT {c} FROM moz_cookies WHERE host LIKE '%{h}%'".format(c=CONTENTS, h=host)
cursor.execute(sql)
for item in cursor.fetchall():
c = Cookie(0, item[4], item[5],
None, False,
item[0], item[0].startswith('.'), item[0].startswith('.'),
item[1], False,
item[2],
item[3], item[3]=="",
None, None, {})
#print c
cj.set_cookie(c)
return cj
def parse(self, response):
topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/a/text()'
topic_names = response.selector.xpath(topic_xpath_rule).extract()
topic_xpath_rule = '//li[@class="zm-topic-cat-item"]/@data-id'
topic_ids = response.selector.xpath(topic_xpath_rule).extract()
# for i in range(len(topic_ids)):
print("?30???")
# for i in range(10):
for i in range(len(topic_ids)):
params = {"topic_id": int(topic_ids[i]), "offset": 0, "hash_id": "d17ff3d503b2ebce086d2f3e98944d54"}
yield FormRequest(
url='https://www.zhihu.com/node/TopicsPlazzaListV2',
method='POST',
# headers=self.set_headers2('https://www.zhihu.com/topics'),
headers=self.set_headers('https://www.zhihu.com/topics'),
cookies=cookielib.LWPCookieJar(filename='cookies'),
# formdata={'method': 'next', 'params': '{"topic_id":988,"offset":0,"hash_id":"d17ff3d503b2ebce086d2f3e98944d54"}'},
formdata={'method': 'next', 'params': str(params).replace("\'", "\"").replace(" ", "")},
callback=self.topic_parse,
meta={'topic_name': topic_names[i]}
)
def login(self, username, pwd, cookie_file):
""""
Login with use name, password and cookies.
(1) If cookie file exists then try to load cookies;
(2) If no cookies found then do login
"""
# If cookie file exists then try to load cookies
if os.path.exists(cookie_file):
try:
cookie_jar = cookielib.LWPCookieJar(cookie_file)
cookie_jar.load(ignore_discard=True, ignore_expires=True)
loaded = 1
except cookielib.LoadError:
loaded = 0
LOG.info('Loading cookies error')
# install loaded cookies for urllib2
if loaded:
cookie_support = urllib2.HTTPCookieProcessor(cookie_jar)
opener = urllib2.build_opener(cookie_support,
urllib2.HTTPHandler)
urllib2.install_opener(opener)
LOG.info('Loading cookies success')
return 1
else:
return self.do_login(username, pwd, cookie_file)
else: # If no cookies found
return self.do_login(username, pwd, cookie_file)
def save_cookie(self, text, cookie_file=CONF.cookie_file):
cookie_jar2 = cookielib.LWPCookieJar()
cookie_support2 = urllib2.HTTPCookieProcessor(cookie_jar2)
opener2 = urllib2.build_opener(cookie_support2, urllib2.HTTPHandler)
urllib2.install_opener(opener2)
if six.PY3:
text = text.decode('gbk')
p = re.compile('location\.replace\(\'(.*?)\'\)')
# ???httpfox??????????????
# location.replace('http://weibo.com ?????????
# ?????????????# ????login_url?? ??????re?????
# p = re.compile('location\.replace\(\B'(.*?)'\B\)')
# ??? ??????? re?????\'???????
try:
# Search login redirection URL
login_url = p.search(text).group(1)
data = urllib2.urlopen(login_url).read()
# Verify login feedback, check whether result is TRUE
patt_feedback = 'feedBackUrlCallBack\((.*)\)'
p = re.compile(patt_feedback, re.MULTILINE)
feedback = p.search(data).group(1)
feedback_json = json.loads(feedback)
if feedback_json['result']:
cookie_jar2.save(cookie_file,
ignore_discard=True,
ignore_expires=True)
return 1
else:
return 0
except:
return 0
def login(self, username, pwd, cookie_file):
""""
Login with use name, password and cookies.
(1) If cookie file exists then try to load cookies;
(2) If no cookies found then do login
"""
# If cookie file exists then try to load cookies
if os.path.exists(cookie_file):
try:
cookie_jar = cookielib.LWPCookieJar(cookie_file)
cookie_jar.load(ignore_discard=True, ignore_expires=True)
loaded = 1
except cookielib.LoadError:
loaded = 0
print('Loading cookies error')
#install loaded cookies for urllib2
if loaded:
cookie_support = urllib2.HTTPCookieProcessor(cookie_jar)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
print('Loading cookies success')
return 1
else:
return self.do_login(username, pwd, cookie_file)
else: #If no cookies found
return self.do_login(username, pwd, cookie_file)
def __init__(self):
self.header = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/search/',
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36' # NOQA
}
self.cookies = {'appver': '1.5.2'}
self.playlist_class_dict = {}
self.session = requests.Session()
self.storage = Storage()
self.session.cookies = LWPCookieJar(self.storage.cookie_path)
try:
self.session.cookies.load()
cookie = ''
if os.path.isfile(self.storage.cookie_path):
self.file = open(self.storage.cookie_path, 'r')
cookie = self.file.read()
self.file.close()
expire_time = re.compile(r'\d{4}-\d{2}-\d{2}').findall(cookie)
if expire_time:
if expire_time[0] < time.strftime('%Y-%m-%d', time.localtime(time.time())):
self.storage.database['user'] = {
'username': '',
'password': '',
'user_id': '',
'nickname': '',
}
self.storage.save()
os.remove(self.storage.cookie_path)
except IOError as e:
log.error(e)
self.session.cookies.save()
def __init__(self):
print("init YiClient")
# ????cookie??
self.isLogin = False
self.session = requests.session()
# ??????cookie???
self.session.cookies = cookiejar.LWPCookieJar(filename='cookies_yi.txt')
try:
self.session.cookies.load(ignore_discard=True)
self.isLogin = True
except:
print("???cookies??")
self.isLogin = False
# ?????
def start_requests(self):
if isLogin():
print('?????')
else:
# account = input('????????\n> ')
# secret = input("???????\n> ")
account = '15728689495'
secret = 'q12345'
login(secret, account)
for url in self.start_urls:
yield Request(
url=url,
headers=self.set_headers('https://www.zhihu.com'),
cookies=cookielib.LWPCookieJar(filename='cookies')
)
def topic_parse(self, response):
if response.status in [400, 403, 302]:
response.request.meta["change_proxy"] = True
print ("?????????{url}".format(url=response.request.headers["Referer"]))
pass
else:
# ???????????
json_object = json.loads(response.body_as_unicode())
json_content = ''.join(json_object['msg'])
pattern = re.compile('<strong>(.*?)</strong>')
subtopic_names = re.findall(pattern,json_content)
pattern = re.compile('<p>(.*?)</p>')
subtopic_descriptions = re.findall(pattern,json_content)
pattern = re.compile('<a target="_blank" href="([^"]*)".*?>')
subtopic_urls = re.findall(pattern,json_content)
pattern = re.compile('<img src="(.*?)" alt=')
subtopic_pics = re.findall(pattern,json_content)
print("subtopic: %s"%len(subtopic_names))
# for i in range(2):
for i in range(len(subtopic_names)):
base_url = "https://www.zhihu.com" + subtopic_urls[i]
yield Request(
# url = base_url + "/top-answers",
url=base_url + "/top-answers?page=3",
# headers = self.set_headers3(base_url + "/hot"),
headers=self.set_headers(base_url + "/hot"),
cookies = cookielib.LWPCookieJar(filename='cookies'),
callback = self.top_answers_parse,
)
# ????????????????
def __init__(self):
self.header = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/search/',
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36' # NOQA
}
self.cookies = {'appver': '1.5.2'}
self.playlist_class_dict = {}
self.session = requests.Session()
self.storage = Storage()
self.session.cookies = LWPCookieJar(self.storage.cookie_path)
try:
self.session.cookies.load()
cookie = ''
if os.path.isfile(self.storage.cookie_path):
self.file = open(self.storage.cookie_path, 'r')
cookie = self.file.read()
self.file.close()
expire_time = re.compile(r'\d{4}-\d{2}-\d{2}').findall(cookie)
if expire_time:
if expire_time[0] < time.strftime('%Y-%m-%d', time.localtime(time.time())):
self.storage.database['user'] = {
'username': '',
'password': '',
'user_id': '',
'nickname': '',
}
self.storage.save()
os.remove(self.storage.cookie_path)
except IOError as e:
log.error(e)
self.session.cookies.save()
def __init__(self):
self.header = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/search/',
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36' # NOQA
}
self.cookies = {'appver': '1.5.2'}
self.playlist_class_dict = {}
self.session = requests.Session()
self.storage = Storage()
self.session.cookies = LWPCookieJar(self.storage.cookie_path)
try:
self.session.cookies.load()
self.file = open(self.storage.cookie_path, 'r')
cookie = self.file.read()
self.file.close()
pattern = re.compile(r'\d{4}-\d{2}-\d{2}')
str = pattern.findall(cookie)
if str:
if str[0] < time.strftime('%Y-%m-%d',
time.localtime(time.time())):
self.storage.database['user'] = {
'username': '',
'password': '',
'user_id': '',
'nickname': '',
}
self.storage.save()
os.remove(self.storage.cookie_path)
except IOError as e:
log.error(e)
self.session.cookies.save()
def __init__(self):
super(Model, self).__init__()
self.cookies = cookiejar.LWPCookieJar(filename=settings.COOKIES_FILE)
try:
self.cookies.load(ignore_discard=True)
except FileNotFoundError:
pass
self.verify = False
self.headers = settings.HEADERS
def __init__(self):
self.headers = {
'Referer': 'https://github.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Host': 'github.com'
}
self.login_url ='https://github.com/login'
self.post_url = 'https://github.com/session'
self.logined_url = 'https://github.com/settings/profile'
self.session = requests.session()
self.session.cookies = cookielib.LWPCookieJar(filename='github_cookie')
def load_cookie():
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard=True)
except:
print("Cookie ????")
# ???????
def load_cookies(self, cookie_storage, keep_old=False):
"""load from cookielib's CookieJar or Set-Cookie3 format text file.
:param cookie_storage: file location string on disk or CookieJar
instance.
:param keep_old: Don't reset, keep cookies not overridden.
"""
def toQtCookieJar(PyCookieJar, QtCookieJar):
allCookies = QtCookieJar.allCookies() if keep_old else []
for pc in PyCookieJar:
qc = toQtCookie(pc)
allCookies.append(qc)
QtCookieJar.setAllCookies(allCookies)
def toQtCookie(PyCookie):
qc = QNetworkCookie(PyCookie.name, PyCookie.value)
qc.setSecure(PyCookie.secure)
if PyCookie.path_specified:
qc.setPath(PyCookie.path)
if PyCookie.domain != "":
qc.setDomain(PyCookie.domain)
if PyCookie.expires and PyCookie.expires != 0:
t = QDateTime()
t.setTime_t(PyCookie.expires)
qc.setExpirationDate(t)
# not yet handled(maybe less useful):
# py cookie.rest / QNetworkCookie.setHttpOnly()
return qc
if cookie_storage.__class__.__name__ == 'str':
cj = LWPCookieJar(cookie_storage)
cj.load()
toQtCookieJar(cj, self.cookie_jar)
elif cookie_storage.__class__.__name__.endswith('CookieJar'):
toQtCookieJar(cookie_storage, self.cookie_jar)
else:
raise ValueError('unsupported cookie_storage type.')
def __init__ (self, args) :
self.modulus = None
self.exponent = None
self.args = args
self.jar = j = LWPCookieJar ()
self.has_cookies = False
if self.args.cookiefile :
self.has_cookies = True
try :
j.load (self.args.cookiefile, ignore_discard = True)
except IOError :
self.has_cookies = False
self.opener = build_opener (HTTPCookieProcessor (j))
self.nextfile = args.file
# end def __init__
def login(self,username,password):
self.session.cookies = cookielib.LWPCookieJar(filename='zhanaicookies')
try:
self.session.cookies.load(ignore_discard=True)
except:
print("Cookie ????")
loginUrl='http://profile.zhenai.com/login/login.jsp?fromurl=http://profile.zhenai.com/v2/personal/home.do'
pageContent=self.session.get(loginUrl,headers=self.headers)
soup=BeautifulSoup(pageContent.text,"lxml")
# print(soup)
codePattern = re.compile('<img id="codeImg" src="(.*?)">',re.S);
result=re.findall(codePattern,pageContent.text)
if result :
captcha_url='http://profile.zhenai.com'+result[0]
r = self.session.get(captcha_url)
with open('zhenai.jpg', 'wb') as f:
f.write(r.content)
f.close()
try:
im = Image.open('zhenai.jpg')
im.show()
im.close()
except:
print(u'?? %s ????zhenai.jpg ????' % os.path.abspath('zhenai.jpg'))
code=input("please input verify code:")
data={
'fromurl':'http://profile.zhenai.com/v2/personal/home.do',
'loginZAT':'0',
'formHuntWedding':'',
'whichTV':'',
'fid':'',
'mid':'',
'redirectUrl':'',
'isTpRedirect':'',
'loginmode':'2',
'whereLogin':'login_page',
'rememberpassword':'1',
'loginInfo':username,
'password':password,
'imgCode':code
}
postUrl='http://profile.zhenai.com/login/loginactionindex.jsps'
loginContent=self.session.post(postUrl,data=data,headers=self.headers)
print(loginContent.text)
self.session.cookies.save()
userUrl='http://profile.zhenai.com/v2/userdata/showRegInfo.do'
userContent=self.session.get(userUrl,headers=self.headers)
print(userContent.text)
pass
def top_answers_parse(self, response):
if response.body in ["banned", b"{'reason': b'Bad Request', 'status': 400}",
"{'reason': b'Bad Request', 'status': 400}",
]:
req = response.request
req.meta["change_proxy"] = True
yield req
else:
# ??topic???
# https://www.zhihu.com/topic/19551137/top-answers?page=2
# print response.url
end = response.url.rfind("/")
topic_id = int(response.url[28:end])
# print topic_id
# topic_id = int(response.url[28:-12])
topic_name_xpath_rule = '//h1[@class="zm-editable-content"]/text()'
topic_name = response.selector.xpath(topic_name_xpath_rule).extract_first()
topic_description_xpath_rule = '//div[@id="zh-topic-desc"]/div[@class="zm-editable-content"]/text()'
topic_description = response.selector.xpath(topic_description_xpath_rule).extract_first()
# print ("topic description")
# print (topic_description)
#?????
topicItem = TopicItem()
topicItem['type'] = 'topic'
topicItem['topic_id'] = topic_id
topicItem['topic_name'] = topic_name
topicItem['topic_description'] = topic_description
yield topicItem
answer_url_xpath_rule = '//div[@class="feed-item feed-item-hook folding"]/link/@href'
answer_urls_temp = response.selector.xpath(answer_url_xpath_rule).extract()
answer_urls = ["https://www.zhihu.com" + temp for temp in answer_urls_temp] #??????
for answer_url in answer_urls:
# print answer_url
yield Request(
url = answer_url,
# headers = self.set_headers3(None),
headers=self.set_headers(None),
cookies = cookielib.LWPCookieJar(filename='cookies'),
callback = self.answer_parse,
meta={'topic_id': topic_id}
)
# print ("?????????")
# answer_url = answer_urls[0]
# yield Request(
# url = answer_url,
# headers = self.set_headers(None),
# cookies = cookielib.LWPCookieJar(filename='cookies'),
# callback = self.answer_parse,
# )
def save_cookies(self, cookie_storage):
"""Save to cookielib's CookieJar or Set-Cookie3 format text file.
:param cookie_storage: file location string or CookieJar instance.
"""
def toPyCookieJar(QtCookieJar, PyCookieJar):
for c in QtCookieJar.allCookies():
PyCookieJar.set_cookie(toPyCookie(c))
def toPyCookie(QtCookie):
port = None
port_specified = False
secure = QtCookie.isSecure()
name = str(QtCookie.name())
value = str(QtCookie.value())
v = str(QtCookie.path())
path_specified = bool(v != "")
path = v if path_specified else None
v = str(QtCookie.domain())
domain_specified = bool(v != "")
domain = v
if domain_specified:
domain_initial_dot = v.startswith('.')
else:
domain_initial_dot = None
v = long(QtCookie.expirationDate().toTime_t())
# Long type boundary on 32bit platfroms; avoid ValueError
expires = 2147483647 if v > 2147483647 else v
rest = {}
discard = False
return Cookie(
0,
name,
value,
port,
port_specified,
domain,
domain_specified,
domain_initial_dot,
path,
path_specified,
secure,
expires,
discard,
None,
None,
rest,
)
if cookie_storage.__class__.__name__ == 'str':
cj = LWPCookieJar(cookie_storage)
toPyCookieJar(self.cookie_jar, cj)
cj.save()
elif cookie_storage.__class__.__name__.endswith('CookieJar'):
toPyCookieJar(self.cookie_jar, cookie_storage)
else:
raise ValueError('unsupported cookie_storage type.')