def getWeiboContent(self):
weiboContent = ""
try:
req = self.session.get(self.URL, headers = self.myheader)
if req.status_code == 200:
print 'This session work.'
print 'The current Ip is ' + self.getPublicIp()
else:
print 'This session not work with code 200.'
return False
except:
print 'This session not work.'
return False
try:
page = req.content
except httplib.IncompleteRead:
print 'Incompleted!'
return False
# try to use phantomjs
# cmd = 'phantomjs' + ' request.js ' + self.URL + ' '+ str(self.myheader)
# str_body = str(os.popen(cmd).read())
# page = str_body.split('\nbegin\nStatus: success\n')[1]
soupPage = BeautifulSoup(page, 'lxml')
numList = soupPage.find_all('script')
if len(numList) == 0:
print 'you may need to input an access code'
return False
for i in range(0, len(numList)):
IsSearch = re.search(r"\"pid\":\"pl_weibo_direct\"", str(numList[i]))
if IsSearch == None:
continue
else:
weiboContent = str(numList[i])
break
return weiboContent
评论列表
文章目录