def download(self, link, name, url):
"""
????????????
:param link:
:param name:
:param url:
:return:
"""
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
random.choice(self.agents)
)
dcap["takesScreenshot"] = False
dcap["phantomjs.page.customHeaders.Cookie"] = random.choice(self.cookie)
# dcap["phantomjs.page.settings.resourceTimeout"] = ("1000")
try:
driver1 = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no', ])
except Exception as e:
with open(r'list_error.txt', 'a') as f:
f.write(name.encode('utf-8'))
f.write('\n')
print(datetime.datetime.now())
print(url)
print(e)
else:
try:
driver1.set_page_load_timeout(20)
driver1.get(link)
b = True
try:
driver1.find_element_by_class_name('page_verify')
except:
b = False
if b is True:
print('page needs verify, stop the program')
print('the last weixinNUM is %s\n' % name)
self.ocr4wechat(link)
time.sleep(5)
with open(r'list_error.txt', 'a') as f:
f.write(name.encode('utf-8'))
f.write('\n')
else:
html = driver1.page_source
return link, html
except Exception as e:
with open(r'list_error.txt', 'a') as f:
f.write(name.encode('utf-8'))
f.write('\n')
print(url)
print(datetime.datetime.now())
print(e)
finally:
driver1.quit()
评论列表
文章目录