def getSoup(start, stop):
try:
for number in range(start, stop+1):
url = 'http://space.bilibili.com/'+str(number)+'/#!/'
# "http://space.bilibili.com/1643718/#!/"
# "http://space.bilibili.com/902915/#!/"
# "http://space.bilibili.com/1/#!/"
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
)
dcap["phantomjs.page.settings.loadImages"] = False #??????????
# phantomjs.exe???G:\Anaconda3\phantomjs\bin
driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
desired_capabilities=dcap)
driver.get(url)
# time.sleep(1) # ?????????????
content = driver.page_source # ??????
# print(content)
driver.close()
driver.quit()
soup = BeautifulSoup(content, 'lxml')
getInfo(soup)
except Exception:
pass
# ????
评论列表
文章目录