def getSoup(start, stop):
try:
for number in range(start, stop+1):
url = 'http://space.bilibili.com/'+str(number)+'/#!/'
# url = 'http://space.bilibili.com/122879/#!/'
# "http://http://space.bilibili.com/122879/#!/"
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
)
dcap["phantomjs.page.settings.loadImages"] = False #??????????
# executable_path='D:\\Chrome\\phantomjs-2.1.1-windows\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe',
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.get(url)
content = driver.page_source # ??????
# print(content)
driver.close()
soup = BeautifulSoup(content, 'lxml')
username= getInfo(soup) # ?????
uid = number # number??uid
get_fans_uid = GetFansUid.GetFansUid(number)
fansuid, fansnumber = get_fans_uid.get_uids() # ????id?????
print(uid, username, fansnumber)
saveData(uid, username, fansnumber, fansuid)# ?????
except Exception:
print("get page error")
return getSoup(number + 1, stop+1)
# ????
评论列表
文章目录