def get_all_page(self, response):
all_page = 0 # ???
current_page = 1 # ????
body = str(response.body)
regex_str = ".*?PAGE.pager = ({.*?});.*"
pager = re.match(regex_str, body)
if pager:
pager_data = pager.group(1).replace('\\n', '').replace('\\r', '').replace(" ", "")
regex_str = '.*count:"(\d+)".*'
all_page = int(re.match(regex_str, pager_data).group(1))
print("all_page :" + str(all_page))
# ????????scrapy????
while current_page <= all_page:
url = apiconstants.get_douyu_list_url(current_page)
print(url)
current_page = current_page + 1
yield Request(url=url, callback=self.parse)
print("????")
评论列表
文章目录