def parse_search_page(self, response):
# handle current page
for item in self.parse_tweets_block(response.body):
yield item
# get next page
tmp = self.reScrollCursor.search(response.body)
if tmp:
query = urlparse.parse_qs(urlparse.urlparse(response.request.url).query)['q'][0]
scroll_cursor = tmp.group(1)
url = 'https://twitter.com/i/search/timeline?q=%s&' \
'include_available_features=1&include_entities=1&max_position=%s' % \
(urllib.quote_plus(query), scroll_cursor)
yield http.Request(url, callback=self.parse_more_page)
# TODO: # get refresh page
# tmp = self.reRefreshCursor.search(response.body)
# if tmp:
# query = urlparse.parse_qs(urlparse.urlparse(response.request.url).query)['q'][0]
# refresh_cursor=tmp.group(1)
评论列表
文章目录