def generate_articlelist(self, response):
if response.body.find("list") == -1:
return
articlelist = json.loads(response.body)
page_key = int(response.meta['page_key'])
# if 1 == 1:
if page_key == 1 or self.check_rep_time(response.body):
yield scrapy.Request(
response.url.replace(re.search(u'index=[\d]+', response.url).group(0), 'index='+str(page_key+1)),
callback=self.generate_articlelist,
meta={"page_key": str(page_key+1)}
)
# scrapy all article
for artUrl in articlelist['list']:
yield scrapy.Request(
artUrl['ArtUrl'],
callback=self.generate_article_detail
)
评论列表
文章目录