def generate_forum_url(self, response):
# page_key = int(response.meta['page_key']) + 1
# check last forum time ?????????
# rep_time = response.xpath('//div[@class="Forumhome_listbox"]//dl//dd//p/text()').extract()
# if self.check_rep_date(rep_time):
# url = 'http://club.lenovo.com.cn/forum-all-reply_time-0-' + str(page_key)
# yield scrapy.Request(
# url,
# meta={"page_key": page_key, "proxy": MongoClient.get_random_proxy()},
# callback=self.generate_forum_url
# )
for h1a_forum_url in response.xpath('//div[@class="Forumhome_listbox"]//dd//h1//a//@href').extract():
yield scrapy.Request(
h1a_forum_url,
meta={"proxy": MongoClient.get_random_proxy()},
callback=self.generate_forum_content
)
# parse forum content and store
评论列表
文章目录