def generate_forum_url(self, response):
url_xpath = response.xpath(
'//div[@class="threadlist"]//div[@class="threadlist_title"]//a[@onclick="atarget(this)"]/@href').extract()
rep_time_path = response.xpath(
'//div[@class="threadlist_info"]//div[@class="lastreply"]//span/@title').extract()
page_key = int(response.meta['page_key']) + 1
if len(rep_time_path) > 0:
if self.check_rep_date(rep_time_path[0]) or page_key == 2:
# ?????
forum_key = response.meta['forum_key']
yield scrapy.Request(
"http://bbs.lenovomobile.cn/" + forum_key + "/" + str(page_key) + "/",
meta={"page_key": page_key, "forum_key": forum_key},
callback=self.generate_forum_url
)
logging.error(len(url_xpath))
# ????
for forum_url in url_xpath:
yield scrapy.Request(
# eg. /zui/t778232/
"http://bbs.lenovomobile.cn" + forum_url + '1/',
callback=self.generate_forum_content
)
评论列表
文章目录