lenovo_forum.py 文件源码-python代码片段

lenovo_forum.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

def generate_forum_url(self, response):
        # page_key = int(response.meta['page_key']) + 1
        # check last forum time ?????????
        # rep_time = response.xpath('//div[@class="Forumhome_listbox"]//dl//dd//p/text()').extract()
        # if self.check_rep_date(rep_time):
        #     url = 'http://club.lenovo.com.cn/forum-all-reply_time-0-' + str(page_key)
        #     yield scrapy.Request(
        #         url,
        #         meta={"page_key": page_key, "proxy": MongoClient.get_random_proxy()},
        #         callback=self.generate_forum_url
        #     )

        for h1a_forum_url in response.xpath('//div[@class="Forumhome_listbox"]//dd//h1//a//@href').extract():
            yield scrapy.Request(
                h1a_forum_url,
                meta={"proxy": MongoClient.get_random_proxy()},
                callback=self.generate_forum_content
            )

    # parse forum content and store