zhiyoo.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:lichking 作者: melonrun 项目源码 文件源码
def generate_forum(self, response):
        forum_list = response.xpath('//td[@class="fl_g"]//dl//dt//a/@href').extract()
        if len(forum_list) > 0:
            for forum_url in forum_list:
                f_url = forum_url
                if forum_url.find('bbs.zhiyoo.com') == -1:
                    f_url = 'http://bbs.zhiyoo.com/' + forum_url

                yield scrapy.Request(
                    f_url,
                    meta={"page_key": 1, "proxy": MongoClient.get_random_proxy()},
                    callback=self.generate_forum
                )

        # check ??????
        pg_bar = response.xpath('//div[@class="pg"]//a[@class="nxt"]/@href').extract()
        page_key = int(response.meta['page_key'])
        rep_time_list = response.xpath('//tr/td[@class="by"]/em/a').extract()
        # ???????????
        if len(pg_bar) > 0:
            if page_key == 1 or self.check_rep_date(rep_time_list):
                yield scrapy.Request(
                    pg_bar[0],
                    meta={"page_key": -1, "proxy": MongoClient.get_random_proxy()},
                    callback=self.generate_forum
                )
                # scrapy all tie url
                thread_list = response.xpath('//a[@class="xst"]/@href').extract()
                if len(thread_list) > 0:
                    for thread_url in thread_list:
                        yield scrapy.Request(
                            thread_url,
                            meta={"proxy": MongoClient.get_random_proxy()},
                            callback=self.generate_forum_thread
                        )
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号