toutiao.py 文件源码-python代码片段

toutiao.py 文件源码
python
阅读 46 收藏 0 点赞 0 评论 0
def generate_article_url(self, response):
        as_id = ''.join(random.sample(string.ascii_letters + string.digits, 15))
        cp_id = ''.join(random.sample(string.ascii_letters + string.digits, 15))
        yield scrapy.Request(
            "http://www.toutiao.com/api/pc/feed/?category=news_tech&utm_source=toutiao&widen=1&max_behot_time=0" +
            "max_behot_time_tmp=" + str(int(time.time())) +
            "tadrequire=true&as=" + as_id + "&cp=" + cp_id + "&t=" + str(time.time()),
            callback=self.generate_article_url
        )
        article_list = json.loads(response.body)
        if article_list.get("message") != "success":
            return
        for article_detail in article_list.get('data'):
            # wenda gallery ad ?
            # news_tech and news_finance
            tag_url = article_detail.get('tag_url')
            if article_detail.get('article_genre') == 'article'\
                    and (tag_url == 'news_tech' or tag_url == 'news_finance'):
                yield scrapy.Request(
                    self.toutiao_url_pre + article_detail.get('source_url'),
                    callback=self.generate_article_content
                )