wshang_spider.py 文件源码-python代码片段

wshang_spider.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：NewsScrapy 作者: yinzishao 项目源码文件源码

def parse(self, response):
        """

        :param response:
        :return:???????post??

                post???
                    inslider
                    page
                    pagesize
                Content-Type:application/x-www-form-urlencoded
        """
        soup = BeautifulSoup(response.body)
        menu = soup.find_all("a",class_="ui-more")  #????????
        if menu:
            for topic in menu:
                topic_name = topic.text.replace(u"??","")
                topic_url = topic.get("href")
                self.flag.setdefault(topic_url,0)
                page="1"
                #post_data?????
                post_data = {
                    "inslider":"0",
                    "page":page,
                    "pagesize":"10"
                }
                # yield scrapy.Request(topic_url,
                #                      callback=self.parse_topic,
                #                      method="POST",
                #                      headers={"Content-Type":"application/x-www-form-urlencoded"},
                #                      body=json.dumps(post_data)
                #                      )
                yield scrapy.FormRequest(
                    url=topic_url,
                    formdata=post_data,
                    callback=self.parse_topic,
                    meta={"page":page,"topic_name":topic_name}
                )