blsh.py 文件源码-python代码片段

blsh.py 文件源码

python

阅读 17 收藏 0 点赞 0 评论 0

项目：picScrapy 作者: JunWangCode 项目源码文件源码

def parse(self, response):
        all_urls = response.xpath('//div[@class="tit_sort"]//dl')
        if len(all_urls):
            for url in all_urls:
                category_name = url.xpath('./dt/a/text()').extract()[0]
                next_urls = url.xpath('.//em//a/@href').extract()
                for next_url in next_urls:
                    class_id = re.search("list-(\d+)-(\d+)-(\d+)", next_url)
                    c1 = class_id.group(1)
                    c2 = class_id.group(2)
                    c3 = class_id.group(3)
                    next_url = "http://www.benlai.com/NewCategory/GetLuceneProduct"
                    yield FormRequest(next_url, formdata={"c1": c1, "c2": c2, "c3": c3, "page": "1"},
                                      callback=self.parse_data,
                                      meta={"cat": category_name, "c1": c1, "c2": c2, "c3": c3, "page": "1"})

    # ?????????