def parse(self, response):
all_urls = response.xpath('//div[@class="tit_sort"]//dl')
if len(all_urls):
for url in all_urls:
category_name = url.xpath('./dt/a/text()').extract()[0]
next_urls = url.xpath('.//em//a/@href').extract()
for next_url in next_urls:
class_id = re.search("list-(\d+)-(\d+)-(\d+)", next_url)
c1 = class_id.group(1)
c2 = class_id.group(2)
c3 = class_id.group(3)
next_url = "http://www.benlai.com/NewCategory/GetLuceneProduct"
yield FormRequest(next_url, formdata={"c1": c1, "c2": c2, "c3": c3, "page": "1"},
callback=self.parse_data,
meta={"cat": category_name, "c1": c1, "c2": c2, "c3": c3, "page": "1"})
# ?????????
评论列表
文章目录