def parse_items(self, response):
print "------------"
print(response.url)
print("----------")
from scrapy.selector import Selector
import json
category = response.meta['category']['category']
sub_category = response.meta['category']['sub_category']
response_json = json.loads(response.body)
required_text = response_json["result"]["html"]
response = Selector(text=required_text)
all_items = response.xpath('//div[contains(@class, "grid_item")]')
for each_item in all_items:
name = each_item.xpath('.//div[@class="title"]/a/text()').extract_first()
price = each_item.xpath('.//span[@class="price"]/text()').extract_first()
image_urls = [each_item.xpath(".//img/@src").extract_first()]
affiliate_link = each_item.xpath(".//a/@href").extract_first()
website = "polyvore.com"
brand = [i for i in ALL_BRANDS if i.lower() in name.lower()]
if brand:
brand = brand[0]
print ("brand", brand)
else:
print (name, brand, "exited")
continue
item = ProductItem(
name=name.strip(),
price=price.strip(),
image_urls=image_urls,
brand=brand.strip(),
affiliate_link=affiliate_link,
category=category,
sub_category=sub_category,
website=website
)
yield item
if response_json["result"]["more_pages"] == "1":
next_page = int(response_json["result"]["page"]) + 1
else:
return
next_link = url_to_use.format(str(next_page), urllib.quote(sub_category))
my_request = scrapy.Request(
next_link,
self.parse_items)
my_request.meta['category'] = {
"sub_category": sub_category,
"category": category,
}
yield my_request
polyvore_spider.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录