def parse(self, response):
# print response.request.headers
# print u'~~~~', ("pp3288" in response.body)
# print u'~~~~', unicode(response.body, "utf8").encode("utf8")
#????????????????parse_albumm????
for box in response.xpath(self.config["xpathAlbumList"]):
url = box.xpath(self.config["xpathAlbumURL"]).extract()[0]
title = box.xpath(self.config["xpathAlbumTitle"]).extract()[0]
if not self.config.has_key("specificAlbums") or url in self.config["specificAlbums"]:
if not url.startswith("http") and self.config.has_key("baseAddress"):
url = self.config["baseAddress"] + url
# print u'?????', title, url
request = scrapy.Request(url, headers=self.headers, callback=self.parse_album, cookies={'title': title})
yield request
# break
#TODO????????????????parse_album_list
pass
#?????????
评论列表
文章目录