imgspider.py 文件源码-python代码片段

imgspider.py 文件源码

python

阅读 24 收藏 0 点赞 0 评论 0

def parse(self, response):
        # print response.request.headers
        # print u'~~~~', ("pp3288" in response.body)
        # print u'~~~~', unicode(response.body, "utf8").encode("utf8")
        #????????????????parse_albumm????
        for box in response.xpath(self.config["xpathAlbumList"]):
            url = box.xpath(self.config["xpathAlbumURL"]).extract()[0]
            title = box.xpath(self.config["xpathAlbumTitle"]).extract()[0]
            if not self.config.has_key("specificAlbums") or url in self.config["specificAlbums"]:

                if not url.startswith("http") and self.config.has_key("baseAddress"):
                    url = self.config["baseAddress"] + url
                # print u'?????', title, url
                request = scrapy.Request(url, headers=self.headers, callback=self.parse_album, cookies={'title': title})
                yield request
                # break

        #TODO????????????????parse_album_list
        pass

    #?????????