com5442.py 文件源码-python代码片段

com5442.py 文件源码

python

阅读 16 收藏 0 点赞 0 评论 0

def parse_first_page(self, response):
        count = int(response.xpath('//div[@id="aplist"]/ul/li[1]/a/text()')[0].re(r'.*?(\d+).*?')[0])
        title = response.request.cookies['title']
        albumURL = response.url.replace(".html", '')
        for x in xrange(1,count+1):
            suffix = ".html"
            if x > 1:
                suffix = "_"+str(x)+".html"
                request = scrapy.Request(albumURL+suffix, callback=self.parse_item, cookies={'title': title})
                yield request
        l = ItemLoader(item=PageItem(), response=response)
        l.add_value('title', title)
        l.add_value('name', self.name)
        l.add_value('url', response.url)
        l.add_xpath('image_urls', '//p[@id="contents"]/a/img/@src')
        yield l.load_item()