tuba77.py 文件源码-python代码片段

tuba77.py 文件源码

python

阅读 17 收藏 0 点赞 0 评论 0

def parse_first_page(self, response):
        count = int(response.xpath('//ul[@class="image"]/text()')[0].re(r'.*?(\d+).*?')[0])
        title = response.request.cookies['title']
        albumURL = response.url.replace(".shtml", '')
        # print u'', count, title, albumURL
        for x in xrange(1,count+1):
            suffix = ".shtml"
            if x > 1:
                suffix = "_"+str(x)+".shtml"
                # print u'',albumURL+suffix
                request = scrapy.Request(albumURL+suffix, callback=self.parse_item, cookies={'title': title})
                yield request

        l = ItemLoader(item=PageItem(), response=response)
        l.add_value('title', title)
        l.add_value('name', self.name)
        l.add_value('url', response.url)
        l.add_xpath('image_urls', '//td[@valign="top"]/img/@src')
        yield l.load_item()