def get_torrent(self, response):
sel = Selector(response)
cl_title = sel.xpath('//td[@class="h"]/text()[2]').extract_first()
cl_bankuai = sel.xpath('//div[@class="t3"]/table/tr/td/b/a[2]/text()').extract_first()
cl_url = response.url
torrent = re.search('rmdown\.com(.+?)</a>', response.body)
torrent_url = 'http://www.' + torrent.group()[:-4]
posted = sel.xpath('//div[@class="tipad"]/text()').extract()[1]
posted = posted.encode('utf-8')[9:-7]
yield Request(
url=torrent_url,
meta={
'cl_title': cl_title,
'cl_bankuai': cl_bankuai,
'cl_url': cl_url,
'posted': posted,
},
callback=self.parse_item,
dont_filter=True)
评论列表
文章目录