def parse_item(self, response):
item = Cl1024Item()
item['cl_title'] = response.meta['cl_title']
item['cl_url'] = response.meta['cl_url']
item['cl_bankuai'] = response.meta['cl_bankuai']
item['posted'] = response.meta['posted']
# redownloaded = re.search('downloaded:(.+?)<BR>', response.body)
# downloaded = redownloaded[12:-4]
sel = Selector(response)
downloaded = sel.xpath('//td/table/tr/td/text()').extract()[2]
item['torrent_downloaded'] = downloaded[17:]
item['torrent_url'] = response.url
ref = sel.xpath('//input[@name="ref"]/@value').extract_first()
reff = sel.xpath('//input[@name="reff"]/@value').extract_first()
dl = ('http://www.rmdown.com/download.php?ref=%s&&reff=%s&submit=download' % (ref, reff)).encode('utf-8')
item['torrent_download_urls'] = dl
yield item
评论列表
文章目录