spider.py 文件源码-python代码片段

spider.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

项目：cl1024 作者: wuchujiecode 项目源码文件源码

def get_torrent(self, response):
        sel = Selector(response)
        cl_title = sel.xpath('//td[@class="h"]/text()[2]').extract_first()
        cl_bankuai = sel.xpath('//div[@class="t3"]/table/tr/td/b/a[2]/text()').extract_first()
        cl_url = response.url
        torrent = re.search('rmdown\.com(.+?)</a>', response.body)
        torrent_url = 'http://www.' + torrent.group()[:-4]
        posted = sel.xpath('//div[@class="tipad"]/text()').extract()[1]
        posted = posted.encode('utf-8')[9:-7]
        yield Request(
            url=torrent_url,
            meta={
                'cl_title': cl_title,
                'cl_bankuai': cl_bankuai,
                'cl_url': cl_url,
                'posted': posted,
            },
            callback=self.parse_item,
            dont_filter=True)