python类shell()的实例源码

kdl_spider.py 文件源码 项目:scrapy_projects 作者: morefreeze 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def parse(self, response):
        """parse crawl page

        :response: TODO
        :returns: None

        """
        # debug
        # from scrapy.shell import inspect_response
        # inspect_response(response, self)
        for i in range(1, self.page+1):
            yield scrapy.Request(
                response.request.url + '%s' % (i),
                self.parse_ip,
                dont_filter=True,
            )
example.py 文件源码 项目:scrapy_redis_spider 作者: lymlhhj123 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _extract_item(self, response):
        #?scrapy shell???response
        #inspect_response(response, self)

        #???????scrapy????response?????????????
        #open_in_browser(response)

        #???????
        l = ItemLoader(response=response, item=MyspiderItem(), type='html')
        l.add_xpath('movie_name', '//h1/span[@property="v:itemreviewed"]/text()')
        l.add_xpath('movie_year', '//span[@property="v:initialReleaseDate"]/text()')
        l.add_xpath('movie_type', '//span[@property="v:genre"]/text()')
        l.add_xpath('movie_rate', '//strong[@class="ll rating_num"]/text()')
        l.add_value('url', response.url)
        #????????????load_item()????scrapy.Item??
        #?scrapy-redis????json?item???????redis?item???
        #??json?????python?????????????item?????
        return dict(l.load_item())
Movie1801.py 文件源码 项目:python 作者: panxus 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parse(self, response):

        # #?????
        # from scrapy.shell import inspect_response
        # inspect_response(response,self)
        # # shell >> view(response)

        lis = response.xpath('//ol[@class="grid_view"]/li')
        for li in lis:
            item = Dou1801Item()
            item['ranks'] = li.xpath('div/div[1]/em/text()').extract()[0]
            item['titles'] = li.xpath('div/div[2]/div[1]/a/span[1]/text()').extract()[0]
            item['score'] = li.xpath('div/div[2]/div[2]/div/span[2]/text()').extract()[0]
            item['nums'] = li.xpath('div/div[2]/div[2]/div/span[4]/text()').extract()[0]
            des = li.xpath('div/div[2]/div[2]/p[2]/span/text()').extract()
            if des:
                item['des'] = des[0]
            item['links'] = li.xpath('div/div[2]/div[1]/a/@href').extract()[0]
            yield item

        next_url = response.xpath('//*[@id="content"]/div/div[1]/div[2]/span[3]/a/@href').extract()
        if next_url:
            next_url = 'https://movie.douban.com/top250' + next_url[0]
            yield scrapy.Request(url=next_url,headers=self.headers)
ip84_spider.py 文件源码 项目:scrapy_projects 作者: morefreeze 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def parse(self, response):
        """parse crawl page

        :response: TODO
        :returns: None

        """
        # debug
        # from scrapy.shell import inspect_response
        # inspect_response(response, self)
        for i in range(1, 2):
            yield scrapy.Request(response.request.url + '/%s' % (i), callback=self.parse_ip)
gatherproxy_spider.py 文件源码 项目:scrapy-proxy-spiders 作者: MartiONE 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def parse_page(self, response):
#                from scrapy.shell import inspect_response
#                inspect_response(response, self)
                for row in response.xpath("//table/tr")[2:]:
                        # Item creation and deployment
                        item = ProxyfetcherItem()
                        item["ip"] = row.xpath("td")[1].re("document.write\('(.+?)'")[0].strip()
                        # The port is "encoded" as hexadecimal
                        item["port"] = str(int(row.xpath("td")[2].re("gp.dep\('(.+?)'")[0], 16))
                        item["country"] = row.xpath("td[5]/text()").extract()[0]
                        item["con_type"] = 'http'
                        item["full_address"] = "{}:{}".format(item["ip"], item["port"])
                        yield item.status_check(item)


问题


面经


文章

微信
公众号

扫码关注公众号