room.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:Spider 作者: poluo 项目源码 文件源码
def parse_room_first(self, response):
        id = re.findall(r'\d{3,10}', response.url)[0]
        name = response.css('#listing_name::text').extract_first()
        # equipment = response.css(
        #     'div.row.row-condensed.text-muted.text-center.hide-sm > div > div.col-sm-3.icon--small-margin > span.text-small::text').extract()
        # img = response.css('.cover-img::attr(style)').extract_first().replace('ackground-image:url', '')[1:-1]
        # description = response.css('div.simple-format-container > p > span::text').extract()
        # comment_num = response.css('div.col-md-8.review-header > div > h4 > span > span::text').extract_first()
        owner = response.css(
            'div.host-info.pull-left > div > span > a.link-reset::attr(href)').extract_first().split('?')[-1]
        owner_id = response.css(
            'div.host-info.pull-left > div > span > a.link-reset > span::text').extract_first()
        f = furl(response.url)
        f.path.add('personalization.json')
        try:
            del f.args['location']
        except KeyError:
            pass
        f.args.addlist('review_ids[]',
                       ['144474925', '141633062', '140450604', '139913674', '138701100', '138102086', '137690239'])
        url = f.url
        path = str(f.path) + str(f.query)
        return scrapy.FormRequest(url=url, callback=self.parse_room_second,
                                  meta={'room_id': id, 'name': name, 'owner': owner, 'owner_id': owner_id,
                                        'parse': True})
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号