def parse_room_first(self, response):
id = re.findall(r'\d{3,10}', response.url)[0]
name = response.css('#listing_name::text').extract_first()
# equipment = response.css(
# 'div.row.row-condensed.text-muted.text-center.hide-sm > div > div.col-sm-3.icon--small-margin > span.text-small::text').extract()
# img = response.css('.cover-img::attr(style)').extract_first().replace('ackground-image:url', '')[1:-1]
# description = response.css('div.simple-format-container > p > span::text').extract()
# comment_num = response.css('div.col-md-8.review-header > div > h4 > span > span::text').extract_first()
owner = response.css(
'div.host-info.pull-left > div > span > a.link-reset::attr(href)').extract_first().split('?')[-1]
owner_id = response.css(
'div.host-info.pull-left > div > span > a.link-reset > span::text').extract_first()
f = furl(response.url)
f.path.add('personalization.json')
try:
del f.args['location']
except KeyError:
pass
f.args.addlist('review_ids[]',
['144474925', '141633062', '140450604', '139913674', '138701100', '138102086', '137690239'])
url = f.url
path = str(f.path) + str(f.query)
return scrapy.FormRequest(url=url, callback=self.parse_room_second,
meta={'room_id': id, 'name': name, 'owner': owner, 'owner_id': owner_id,
'parse': True})
评论列表
文章目录