def parse_movie_details(self, response):
html_root = html.fromstring(response.content,
base_url=response.base_url)
movie_info = dict()
movie_info['??'] = self.xpath_first(html_root,
'//div[@id="content"]'
'/h1/span[1]/text()').strip()
try:
# to pure text
soup = BeautifulSoup(html.tostring(
self.xpath_first(html_root,
'//div[@id="info"]')), 'html')
except TypeError:
return None
else:
for line in soup.get_text().splitlines():
try:
left, *right = line.split(':')
except AttributeError:
pass
else:
key = left.strip()
value = ''.join(x.strip() for x in right)
if key and value:
movie_info[key] = value
yield movie_info
评论列表
文章目录