def parse(movie):
url = PAGE_URL % movie.id
r = requests.get(url)
soup = BeautifulSoup(r.text.encode('utf-8'), 'lxml')
movie.score = soup.find('strong', 'rating_num').text
info = soup.find('div', {'id': 'info'})
for linebreak in info.find_all('br'):
linebreak.extract()
for span in info.contents:
if isinstance(span, NavigableString): continue
if span.contents[0]:
if span.contents[0].string == u'??':
if isinstance(span.contents[1], NavigableString):
movie.director = span.contents[2].text
elif span.contents[0].string == u'??':
if isinstance(span.contents[1], NavigableString):
movie.actor = span.contents[2].text
print movie
评论列表
文章目录