def parse_movie(self, response):
content_info = response.css('div.contentinfo')
movie = Movie()
symbol1 = u'?'
symbol2 = u'?'
rex = r'%s(.*)%s' % (symbol1, symbol2)
movie['title'] = content_info.css('h1 a::text').re_first(rex)
# logging.log(logging.INFO, "parse_movie " + movie['title'])
text = content_info.css('div#text')
t_msg_font = text.css('div.t_msgfont')
if len(t_msg_font) > 0:
movie['cover'] = t_msg_font.css(' img::attr(src)').extract_first()
movie['detail'] = self.parse_detail(t_msg_font.css('::text'))
else:
movie['cover'] = text.css(' p img::attr(src)').extract_first()
movie['detail'] = self.parse_detail(text.css(' p::text'))
thumbnails = text.css(' p img::attr(src)').extract()
if movie['cover'] in thumbnails:
thumbnails.remove(movie['cover'])
movie['thumbnails'] = thumbnails
download_links = text.css(' table tbody tr td a')
download_links_array = []
for link_item in download_links:
download_link = DownloadLink()
download_link['title'] = link_item.css('::text').extract_first()
download_link['link'] = link_item.css('::attr(href)').extract_first();
download_links_array.append(dict(download_link))
movie['download_links'] = download_links_array
return movie
评论列表
文章目录