movie_info.py 文件源码-python代码片段

movie_info.py 文件源码

python

阅读 33 收藏 0 点赞 0 评论 0

项目：xcrawler 作者: 0xE8551CCB 项目源码文件源码

def parse_movie_details(self, response):
        html_root = html.fromstring(response.content,
                                    base_url=response.base_url)

        movie_info = dict()
        movie_info['??'] = self.xpath_first(html_root,
                                            '//div[@id="content"]'
                                            '/h1/span[1]/text()').strip()

        try:
            # to pure text
            soup = BeautifulSoup(html.tostring(
                self.xpath_first(html_root,
                                 '//div[@id="info"]')), 'html')
        except TypeError:
            return None
        else:
            for line in soup.get_text().splitlines():
                try:
                    left, *right = line.split(':')
                except AttributeError:
                    pass
                else:
                    key = left.strip()
                    value = ''.join(x.strip() for x in right)

                    if key and value:
                        movie_info[key] = value

            yield movie_info