imdb.py 文件源码-python代码片段

imdb.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：script.module.metadatautils 作者: marcelveldt 项目源码文件源码

def get_top250_db(self):
        '''
            get the top250 listing for both movies and tvshows as dict with imdbid as key
            uses 7 day cache to prevent overloading the server
        '''
        results = {}
        for listing in [("top", "chttp_tt_"), ("toptv", "chttvtp_tt_")]:
            html = requests.get(
                "http://www.imdb.com/chart/%s" %
                listing[0], headers={
                    'User-agent': 'Mozilla/5.0'}, timeout=20)
            soup = BeautifulSoup.BeautifulSoup(html.text)
            for table in soup.findAll('table'):
                if table.get("class") == "chart full-width":
                    for td_def in table.findAll('td'):
                        if td_def.get("class") == "titleColumn":
                            a_link = td_def.find("a")
                            if a_link:
                                url = a_link["href"]
                                imdb_id = url.split("/")[2]
                                imdb_rank = url.split(listing[1])[1]
                                results[imdb_id] = try_parse_int(imdb_rank)
        self.write_kodidb(results)
        return results