def get_top250_db(self):
'''
get the top250 listing for both movies and tvshows as dict with imdbid as key
uses 7 day cache to prevent overloading the server
'''
results = {}
for listing in [("top", "chttp_tt_"), ("toptv", "chttvtp_tt_")]:
html = requests.get(
"http://www.imdb.com/chart/%s" %
listing[0], headers={
'User-agent': 'Mozilla/5.0'}, timeout=20)
soup = BeautifulSoup.BeautifulSoup(html.text)
for table in soup.findAll('table'):
if table.get("class") == "chart full-width":
for td_def in table.findAll('td'):
if td_def.get("class") == "titleColumn":
a_link = td_def.find("a")
if a_link:
url = a_link["href"]
imdb_id = url.split("/")[2]
imdb_rank = url.split(listing[1])[1]
results[imdb_id] = try_parse_int(imdb_rank)
self.write_kodidb(results)
return results
评论列表
文章目录