metacritic_crawl.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:holcrawl 作者: shaypal5 项目源码 文件源码
def _get_movie_url_by_name(movie_name, year=None):
    query = SEARCH_URL.format(movie_name=_parse_name_for_search(movie_name))
    request = urllib.request.Request(query, headers=_HEADERS)
    search_res = bs(urllib.request.urlopen(request), "html.parser")
    results = search_res.find_all("li", {"class": "result"})
    correct_result = None
    for result in results:
        title = result.find_all(
            "h3", {"class": "product_title"})[0].contents[0].contents[0]
        title_match = title.strip().lower() == movie_name.strip().lower()
        if year is None and title_match:
            correct_result = result
        else:
            year_match = str(year) in str(result)
            if title_match and year_match:
                correct_result = result
    movie_url_suffix = correct_result.find_all("a")[0]['href']
    return METACRITIC_URL + movie_url_suffix


# === critics reviews page ===
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号