imdb_crawl.py 文件源码

python
阅读 31 收藏 0 点赞 0 评论 0

项目:holcrawl 作者: shaypal5 项目源码 文件源码
def _get_business_props(movie_code):
    cur_business_url = _BUSINESS_URL.format(code=movie_code)
    busi_page = bs(request.urlopen(cur_business_url), "html.parser")
    busi_str = str(busi_page)
    weekend_contents = re.findall(_WEEKEND_CONTENT_REGEX, busi_str)[0]
    num_screens_list = [
        int(match.replace(',', ''))
        for match in re.findall(_US_OPEN_WEEKEND_REGEX, weekend_contents)]
    busi_props = {}
    busi_props['screens_by_weekend'] = [
        val for val in reversed(num_screens_list)]
    busi_props['opening_weekend_screens'] = busi_props['screens_by_weekend'][0]
    busi_props['max_screens'] = max(num_screens_list)
    busi_props['total_screens'] = sum(num_screens_list)
    busi_props['avg_screens'] = sum(num_screens_list) / len(num_screens_list)
    busi_props['num_weekends'] = len(num_screens_list)
    return busi_props


# ==== crawling the release page ====
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号