tripadvisor.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:tripadvisor 作者: rgmining 项目源码 文件源码
def load(graph):
    """Load the Trip Advisor dataset to a given graph object.

    The graph object must implement the
    :ref:`graph interface <dataset-io:graph-interface>`.

    Args:
      graph: an instance of bipartite graph.

    Returns:
      The graph instance *graph*.
    """
    base = "TripAdvisorJson.tar.bz2"
    path = join(".", base)
    if not exists(path):
        path = join(sys.prefix, "rgmining","data", base)
    if not exists(path):
        path = join(sys.prefix, "local", "rgmining","data", base)
    if not exists(path):
        path = join(site.getuserbase(), "rgmining","data", base)

    R = {}  # Reviewers dict.
    with tarfile.open(path) as tar:

        for info in _files(tar):

            with closing(tar.extractfile(info)) as fp:

                obj = json.load(fp)

                target = obj["HotelInfo"]["HotelID"]
                product = graph.new_product(name=target)

                for r in obj["Reviews"]:
                    name = r["ReviewID"]
                    score = float(r["Ratings"]["Overall"]) / 5.

                    try:
                        date = datetime.datetime.strptime(
                            r["Date"], _DATE_FORMAT).strftime("%Y%m%d")
                    except ValueError:
                        date = None

                    if name not in R:
                        R[name] = graph.new_reviewer(name=name)
                    graph.add_review(R[name], product, score, date)

    return graph
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号