def load(graph):
"""Load the Trip Advisor dataset to a given graph object.
The graph object must implement the
:ref:`graph interface <dataset-io:graph-interface>`.
Args:
graph: an instance of bipartite graph.
Returns:
The graph instance *graph*.
"""
base = "TripAdvisorJson.tar.bz2"
path = join(".", base)
if not exists(path):
path = join(sys.prefix, "rgmining","data", base)
if not exists(path):
path = join(sys.prefix, "local", "rgmining","data", base)
if not exists(path):
path = join(site.getuserbase(), "rgmining","data", base)
R = {} # Reviewers dict.
with tarfile.open(path) as tar:
for info in _files(tar):
with closing(tar.extractfile(info)) as fp:
obj = json.load(fp)
target = obj["HotelInfo"]["HotelID"]
product = graph.new_product(name=target)
for r in obj["Reviews"]:
name = r["ReviewID"]
score = float(r["Ratings"]["Overall"]) / 5.
try:
date = datetime.datetime.strptime(
r["Date"], _DATE_FORMAT).strftime("%Y%m%d")
except ValueError:
date = None
if name not in R:
R[name] = graph.new_reviewer(name=name)
graph.add_review(R[name], product, score, date)
return graph
评论列表
文章目录