def benchmark_spark(ratings, factors, iterations=5):
conf = (SparkConf()
.setAppName("implicit_benchmark")
.setMaster('local[*]')
.set('spark.driver.memory', '16G')
)
context = SparkContext(conf=conf)
spark = SparkSession(context)
times = {}
try:
ratings = convert_sparse_to_dataframe(spark, context, ratings)
for rank in factors:
als = ALS(rank=rank, maxIter=iterations,
alpha=1, implicitPrefs=True,
userCol="row", itemCol="col", ratingCol="data")
start = time.time()
als.fit(ratings)
elapsed = time.time() - start
times[rank] = elapsed / iterations
print("spark. factors=%i took %.3f" % (rank, elapsed/iterations))
finally:
spark.stop()
return times
评论列表
文章目录