def df_train(spark_context, hive_context):
# TODO: Use some fixture dataset representing real-ish data? But
# it needs to be pretty small
return spark_context.parallelize(
_make_q('abc') + _make_q('def') + _make_q('ghi') + _make_q('jkl')
+ _make_q('mno') + _make_q('pqr') + _make_q('stu')
).toDF(['wikiid', 'norm_query_id', 'query', 'label', 'features'])
评论列表
文章目录