def test_esk609(self):
"""Test Esk-609: Map data-frame groups"""
# run Eskapade
self.run_eskapade('esk609_map_df_groups.py')
proc_mgr = ProcessManager()
ds = proc_mgr.service(DataStore)
# check input data
for key in ('map_rdd', 'flat_map_rdd'):
self.assertIn(key, ds, 'no data found with key "{}"'.format(key))
self.assertIsInstance(ds[key], pyspark.RDD,
'object "{0:s}" is not an RDD (type "{1:s}")'.format(key, str(type(ds[key]))))
# sums of "bar" variable
bar_sums = [(0, 27.5), (1, 77.5), (2, 127.5), (3, 177.5), (4, 227.5), (5, 277.5), (6, 327.5), (7, 377.5),
(8, 427.5), (9, 477.5)]
flmap_rows = [(it, 'foo{:d}'.format(it), (it + 1) / 2., bar_sums[it // 10][1]) for it in range(100)]
# check mapped data frames
self.assertListEqual(sorted(ds['map_rdd'].collect()), bar_sums, 'unexpected values in "map_rdd"')
self.assertListEqual(sorted(ds['flat_map_rdd'].collect()), flmap_rows, 'unexpected values in "flat_map_rdd"')
评论列表
文章目录