def test_groupby_categorical_two_columns(self):
# https://github.com/pydata/pandas/issues/8138
d = {'cat':
pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
ordered=True),
'ints': [1, 1, 2, 2],
'val': [10, 20, 30, 40]}
test = pd.DataFrame(d)
# Grouping on a single column
groups_single_key = test.groupby("cat")
res = groups_single_key.agg('mean')
exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
index=pd.CategoricalIndex(["a", "b", "c"], name="cat"))
tm.assert_frame_equal(res, exp)
# Grouping on two columns
groups_double_key = test.groupby(["cat", "ints"])
res = groups_double_key.agg('mean')
exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan],
"cat": ["a", "a", "b", "b", "c", "c"],
"ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints"
])
tm.assert_frame_equal(res, exp)
# GH 10132
for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
c, i = key
result = groups_double_key.get_group(key)
expected = test[(test.cat == c) & (test.ints == i)]
assert_frame_equal(result, expected)
d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
test = pd.DataFrame(d)
values = pd.cut(test['C1'], [1, 2, 3, 6])
values.name = "cat"
groups_double_key = test.groupby([values, 'C2'])
res = groups_double_key.agg('mean')
nan = np.nan
idx = MultiIndex.from_product([["(1, 2]", "(2, 3]", "(3, 6]"],
[1, 2, 3, 4]],
names=["cat", "C2"])
exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3,
nan, nan, nan, nan, 4, 5],
"C3": [nan, nan, nan, nan, 10, 100,
nan, nan, nan, nan, 200, 34]}, index=idx)
tm.assert_frame_equal(res, exp)
test_groupby.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录