def test_quantiles_unmasked(self, seed):
permute = partial(permute_rows, seed)
shape = (6, 6)
# Shuffle the input rows to verify that we don't depend on the order.
# Take the log to ensure that we don't depend on linear scaling or
# integrality of inputs
factor_data = permute(log1p(arange(36, dtype=float).reshape(shape)))
f = self.f
# Apply the same shuffle we applied to the input rows to our
# expectations. Doing it this way makes it obvious that our
# expectation corresponds to our input, while still testing against
# a range of input orderings.
permuted_array = compose(permute, partial(array, dtype=int64_dtype))
self.check_terms(
terms={
'2': f.quantiles(bins=2),
'3': f.quantiles(bins=3),
'6': f.quantiles(bins=6),
},
initial_workspace={
f: factor_data,
},
expected={
# The values in the input are all increasing, so the first half
# of each row should be in the bottom bucket, and the second
# half should be in the top bucket.
'2': permuted_array([[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1]]),
# Similar for three buckets.
'3': permuted_array([[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 2, 2]]),
# In the limiting case, we just have every column different.
'6': permuted_array([[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5],
[0, 1, 2, 3, 4, 5]]),
},
mask=self.build_mask(self.ones_mask(shape=shape)),
)
评论列表
文章目录