def __init__(self, metric, dimensions, ascending=True, name=None):
"""Initializes distribution estimator.
Args:
metric: Thing to calculate
dimensions: Dimensions to distribute things over.
ascending: list of bools to pass to pandas.sort_index that say to sort
each dimension ascending or descending.
name: A string for the column name of results.
"""
def _calculate(data, weights):
"""Calculates cumulative distribution metric."""
total = 1.0 * _weighted_sum(data[metric].values, weights)
dimension_tuples = pd.lib.fast_zip([data[ii].values for ii in dimensions])
factors, keys = pd.factorize(dimension_tuples, sort=True)
results = np.zeros(len(keys))
for ii in xrange(len(keys)):
results[ii] = _weighted_sum(data[metric].values,
weights * (factors == ii)) / total
output = pd.DataFrame(results,
index=pd.MultiIndex.from_tuples(keys,
names=dimensions),
columns=[""])
output = output.sort_index(ascending=ascending).cumsum()
return output
if name is None:
name = "{} Cumulative Distribution".format(metric)
super(CumulativeDistribution, self).__init__(name, _calculate, "dataframe")
评论列表
文章目录