def construct_empty_hist(self, columns):
"""Create an (empty) histogram of right type
Create a multi-dim histogram by iterating through the columns in
reverse order and passing a single-dim hist as input to the next
column.
:param list columns: histogram columns
:returns: created histogram
:rtype: histogrammar.Count
"""
hist = hg.Count()
# create a multi-dim histogram by iterating through the columns in reverse order
# and passing a single-dim hist as input to the next column
for col in reversed(columns):
# histogram type depends on the data type
dt = np.dtype(self.var_dtype[col])
# processing function, e.g. only accept boolians during filling
f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type]
if len(columns) == 1:
# df[col] is a pd.series
quant = lambda x, fnc=f: fnc(x)
else:
# df[columns] is a pd.Dataframe
# fix column to col
quant = lambda x, fnc=f, clm=col: fnc(x[clm])
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
if is_number or is_timestamp:
# numbers and timestamps are put in a sparse binned histogram
bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs)
hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist)
else:
# string and boolians are treated as categories
hist = hg.Categorize(quantity=quant, value=hist)
# FIXME stick data types and number of dimension to histogram
dta = [self.var_dtype[col] for col in columns]
hist.datatype = dta[0] if len(columns) == 1 else dta
hist.n_dim = len(columns)
@property
def n_bins(self):
if hasattr(self, 'num'):
return self.num
elif hasattr(self, 'size'):
return self.size
else:
raise RuntimeError('Cannot retrieve number of bins from hgr hist')
hist.n_bins = n_bins
return hist
评论列表
文章目录