def fill_histogram(self, idf, columns):
"""Fill input histogram with column(s) of input dataframe
:param idf: input data frame used for filling histogram
:param list columns: histogram column(s)
"""
name = ':'.join(columns)
if name not in self._counts:
# create an (empty) value counts dict
self._counts[name] = Counter()
# value_counts() is faster than groupby().size(), but only works for series (1d).
# else use groupby() for multi-dimensions
g = idf.groupby(by=columns).size() if len(columns) > 1 else idf[columns[0]].value_counts()
counts = Counter(g.to_dict())
# remove specific keys from histogram before merging, if so requested
counts = self.drop_requested_keys(name, counts)
self._counts[name].update(counts)
评论列表
文章目录