def maximize_likelihood(self, data, responsibilities, weights, cmask=None):
if not (cmask is None or cmask.shape == () or np.all(cmask)): # cluster reduction
responsibilities = responsibilities[:, cmask]
self.names = list(compress(self.names, cmask)) # TODO: make self.names a numpy array?
weights_combined = responsibilities * weights
self.variables = np.dot(weights_combined.T, data.frequencies)
with np.errstate(invalid='ignore'): # if no training data is available for any class
np.divide(self.variables, weights_combined.sum(axis=0, keepdims=True, dtype=types.large_float_type).T, out=self.variables) # normalize before update, self.variables is types.prob_type
dimchange = self.update() # create cache for likelihood calculations
# TODO: refactor this block
ll = self.log_likelihood(data)
std_per_class = common.weighted_std(ll, weights_combined)
weight_per_class = weights_combined.sum(axis=0, dtype=types.large_float_type)
weight_per_class /= weight_per_class.sum()
std_per_class_mask = np.isnan(std_per_class)
skipped_classes = std_per_class_mask.sum()
self.stdev = np.ma.dot(np.ma.MaskedArray(std_per_class, mask=std_per_class_mask), weight_per_class)
stderr.write("LOG %s: mean class likelihood standard deviation is %.2f (omitted %i/%i classes due to invalid or unsufficient data)\n" % (self._short_name, self.stdev, skipped_classes, self.num_components - skipped_classes))
return dimchange, ll
评论列表
文章目录