def log_likelihood(self, data):
nks = np.bincount(self.labels_, minlength=self.n_clusters) # number of points in each cluster
n, d = data.shape
log_likelihood = 0
covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
covar_matrix_det_v = np.linalg.det(covar_matrices)
self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
for k, nk in enumerate(nks):
if self.verbose == 1:
print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
cdist_no_nan = cdist_result[~np.isnan(cdist_result)] # to deal with nans returned by cdist
term_2 = -0.5 * (np.sum(cdist_no_nan))
k_sum = term_1 + term_2
log_likelihood += k_sum
if np.isnan(log_likelihood) or log_likelihood == float('inf'):
raise Exception('ll is nan or inf')
return log_likelihood
评论列表
文章目录