def _EM_VMM_expectation_step_soft(mu_list, kappa_list, instance_array, pi_hat, c_arr = None, s_arr = None):
n_clusters = len(mu_list);
n_datapoints, n_dimensions = instance_array.shape
probs = np.ones((instance_array.shape[0],n_clusters),dtype=float)
#c_arr and s_arr are used to speed up cos(instance_array - mu) using
#the trig identity cos(a-b) = cos(a)cos(b) + sin(a)sin(b)
#this removes the need to constantly recalculate cos(a) and sin(a)
if c_arr==None or s_arr==None:
c_arr = np.cos(instance_array)
s_arr = np.sin(instance_array)
# c_arr2 = c_arr[:,np.newaxis,:]
# s_arr2 = s_arr[:,np.newaxis,:]
# pt1 = (np.cos(mu_list))[np.newaxis,:,:]
# pt2 = (np.sin(mu_list))[np.newaxis,:,:]
# pt2 = (c_arr2*pt1 + s_arr2*pt2)
# pt2 = (np.array(kappa_list))[np.newaxis,:,:] * pt2
#print pt2.shape
for mu_tmp, kappa_tmp, p_hat, cluster_ident in zip(mu_list,kappa_list,pi_hat,range(n_clusters)):
norm_fac_exp = len(mu_list[0])*np.log(1./(2.*np.pi)) - np.sum(np.log(spec.iv(0,kappa_tmp)))
pt1 = kappa_tmp * (c_arr*np.cos(mu_tmp) + s_arr*np.sin(mu_tmp))
probs[:,cluster_ident] = p_hat * np.exp(np.sum(pt1,axis=1) + norm_fac_exp)
#old way without trig identity speed up
#probs[:,cluster_ident] = p_hat * np.exp(np.sum(kappa_tmp*np.cos(instance_array - mu_tmp),axis=1)+norm_fac_exp)
#older way including everything in exponent, and not taking log of hte constant
#probs[:,cluster_ident] = p_hat * np.product( np.exp(kappa_tmp*np.cos(instance_array-mu_tmp))/(2.*np.pi*spec.iv(0,kappa_tmp)),axis=1)
prob_sum = (np.sum(probs,axis=1))[:,np.newaxis]
zij = probs/((prob_sum))
#This was from before using np.newaxis
#zij = (probs.T/prob_sum).T
#Calculate the log-likelihood - note this is quite an expensive computation and not really necessary
#unless comparing different techniques and/or checking for convergence
valid_items = probs>1.e-20
#L = np.sum(zij[probs>1.e-20]*np.log(probs[probs>1.e-20]))
L = np.sum(zij[valid_items]*np.log(probs[valid_items]))
#L = np.sum(zij*np.log(np.clip(probs,1.e-10,1)))
return zij, L
评论列表
文章目录