def test_verbose_second_level():
# Create sample data
X = rng.randn(30, 5)
X[:10] += 2
g = mixture.GMM(n_components=2, n_init=2, verbose=2)
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
g.fit(X)
finally:
sys.stdout = old_stdout
python类GMM的实例源码
def fit(self, data):
gmm = GMM(n_components=1, covariance_type=self.cv_type)
gmm.fit(data)
self.mean = gmm.means_[0]
if self.cv_type == 'full':
self.cov = gmm.covars_[0]
elif self.cv_type == 'tied':
self.cov = gmm.covars_
else:
self.cov = np.diag(gmm.covars_[0])
def fit(self, data):
if self.n is None:
means = []
stds = []
weights = []
score = []
for n in range(self.n_min, self.n_max):
gmm = GMM(n_components=n, covariance_type='full')
gmm.fit(data)
means.append(gmm.means_)
stds.append(gmm.covars_)
weights.append(gmm.weights_)
if self.n_estimator == 'BIC':
score.append(gmm.bic(data))
i_best = self._chosebestformetric(self.n_estimator, score)
self.means = means[i_best]
self.stds = stds[i_best]
self.weights = weights[i_best]
else:
gmm = GMM(n_components=self.n, covariance_type='full')
gmm.fit(data)
self.means = gmm.means_
self.stds = gmm.covars_
self.weights = gmm.weights_
def fit(self, data):
if self.n is None:
means = []
covs = []
weights = []
score = []
for n in range(self.n_min, self.n_max):
gmm = GMM(n_components=n, covariance_type=self.cv_type)
gmm.fit(data)
means.append(gmm.means_)
if self.cv_type == 'full':
covs.append(gmm.covars_)
elif self.cv_type == 'tied':
covs.append(np.tile(gmm.covars_, (n, 1, 1)))
else:
covs.append(np.array([np.diag(cv) for cv in gmm.covars_]))
weights.append(gmm.weights_)
if self.n_estimator == 'BIC':
score.append(gmm.bic(data))
i_best = self._chosebestformetric(self.n_estimator, score)
self.means = means[i_best]
self.covs = covs[i_best]
self.weights = weights[i_best]
else:
gmm = GMM(n_components=self.n, covariance_type=self.cv_type)
gmm.fit(data)
self.means = gmm.means_
if self.cv_type == 'full':
self.covs = gmm.covars_
elif self.cv_type == 'tied':
self.covs = np.tile(gmm.covars_, (n, 1, 1))
else:
self.covs = np.array([np.diag(cv) for cv in gmm.covars_])
self.weights = gmm.weights_
def GRsd (GRlog):
em = GMM(n_components=3)
em.fit(GRlog.reshape((-1, 1)))
idxminmeangr = np.argmin(em.means_)
grsd = em.means_[idxminmeangr] - em.covars_[idxminmeangr]**0.5
return grsd
def GRsh (GRlog):
em = GMM(n_components=3)
em.fit(GRlog.reshape((-1, 1)))
idxmaxmeangr = np.argmax(em.means_)
grsh = em.means_[idxmaxmeangr] + em.covars_[idxmaxmeangr]**0.5
return grsh
def SPsh (SPlog):
em = GMM(n_components=3)
em.fit(SPlog.reshape((-1, 1)))
idxmaxmeangr = np.argmax(em.means_)
spsh = em.means_[idxmaxmeangr] + em.covars_[idxmaxmeangr]**0.5
return spsh
def fit(self, data):
gmm = GMM(n_components=1, covariance_type=self.cv_type)
gmm.fit(data)
self.mean = gmm.means_[0]
if self.cv_type == 'full':
self.cov = gmm.covars_[0]
elif self.cv_type == 'tied':
self.cov = gmm.covars_
else:
self.cov = np.diag(gmm.covars_[0])
def EM_GMM_clustering(instance_array, n_clusters=9, sin_cos = 0, number_of_starts = 10, show_covariances = 0, clim=None, covariance_type='diag'):
print 'starting EM-GMM algorithm from sckit-learn, k=%d, retries : %d, sin_cos = %d'%(n_clusters,number_of_starts,sin_cos)
if sin_cos==1:
print ' using sine and cosine of the phases'
sin_cos_instances = np.zeros((instance_array.shape[0],instance_array.shape[1]*2),dtype=float)
sin_cos_instances[:,::2]=np.cos(instance_array)
sin_cos_instances[:,1::2]=np.sin(instance_array)
input_data = sin_cos_instances
else:
print ' using raw phases'
input_data = instance_array
gmm = mixture.GMM(n_components=n_clusters,covariance_type=covariance_type,n_init=number_of_starts)
gmm.fit(input_data)
cluster_assignments = gmm.predict(input_data)
bic_value = gmm.bic(input_data)
LL = np.sum(gmm.score(input_data))
gmm_covars_tmp = np.array(gmm._get_covars())
if show_covariances:
fig, ax = make_grid_subplots(gmm_covars_tmp.shape[0], sharex = True, sharey = True)
im = []
for i in range(gmm_covars_tmp.shape[0]):
im.append(ax[i].imshow(np.abs(gmm_covars_tmp[i,:,:]),aspect='auto'))
print im[-1].get_clim()
if clim==None:
im[-1].set_clim([0, im[-1].get_clim()[1]*0.5])
else:
im[-1].set_clim(clim)
clims = [np.min(np.abs(gmm_covars_tmp)),np.max(np.abs(gmm_covars_tmp))*0.5]
#for i in im : i.set_clim(clims)
fig.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
fig.canvas.draw();fig.show()
gmm_covars = np.array([np.diagonal(i) for i in gmm._get_covars()])
gmm_means = gmm.means_
if sin_cos:
cluster_details = {'EM_GMM_means_sc':gmm_means, 'EM_GMM_variances_sc':gmm_covars, 'EM_GMM_covariances_sc':gmm_covars_tmp,'BIC':bic_value, 'LL':LL}
else:
cluster_details = {'EM_GMM_means':gmm_means, 'EM_GMM_variances':gmm_covars, 'EM_GMM_covariances':gmm_covars_tmp, 'BIC':bic_value,'LL':LL}
return cluster_assignments, cluster_details
def initialisation(self):
'''This involves generating the mu and kappa arrays
Then initialising based on self.start using k-means, EM-GMM or
giving every instance a random probability of belonging to each cluster
SH: 7June2013
'''
self.mu_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.kappa_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.LL_list = []
self.zij = np.zeros((self.instance_array.shape[0],self.n_clusters),dtype=float)
if self.start=='k_means':
print 'Initialising clusters using a fast k_means run'
self.cluster_assignments, self.cluster_details = k_means_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 3, seed=self.seed)
for i in list(set(self.cluster_assignments)):
self.zij[self.cluster_assignments==i,i] = 1
print 'finished initialising'
elif self.start=='EM_GMM':
self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
for i in list(set(cluster_assignments)):
self.zij[cluster_assignments==i,i] = 1
else:
print 'going with random option'
#need to get this to work better.....
self.zij = np.random.random(self.zij.shape)
#and normalise so each row adds up to 1....
self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
self._EM_VMM_maximisation_step()
def initialisation(self):
'''This involves generating the mu and kappa arrays
Then initialising based on self.start using k-means, EM-GMM or
giving every instance a random probability of belonging to each cluster
SH: 7June2013
'''
self.mu_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.std_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.LL_list = []
self.zij = np.zeros((self.instance_array.shape[0],self.n_clusters),dtype=float)
if self.start=='k_means':
print 'Initialising clusters using a fast k_means run'
self.cluster_assignments, self.cluster_details = k_means_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 3, seed=self.seed)
for i in list(set(self.cluster_assignments)):
self.zij[self.cluster_assignments==i,i] = 1
print 'finished initialising'
elif self.start=='EM_GMM':
self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.instance_array, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
for i in list(set(cluster_assignments)):
self.zij[cluster_assignments==i,i] = 1
else:
print 'going with random option'
#need to get this to work better.....
self.zij = np.random.random(self.zij.shape)
#and normalise so each row adds up to 1....
self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
self._EM_GMM_maximisation_step()
def _initialisation(self):
'''This involves generating the mu and kappa arrays
Then initialising based on self.start using k-means, EM-GMM or
giving every instance a random probability of belonging to each cluster
SH: 7June2013
'''
self.mean_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.std_list = np.ones((self.n_clusters,self.n_dimensions),dtype=float)
self.LL_list = []
self.zij = np.zeros((self.n_instances, self.n_clusters),dtype=float)
#maybe only the random option is valid here.....
if self.start=='k_means':
print 'Initialising clusters using a fast k_means run'
self.cluster_assignments, self.cluster_details = k_means_clustering(self.input_data, n_clusters=self.n_clusters, sin_cos = 0, number_of_starts = 4, seed=self.seed)
for i in list(set(self.cluster_assignments)):
self.zij[self.cluster_assignments==i,i] = 1
#print 'finished initialising'
elif self.start=='EM_GMM':
self.cluster_assignments, self.cluster_details = EM_GMM_clustering(self.input_data, n_clusters=self.n_clusters, sin_cos = 1, number_of_starts = 1)
for i in list(set(self.cluster_assignments)):
self.zij[self.cluster_assignments==i,i] = 1
else:
print 'going with random option'
#need to get this to work better.....
self.zij = np.random.random(self.zij.shape)
#and normalise so each row adds up to 1....
self.zij = self.zij / ((np.sum(self.zij,axis=1))[:,np.newaxis])
self._EM_VMM_GMM_maximisation_step()
def EM_GMM_GMM_clustering(instance_array_amps, n_clusters=9, sin_cos = 0, number_of_starts = 10, show_covariances = 0, clim=None, covariance_type='diag', n_iter = 50):
'''
Cluster using a Gaussian for the real and imag part of the ratio of the complex value between adjacent channels
Supposed to be for imaging diagnostics
SRH: 18May2014
'''
print 'starting EM-GMM-GMM algorithm from sckit-learn, clusters=%d, retries : %d'%(n_clusters,number_of_starts)
#tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
#for i in range(1,instance_array_amps.shape[1]):
# tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
#print 'ratio :', np.sum(np.abs(np.imag(instance_array_amps)))/np.sum(np.abs(np.real(instance_array_amps)))
data_complex = instance_array_amps/np.sum(instance_array_amps,axis = 1)[:,np.newaxis]
#data_complex = instance_array_amps/(instance_array_amps[:,2])[:,np.newaxis]
#print 'hello..', instance_array_amps.shape
input_data = np.hstack((np.real(data_complex), np.real(data_complex)))
#k_means_cluster_assignments, k_means_cluster_details = k_means_clustering(input_data, n_clusters=n_clusters, sin_cos = 1, number_of_starts = 3,)
#print k_means_cluster_assignments
#input_data = np.hstack((np.abs(data_complex),(np.abs(data_complex))))
n_dim = data_complex.shape[1]
#print n_clusters
gmm = mixture.GMM(n_components = n_clusters, covariance_type = covariance_type, n_init = number_of_starts, n_iter = n_iter,)
gmm.fit(input_data)
cluster_assignments = gmm.predict(input_data)
bic_value = gmm.bic(input_data)
LL = np.sum(gmm.score(input_data))
#Extract the means, variances and covariances
gmm_covars = np.array(gmm._get_covars())
gmm_vars = np.array([np.diagonal(i) for i in gmm._get_covars()])
gmm_vars_re, gmm_vars_im = np.hsplit(gmm_vars,2)
gmm_covars_re = np.array([i[0:n_dim,0:n_dim] for i in gmm._get_covars()])
gmm_covars_im = np.array([i[n_dim:,n_dim:] for i in gmm._get_covars()])
gmm_means = gmm.means_
gmm_means_re, gmm_means_im = np.hsplit(gmm_means, 2)
#Bundle up the answer
cluster_details = {'EM_GMM_means':gmm_means, 'EM_GMM_variances':gmm_vars, 'EM_GMM_covariances':gmm_covars, 'EM_GMM_means_re':gmm_means_re, 'EM_GMM_variances_re':gmm_vars_re, 'EM_GMM_covariances_re':gmm_covars_re,'EM_GMM_means_im':gmm_means_im, 'EM_GMM_variances_im':gmm_vars_im, 'EM_GMM_covariances_im':gmm_covars_im,'BIC':bic_value,'LL':LL}
print 'EM_GMM_GMM Converged: ', gmm.converged_
return cluster_assignments, cluster_details
def train(self, datadict, labels, rand_features=True):
'''
Trains a scipy GMM for each class, joins them into a super codebook.
@param datadict: Dictionary of class labels.
Inside each label there is a list of feature matrices for each window [frames x feature]
@param labels: the labels of the datadict in a given order
@param rand_features: Shuffles the samples before running the GMM
'''
self.criterion = []
# Stack the features
allfeatures = np.vstack(list([np.vstack(x) for x in datadict.values()]))
# Determine the normalisation statistics and remember them
self.norm = FeatureNormalizer()
self.norm.setup(allfeatures)
# Get number of classes
ncl = len(labels)
# Compute vocabsize per class
vocab_size_per_cl = max(1, self.vocab_size / ncl)
# Update vocabsize to account for rounding errors
self.vocab_size = vocab_size_per_cl * ncl
#
# Train GMMs for each class
#
self.gmms = {}
self.labels = labels
for label in labels:
# Compute feature representations
feats = np.vstack(datadict[label])
if rand_features:
np.random.shuffle(feats)
if self.normalize:
norm_features = self.norm.normalize(feats)
else:
norm_features = (feats)
print >> sys.stderr, ("Training a GMM for label %s, using scipy and data of shape %s"
% (label, str(np.shape(norm_features))))
# Train the gmm
sub_gmm = GMM(vocab_size_per_cl, covariance_type='diag', n_iter=100)
sub_gmm.fit(norm_features)
# Set GMM for class
self.gmms[label] = sub_gmm
#
# Combine GMMs to super codebook
#
self.compute_super_codebook(allfeatures.shape[1])
return
def preproc(self):
self.df["time_remaining"] = self.df["minutes_remaining"] * 60 + self.df["seconds_remaining"]
self.df['last_5_sec'] = self.df['time_remaining'] < 5
self.df['latter_half'] = self.df['time_remaining'] < 360
self.df['first_period'] = self.df['period'] == 1
self.df['latter_period'] = self.df['period'] > 2
self.df['last_period'] = self.df['period'] == 4
self.df['last_quarter'] = self.df['time_remaining'] < 180
threshold = 3
anomaly = 14
self.df['last_moment'] = self.df.apply(lambda row: row['time_remaining'] < threshold or row['time_remaining'] == anomaly, axis=1)
self.df['away'] = self.df.matchup.str.contains('@')
self.df['secondsFromStart'] = 60 * (11 - self.df['minutes_remaining']) + (60 - self.df['seconds_remaining'])
self.df['secondsFromGameStart'] = (self.df['period'] <= 4).astype(int) * (self.df['period'] - 1) * 12 * 60 + (self.df['period'] > 4).astype(int) * ((self.df['period'] - 4) * 5 * 60 + 3 * 12 * 60) + self.df['secondsFromStart']
numGaussians = 13
gaussianMixtureModel = mixture.GMM(n_components=numGaussians, covariance_type='full',
params='wmc', init_params='wmc',
random_state=1, n_init=3, verbose=0)
gaussianMixtureModel.fit(self.df.ix[:,['loc_x','loc_y']])
self.df['shotLocationCluster'] = gaussianMixtureModel.predict(self.df.ix[:,['loc_x','loc_y']])
self.df['homeGame'] = self.df['matchup'].apply(lambda x: 1 if (x.find('@') < 0) else 0)
self.df["game_year"] = pd.Series([int(self.df["game_date"][i][:4]) for i in range(0, len(self.df))])
self.df["game_month"] = pd.Series([int(self.df["game_date"][i][5:7]) for i in range(0, len(self.df))])
self.df["game_day"] = pd.Series([int(self.df["game_date"][i][-2:]) for i in range(0, len(self.df))])
action_type_list = list(set(self.df["action_type"].tolist()))
self.df["action_type_num"] = pd.Series([action_type_list.index(self.df["action_type"][i]) for i in range(0, len(self.df))])
combined_shot_type_list = list(set(self.df["combined_shot_type"].tolist()))
self.df["combined_shot_type_num"] = pd.Series([combined_shot_type_list.index(self.df["combined_shot_type"][i]) for i in range(0, len(self.df))])
opponent_list = list(set(self.df["opponent"].tolist()))
self.df["opponent_num"] = pd.Series([opponent_list.index(self.df["opponent"][i]) for i in range(0, len(self.df))])
game_id_list = list(set(self.df["game_id"].tolist()))
self.df["game_id_num"] = pd.Series([game_id_list.index(self.df["game_id"][i]) for i in range(0, len(self.df))])
season_list = list(set(self.df["season"].tolist()))
season_list.sort()
self.df["season_num"] = pd.Series([season_list.index(self.df["season"][i]) for i in range(0, len(self.df))])
self.df["shot_distance"][self.df["shot_distance"] > 45] = 45
# del self.df["team_id"], self.df["team_name"], self.df["game_event_id"], self.df["lat"], self.df["lon"]
# return self.df
def check_positive_definite_covars(covariance_type):
r"""Test that covariance matrices do not become non positive definite
Due to the accumulation of round-off errors, the computation of the
covariance matrices during the learning phase could lead to non-positive
definite covariance matrices. Namely the use of the formula:
.. math:: C = (\sum_i w_i x_i x_i^T) - \mu \mu^T
instead of:
.. math:: C = \sum_i w_i (x_i - \mu)(x_i - \mu)^T
while mathematically equivalent, was observed a ``LinAlgError`` exception,
when computing a ``GMM`` with full covariance matrices and fixed mean.
This function ensures that some later optimization will not introduce the
problem again.
"""
rng = np.random.RandomState(1)
# we build a dataset with 2 2d component. The components are unbalanced
# (respective weights 0.9 and 0.1)
X = rng.randn(100, 2)
X[-10:] += (3, 3) # Shift the 10 last points
gmm = mixture.GMM(2, params="wc", covariance_type=covariance_type,
min_covar=1e-3)
# This is a non-regression test for issue #2640. The following call used
# to trigger:
# numpy.linalg.linalg.LinAlgError: 2-th leading minor not positive definite
gmm.fit(X)
if covariance_type == "diag" or covariance_type == "spherical":
assert_greater(gmm.covars_.min(), 0)
else:
if covariance_type == "tied":
covs = [gmm.covars_]
else:
covs = gmm.covars_
for c in covs:
assert_greater(np.linalg.det(c), 0)
def expectation_maximization(data, nc, cv_type='full', req_info=None):
gmm = GMM(n_components=nc, covariance_type=cv_type, thresh=1.0E-4, n_init=10)
gmm.fit(data)
labels = gmm.predict(data)
if req_info == 'all':
req_info = ['aic', 'bic', 'converged', 'weights', 'means', 'covars',
'silhouette', 'proba']
elif req_info is None:
req_info = []
info = {}
if 'aic' in req_info:
info['aic'] = gmm.aic(data)
if 'bic' in req_info:
info['bic'] = gmm.bic(data)
if 'converged' in req_info:
info['converged'] = gmm.converged_
if 'weights' in req_info:
info['weights'] = gmm.weights_
if 'means' in req_info:
info['means'] = gmm.means_
if 'covars' in req_info:
if cv_type == 'full':
info['covars'] = gmm.covars_
elif cv_type == 'tied':
cov = np.empty((nc, gmm.covars_.shape[0], gmm.covars_.shape[1]))
for i in range(nc):
cov[i] = gmm.covars_.copy()
info['covars'] = cov
else:
cov = np.empty((nc, gmm.covars_.shape[0], gmm.covars_.shape[1]))
for i in range(nc):
cov[i] = np.diag(gmm.covars_[i])
info['covars'] = cov
if 'silhouette' in req_info:
info['silhouette'] = metrics.silhouette_score(data, labels)
if 'proba' in req_info:
info['proba'] = gmm.predict_proba(data).T
return labels, info
def plot_dimension_histograms_GMM_GMM(self,pub_fig = 0, filename='plot_dim_hist.pdf',specific_dimensions = None, extra_txt_labels = '', label_loc = [-2,1.5], ylim = None):
'''For each dimension in the data set, plot the histogram of the real and imag part of the measurements
overlay the GMM's - used for the GMM-GMM clustering method
SRH: 18May2014
'''
suptitle = self.settings.__str__().replace("'",'').replace("{",'').replace("}",'')
cluster_mu = self.cluster_details['EM_GMM_means_re'] + 1j*self.cluster_details['EM_GMM_means_im']
cluster_sigma = self.cluster_details['EM_GMM_variances_re'] + 1j*self.cluster_details['EM_GMM_variances_im']
dimensions = cluster_mu.shape[1]
instance_array_amps = self.feature_obj.misc_data_dict['mirnov_data']
tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
tmp = instance_array_amps/np.sum(instance_array_amps, axis = 1)[:,np.newaxis]
#for i in range(1,instance_array_amps.shape[1]): tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
if specific_dimensions == None: specific_dimensions = range(dimensions)
fig_re, ax_re = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
fig_im, ax_im = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
for i,dim in enumerate(specific_dimensions):
ax_re[i].hist(np.real(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
ax_im[i].hist(np.imag(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
if self.cluster_assignments!=None: cluster_list = list(set(self.cluster_assignments))
x = np.linspace(-np.pi, np.pi, 300)
cluster_prob_list = []
for cluster in cluster_list:
cluster_prob_list.append(float(np.sum(self.cluster_assignments==cluster))/float(len(self.cluster_assignments)))
for i, dimension in enumerate(specific_dimensions):
for ax_cur, op in zip([ax_re,ax_im],[np.real, np.imag]):
cluster_sum = x*0
for cluster, cluster_prob in zip(cluster_list, cluster_prob_list):
Z_EM = cluster_prob * norm(loc=op(cluster_mu[cluster][dimension]), scale=np.sqrt(op(cluster_sigma[cluster][dimension]))).pdf(x)
cluster_sum += Z_EM
tmp = ax_cur[i].plot(x,Z_EM,'-',linewidth=0.8)
tmp = ax_cur[i].plot(x,cluster_sum,'-',linewidth=2)
print '{area},'.format(area = np.sum(cluster_sum*(x[1]-x[0]))),
ax_cur[i].text(label_loc[0], label_loc[1],r'$\Delta \psi_%d$ '%(dimension+1,) + extra_txt_labels, fontsize = 8)#,bbox=dict(facecolor='white', alpha=0.5))
ax_cur[i].locator_params(nbins=7)
print ''
for ax_cur, fig_cur in zip([ax_re, ax_im],[fig_re, fig_im]):
ax_cur[-1].set_xlim([-np.pi,np.pi])
ax_cur[-1].set_ylim([0,1.3])
fig_cur.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
fig_cur.suptitle(suptitle.replace('_','\char`_'),fontsize = 8)
fig_cur.canvas.draw(); fig_cur.show()
def plot_dimension_histograms_VMM_GMM(self,pub_fig = 0, filename='plot_dim_hist.pdf',specific_dimensions = None, extra_txt_labels = '', label_loc = [-2,1.5], ylim = None):
'''For each dimension in the data set, plot the histogram of the real and imag part of the measurements
overlay the GMM's - used for the GMM-GMM clustering method
SRH: 18May2014
'''
suptitle = self.settings.__str__().replace("'",'').replace("{",'').replace("}",'')
cluster_GMM_mu = self.cluster_details['EM_GMM_means']
cluster_GMM_sigma = self.cluster_details['EM_GMM_std']
dimensions = cluster_GMM_mu.shape[1]
instance_array_amps = self.feature_obj.misc_data_dict['mirnov_data']
tmp = np.zeros((instance_array_amps.shape[0], instance_array_amps.shape[1]-1),dtype=complex)
for i in range(1,instance_array_amps.shape[1]): tmp[:,i-1] = instance_array_amps[:,i]/instance_array_amps[:,i-1]
if specific_dimensions == None: specific_dimensions = range(dimensions)
fig_ang, ax_ang = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
fig_abs, ax_abs = make_grid_subplots(len(specific_dimensions), sharex = True, sharey = True)
amp_vals = np.abs(tmp)
amp_vals[np.angle(tmp)<0]*= (-1)
for i,dim in enumerate(specific_dimensions):
ax_abs[i].hist(amp_vals[:,dim], bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
ax_ang[i].hist(np.angle(tmp[:,dim]), bins=180,normed=True,histtype='stepfilled',range=[-np.pi,np.pi])
if self.cluster_assignments!=None: cluster_list = list(set(self.cluster_assignments))
x = np.linspace(-np.pi, np.pi, 300)
cluster_prob_list = []
for cluster in cluster_list:
cluster_prob_list.append(float(np.sum(self.cluster_assignments==cluster))/float(len(self.cluster_assignments)))
for i, dimension in enumerate(specific_dimensions):
#for ax_cur, op in zip([ax_re,ax_im],[np.real, np.imag]):
#for ax_cur, op in zip([ax_ang,ax_abs],[np.angle, np.abs]):
for ax_cur, op in zip([ax_abs],[np.abs]):
cluster_sum = x*0
for cluster, cluster_prob in zip(cluster_list, cluster_prob_list):
Z_EM = cluster_prob * norm(loc=cluster_GMM_mu[cluster][dimension], scale=cluster_GMM_sigma[cluster][dimension]).pdf(x)
cluster_sum += Z_EM
tmp = ax_cur[i].plot(x,Z_EM,'-',linewidth=0.8)
tmp = ax_cur[i].plot(x,cluster_sum,'-',linewidth=2)
print '{area},'.format(area = np.sum(cluster_sum*(x[1]-x[0]))),
ax_cur[i].text(label_loc[0], label_loc[1],r'$\Delta \psi_%d$ '%(dimension+1,) + extra_txt_labels, fontsize = 8)#,bbox=dict(facecolor='white', alpha=0.5))
ax_cur[i].locator_params(nbins=7)
print ''
#for ax_cur, fig_cur in zip([ax_re, ax_im],[fig_re, fig_im]):
for ax_cur, fig_cur in zip([ax_ang, ax_abs],[fig_ang, fig_abs]):
ax_cur[-1].set_xlim([-np.pi,np.pi])
ax_cur[-1].set_ylim([0,1.3])
fig_cur.subplots_adjust(hspace=0, wspace=0,left=0.05, bottom=0.05,top=0.95, right=0.95)
fig_cur.suptitle(suptitle.replace('_','\char`_'),fontsize = 8)
fig_cur.canvas.draw(); fig_cur.show()