def train(self, datadict, labels, rand_features=True):
'''
Trains a scipy GMM for each class, joins them into a super codebook.
@param datadict: Dictionary of class labels.
Inside each label there is a list of feature matrices for each window [frames x feature]
@param labels: the labels of the datadict in a given order
@param rand_features: Shuffles the samples before running the GMM
'''
self.criterion = []
# Stack the features
allfeatures = np.vstack(list([np.vstack(x) for x in datadict.values()]))
# Determine the normalisation statistics and remember them
self.norm = FeatureNormalizer()
self.norm.setup(allfeatures)
# Get number of classes
ncl = len(labels)
# Compute vocabsize per class
vocab_size_per_cl = max(1, self.vocab_size / ncl)
# Update vocabsize to account for rounding errors
self.vocab_size = vocab_size_per_cl * ncl
#
# Train GMMs for each class
#
self.gmms = {}
self.labels = labels
for label in labels:
# Compute feature representations
feats = np.vstack(datadict[label])
if rand_features:
np.random.shuffle(feats)
if self.normalize:
norm_features = self.norm.normalize(feats)
else:
norm_features = (feats)
print >> sys.stderr, ("Training a GMM for label %s, using scipy and data of shape %s"
% (label, str(np.shape(norm_features))))
# Train the gmm
sub_gmm = GMM(vocab_size_per_cl, covariance_type='diag', n_iter=100)
sub_gmm.fit(norm_features)
# Set GMM for class
self.gmms[label] = sub_gmm
#
# Combine GMMs to super codebook
#
self.compute_super_codebook(allfeatures.shape[1])
return
评论列表
文章目录