models.py 文件源码-python代码片段

def train(self, datadict, labels, rand_features=True):
        '''
        Trains a scipy GMM for each class, joins them into a super codebook.
        @param datadict: Dictionary of class labels. 
        Inside each label there is a list of feature matrices for each window [frames x feature]
        @param labels: the labels of the datadict in a given order
        @param rand_features: Shuffles the samples before running the GMM
        '''
        self.criterion = []
        # Stack the features
        allfeatures = np.vstack(list([np.vstack(x) for x in datadict.values()]))

        # Determine the normalisation statistics and remember them
        self.norm = FeatureNormalizer()
        self.norm.setup(allfeatures)


        # Get number of classes
        ncl = len(labels)
        # Compute vocabsize per class
        vocab_size_per_cl = max(1, self.vocab_size / ncl)
        # Update vocabsize to account for rounding errors
        self.vocab_size = vocab_size_per_cl * ncl

        #
        # Train GMMs for each class
        #
        self.gmms = {}
        self.labels = labels
        for label in labels:
            # Compute feature representations
            feats = np.vstack(datadict[label])
            if rand_features:
                np.random.shuffle(feats)
            if self.normalize:
                norm_features = self.norm.normalize(feats)
            else:
                norm_features = (feats)
            print >> sys.stderr, ("Training a GMM for label %s, using scipy and data of shape %s"
                                 % (label, str(np.shape(norm_features))))
            # Train the gmm
            sub_gmm = GMM(vocab_size_per_cl, covariance_type='diag', n_iter=100)
            sub_gmm.fit(norm_features)
            # Set GMM for class
            self.gmms[label] = sub_gmm
        #
        # Combine GMMs to super codebook
        #
        self.compute_super_codebook(allfeatures.shape[1])
        return