bidnn.py 文件源码-python代码片段

def load_dataset(self, X=None):
        if self.conf.verbosity > 1:
            print "Loading dataset..."
        if X is None:
            self.X_train, self.tl = load_svmlight_file(self.conf.fname_in, dtype=np.float32, multilabel=False)
            # we're saving tl (target labels) just in case they exist and the user needs them - since
            # this is unsupervised learning, we completely ignore the labels and don't expect them to exist
        else:
            self.X_train = X

        self.X_train = self.X_train.todense()

        if (self.conf.mod1size + self.conf.mod2size) != self.X_train.shape[1]:
            raise ValueError("Provided dimensionality of 1st modality ("+str(self.conf.mod1size)+") and 2nd modality ("+str(self.conf.mod2size)+") " \
                             "does not sum to the dimensionality provided in the input file ("+str(self.X_train.shape[1])+")")

        # indices of missing modalities (stored for later)
        self.idxMissingFirst = []
        self.idxMissingSecond = []

        # generate training data for modality translation
        self.X_first = [] 
        self.X_second = []

        bothMissing = both = 0
        if self.conf.ignore_zeroes:
            # zeroes are not treated as missing modalities
            # I have no idea why this might be useful, but ok :D
            # since idxMissing* are left empty, this is the only
            # place where we should take care of this
            for i in range(self.X_train.shape[0]):
                both += 1
                self.X_first.append(np.ravel(self.X_train[i, :self.conf.mod1size]))
                self.X_second.append(np.ravel(self.X_train[i, self.conf.mod1size:]))
        else:
            # zero vectors are treated as missing modalities (default)
            for i in range(self.X_train.shape[0]):
                if not np.any(self.X_train[i, :self.conf.mod1size]): # first missing
                    if np.any(self.X_train[i, self.conf.mod1size:]): # second not missing
                        # second ok, need to reconstruct first
                        self.idxMissingFirst.append(i)
                    else:
                        bothMissing +=  1 # missing both
                else: # first ok
                    if not np.any(self.X_train[i, self.conf.mod1size:]): # second missing
                        self.idxMissingSecond.append(i)
                    else: #both ok -> use them to train translator
                        both += 1
                        self.X_first.append(np.ravel(self.X_train[i, :self.conf.mod1size]))
                        self.X_second.append(np.ravel(self.X_train[i, self.conf.mod1size:]))

        if self.conf.verbosity > 1:
            print "Both modalities present:",both, "\nMissing 1st:", len(self.idxMissingFirst), "\nMissing 2nd:",len(self.idxMissingSecond)
            print "Missing both modalities:", bothMissing, "\n"

        self.X_first = np.array(self.X_first)
        self.X_second = np.array(self.X_second)