def load_dataset(self, X=None):
if self.conf.verbosity > 1:
print "Loading dataset..."
if X is None:
self.X_train, self.tl = load_svmlight_file(self.conf.fname_in, dtype=np.float32, multilabel=False)
# we're saving tl (target labels) just in case they exist and the user needs them - since
# this is unsupervised learning, we completely ignore the labels and don't expect them to exist
else:
self.X_train = X
self.X_train = self.X_train.todense()
if (self.conf.mod1size + self.conf.mod2size) != self.X_train.shape[1]:
raise ValueError("Provided dimensionality of 1st modality ("+str(self.conf.mod1size)+") and 2nd modality ("+str(self.conf.mod2size)+") " \
"does not sum to the dimensionality provided in the input file ("+str(self.X_train.shape[1])+")")
# indices of missing modalities (stored for later)
self.idxMissingFirst = []
self.idxMissingSecond = []
# generate training data for modality translation
self.X_first = []
self.X_second = []
bothMissing = both = 0
if self.conf.ignore_zeroes:
# zeroes are not treated as missing modalities
# I have no idea why this might be useful, but ok :D
# since idxMissing* are left empty, this is the only
# place where we should take care of this
for i in range(self.X_train.shape[0]):
both += 1
self.X_first.append(np.ravel(self.X_train[i, :self.conf.mod1size]))
self.X_second.append(np.ravel(self.X_train[i, self.conf.mod1size:]))
else:
# zero vectors are treated as missing modalities (default)
for i in range(self.X_train.shape[0]):
if not np.any(self.X_train[i, :self.conf.mod1size]): # first missing
if np.any(self.X_train[i, self.conf.mod1size:]): # second not missing
# second ok, need to reconstruct first
self.idxMissingFirst.append(i)
else:
bothMissing += 1 # missing both
else: # first ok
if not np.any(self.X_train[i, self.conf.mod1size:]): # second missing
self.idxMissingSecond.append(i)
else: #both ok -> use them to train translator
both += 1
self.X_first.append(np.ravel(self.X_train[i, :self.conf.mod1size]))
self.X_second.append(np.ravel(self.X_train[i, self.conf.mod1size:]))
if self.conf.verbosity > 1:
print "Both modalities present:",both, "\nMissing 1st:", len(self.idxMissingFirst), "\nMissing 2nd:",len(self.idxMissingSecond)
print "Missing both modalities:", bothMissing, "\n"
self.X_first = np.array(self.X_first)
self.X_second = np.array(self.X_second)
评论列表
文章目录