def loadData (self, filename, verbose=True, replace_missing=True):
''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
if verbose: print("========= Reading " + filename)
start = time.time()
if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
return pickle.load(pickle_file)
if 'format' not in self.info.keys():
self.getFormatData(filename)
if 'feat_num' not in self.info.keys():
self.getNbrFeatures(filename)
data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}
data = data_func[self.info['format']](filename, self.info['feat_num'])
# INPORTANT: when we replace missing values we double the number of variables
if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
vprint (verbose, "Replace missing values by 0 (slow, sorry)")
data = data_converter.replace_missing(data)
if self.use_pickle:
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
p = pickle.Pickler(pickle_file)
p.fast = True
p.dump(data)
end = time.time()
if verbose: print( "[+] Success in %5.2f sec" % (end - start))
return data
评论列表
文章目录