def _build(self,flags,files):
fnames,names = self.fnames,self.names
path = self.path
Table = namedtuple('Table', 'name fname dtype')
tables = [Table(i,"%s/%s"%(path,j),{}) for i,j in zip(names,fnames) if files =="all" or i in files]
print()
self.flags = flags
path = flags.data_path
data = {}
for table in tables:
name,fname,dtype = table.name,table.fname,table.dtype
pname = "%s/%s_%s.pkl"%(path,self.name,name.split('/')[-1].split('.')[0])
if os.path.exists(pname):
data[name] = pd.read_pickle(pname)
else:
if '_text' in name:
data[name] = pd.read_csv(fname,header=None,sep="\|\|",skiprows=1,names=['ID','Text'])
else:
data[name] = pd.read_csv(fname)
data[name].to_pickle(pname)
print_mem_time("Loaded {} {}".format(fname.split('/')[-1],data[name].shape))
self.data = data # no copy, pass the reference
if "training_variants" in self.data:
y = self.data["training_variants"]['Class']-1
from utils.np_utils.encoder import onehot_encode
self.y = onehot_encode(y,self.flags.classes)
print()
评论列表
文章目录