def load_dataset():
if(not os.path.exists("./dataset/training.csv")):
print("dataset does not exist")
raise Exception
#load dataset
labeled_image = pd.read_csv("./dataset/training.csv")
#preprocessing dataframe
image = np.array(labeled_image["Image"].values).reshape(-1,1)
image = np.apply_along_axis(lambda img: (img[0].split()),1,image)
image = image.astype(np.int32) #because train_img elements are string before preprocessing
image = image.reshape(-1,96*96) # data 96 * 96 size image
label = labeled_image.values[:,:-1]
label = label.astype(np.float32)
#nan value to mean value
col_mean = np.nanmean(label, axis=0)
indices = np.where(np.isnan(label))
label[indices] = np.take(col_mean, indices[1])
return image, label
评论列表
文章目录