def load_csv(training_fn, features_fn):
'''
Transform tabular data set into NumPy arrays.
'''
df = pd.read_csv(training_fn, sep='\t')
features = json.load(open(features_fn))['features']
data = df[features].as_matrix()
print('Data:', data.shape)
labels = df[['label']].as_matrix().reshape(-1)
lb = preprocessing.LabelBinarizer()
lb.fit(labels)
print('Labels:', labels.shape)
return features, data, labels
评论列表
文章目录