def load_abalone_data(proportion=1044./4177):
from sklearn import datasets
from sklearn import preprocessing
from sklearn import cross_validation
abalone = datasets.fetch_mldata('regression-datasets abalone')
X_cate = np.array([abalone.target[i].tolist()
for i in range(abalone.target.shape[0])])
X_cate = preprocessing.label_binarize(X_cate, np.unique(X_cate))
X = np.hstack((X_cate, abalone.data))
y = abalone.int1[0].T.astype(np.float64)
y = y[:, None]
X = X.astype(np.float64)
X_train, X_test, y_train, y_test = \
cross_validation.train_test_split(X, y, test_size=proportion)
return X_train, y_train, X_test, y_test
评论列表
文章目录