def generate_test_data():
with open('./test.csv', 'r') as test_file:
test_csv = csv.reader(test_file, delimiter=',')
next(test_csv)
test_data = list(test_csv)
test_data = numpy.array(test_data)
# delete id column
# test_data = numpy.delete(test_data, 0, 1)
# One of K encoding of categorical data
encoder = preprocessing.LabelEncoder()
for j in (1, 2, 3, 4, 5, 6, 7, 8, 9, 14):
test_data[:, j+1] = encoder.fit_transform(test_data[:, j+1])
# Converting numpy strings to floats
test_data = test_data.astype(numpy.float)
missValueIndex = 7
Xy_test = test_data[test_data[:, 3+1]==missValueIndex]
Xy_train = test_data[test_data[:, 3+1]!=missValueIndex]
X_train = numpy.delete(Xy_train, 3+1 ,1)
y_train = Xy_train[:, 3+1]
X_test = numpy.delete(Xy_test, 3+1 ,1)
market_test_data = MarketingData(X_train, y_train, X_test)
return market_test_data, test_data
# use knn for impute missing values
评论列表
文章目录