def preprocess_data(train_data_matrix, valid_data_matrix, test_data_matrix):
"""
Function to preprocess the data with the standard scaler from sci-kit learn.
It takes in the training, validation, and testing matrices and returns the
standardized versions of them.
Input: train_data_matrix The data matrix with the training set data
valid_data_matrix The data matrix with the validation set data
test_data_matrix The data matrix with the testing set data
.
Output: transform_train_data_matrix The data matrix with the standardized training set data
transform_valid_data_matrix The data matrix with the standardized validation set data
transform_test_data_matrix The data matrix with the standardized testing set data
Usage: analyze_ml_data(actual_bg_test_array, test_prediction, True, False, True, False, "00000001", "Linear Regression", "Pred30Data5")
"""
reg_scaler = prep.StandardScaler().fit(train_data_matrix)
transform_train_data_matrix = reg_scaler.transform(train_data_matrix)
transform_valid_data_matrix = reg_scaler.transform(valid_data_matrix)
transform_test_data_matrix = reg_scaler.transform(test_data_matrix)
return transform_train_data_matrix, transform_valid_data_matrix, transform_test_data_matrix
评论列表
文章目录