def process_linear_regression(self):
""" Linear Regression
Fit a Machine Learning Model to the data
- where `input` is matrix with:
- rows - `n_samples`
- columns - `n_features`
- where `output` is:
- array of `n_samples` when predicting one output
- matrix of `n_samples` rows and `n_outputs` columns when predicting multiple outputs simultaneously
- Important Note:
- Given say a dataset with 400 rows and 10 columns, must pass in matrix of 400 rows and 1 column to predict 1 column
- Prior to passing `input` to the Fit function, convert the Series/Dataframe objects to a Numpy matrix
first so Scikit-Learn can convert the input to a Numpy Object
- WRONG Obtain Numpy array (400 elements) returned from Series using `values` attribute `df["mpg"].values.shape`
- CORRECT Obtain Numpy matrix object (400 rows, 1 col) returned from Series using `values` attribute `df[["mpg"]].values.shape`
"""
print("Linear Regression in progress...")
model = self.prediction_utils.generate_model(self.model_type, None, None, None)
df = self.prediction_data.df_listings
inputs = df[self.training_columns]
if not len(inputs):
print("No Training Columns to use for Logistic Regression. Perhaps they were all bad and removed.")
return None
# Check inputs is Numpy matrix not Numpy array
print("Shape of inputs to Scikit-Learn Fit function: ", inputs.values.shape)
output = df[self.target_column]
model.fit(inputs, output)
predictions = model.predict(inputs)
df["predictions"] = predictions
if self.prediction_config.PLOT_LINEAR_RELATIONSHIP_PREDICTION_VS_ACTUAL_FOR_TRAIN_FEATURES_VS_TARGET == True:
self.plot_linear_relationships(predictions)
print("Check predictions accuracy against 'known' Model Training Data:\n %r" % (df[[self.target_column, "predictions"]]))
print("Predictions using Scikit-Learn Linear Regression: %r" % (predictions) )
mae = median_absolute_error(df[self.target_column], predictions)
mse = mean_squared_error(df[self.target_column], predictions, multioutput='raw_values')
rmse = math.sqrt(mse)
print("MAE: %r" % (mae) )
print("MSE: %r" % (mse[0]) )
print("RMSE: %r" % (rmse) )
if mae and rmse:
mae_rmse_ratio_prefix = mae / rmse
print("MAE to RMSE Ratio using Linear Regression: %.2f:1" % (mae_rmse_ratio_prefix) )
if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True:
for index, training_model_feature_name in enumerate(self.training_columns):
self.prediction_utils.plot(training_model_feature_name, df)
self.response["pre-hyperparameter_optimisation"] = {
"model_type": self.model_type,
"rmse": rmse
}
print("Linear Regression Pre-Hyperparameter k Optimisation results: %r" % (self.response))
prediction_model_linear_external.py 文件源码
python
阅读 16
收藏 0
点赞 0
评论 0
评论列表
文章目录