prediction_model_knn_external.py 文件源码-python代码片段

def process_hyperparameter_fixed(self):
        """
        Step 1: Create instance of K-Nearest-Neighbors Machine Learning Model class where p=2 is Euclidean Distance
        Step 2: Fit the Model using by specifying data for K-Nearest-Neighbor Model to use:
            - X as Training data (i.e. DataFrame "feature" Columns from Training data)
            - y as Target values (i.e. DataFrame's Target Column)

            X argument of `fit` function is matrix-like object, containing cols of interest from Training set (to make predictions)
            y argument of `fit` function is list-like object, containing just TARGET_COLUMN, `price`.

            X and y are passed into `fit` method of Scikit-Learn.
                Warning: DO NOT pass in data containing the following else Error occurs:
                    - Missing values
                    - Non-numerical values

        Step 3: Scikit-Learn's `predict` function called to make predictions on cols of test_df.
            Returns NumPy array of predicted "price" TARGET_COLUMN values

        Step 4: Calculate MAE, MSE, and RMSE float values for each individual Target, where least loss "best" values are 0
        """
        print("Training features include: %r" % (self.training_columns) )

        training_column_names = self.training_columns
        feature_combo = '__'.join(training_column_names)

        model = self.prediction_utils.generate_model(self.model_type, self.prediction_config.HYPERPARAMETER_FIXED, 'brute', 2)

        _temp_training_part = self.prediction_data.training_part
        X = _temp_training_part[self.training_columns]
        y = _temp_training_part[self.target_column]

        model.fit(X, y)

        _temp_testing_part = self.prediction_data.testing_part
        predictions = model.predict(_temp_testing_part[self.training_columns])

        print("Predictions using Scikit-Learn KNN Regression: %r" % (predictions) )

        mae = median_absolute_error(_temp_testing_part[self.target_column], predictions)
        mse = mean_squared_error(_temp_testing_part[self.target_column], predictions, multioutput='raw_values')
        rmse = math.sqrt(mse)

        print("MAE: %r" % (mae) )
        print("MSE: %r" % (mse[0]) )
        print("RMSE: %r" % (rmse) )

        if mae and rmse:
            mae_rmse_ratio_prefix = mae / rmse
            print("MAE to RMSE Ratio: %.2f:1" % (mae_rmse_ratio_prefix) )

        if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True:
            for index, training_model_feature_name in enumerate(self.training_columns):
                self.prediction_utils.plot(training_model_feature_name, _temp_testing_part)

        return {
            "feature_names": feature_combo,
            "rmse": rmse,
            "k_neighbors_qty": self.prediction_config.HYPERPARAMETER_FIXED,
            "k_folds_qty": None,
            "k_fold_cross_validation_toggle": False
        }