def _construct_pipeline(self, model_name='LogisticRegression', trained_pipeline=None, final_model=None, feature_learning=False, final_model_step_name='final_model'):
pipeline_list = []
if self.user_input_func is not None:
if trained_pipeline is not None:
pipeline_list.append(('user_func', trained_pipeline.named_steps['user_func']))
elif self.transformation_pipeline is None:
print('Including the user_input_func in the pipeline! Please remember to return X, and not modify the length or order of X at all.')
print('Your function will be called as the first step of the pipeline at both training and prediction times.')
pipeline_list.append(('user_func', FunctionTransformer(func=self.user_input_func, pass_y=False, validate=False)))
# These parts will be included no matter what.
if trained_pipeline is not None:
pipeline_list.append(('basic_transform', trained_pipeline.named_steps['basic_transform']))
else:
pipeline_list.append(('basic_transform', utils_data_cleaning.BasicDataCleaning(column_descriptions=self.column_descriptions)))
if self.perform_feature_scaling is True:
if trained_pipeline is not None:
pipeline_list.append(('scaler', trained_pipeline.named_steps['scaler']))
else:
pipeline_list.append(('scaler', utils_scaling.CustomSparseScaler(self.column_descriptions)))
if trained_pipeline is not None:
pipeline_list.append(('dv', trained_pipeline.named_steps['dv']))
else:
pipeline_list.append(('dv', DataFrameVectorizer.DataFrameVectorizer(sparse=True, sort=True, column_descriptions=self.column_descriptions)))
if self.perform_feature_selection == True:
if trained_pipeline is not None:
# This is the step we are trying to remove from the trained_pipeline, since it has already been combined with dv using dv.restrict
pass
else:
pipeline_list.append(('feature_selection', utils_feature_selection.FeatureSelectionTransformer(type_of_estimator=self.type_of_estimator, column_descriptions=self.column_descriptions, feature_selection_model='SelectFromModel') ))
if trained_pipeline is not None:
# First, check and see if we have any steps with some version of keyword matching on something like 'intermediate_model_predictions' or 'feature_learning_model' or 'ensemble_model' or something like that in them.
# add all of those steps
# then try to add in the final_model that was passed in as a param
# if it's none, then we've already added in the final model with our keyword matching above!
for step in trained_pipeline.steps:
step_name = step[0]
if step_name[-6:] == '_model':
pipeline_list.append((step_name, trained_pipeline.named_steps[step_name]))
# Handling the case where we have run gscv on just the final model itself, and we now need to integrate it back into the rest of the pipeline
if final_model is not None:
pipeline_list.append((final_model_step_name, final_model))
# else:
# pipeline_list.append(('final_model', trained_pipeline.named_steps['final_model']))
else:
final_model = utils_models.get_model_from_name(model_name, training_params=self.training_params)
pipeline_list.append(('final_model', utils_model_training.FinalModelATC(model=final_model, type_of_estimator=self.type_of_estimator, ml_for_analytics=self.ml_for_analytics, name=self.name, scoring_method=self._scorer, feature_learning=feature_learning)))
constructed_pipeline = Pipeline(pipeline_list)
return constructed_pipeline
评论列表
文章目录