def __init__(self, *args, **kwargs):
self.ft = FunctionTransformer(*args, **kwargs)
python类FunctionTransformer()的实例源码
custom_transformers.py 文件源码
项目:pandas-pipelines-custom-transformers
作者: jem1031
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self, alpha=1.0, threshold=0.1, degree=3, operators=None, dt=1.0, n_jobs=1, derivative=None, feature_names=None, kw={}):
self.alpha = alpha
self.threshold = threshold
self.degree = degree
self.operators = operators
self.n_jobs = n_jobs
self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt})
self.feature_names = feature_names
self.kw = kw
def drop_first_component(X, y):
"""
Create a pipeline with PCA and the column selector and use it to
transform the dataset.
"""
pipeline = make_pipeline(
PCA(), FunctionTransformer(all_but_first_column),
)
X_train, X_test, y_train, y_test = train_test_split(X, y)
pipeline.fit(X_train, y_train)
return pipeline.transform(X_test), y_test
def test_np_log():
X = np.arange(10).reshape((5, 2))
# Test that the numpy.log example still works.
testing.assert_array_equal(
FunctionTransformer(np.log1p).transform(X),
np.log1p(X),
)
def test_kw_arg():
X = np.linspace(0, 1, num=10).reshape((5, 2))
F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
# Test that rounding is correct
testing.assert_array_equal(F.transform(X),
np.around(X, decimals=3))
def test_kw_arg_update():
X = np.linspace(0, 1, num=10).reshape((5, 2))
F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
F.kw_args['decimals'] = 1
# Test that rounding is correct
testing.assert_array_equal(F.transform(X),
np.around(X, decimals=1))
def _construct_pipeline(self, model_name='LogisticRegression', trained_pipeline=None, final_model=None, feature_learning=False, final_model_step_name='final_model'):
pipeline_list = []
if self.user_input_func is not None:
if trained_pipeline is not None:
pipeline_list.append(('user_func', trained_pipeline.named_steps['user_func']))
elif self.transformation_pipeline is None:
print('Including the user_input_func in the pipeline! Please remember to return X, and not modify the length or order of X at all.')
print('Your function will be called as the first step of the pipeline at both training and prediction times.')
pipeline_list.append(('user_func', FunctionTransformer(func=self.user_input_func, pass_y=False, validate=False)))
# These parts will be included no matter what.
if trained_pipeline is not None:
pipeline_list.append(('basic_transform', trained_pipeline.named_steps['basic_transform']))
else:
pipeline_list.append(('basic_transform', utils_data_cleaning.BasicDataCleaning(column_descriptions=self.column_descriptions)))
if self.perform_feature_scaling is True:
if trained_pipeline is not None:
pipeline_list.append(('scaler', trained_pipeline.named_steps['scaler']))
else:
pipeline_list.append(('scaler', utils_scaling.CustomSparseScaler(self.column_descriptions)))
if trained_pipeline is not None:
pipeline_list.append(('dv', trained_pipeline.named_steps['dv']))
else:
pipeline_list.append(('dv', DataFrameVectorizer.DataFrameVectorizer(sparse=True, sort=True, column_descriptions=self.column_descriptions)))
if self.perform_feature_selection == True:
if trained_pipeline is not None:
# This is the step we are trying to remove from the trained_pipeline, since it has already been combined with dv using dv.restrict
pass
else:
pipeline_list.append(('feature_selection', utils_feature_selection.FeatureSelectionTransformer(type_of_estimator=self.type_of_estimator, column_descriptions=self.column_descriptions, feature_selection_model='SelectFromModel') ))
if trained_pipeline is not None:
# First, check and see if we have any steps with some version of keyword matching on something like 'intermediate_model_predictions' or 'feature_learning_model' or 'ensemble_model' or something like that in them.
# add all of those steps
# then try to add in the final_model that was passed in as a param
# if it's none, then we've already added in the final model with our keyword matching above!
for step in trained_pipeline.steps:
step_name = step[0]
if step_name[-6:] == '_model':
pipeline_list.append((step_name, trained_pipeline.named_steps[step_name]))
# Handling the case where we have run gscv on just the final model itself, and we now need to integrate it back into the rest of the pipeline
if final_model is not None:
pipeline_list.append((final_model_step_name, final_model))
# else:
# pipeline_list.append(('final_model', trained_pipeline.named_steps['final_model']))
else:
final_model = utils_models.get_model_from_name(model_name, training_params=self.training_params)
pipeline_list.append(('final_model', utils_model_training.FinalModelATC(model=final_model, type_of_estimator=self.type_of_estimator, ml_for_analytics=self.ml_for_analytics, name=self.name, scoring_method=self._scorer, feature_learning=feature_learning)))
constructed_pipeline = Pipeline(pipeline_list)
return constructed_pipeline
def test_delegate_to_func():
# (args|kwargs)_store will hold the positional and keyword arguments
# passed to the function inside the FunctionTransformer.
args_store = []
kwargs_store = {}
X = np.arange(10).reshape((5, 2))
testing.assert_array_equal(
FunctionTransformer(_make_func(args_store, kwargs_store)).transform(X),
X,
'transform should have returned X unchanged',
)
# The function should only have received X.
assert_equal(
args_store,
[X],
'Incorrect positional arguments passed to func: {args}'.format(
args=args_store,
),
)
assert_equal(
kwargs_store,
{},
'Unexpected keyword arguments passed to func: {args}'.format(
args=kwargs_store,
),
)
# reset the argument stores.
args_store[:] = [] # python2 compatible inplace list clear.
kwargs_store.clear()
y = object()
testing.assert_array_equal(
FunctionTransformer(
_make_func(args_store, kwargs_store),
pass_y=True,
).transform(X, y),
X,
'transform should have returned X unchanged',
)
# The function should have received X and y.
assert_equal(
args_store,
[X, y],
'Incorrect positional arguments passed to func: {args}'.format(
args=args_store,
),
)
assert_equal(
kwargs_store,
{},
'Unexpected keyword arguments passed to func: {args}'.format(
args=kwargs_store,
),
)