def train_model(self, train_file_path, model_path):
print("==> Load the data ...")
X_train, Y_train = self.load_file(train_file_path)
print(train_file_path, shape(X_train))
print("==> Train the model ...")
min_max_scaler = preprocessing.MaxAbsScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
clf = RandomForestRegressor(n_estimators=self.n_estimators)
clf.fit(X_train_minmax.toarray(), Y_train)
print("==> Save the model ...")
pickle.dump(clf, open(model_path, 'wb'))
scaler_path = model_path.replace('.pkl', '.scaler.pkl')
pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
return clf
python类MaxAbsScaler()的实例源码
def train_model(self, train_file_path, model_path):
print("==> Load the data ...")
X_train, Y_train = self.load_file(train_file_path)
print(train_file_path, shape(X_train))
print("==> Train the model ...")
min_max_scaler = preprocessing.MaxAbsScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
clf = GradientBoostingRegressor(n_estimators=self.n_estimators)
clf.fit(X_train_minmax.toarray(), Y_train)
print("==> Save the model ...")
pickle.dump(clf, open(model_path, 'wb'))
scaler_path = model_path.replace('.pkl', '.scaler.pkl')
pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
return clf
def scale_features(features, train):
"""Scale features, using test set to learn parameters.
Returns:
Scaled copy of features.
"""
if FLAGS.scaling is None:
return features
logging.info('Scaling features with %s', FLAGS.scaling)
if FLAGS.scaling == 'max_abs':
scaler = preprocessing.MaxAbsScaler()
elif FLAGS.scaling == 'standard':
scaler = preprocessing.StandardScaler()
else:
raise ValueError('Unrecognized scaling %s' % FLAGS.scaling)
scaler.fit(features[train])
return scaler.transform(features)
def test_MaxAbsScaler():
'''
test the method of MaxAbs Scaler
:return: None
'''
X=[ [1,5,1,2,10],
[2,6,3,2,7],
[3,7,5,6,4,],
[4,8,7,8,1] ]
print("before transform:",X)
scaler=MaxAbsScaler()
scaler.fit(X)
print("scale_ is :",scaler.scale_)
print("max_abs_ is :",scaler.max_abs_)
print("after transform:",scaler.transform(X))
def scale(df, scaling=None):
"""Scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to scale
scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
type of scaling to apply
"""
if scaling is None:
return df
df = df.dropna(axis=1, how='any')
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = df.as_matrix()
mat = scaler.fit_transform(mat)
# print(mat.shape)
df = pd.DataFrame(mat, columns=df.columns)
return df
def impute_and_scale(df, scaling=None):
"""Impute missing values with mean and scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to impute and scale
scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
type of scaling to apply
"""
df = df.dropna(axis=1, how='all')
imputer = Imputer(strategy='mean', axis=0)
mat = imputer.fit_transform(df)
# print(mat.shape)
if scaling is None:
return pd.DataFrame(mat, columns=df.columns)
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = scaler.fit_transform(mat)
# print(mat.shape)
df = pd.DataFrame(mat, columns=df.columns)
return df
def scale(df, scaling=None):
"""Scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to scale
scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
type of scaling to apply
"""
if scaling is None:
return df
df = df.dropna(axis=1, how='any')
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = df.as_matrix()
mat = scaler.fit_transform(mat)
# print(mat.shape)
df = pd.DataFrame(mat, columns=df.columns)
return df
def impute_and_scale(df, scaling=None):
"""Impute missing values with mean and scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to impute and scale
scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
type of scaling to apply
"""
df = df.dropna(axis=1, how='all')
imputer = Imputer(strategy='mean', axis=0)
mat = imputer.fit_transform(df)
# print(mat.shape)
if scaling is None:
return pd.DataFrame(mat, columns=df.columns)
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = scaler.fit_transform(mat)
# print(mat.shape)
df = pd.DataFrame(mat, columns=df.columns)
return df
def scale(df, scaling=None):
"""Scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to scale
scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
type of scaling to apply
"""
if scaling is None or scaling.lower() == 'none':
return df
df = df.dropna(axis=1, how='any')
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = df.as_matrix()
mat = scaler.fit_transform(mat)
df = pd.DataFrame(mat, columns=df.columns)
return df
def load_data(shuffle=True, n_cols=None):
train_path = get_p1_file('http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B1/P1B1.train.csv')
test_path = get_p1_file('http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B1/P1B1.test.csv')
usecols = list(range(n_cols)) if n_cols else None
df_train = pd.read_csv(train_path, engine='c', usecols=usecols)
df_test = pd.read_csv(test_path, engine='c', usecols=usecols)
df_train = df_train.drop('case_id', 1).astype(np.float32)
df_test = df_test.drop('case_id', 1).astype(np.float32)
if shuffle:
df_train = df_train.sample(frac=1, random_state=seed)
df_test = df_test.sample(frac=1, random_state=seed)
X_train = df_train.as_matrix()
X_test = df_test.as_matrix()
scaler = MaxAbsScaler()
mat = np.concatenate((X_train, X_test), axis=0)
mat = scaler.fit_transform(mat)
X_train = mat[:X_train.shape[0], :]
X_test = mat[X_train.shape[0]:, :]
return X_train, X_test
def scale_by_max_value(X):
"""
Scale each feature by its abs maximum value.
Keyword arguments:
X -- The feature vectors
"""
if verbose:
print '\nScaling to the range [-1,1] ...'
max_abs_scaler = preprocessing.MaxAbsScaler()
return max_abs_scaler.fit_transform(X)