def test_MinMaxScaler():
'''
test the method of MinMax Scaler
:return: None
'''
X=[ [1,5,1,2,10],
[2,6,3,2,7],
[3,7,5,6,4,],
[4,8,7,8,1] ]
print("before transform:",X)
scaler=MinMaxScaler(feature_range=(0,2))
scaler.fit(X)
print("min_ is :",scaler.min_)
print("scale_ is :",scaler.scale_)
print("data_max_ is :",scaler.data_max_)
print("data_min_ is :",scaler.data_min_)
print("data_range_ is :",scaler.data_range_)
print("after transform:",scaler.transform(X))
python类MinMaxScaler()的实例源码
def test_graph_simple():
data, labels = make_circles(n_samples=2000, noise=0.03, factor=0.3)
params = {'coverer__intervals': 10,
'coverer__overlap': 0.1,
'clusterer__min_samples': 3,
'clusterer__eps': 0.5}
m = Mapper(params=params)
scaled_data = MinMaxScaler().fit_transform(data)
m.fit(data, scaled_data)
categories = {"labels": labels}
scales = {"y[0]": scaled_data[:, 0],
"y[1]": scaled_data[:, 1]}
json_graph_str = json_graph(m, categories, scales)
# check if it can be loaded to validate html
json_graph_dict = json.loads(json_graph_str)
html_graph_str = html_graph(m, categories, scales) # validate HTML?
def plot_on_dataset(X, y, ax, name):
# for each dataset, plot learning for each learning strategy
print("\nlearning on dataset %s" % name)
ax.set_title(name)
X = MinMaxScaler().fit_transform(X)
mlps = []
if name == "digits":
# digits is larger but converges fairly quickly
max_iter = 15
else:
max_iter = 400
for label, param in zip(labels, params):
print("training: %s" % label)
mlp = MLPClassifier(verbose=0, random_state=0,
max_iter=max_iter, **param)
mlp.fit(X, y)
mlps.append(mlp)
print("Training set score: %f" % mlp.score(X, y))
print("Training set loss: %f" % mlp.loss_)
for mlp, label, args in zip(mlps, labels, plot_args):
ax.plot(mlp.loss_curve_, label=label, **args)
def normalized_usage_by_package(self, package_usage_frame: pd.DataFrame,
drop_package_prefix: str = None):
scaler = MinMaxScaler()
df = package_usage_frame.drop('package', 1)
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
if drop_package_prefix:
df_scaled['package'] = package_usage_frame['package'].apply(
lambda text: text[text.startswith(drop_package_prefix)
and len(drop_package_prefix):])
else:
df_scaled['package'] = package_usage_frame['package']
df_sorted = df_scaled.sort_values('user_count').reset_index()
del df_sorted['index']
return df_sorted
def predict_new(self, input):
model = self.train_model()
assert len(input) == 5 and type(input) == list
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(self.data)
inp = scaler.transform([input])
print(scaler.inverse_transform(model.predict(numpy.array(inp).reshape(1, 1, 5))))
# x = Predict()
# x.predict_new([1243.068, 1298.713, 1336.560, 1299.175, 1288.913])
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
_val = []
_coords = []
file_dir_fix = dir + "\\output_INFLO.csv"
#f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
with open(file_dir_fix, 'rU') as inp:
rd = csv.reader(inp)
for row in rd:
_val.append([row[1], row[2], row[0]])
#print(_center)
_val = np.asarray(_val)
_val_original = _val
_val_original = map(myFloat, _val_original)
_val_original = map(myInt, _val_original)
#_val_original = map(myTemp, _val_original)
_val_original = np.asarray(_val_original)
_val = preprocessing.StandardScaler().fit_transform(_val)
#_center = preprocessing.MinMaxScaler()
#_center.fit_transform(_val)
#_arr = StandardScaler().inverse_transform(_center)
#print(_arr)
#print(_center)
new_file = prefix + file_name + ".png"
dbFun(_val, _val_original, new_file)
#_len = len(_center)
return
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
_val = []
_coords = []
file_dir_fix = dir + "\\output_INFLO.csv"
#f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
with open(file_dir_fix, 'rU') as inp:
rd = csv.reader(inp)
for row in rd:
_val.append([row[1], row[2], row[0]])
#print(_center)
_val = np.asarray(_val)
_val_original = _val
_val_original = map(myFloat, _val_original)
_val_original = map(myInt, _val_original)
#_val_original = map(myTemp, _val_original)
_val_original = np.asarray(_val_original)
_val = preprocessing.StandardScaler().fit_transform(_val)
#_center = preprocessing.MinMaxScaler()
#_center.fit_transform(_val)
#_arr = StandardScaler().inverse_transform(_center)
#print(_arr)
#print(_center)
new_file = prefix + file_name + ".png"
dbFun(_val, _val_original, new_file)
#_len = len(_center)
return
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
_val = []
_coords = []
file_dir_fix = dir + "\\output_INFLO.csv"
#f = "C:\Users\Abdullah
#Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
with open(file_dir_fix, 'rU') as inp:
rd = csv.reader(inp)
for row in rd:
_val.append([row[1], row[2], row[0]])
#print(_center)
_val = np.asarray(_val)
_val_original = _val
_val_original = map(myFloat, _val_original)
_val_original = map(myInt, _val_original)
#_val_original = map(myTemp, _val_original)
_val_original = np.asarray(_val_original)
_val = preprocessing.StandardScaler().fit_transform(_val)
#_center = preprocessing.MinMaxScaler()
#_center.fit_transform(_val)
#_arr = StandardScaler().inverse_transform(_center)
#print(_arr)
#print(_center)
new_file = prefix + file_name + ".png"
dbFun(_val, _val_original, new_file)
#_len = len(_center)
return
##############################################################################################
# Getting the clusters and printing in the most trivial way as asked by Dr Sheikh Faisal
def next_batch(self, batches, in_memory):
"""
Returns the next batch in some fixed-length representation.
Currently we use Panchenko et al.'s cumulative traces
@param batches an iterator with all of the batches (
if in_memory == True:
in batch-major form without padding
else:
A list of paths to the files
)
@param in_memory is a boolean value
@return if in_memory is False, returns a tuple of (dict, [paths]) where paths is a list of paths for each batch
else it returns a dict for training
"""
batch = next(batches)
data_batch = batch
if not in_memory:
data_batch = [helpers.read_cell_file(path) for path in batch]
data_batch = [self._process_trace(trace, self.layers[0]) for trace in data_batch]
min_max_scaler = MinMaxScaler()
data_batch = min_max_scaler.fit_transform(data_batch)
encoder_inputs_ = data_batch
decoder_targets_ = data_batch
train_dict = {
self.encoder_inputs: encoder_inputs_,
self.decoder_targets: decoder_targets_,
}
if not in_memory:
return (train_dict, batch)
return train_dict
def min_max_scale(X_train, X_test):
preprocessor = prep.MinMaxScaler().fit(np.concatenate((X_train, X_test), axis=0))
X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test
def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver='liblinear', max_iter=100,
multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):
self.penalty = penalty
self.dual = dual
self.tol = tol
self.C = C
self.fit_intercept = fit_intercept
self.intercept_scaling = intercept_scaling
self.class_weight = class_weight
self.random_state = random_state
self.solver = solver
self.max_iter = max_iter
self.multi_class = multi_class
self.verbose = verbose
self.warm_start = warm_start
self.n_jobs = n_jobs
self.minmax_scaler = MinMaxScaler()
self.dsapp_cutoff = CutOff()
self.lr = LogisticRegression(penalty=penalty,
dual=dual,
tol=tol,
C=C,
fit_intercept=fit_intercept,
intercept_scaling=intercept_scaling,
class_weight=class_weight,
random_state=random_state,
solver=solver,
max_iter=max_iter,
multi_class=multi_class,
verbose=verbose,
warm_start=warm_start,
n_jobs=n_jobs)
self.pipeline = Pipeline([
('minmax_scaler', self.minmax_scaler),
('dsapp_cutoff', self.dsapp_cutoff),
('lr', self.lr)
])
def setClf(self):
clf = KNeighborsClassifier(n_neighbors = 33)
min_max_scaler = preprocessing.MinMaxScaler()
self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
return
linearregressionmodel.py 文件源码
项目:Supply-demand-forecasting
作者: LevinJ
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def setClf(self):
# self.clf = Ridge(alpha=0.0000001, tol=0.0000001)
clf = LinearRegression()
min_max_scaler = preprocessing.MinMaxScaler()
self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
return
def setClf(self):
clf = SVR(C=100, epsilon=0.1, gamma = 0.0001,cache_size = 10240)
min_max_scaler = preprocessing.MinMaxScaler()
self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
return
def __init__(self, classifier_class):
'''
Constructor
'''
self.classifiers = []
self.logger = logging.getLogger("PairwiseClassifier")
self.classifier_class = classifier_class
self.normalizer = MinMaxScaler()
def test_large_grid():
"""In this test, we purposely overfit a RandomForest to completely random data
in order to assert that the test error will far supercede the train error.
"""
if not SK18:
custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
else:
custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)
# define the pipe
pipe = Pipeline([
('scaler', SelectiveScaler()),
('pca', SelectivePCA(weight=True)),
('rf', RandomForestClassifier(random_state=42))
])
# define hyper parameters
hp = {
'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
'pca__whiten': [True, False],
'pca__weight': [True, False],
'pca__n_components': uniform(0.75, 0.15),
'rf__n_estimators': randint(5, 10),
'rf__max_depth': randint(5, 15)
}
# define the grid
grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)
# this will fail because we haven't fit yet
assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)
# fit the grid
grid.fit(X_train, y_train)
# score for coverage -- this might warn...
with warnings.catch_warnings():
warnings.simplefilter("ignore")
grid.score(X_train, y_train)
# coverage:
assert grid._estimator_type == 'classifier'
# get predictions
tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)
# evaluate score (SHOULD be better than random...)
accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)
# grid score reports:
# assert fails for bad percentile
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})
# assert fails for bad y_axis
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})
# assert passes otherwise
report_grid_score_detail(grid, charts=True, percentile=0.95) # just ensure percentile works
def scale(train, test):
scale_f = MinMaxScaler(feature_range=(-1,1))
scale_f = scale_f.fit(train)
train = train.reshape(train.shape[0], train.shape[1])
train_s = scale_f.transform(train)
test = test.reshape(test.shape[0], test.shape[1])
test_s = scale_f.transform(test)
return scale_f, train_s, test_s
def get_scaled_user():
dataset = get_dataset()
new_df = pd.DataFrame(index=set(dataset.index))
new_df = new_df.sort_index()
for user_id in get_user_id_list():
#print user_id
if not check_empty(user_id):
new_df[user_id] = dataset[dataset.user_id == user_id].power_consumption
new_df_log = new_df.apply(np.log)
new_df_log_scaled = preprocessing.MinMaxScaler().fit_transform(new_df_log.ix[60:,:].dropna())
return pd.DataFrame(new_df_log_scaled,columns = new_df_log.columns)
def scale_features(data):
extract_features = theano.function([model.layers[0].input], model.layers[
32].output, allow_input_downcast=True)
features = extract_features(data)
scale = MinMaxScaler()
scale_feat = scale.fit_transform(features)
return scale_feat
def get_bootstrapped_trainset(trainSet, y_train, bootstrap_data, es, estimator, th_bs):
new_train_set = list(trainSet)
new_y_train = list(y_train)
trainAndBSData = trainSet + bootstrap_data
generateDataDrivenFeats(trainSet, trainAndBSData, es)
featurized = featurize(trainAndBSData)
train_feats = [featurized[idx] for idx in range(0, len(trainSet), 1)]
test_feats = [featurized[idx] for idx in range(len(trainSet), len(trainAndBSData), 1)]
#Do feature selection on train data
train_feats = fs.runFeatureSelection(train_feats, y_train, es)
train_feats, y_train, train_bucket = ss.runSampleSelection(train_feats, y_train,[i for i in range(0, len(trainSet), 1)], es)
# calculate Inter-annotator weighting.
weights_train = getWeights(trainAndBSData, train_bucket, es.weighInterAnnot)
vectorizer = DictVectorizer()
x_train = vectorizer.fit_transform(train_feats)
x_test = vectorizer.transform(test_feats)
if es.scaleData:
min_max_scalar = MinMaxScaler()
x_train = min_max_scalar.fit_transform(x_train.toarray())
x_test = min_max_scalar.transform(x_test.toarray())
model = train(estimator, x_train, y_train, weights_train, model=None)
y_pred_prob = model.predict_proba(x_test)
for i, cur_y in enumerate(y_pred_prob):
if np.max(cur_y) > th_bs:
new_train_set.append(bootstrap_data[i])
new_y_train.append(np.argmax(cur_y))
return (new_train_set, new_y_train) #update none to confidence vector