def keras_mlp1(train2, y, test2, v, z):
from keras import layers
from keras import models
from keras import optimizers
cname = sys._getframe().f_code.co_name
num_splits = 9
scaler = preprocessing.RobustScaler()
train3 = scaler.fit_transform(train2)
test3 = scaler.transform(test2)
input_dims = train3.shape[1]
def build_model():
input_ = layers.Input(shape=(input_dims,))
model = layers.Dense(256, kernel_initializer='Orthogonal')(input_)
#model = layers.BatchNormalization()(model)
#model = layers.advanced_activations.PReLU()(model)
model = layers.Activation('selu')(model)
#model = layers.Dropout(0.7)(model)
model = layers.Dense(64, kernel_initializer='Orthogonal')(model)
#model = layers.BatchNormalization()(model)
model = layers.Activation('selu')(model)
#model = layers.advanced_activations.PReLU()(model)
#model = layers.Dropout(0.9)(model)
model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
#model = layers.BatchNormalization()(model)
model = layers.Activation('selu')(model)
#model = layers.advanced_activations.PReLU()(model)
model = layers.Dense(1, activation='sigmoid')(model)
model = models.Model(input_, model)
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam())
#print(model.summary(line_length=120))
return model
keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
python类RobustScaler()的实例源码
def keras_mlp2(train2, y, test2, v, z):
from keras import layers
from keras import models
from keras import optimizers
cname = sys._getframe().f_code.co_name
num_splits = 9
scaler = preprocessing.RobustScaler()
train3 = scaler.fit_transform(train2)
test3 = scaler.transform(test2)
input_dims = train3.shape[1]
def build_model():
input_ = layers.Input(shape=(input_dims,))
model = layers.Dense(1024, kernel_initializer='Orthogonal')(input_)
model = layers.Activation('selu')(model)
model = layers.Dense(128, kernel_initializer='Orthogonal')(model)
model = layers.Activation('selu')(model)
model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
model = layers.Activation('selu')(model)
model = layers.Dense(1, activation='sigmoid')(model)
model = models.Model(input_, model)
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.SGD())
#print(model.summary(line_length=120))
return model
keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
def keras_mlp3(train2, y, test2, v, z):
from keras import layers
from keras import models
from keras import optimizers
cname = sys._getframe().f_code.co_name
num_splits = 9
scaler = preprocessing.RobustScaler()
train3 = scaler.fit_transform(train2)
test3 = scaler.transform(test2)
input_dims = train3.shape[1]
def build_model():
input_ = layers.Input(shape=(input_dims,))
model = layers.Dense(512, kernel_initializer='Orthogonal')(input_)
model = layers.Activation('selu')(model)
model = layers.Dense(256, kernel_initializer='Orthogonal')(model)
model = layers.Activation('selu')(model)
model = layers.Dense(32, kernel_initializer='Orthogonal')(model)
model = layers.Activation('selu')(model)
model = layers.Dense(1, activation='sigmoid')(model)
model = models.Model(input_, model)
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Adam())
#print(model.summary(line_length=120))
return model
keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
lazzy_decomposion.py 文件源码
项目:Power-Consumption-Prediction
作者: YoungGod
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def choose_best_lag(seq, pre_period, lags = range(1,30), Kmax = 200):
"""
????lazzy model,?????
???(?????????)
"""
models = []
# ???
std_sca = StandardScaler().fit(np.array(seq).reshape(-1,1))
# rob_sca = RobustScaler().fit(np.array(seq).reshape(-1,1))
seq = std_sca.transform(np.array(seq).reshape(-1,1))
# ????????????,???????
from sklearn.model_selection import train_test_split
for input_lag in lags:
# window = input_lag + pre_period
X, Y = create_dataset(seq.flatten(), input_lag, pre_period)
# lazzy_models = lazzy_loo(X[-1], X[0:-1], Y[:-1], Kmax)
# y_pred = lazzy_prediction(X[-1], X[0:-1], Y[:-1], lazzy_models)
# err = err_evaluation(y_pred.flatten(), Y[-1])
#
# lazzy_models.sort()
# models.append((err, input_lag, lazzy_models[0][1]))
# do more cv
# for state in range(0,3):
err = 0.0
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.01, random_state=0)
for x_q,y_q in zip(X_test,y_test):
lazzy_models = lazzy_loo(x_q, X_train, y_train, Kmax)
y_pred = lazzy_prediction(x_q, X_train, y_train, lazzy_models)
err += err_evaluation(y_pred.flatten(), y_q)
lazzy_models.sort()
models.append((err/len(X_test), input_lag, lazzy_models[0][1]))
models.sort()
best_lag = models[0][1]
best_k = models[0][2]
# fig, ax = plt.subplots()
# ax.plot(y_pred.flatten(),label='prediction')
# ax.plot(Y[-1],label='real')
# ax.set_title('best cv lags')
return models, best_lag, best_k
def test_random_grid():
# build a pipeline
pipe = Pipeline([
('retainer', FeatureRetainer()), # will retain all
('dropper', FeatureDropper()), # won't drop any
('mapper', FunctionMapper()), # pass through
('encoder', OneHotCategoricalEncoder()), # no object dtypes, so will pass through
('collinearity', MulticollinearityFilterer(threshold=0.85)),
('imputer', SelectiveImputer()), # pass through
('scaler', SelectiveScaler()),
('boxcox', BoxCoxTransformer()),
('nzv', NearZeroVarianceFilterer(threshold=1e-4)),
('pca', SelectivePCA(n_components=0.9)),
('model', RandomForestClassifier(n_jobs=1))
])
# let's define a set of hyper-parameters over which to search
hp = {
'collinearity__threshold': uniform(loc=.8, scale=.15),
'collinearity__method': ['pearson', 'kendall', 'spearman'],
'scaler__scaler': [StandardScaler(), RobustScaler()],
'pca__n_components': uniform(loc=.75, scale=.2),
'pca__whiten': [True, False],
'model__n_estimators': randint(5, 10),
'model__max_depth': randint(2, 5),
'model__min_samples_leaf': randint(1, 5),
'model__max_features': uniform(loc=.5, scale=.5),
'model__max_leaf_nodes': randint(10, 15)
}
# define the gridsearch
search = RandomizedSearchCV(pipe, hp,
n_iter=2, # just to test it even works
scoring='accuracy',
cv=2,
random_state=42)
# fit the search
search.fit(X_train, y_train)
# test the report
report_grid_score_detail(search, charts=False)
def normalize_padded(padded, means=None, stds=None):
"""Normalize by last dim of padded with means/stds or calculate them.
.. TODO::
* consider importing instead ex:
from sklearn.preprocessing import StandardScaler, RobustScaler
robust_scaler = RobustScaler()
x_train = robust_scaler.fit_transform(x_train)
x_test = robust_scaler.transform(x_test)
ValueError: Found array with dim 3. RobustScaler expected <= 2.
* Don't normalize binary features
* If events are sparse then this may lead to huge values.
"""
# TODO epsilon choice is random
epsilon = 1e-6
original_dtype = padded.dtype
is_flat = len(padded.shape) == 2
if is_flat:
padded = np.expand_dims(padded, axis=-1)
n_features = padded.shape[2]
n_obs = padded.shape[0] * padded.shape[1]
if means is None:
means = np.nanmean(np.float128(
padded.reshape(n_obs, n_features)), axis=0)
means = means.reshape([1, 1, n_features])
padded = padded - means
if stds is None:
stds = np.nanstd(np.float128(
padded.reshape(n_obs, n_features)), axis=0)
stds = stds.reshape([1, 1, n_features])
if (stds < epsilon).any():
print('warning. Constant cols: ', np.where((stds < epsilon).flatten()))
stds[stds < epsilon] = 1.0
# should be (small number)/1.0 as mean is subtracted.
# Possible prob depending on machine err
# 128 float cast otherwise
padded = (padded / stds).astype(original_dtype)
if is_flat:
# Return to flat
padded = np.squeeze(padded)
return padded, means, stds
def model(self):
#cname = sys._getframe().f_code.co_name
cname = 'keras'
train, y, test = self.train_, self.y_, self.test_
np.random.seed(1234)
train.drop('id', axis=1, inplace=True)
test.drop('id', axis=1, inplace=True)
from sklearn import pipeline
pipe = pipeline.make_pipeline(preprocessing.Imputer(),
preprocessing.RobustScaler())
train = pipe.fit_transform(train)
test = pipe.transform(test)
self.input_dims_ = train.shape[1]
def build_model():
return self.build_keras_model()
batch_size = self.batch_size_
build_model().summary(line_length=120)
ss = model_selection.StratifiedKFold(n_splits = self.num_splits_,
random_state = 11,
shuffle = True)
scores = list()
model_path = self.temp_name('keras_mlp_weights')
v, z = self.v_, self.z_
v[cname] = 0
z[cname] = 0
for n, (itrain, ival) in enumerate(ss.split(train, y)):
xtrain, xval = train[itrain], train[ival]
ytrain, yval = y[itrain], y[ival]
model = build_model()
model.fit(
xtrain, ytrain,
batch_size = batch_size,
epochs = 10000,
validation_data = (xval, yval),
verbose = 0,
callbacks = build_keras_fit_callbacks(model_path),
shuffle = True
)
model.load_weights(model_path)
p = model.predict(xval)
v.loc[ival, cname] += p.ravel()
score = metrics.log_loss(y[ival], p)
if score != score:
raise Exception('NaN score!!!')
print(cname, 'fold %d: '%(n+1), score, self.now())
scores.append(score)
z[cname] += model.predict(test).ravel()
del model
for i in range(3): gc.collect(i)
print('scores:', scores, np.mean(scores), np.std(scores))
self.drop_temp(model_path)
cv=np.mean(scores)
z[cname] /= self.num_splits_
z['y'] = z[cname]
return cv, None