def try_params( n_iterations, params ):
n_iterations = int( round( n_iterations ))
print "n_iterations:", n_iterations
pprint( params )
if params['scaler']:
scaler = eval( "{}()".format( params['scaler'] ))
x_train_ = scaler.fit_transform( data['x_train'].astype( float ))
x_test_ = scaler.transform( data['x_test'].astype( float ))
local_data = { 'x_train': x_train_, 'y_train': data['y_train'],
'x_test': x_test_, 'y_test': data['y_test'] }
else:
local_data = data
# we need a copy because at the next small round the best params will be re-used
params_ = dict( params )
params_.pop( 'scaler' )
clf = SGD( n_iter = n_iterations, **params_ )
return train_and_eval_sklearn_regressor( clf, local_data )
python类SGDRegressor()的实例源码
def psgd_method(args):
"""
SGD method run in parallel using map.
Parameters
----------
args: tuple (sgd, data), where
sgd is SGDRegressor object and
data is a tuple: (X_train, y_train)
Returns
-------
sgd: object returned after executing .fit()
"""
sgd, data = args
X_train, y_train = data
sgd.fit(X_train, y_train)
return sgd
def psgd_method_1(sgd, X_train, y_train):
"""
SGD method run in parallel using map.
Parameters
----------
args: tuple (sgd, data), where
sgd is SGDRegressor object and
data is a tuple: (X_train, y_train)
Returns
-------
sgd: object returned after executing .fit()
"""
sgd.fit(X_train, y_train)
return sgd
def psgd_method_2(sgd, loop_iter, coef, intercept, X_train, y_train):
"""
SGD method run in parallel using map.
Parameters
----------
args: tuple (sgd, data), where
sgd is SGDRegressor object and
data is a tuple: (X_train, y_train)
Returns
-------
sgd: object returned after executing .fit()
"""
for _ in range(loop_iter):
sgd.coef_ = coef
sgd.intercept_ = intercept
sgd.fit(X_train, y_train)
coef = sgd.coef_
intercept = sgd.intercept_
return sgd
def psgd_1(sgd, n_iter_per_job, n_jobs, X_train, y_train):
"""
Parallel SGD implementation using multiprocessing. All workers sync once after running SGD independently for
n_iter_per_job iterations.
Parameters
----------
sgd: input SGDRegression() object
n_iter_per_job: number of iterations per worker
n_jobs: number of parallel processes to run
X_train: train input data
y_train: train target data
Returns
-------
sgd: the input SGDRegressor() object with updated coef_ and intercept_
"""
sgds = Parallel(n_jobs=n_jobs)(
delayed(psgd_method_1)(s, X_train, y_train)
for s in [SGDRegressor(n_iter=n_iter_per_job) for _ in range(n_jobs)])
sgd.coef_ = np.array([x.coef_ for x in sgds]).mean(axis=0)
sgd.intercept_ = np.array([x.intercept_ for x in sgds]).mean(axis=0)
return sgd
def define_model(self):
#if self.modeltype == "AR" :
# return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
if self.modeltype == "RandomForest" :
return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
#return ensemble.RandomForestClassifier(
# n_estimators=self.parameters['n_estimators'])
elif self.modeltype == "LinearRegression" :
return linear_model.LinearRegression()
elif self.modeltype == "Lasso" :
return linear_model.Lasso(
alpha=self.parameters['alpha'])
elif self.modeltype == "ElasticNet" :
return linear_model.ElasticNet(
alpha=self.parameters['alpha'],
l1_ratio=self.parameters['l1_ratio'])
elif self.modeltype == "SVR" :
return SVR(
C=self.parameters['C'],
epsilon=self.parameters['epsilon'],
kernel=self.parameters['kernel'])
#elif self.modeltype == 'StaticModel':
# return StaticModel (
# parameters=self.parameters
# )
#elif self.modeltype == 'AdvancedStaticModel':
# return AdvancedStaticModel (
# parameters=self.parameters
# )
# elif self.modeltype == 'SGDRegressor' :
# print(self.parameters)
# return linear_model.SGDRegressor(
# loss=self.parameters['loss'],
# penalty=self.parameters['penalty'],
# l1_ratio=self.parameters['l1_ratio'])
else:
raise ConfigError("Unsupported model {0}".format(self.modeltype))
def test_basic(self, single_chunk_regression):
X, y = single_chunk_regression
a = lm.PartialSGDRegressor(random_state=0,
max_iter=1000, tol=1e-3)
b = lm_.SGDRegressor(random_state=0, max_iter=1000, tol=1e-3)
a.fit(X, y)
b.partial_fit(X, y)
assert_estimator_equal(a, b)
def SGD_regression_test_error(X, y, X_test, y_test, delta, SGD_epochs):
# center training targets
y_mean = np.mean(y)
y_train = y - y_mean
# solve primal problem
clf = linear_model.SGDRegressor(alpha=delta, fit_intercept=False, n_iter=SGD_epochs)
clf.fit(X, y_train)
y_hat_test = y_mean + X_test.dot(clf.coef_)
return 100.0 * np.linalg.norm(y_hat_test - y_test) / np.linalg.norm(y_test)
# BINARY SEARCH KERNEL WIDTH
def demo():
import sys
sys.path.append( '../core' )
from tools import make_XOR_dataset
from BR import BR
set_printoptions(precision=3, suppress=True)
X,Y = make_XOR_dataset()
N,L = Y.shape
print("CLASSIFICATION")
h = linear_model.SGDClassifier(n_iter=100)
nn = ELM(8,f=tanh,h=BR(-1,h))
nn.fit(X, Y)
# test it
print(nn.predict(X))
print("vs")
print(Y)
print("REGRESSION")
r = ELM(100,h=linear_model.LinearRegression())
r.fit(X,Y)
print(Y)
print(r.predict(X))
print("REGRESSION OI")
r = ELM_OI(100,h=BR(-1,h=linear_model.SGDRegressor()))
r.fit(X,Y)
print(Y)
print(r.predict(X))
def __init__(self):
# We create a separate model for each action in the environment's
# action space. Alternatively we could somehow encode the action
# into the features, but this way it's easier to code up.
self.actions = []
for _ in xrange(env.action_space.n):
act = SGDRegressor(learning_rate="constant")
# We need to call partial_fit once to initialize the model
# or we get a NotFittedError when trying to make a prediction
# This is quite hacky.
act.partial_fit([self.featurize_state(env.reset())], [0])
self.actions.append(act)
def train_sgd_regressor():
# Picking model
return mp.ModelProperties(regression=True, online=True), linear_model.SGDRegressor()
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html#sklearn.linear_model.PassiveAggressiveRegressor
def adbPredictor(df):
dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)
# clf = linear_model.SGDRegressor()
clf = ensemble.AdaBoostRegressor()
clf.fit(dataTrainX, dataTrainY)
predicted = clf.predict(dataTestX)
fig, ax = plotter.subplots()
ax.set_ylabel('Predicted KNN Weekly')
ax.scatter(dataTestY, predicted)
ax.set_xlabel('Measured')
predicted = np.reshape(predicted, (predicted.size, 1))
corrCoeff = pearsonr(dataTestY,predicted)
print(corrCoeff[0])
plotter.show()
return predicted
def __init__(self):
OutputThing.__init__(self, ports=['train', 'observe', 'predict'])
self.clf = linear_model.SGDRegressor()
def __init__(self):
Publisher.__init__(self, topics=['train', 'observe', 'predict'])
self.clf = linear_model.SGDRegressor()
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = SGDRegressor()
clf.fit(X, Y)
return clf
def reset_args(self):
"""
"""
assert self.max_iter % self.n_iter_per_step == 0
linear_model.SGDRegressor.__init__(self,
alpha=self.alpha,
penalty=self.penalty,
n_iter=self.n_iter_per_step,
**self.kwargs)
def fit(self,X,y):
self.coef_ = None
self.intercept_ = None
self.stages_ = []
for i in range(0,self.max_iter,self.n_iter):
if self.coef_ is not None:
assert(self.intercept_ is not None)
linear_model.SGDRegressor.fit(self,X,y,coef_init=self.coef_,intercept_init=self.intercept_)
else:
linear_model.SGDRegressor.fit(self,X,y)
# record coefs and intercept for later
self.stages_.append((i+self.n_iter,self.coef_.copy(),self.intercept_.copy()))
logging.info('done %d/%d steps' % (i+self.n_iter,self.max_iter))
logging.info('training set auc %f' % self.auc(X,y))
def predict(self,X,coef=None,intercept=None):
"""
a) do the prediction based on given coefs and intercept, if provided.
b) Scale the predictions so that they are in 0..1.
"""
if coef is not None:
assert intercept is not None
self.intercept_ = intercept
self.coef_ = coef
return scale_predictions(linear_model.SGDRegressor.predict(self,X))
def buildNet(self):
net = linear_model.SGDRegressor(
n_iter = 1,
alpha = 0.0001,
shuffle=False
)
return net
def buildNet(self):
net = linear_model.SGDRegressor(
n_iter = 1,
alpha = 0.0001,
shuffle=False
)
return net
def getModels():
result = []
result.append("LinearRegression")
result.append("BayesianRidge")
result.append("ARDRegression")
result.append("ElasticNet")
result.append("HuberRegressor")
result.append("Lasso")
result.append("LassoLars")
result.append("Rigid")
result.append("SGDRegressor")
result.append("SVR")
result.append("MLPClassifier")
result.append("KNeighborsClassifier")
result.append("SVC")
result.append("GaussianProcessClassifier")
result.append("DecisionTreeClassifier")
result.append("RandomForestClassifier")
result.append("AdaBoostClassifier")
result.append("GaussianNB")
result.append("LogisticRegression")
result.append("QuadraticDiscriminantAnalysis")
return result
def __getLearner(self):
return SGDRegressor(loss=self.loss, penalty=self.penalty,
alpha=self.alpha, l1_ratio=self.l1_ratio,
fit_intercept=self.intercept)
Qfunction_approx.py 文件源码
项目:reinforcement-learning-market-microstructure
作者: jacobkahn
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def __init__(self, T, L, backup):
self.backup = backup
self.T = T
self.L = L
self.pre_process = PolynomialFeatures(degree=2, include_bias=False)
if self.backup['name'] == 'sampling':
self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
elif self.backup['name'] == 'doubleQ':
self.Q_1 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
self.Q_2 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
elif self.backup['name'] == 'replay buffer':
self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False)
self.buff = []
else:
print "Illegal Backup Type"
def parallel_sgd(pool, sgd, n_iter, n_jobs, n_sync, data):
"""
High level parallelization of SGDRegressor.
Parameters
----------
pool: multiprocessor pool to use for this parallelization
sgd: SGDRegressor instance whose coef and intercept need to be updated
n_iter: number of iterations per worker
n_jobs: number of parallel workers
n_sync: number of synchronization steps. Syncs are spread evenly through out the iterations
data: list of (X, y) data for the workers. This list should have n_jobs elements
Returns
-------
sgd: SGDRegressor instance with updated coef and intercept
"""
# eta = sgd.eta0*n_jobs
eta = sgd.eta0
n_iter_sync = n_iter/n_sync # Iterations per model between syncs
sgds = [SGDRegressor(warm_start=True, n_iter=n_iter_sync, eta0=eta)
for _ in range(n_jobs)]
for _ in range(n_sync):
args = zip(sgds, data)
sgds = pool.map(psgd_method, args)
coef = np.array([x.coef_ for x in sgds]).mean(axis=0)
intercept = np.array([x.intercept_ for x in sgds]).mean(axis=0)
for s in sgds:
s.coef_ = coef
s.intercept_ = intercept
sgd.coef_ = coef
sgd.intercept_ = intercept
return sgd
def psgd_3(sgd, n_iter_per_job, n_jobs, n_syncs, X_train, y_train):
"""
Parallel SGD implementation using multiprocessing. All workers sync n_syncs times while running SGD independently
for n_iter_per_job iterations. Each worker will have an increased learning rate -- multiple of n_jobs.
Parameters
----------
sgd: input SGDRegression() object
n_iter_per_job: number of iterations per worker
n_jobs: number of parallel processes to run
n_syncs: number of syncs
X_train: train input data
y_train: train target data
Returns
-------
sgd: the input SGDRegressor() object with updated coef_ and intercept_
"""
n_iter_sync = n_iter_per_job/n_syncs # Iterations per model between syncs
eta = sgd.eta0 * n_jobs
sgds = [SGDRegressor(warm_start=True, n_iter=n_iter_sync, eta0=eta)
for _ in range(n_jobs)]
for _ in range(n_syncs):
sgds = Parallel(n_jobs=n_jobs)(
delayed(psgd_method_1)(s, X_train, y_train) for s in sgds)
coef = np.array([x.coef_ for x in sgds]).mean(axis=0)
intercept = np.array([x.intercept_ for x in sgds]).mean(axis=0)
for s in sgds:
s.coef_ = coef
s.intercept_ = intercept
sgd.coef_ = coef
sgd.intercept_ = intercept
return sgd
def psgd_4(sgd, n_iter_per_job, n_jobs, X_train, y_train, coef, intercept):
"""
Parallel SGD implementation using multithreading. All workers read coef and intercept from share memory,
process them, and then overwrite them.
Parameters
----------
sgd: input SGDRegression() object
n_iter_per_job: number of iterations per worker
n_jobs: number of parallel processes to run
X_train: train input data
y_train: train target data
coef: randomly initialized coefs stored in shared memory
intercept: randomly initialized intercept stored in shared memory
Returns
-------
sgd: the input SGDRegressor() object with updated coef_ and intercept_
"""
sgds = [SGDRegressor(warm_start=True, n_iter=1)
for _ in range(n_jobs)]
sgds = Parallel(n_jobs=n_jobs, backend="threading")(
delayed(psgd_method_2) (s, n_iter_per_job, coef, intercept, X_train, y_train)
for s in sgds)
sgd.coef_ = np.array([x.coef_ for x in sgds]).mean(axis=0)
sgd.intercept_ = np.array([x.intercept_ for x in sgds]).mean(axis=0)
return sgd
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = SGDRegressor()
clf.fit(X, Y)
return clf
def __init__(self,
mode='sequence',
random_state=1234):
"""Constructor.
Params
------
mode : str
Values: 'sequence', 'rnafold' or 'rnaplfold'.
n_jobs : int (default : -1)
Number of jobs.
random_state : int (default : 1234)
Seed for random number generator.
"""
self.mode = mode
self.max_dist = None
self.preprocessor_args = dict()
self.vectorizer_args = dict()
self.regressor_args = dict()
self.smoothing_args = dict()
if mode == 'sequence':
self.preprocessor = seq.sequence_preprocessor
self.vote_aggregator = seq.vote_aggregator
elif mode == 'rnafold' or mode == 'rnaplfold':
if mode == 'rnafold':
self.preprocessor = graph.rnafold_preprocessor
else:
self.preprocessor = graph.rnaplfold_preprocessor
self.vote_aggregator = graph.vote_aggregator
else:
raise Exception("Unrecognized mode: %s" % mode)
exit(1)
self.regressor = SGDRegressor(shuffle=True,
random_state=random_state)
# status variables
self.is_optimized = False
self.is_fitted = False
def getSKLearnModel(modelName):
if modelName == 'LinearRegression':
model = linear_model.LinearRegression()
elif modelName == 'BayesianRidge':
model = linear_model.BayesianRidge()
elif modelName == 'ARDRegression':
model = linear_model.ARDRegression()
elif modelName == 'ElasticNet':
model = linear_model.ElasticNet()
elif modelName == 'HuberRegressor':
model = linear_model.HuberRegressor()
elif modelName == 'Lasso':
model = linear_model.Lasso()
elif modelName == 'LassoLars':
model = linear_model.LassoLars()
elif modelName == 'Rigid':
model = linear_model.Ridge()
elif modelName == 'SGDRegressor':
model = linear_model.SGDRegressor()
elif modelName == 'SVR':
model = SVR()
elif modelName=='MLPClassifier':
model = MLPClassifier()
elif modelName=='KNeighborsClassifier':
model = KNeighborsClassifier()
elif modelName=='SVC':
model = SVC()
elif modelName=='GaussianProcessClassifier':
model = GaussianProcessClassifier()
elif modelName=='DecisionTreeClassifier':
model = DecisionTreeClassifier()
elif modelName=='RandomForestClassifier':
model = RandomForestClassifier()
elif modelName=='AdaBoostClassifier':
model = AdaBoostClassifier()
elif modelName=='GaussianNB':
model = GaussianNB()
elif modelName=='LogisticRegression':
model = linear_model.LogisticRegression()
elif modelName=='QuadraticDiscriminantAnalysis':
model = QuadraticDiscriminantAnalysis()
return model
def get_model_list(task_name):
model_list, name_list = [], []
model_list.append(linear_model.LinearRegression())
name_list.append('LR')
#
model_list.append(linear_model.SGDRegressor())
name_list.append('LR_SGD')
model_list.append(linear_model.Lasso(alpha = 1.0))
name_list.append('Lasso')
model_list.append(linear_model.Ridge (alpha = 1.0))
name_list.append('Ridge')
model_list.append(linear_model.LassoLars(alpha=.1))
name_list.append('LassoLars')
model_list.append(linear_model.BayesianRidge())
name_list.append('BayesianRidge')
model_list.append(KernelRidge(alpha=1.0))
name_list.append('KernelRidge')
model_list.append(gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1))
name_list.append('GaussianProcess')
model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=3))
name_list.append('KNN_unif')
model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=3))
name_list.append('KNN_dist')
model_list.append(SVR(kernel = 'linear', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
name_list.append('SVM_linear')
model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
name_list.append('SVM_poly')
model_list.append(SVR(kernel = 'rbf', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
name_list.append('SVM_rbf')
model_list.append(DecisionTreeRegressor())
name_list.append('DT')
model_list.append(RandomForestRegressor(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0))
name_list.append('RF')
model_list.append(ExtraTreesRegressor(n_estimators=100, max_depth=None, max_features='auto', min_samples_split=2, random_state=0))
name_list.append('ET')
return model_list, name_list