def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2):
params = lasagne.layers.get_all_params(network, trainable=True)
grads = theano.grad(loss, params)
# if max_norm:
# names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b']
# constraints = [grad for param, grad in zip(params, grads) if param.name in names]
# assert len(constraints) == 4
# scaled_grads = total_norm_constraint(constraints, max_norm=max_norm)
# counter = 0
# for i in xrange(len(params)):
# param = params[i]
# if param.name in names:
# grads[i] = scaled_grads[counter]
# counter += 1
# assert counter == 4
if opt == 'adam':
updates = adam(grads, params=params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
elif opt == 'momentum':
updates = nesterov_momentum(grads, params=params, learning_rate=learning_rate, momentum=momentum)
else:
raise ValueError('unkown optimization algorithm: %s' % opt)
return updates
python类updates()的实例源码
def create_optimiser(optimiser):
"""
Creates a function that returns an optimiser and (optional) a learn
rate schedule
"""
if optimiser['schedule'] is not None:
# if we have a learn rate schedule, create a theano shared
# variable and a corresponding update
lr = theano.shared(np.float32(optimiser['params']['learning_rate']))
# create a copy of the optimiser config dict so we do not change
# it
from copy import deepcopy
optimiser = deepcopy(optimiser)
optimiser['params']['learning_rate'] = lr
lrs = nn.LearnRateSchedule(learning_rate=lr, **optimiser['schedule'])
else:
lrs = None
return partial(getattr(lnn.updates, optimiser['name']),
**optimiser['params']), lrs
def deepmind_rmsprop(loss_or_grads, params, learning_rate=0.00025,
rho=0.95, epsilon=0.01):
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
acc_grad = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_grad_new = rho * acc_grad + (1 - rho) * grad
acc_rms = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
acc_rms_new = rho * acc_rms + (1 - rho) * grad ** 2
updates[acc_grad] = acc_grad_new
updates[acc_rms] = acc_rms_new
updates[param] = (param - learning_rate *
(grad /
T.sqrt(acc_rms_new - acc_grad_new ** 2 + epsilon)))
return updates
def __init__(self,params,params_task,X,model,policy):
self.rng = np.random.RandomState()
self.model = model
self.policy = policy
self.params = params
self.params_task = params_task
self.x = T.matrix('x')
cost = self.control(self.x)
self.fwpass = theano.function(inputs=[self.x], outputs = cost,allow_input_downcast=True)
self.train_func = theano.function(inputs=[self.x],outputs=[cost], updates=self.adam(cost,lasagne.layers.get_all_params(self.policy,trainable=True),learning_rate=self.params['learning_rate']))
self.policy_network = theano.function(inputs=[self.x],outputs=self.predict(self.x))
def define_updates(output_layer, X, Y):
output_train = lasagne.layers.get_output(output_layer)
output_test = lasagne.layers.get_output(output_layer, deterministic=True)
# set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot
loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y)
loss = loss.mean()
acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX)
# if using ResNet use L2 regularization
all_layers = lasagne.layers.get_all_layers(output_layer)
l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA
loss = loss + l2_penalty
# set up loss functions for validation dataset
test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y)
test_loss = test_loss.mean()
test_loss = test_loss + l2_penalty
test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)
# get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed
l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX))
params = lasagne.layers.get_all_params(output_layer, trainable=True)
updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
#updates = adam(loss, params, learning_rate=l_r)
prediction_binary = T.argmax(output_train, axis=1)
test_prediction_binary = T.argmax(output_test, axis=1)
# set up training and prediction functions
train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates)
valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]])
return train_fn, valid_fn, l_r
def __init__(self, isTrain, isNN):
super(RegressionNN, self).__init__(isTrain, isNN)
# data preprocessing
#self.dataPreprocessing()
self.net1 = NeuralNet(
layers=[ # three layers: one hidden layer
('input', layers.InputLayer),
('hidden', layers.DenseLayer),
#('hidden2', layers.DenseLayer),
#('hidden3', layers.DenseLayer),
('output', layers.DenseLayer),
],
# layer parameters:
input_shape=(None, 13), # input dimension is 13
hidden_num_units=6, # number of units in hidden layer
#hidden2_num_units=8, # number of units in hidden layer
#hidden3_num_units=4, # number of units in hidden layer
output_nonlinearity=None, # output layer uses sigmoid function
output_num_units=1, # output dimension is 1
# obejctive function
objective_loss_function = lasagne.objectives.squared_error,
# optimization method:
update=lasagne.updates.nesterov_momentum,
update_learning_rate=0.002,
update_momentum=0.4,
# use 25% as validation
train_split=TrainSplit(eval_size=0.2),
regression=True, # flag to indicate we're dealing with regression problem
max_epochs=100, # we want to train this many epochs
verbose=0,
)
def setup(dim, criterion='mmd', biased=True, streaming_est=False, opt_log=True,
linear_kernel=False, opt_sigma=False, init_log_sigma=0,
net_version='basic', hotelling_reg=0,
strat='nesterov_momentum', learning_rate=0.01, **opt_args):
input_p = T.matrix('input_p')
input_q = T.matrix('input_q')
mmd2_pq, obj, rep_p, net_p, net_q, log_sigma = make_network(
input_p, input_q, dim,
criterion=criterion, biased=biased, streaming_est=streaming_est,
opt_log=opt_log, linear_kernel=linear_kernel, log_sigma=init_log_sigma,
hotelling_reg=hotelling_reg, net_version=net_version)
params = lasagne.layers.get_all_params([net_p, net_q], trainable=True)
if opt_sigma:
params.append(log_sigma)
fn = getattr(lasagne.updates, strat)
updates = fn(obj, params, learning_rate=learning_rate, **opt_args)
print("Compiling...", file=sys.stderr, end='')
train_fn = theano.function(
[input_p, input_q], [mmd2_pq, obj], updates=updates)
val_fn = theano.function([input_p, input_q], [mmd2_pq, obj])
get_rep = theano.function([input_p], rep_p)
print("done", file=sys.stderr)
return params, train_fn, val_fn, get_rep, log_sigma
def define_updates(output_layer, X, Y):
output_train = lasagne.layers.get_output(output_layer)
output_test = lasagne.layers.get_output(output_layer, deterministic=True)
# set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot
loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y)
loss = loss.mean()
acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX)
# if using ResNet use L2 regularization
all_layers = lasagne.layers.get_all_layers(output_layer)
l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA
loss = loss + l2_penalty
# set up loss functions for validation dataset
test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y)
test_loss = test_loss.mean()
test_loss = test_loss + l2_penalty
test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)
# get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed
l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX))
params = lasagne.layers.get_all_params(output_layer, trainable=True)
updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
#updates = adam(loss, params, learning_rate=l_r)
prediction_binary = T.argmax(output_train, axis=1)
test_prediction_binary = T.argmax(output_test, axis=1)
# set up training and prediction functions
train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates)
valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]])
return train_fn, valid_fn, l_r
def updater(self):
return getattr(lasagne.updates, self.learning_algorithm)
def control(self,st):
srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
# do n roll-outs for each starting state
n = self.params['samples']
st_s = T.tile(st,[n,1])
onoise = srng.normal(size=(st_s.shape[0],1,self.params['T']))
inoise = T.sqrt(st.shape[1]) * srng.normal(size=(n,st.shape[0],self.params['T']))
([_,_,R], updates) = theano.scan(fn=self._step,outputs_info=[st_s,T.as_tensor_variable(0),None],n_steps=self.params['T'],non_sequences=[onoise,inoise])
return R.mean()
def adam(self,cost, params, learning_rate=0.001, beta1=0.9,
beta2=0.999, epsilon=1e-8):
all_grads = T.grad(cost=cost, wrt=params)
all_grads = total_norm_constraint(all_grads,10)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), all_grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
t_prev = theano.shared(utils.floatX(0.))
updates = OrderedDict()
t = t_prev + 1
a_t = learning_rate*T.sqrt(1-beta2**t)/(1-beta1**t)
for param, g_t in zip(params, all_grads):
g_t = T.switch(not_finite, 0.1 * param,g_t)
value = param.get_value(borrow=True)
m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
broadcastable=param.broadcastable)
m_t = beta1*m_prev + (1-beta1)*g_t
v_t = beta2*v_prev + (1-beta2)*g_t**2
step = a_t*m_t/(T.sqrt(v_t) + epsilon)
updates[m_prev] = m_t
updates[v_prev] = v_t
updates[param] = param - step
updates[t_prev] = t
return updates
def omniglot():
input_var = T.tensor3('input') # input_var has dimensions (batch_size, time, input_dim)
target_var = T.imatrix('target') # target_var has dimensions (batch_size, time) (label indices)
# Load data
generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=16, \
nb_samples=5, nb_samples_per_class=10, max_rotation=0., max_shift=0, max_iter=None)
output_var, output_var_flatten, params = memory_augmented_neural_network(input_var, \
target_var, batch_size=generator.batch_size, nb_class=generator.nb_samples, \
memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4)
cost = T.mean(T.nnet.categorical_crossentropy(output_var_flatten, target_var.flatten()))
updates = lasagne.updates.adam(cost, params, learning_rate=1e-3)
accuracies = accuracy_instance(T.argmax(output_var, axis=2), target_var, batch_size=generator.batch_size)
print('Compiling the model...')
train_fn = theano.function([input_var, target_var], cost, updates=updates)
accuracy_fn = theano.function([input_var, target_var], accuracies)
print('Done')
print('Training...')
t0 = time.time()
all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class)
try:
for i, (example_input, example_output) in generator:
score = train_fn(example_input, example_output)
acc = accuracy_fn(example_input, example_output)
all_scores.append(score)
scores.append(score)
accs += acc
if i > 0 and not (i % 100):
print('Episode %05d: %.6f' % (i, np.mean(score)))
print(accs / 100.)
scores, accs = [], np.zeros(generator.nb_samples_per_class)
except KeyboardInterrupt:
print(time.time() - t0)
pass
def test_sa(self):
self.precheck()
train = nn.updates.sa(
self.inputs, self.loss, self.params, outputs=[self.loss / 2],
iters=2014, initial_temperature=2.0e-1, learning_rate=5.0e-1
)
ret = train(*self.get_inputs())
assert len(ret) == 1, 'Optimization function should return output!'
self.check('Simulated Annealing')
def test_adastep(self):
self.precheck()
train = nn.updates.adastep(
self.inputs, self.loss, self.params, outputs=[self.loss / 2],
max_iter=8, rho=0.9, initial_learning_rate=1.0e-1, momentum=0.9,
max_learning_rate=1.0e-1, max_delta=0.1
)
for i in range(128):
ret = train(*self.get_inputs())
assert len(ret) == 1
self.check('AdaStep')
def std_opt(self, method, learning_rate=1.0e-3, *args, **kwargs):
if not callable(method):
import lasagne.updates as updates
method = getattr(updates, method)
self.precheck()
upd = method(self.loss, self.params, learning_rate=learning_rate, *args, **kwargs)
train = theano.function(self.inputs, outputs=self.loss, updates=upd)
#path = []
for i in range(2048):
train(*self.get_inputs())
#path.append(self.params[0].get_value())
# path = np.array(path)
#
# Xs, Ys = np.meshgrid(np.linspace(-1, 2, num=50), np.linspace(-1, 2, num=50))
# Zs = np.zeros(shape=(50, 50))
#
# for i in range(50):
# for j in range(50):
# Zs[i, j] = self.get_loss(np.array([Xs[i, j], Ys[i, j]]).astype('float32'), *self.get_inputs())
#
# import matplotlib.pyplot as plt
#
# plt.figure()
# plt.contourf(Xs, Ys, Zs)
# plt.colorbar()
# plt.scatter(path[:, 0], path[:, 1], color=[ plt.cm.Greys(x) for x in np.linspace(0, 1, num=2048) ], s = 5)
# plt.show()
self.check(method)
def test_pseudograd(self):
self.std_opt(nn.updates.pseudograd, temperature=1.0e-3, learning_rate=1.0e-2)
def test_adastep(self):
self.precheck()
train = nn.updates.adastep(
self.inputs, self.loss, self.params, outputs=[self.loss / 2],
max_iter=8, rho=0.9, initial_learning_rate=1.0e-3, momentum=0.9,
max_learning_rate=1.0e+6, max_delta=1.0e-1, eps=1.0e-6
)
for i in range(512):
ret = train()
assert len(ret) == 1
self.check('AdaStep')
RegressionUniformBlending.py 文件源码
项目:AirTicketPredicting
作者: junlulocky
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def __init__(self, isTrain):
super(RegressionUniformBlending, self).__init__(isTrain)
# data preprocessing
#self.dataPreprocessing()
self.net1 = NeuralNet(
layers=[ # three layers: one hidden layer
('input', layers.InputLayer),
('hidden', layers.DenseLayer),
#('hidden2', layers.DenseLayer),
#('hidden3', layers.DenseLayer),
('output', layers.DenseLayer),
],
# layer parameters:
input_shape=(None, 13), # input dimension is 13
hidden_num_units=6, # number of units in hidden layer
#hidden2_num_units=8, # number of units in hidden layer
#hidden3_num_units=4, # number of units in hidden layer
output_nonlinearity=None, # output layer uses sigmoid function
output_num_units=1, # output dimension is 1
# obejctive function
objective_loss_function = lasagne.objectives.squared_error,
# optimization method:
update=lasagne.updates.nesterov_momentum,
update_learning_rate=0.002,
update_momentum=0.4,
# use 25% as validation
train_split=TrainSplit(eval_size=0.2),
regression=True, # flag to indicate we're dealing with regression problem
max_epochs=100, # we want to train this many epochs
verbose=0,
)
# Create linear regression object
self.linRegr = linear_model.LinearRegression()
# Create KNN regression object
self.knn = neighbors.KNeighborsRegressor(86, weights='distance')
# Create Decision Tree regression object
self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None)
# Create AdaBoost regression object
decisionReg = DecisionTreeRegressor(max_depth=10)
rng = np.random.RandomState(1)
self.adaReg = AdaBoostRegressor(decisionReg,
n_estimators=400,
random_state=rng)
# Create linear regression object
self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
def _compile(self, ddqn):
a = self.inputs["A"]
r = self.inputs["R"]
nonterminal = self.inputs["Nonterminal"]
q = ls.get_output(self.network, deterministic=True)
if ddqn:
q2 = ls.get_output(self.network, deterministic=True, inputs=self.alternate_input_mappings)
q2_action_ref = tensor.argmax(q2, axis=1)
q2_frozen = ls.get_output(self.frozen_network, deterministic=True)
q2_max = q2_frozen[tensor.arange(q2_action_ref.shape[0]), q2_action_ref]
else:
q2_max = tensor.max(ls.get_output(self.frozen_network, deterministic=True), axis=1)
target_q = r + self.gamma * nonterminal * q2_max
predicted_q = q[tensor.arange(q.shape[0]), a]
loss = self.build_loss_expression(predicted_q, target_q).sum()
params = ls.get_all_params(self.network, trainable=True)
# updates = lasagne.updates.rmsprop(loss, params, self._learning_rate, rho=0.95)
updates = deepmind_rmsprop(loss, params, self.learning_rate)
# TODO does FAST_RUN speed anything up?
mode = None # "FAST_RUN"
s0_img = self.inputs["S0"]
s1_img = self.inputs["S1"]
if self.misc_state_included:
s0_misc = self.inputs["S0_misc"]
s1_misc = self.inputs["S1_misc"]
print "Compiling the training function..."
self._learn = theano.function([s0_img, s0_misc, s1_img, s1_misc, a, r, nonterminal], loss,
updates=updates, mode=mode, name="learn_fn")
print "Compiling the evaluation function..."
self._evaluate = theano.function([s0_img, s0_misc], q, mode=mode,
name="eval_fn")
else:
print "Compiling the training function..."
self._learn = theano.function([s0_img, s1_img, a, r, nonterminal], loss, updates=updates, mode=mode,
name="learn_fn")
print "Compiling the evaluation function..."
self._evaluate = theano.function([s0_img], q, mode=mode, name="eval_fn")
print "Network compiled."