def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2):
params = lasagne.layers.get_all_params(network, trainable=True)
grads = theano.grad(loss, params)
# if max_norm:
# names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b']
# constraints = [grad for param, grad in zip(params, grads) if param.name in names]
# assert len(constraints) == 4
# scaled_grads = total_norm_constraint(constraints, max_norm=max_norm)
# counter = 0
# for i in xrange(len(params)):
# param = params[i]
# if param.name in names:
# grads[i] = scaled_grads[counter]
# counter += 1
# assert counter == 4
if opt == 'adam':
updates = adam(grads, params=params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
elif opt == 'momentum':
updates = nesterov_momentum(grads, params=params, learning_rate=learning_rate, momentum=momentum)
else:
raise ValueError('unkown optimization algorithm: %s' % opt)
return updates
python类adam()的实例源码
def model_initial(X_train,y_train,max_iter = 5):
global params, val_acc
params = []
val_acc = np.zeros(max_iter)
lr = theano.shared(np.float32(1e-4))
for iteration in range(max_iter):
print 'Initializing weights (%d/5) ...'%(iteration+1)
network_init = create_network()
net_init = NeuralNet(
network_init,
max_epochs=3,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
batch_iterator_train=BatchIterator(batch_size=32),
batch_iterator_test=BatchIterator(batch_size=64),
on_training_finished=[SaveTrainHistory(iteration = iteration)],
verbose=0)
net_init.initialize()
net_init.fit(X_train, y_train)
#model training
def model_train(X_train, y_train,learning_rate = 1e-4,epochs = 50):
network = create_network()
lr = theano.shared(np.float32(learning_rate))
net = NeuralNet(
network,
max_epochs=epochs,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
batch_iterator_train=BatchIterator(batch_size=32),
batch_iterator_test=BatchIterator(batch_size=64),
#on_training_started=[LoadBestParam(iteration=val_acc.argmax())],
on_epoch_finished=[EarlyStopping(patience=5)],
verbose=1)
print 'Loading pre-training weights...'
net.load_params_from(params[val_acc.argmax()])
print 'Continue to train...'
net.fit(X_train, y_train)
print 'Model training finished.'
return net
#model testing
def model_train(X_train, y_train,learning_rate = 1e-4,epochs = 50):
network = create_network()
lr = theano.shared(np.float32(learning_rate))
net = NeuralNet(
network,
max_epochs=epochs,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
batch_iterator_train=BatchIterator(batch_size=32),
batch_iterator_test=BatchIterator(batch_size=64),
regression = True,
objective_loss_function = squared_error,
#on_training_started=[LoadBestParam(iteration=val_loss.argmin())],
on_epoch_finished=[EarlyStopping(patience=5)],
verbose=1)
print 'loading pre-training weights...'
net.load_params_from(params[val_loss.argmin()])
print 'continue to train...'
net.fit(X_train, y_train)
print 'training finished'
return net
#model testing
def model_initial(X_train,y_train,max_iter = 5):
global params, val_loss
params = []
val_loss = np.zeros(max_iter)
lr = theano.shared(np.float32(1e-4))
for iteration in range(max_iter):
print 'initializing weights (%d/5) ...'%(iteration+1)
print iteration
network_init = create_network()
net_init = NeuralNet(
network_init,
max_epochs=3,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
batch_iterator_train=BatchIterator(batch_size=32),
batch_iterator_test=BatchIterator(batch_size=64),
regression = True,
objective_loss_function = squared_error,
on_training_finished=[SaveTrainHistory(iteration = iteration)],
verbose=0)
net_init.initialize()
net_init.fit(X_train, y_train)
#model training
def get_updates(nnet,
train_obj,
trainable_params):
implemented_solvers = ("nesterov", "adagrad", "adadelta", "adam")
if not hasattr(nnet, "solver") or nnet.solver not in implemented_solvers:
nnet.sgd_solver = "nesterov"
else:
nnet.sgd_solver = nnet.solver
if nnet.sgd_solver == "nesterov":
updates = l_updates.nesterov_momentum(train_obj,
trainable_params,
learning_rate=Cfg.learning_rate,
momentum=0.9)
elif nnet.sgd_solver == "adagrad":
updates = l_updates.adagrad(train_obj,
trainable_params,
learning_rate=Cfg.learning_rate)
elif nnet.sgd_solver == "adadelta":
updates = l_updates.adadelta(train_obj,
trainable_params,
learning_rate=Cfg.learning_rate)
elif nnet.sgd_solver == "adam":
updates = l_updates.adam(train_obj,
trainable_params,
learning_rate=Cfg.learning_rate)
return updates
def prep_train(alpha=0.0002, nz=100):
E,D=build_net(nz=nz)
x = T.tensor4('x')
#Get outputs z=E(x), x_hat=D(z)
encoding = get_output(E,x)
decoding = get_output(D,encoding)
#Get parameters of E and D
params_e=get_all_params(E, trainable=True)
params_d=get_all_params(D, trainable=True)
params = params_e + params_d
#Calc cost and updates
cost = T.mean(squared_error(x,decoding))
grad=T.grad(cost,params)
updates = adam(grad,params, learning_rate=alpha)
train = theano.function(inputs=[x], outputs=cost, updates=updates)
rec = theano.function(inputs=[x], outputs=decoding)
test = theano.function(inputs=[x], outputs=cost)
return train ,test, rec, E, D
def main():
################
# LOAD DATASET #
################
dataset = './data/ubiquitous_aug.hkl'
kfd = './data/ubiquitous_kfold.hkl'
print('Loading dataset {}...'.format(dataset))
X, y = hkl.load(open(dataset, 'r'))
X = X.reshape(-1, 4, 1, 400).astype(floatX)
y = y.astype('int32')
print('X shape: {}, y shape: {}'.format(X.shape, y.shape))
kf = hkl.load(open(kfd, 'r'))
kfold = [(train, test) for train, test in kf]
(train, test) = kfold[0]
print('train_set size: {}, test_set size: {}'.format(len(train), len(test)))
# shuffle +/- labels in minibatch
print('shuffling train_set and test_set')
shuffle(train)
shuffle(test)
X_train = X[train]
X_test = X[test]
y_train = y[train]
y_test = y[test]
print('data prepared!')
layers = [
(InputLayer, {'shape': (None, 4, 1, 400)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(DenseLayer, {'num_units': 64}),
(DropoutLayer, {}),
(DenseLayer, {'num_units': 64}),
(DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]
net = NeuralNet(
layers=layers,
max_epochs=100,
update=adam,
update_learning_rate=1e-4,
train_split=TrainSplit(eval_size=0.1),
on_epoch_finished=[
AdjustVariable(1e-4, target=0, half_life=20)],
verbose=2)
net.fit(X_train, y_train)
plot_loss(net)
def main(resume=None):
l = 300
dataset = './data/ubiquitous_train.hkl'
print('Loading dataset {}...'.format(dataset))
X_train, y_train = hkl.load(dataset)
X_train = X_train.reshape(-1, 4, 1, l).astype(floatX)
y_train = np.array(y_train, dtype='int32')
indice = np.arange(X_train.shape[0])
np.random.shuffle(indice)
X_train = X_train[indice]
y_train = y_train[indice]
print('X_train shape: {}, y_train shape: {}'.format(X_train.shape, y_train.shape))
layers = [
(InputLayer, {'shape': (None, 4, 1, l)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(DenseLayer, {'num_units': 64}),
(DropoutLayer, {}),
(DenseLayer, {'num_units': 64}),
(DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]
lr = theano.shared(np.float32(1e-4))
net = NeuralNet(
layers=layers,
max_epochs=100,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
on_epoch_finished=[
AdjustVariable(lr, target=1e-8, half_life=20)],
verbose=4)
if resume != None:
net.load_params_from(resume)
net.fit(X_train, y_train)
net.save_params_to('./models/net_params.pkl')