def fit(
self, docs, y, max_epochs, epoch_size=None, val_docs=None, val_y=None, update_params_iter=itertools.repeat([]),
save_best=True
):
has_val = val_docs is not None
with log_time('training...', 'training took {:.0f}s'):
params = get_all_params(self.network)
best_perf, best_params = None, None
epoch_iter = EpochIterator(
self.gen_batches, (docs, y), (epoch_size + self.batch_size - 1) // self.batch_size
if epoch_size else None
)
for i, batches, update_params in zip(range(max_epochs), epoch_iter, update_params_iter):
train_res = [self._train(*batch, *update_params) for batch in batches]
val_res = np.concatenate(
[self._test(*batch[:-1]) for batch in self.gen_batches(val_docs)], axis=0
)[:len(val_y)] if has_val else None
perf = self.perf(i, train_res, val_y, val_res)
if (has_val and save_best) and (best_perf is None or perf >= best_perf):
best_perf = perf
best_params = {param: param.get_value() for param in params}
if has_val and save_best:
for param, value in best_params.items():
param.set_value(value)
python类get_all_params()的实例源码
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __build_loss_train__fn__(self):
# create loss function
prediction = layers.get_output(self.net)
loss = objectives.categorical_crossentropy(prediction, self.__target_var__)
loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2)
val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX)
# create parameter update expressions
params = layers.get_all_params(self.net, trainable=True)
self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta,
momentum=0.9)
# compile training function that updates parameters and returns training loss
self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule)
self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True))
self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
def build_train_func(rank=0, **kwargs):
print("rank: {} Building model".format(rank))
resnet = build_resnet()
print("Building training function")
x = T.ftensor4('x')
y = T.imatrix('y')
prob = L.get_output(resnet['prob'], x, deterministic=False)
loss = T.nnet.categorical_crossentropy(prob, y.flatten()).mean()
params = L.get_all_params(resnet.values(), trainable=True)
sgd_updates = updates.sgd(loss, params, learning_rate=1e-4)
# make a function to compute and store the raw gradient
f_train = theano.function(inputs=[x, y],
outputs=loss, # (assumes this is an avg)
updates=sgd_updates)
return f_train, "original"
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values)
return L.get_all_params( # this lasagne function also returns all var below the passed layers
L.concat(self._output_layers),
**tags
)
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values)
return L.get_all_params( # this lasagne function also returns all var below the passed layers
L.concat(self._output_layers),
**tags
)
def prep_train(alpha=0.0002, nz=100):
E,D=build_net(nz=nz)
x = T.tensor4('x')
#Get outputs z=E(x), x_hat=D(z)
encoding = get_output(E,x)
decoding = get_output(D,encoding)
#Get parameters of E and D
params_e=get_all_params(E, trainable=True)
params_d=get_all_params(D, trainable=True)
params = params_e + params_d
#Calc cost and updates
cost = T.mean(squared_error(x,decoding))
grad=T.grad(cost,params)
updates = adam(grad,params, learning_rate=alpha)
train = theano.function(inputs=[x], outputs=cost, updates=updates)
rec = theano.function(inputs=[x], outputs=decoding)
test = theano.function(inputs=[x], outputs=cost)
return train ,test, rec, E, D
def create_train_func(layers):
Xa, Xb = T.tensor4('Xa'), T.tensor4('Xb')
Xa_batch, Xb_batch = T.tensor4('Xa_batch'), T.tensor4('Xb_batch')
Tp = get_output(
layers['trans'],
inputs={
layers['inputa']: Xa, layers['inputb']: Xb,
}, deterministic=False,
)
# transforms: ground-truth, predicted
Tg = T.fmatrix('Tg')
Tg_batch = T.fmatrix('Tg_batch')
theta_gt = Tg.reshape((-1, 2, 3))
theta_pr = Tp.reshape((-1, 2, 3))
# grids: ground-truth, predicted
Gg = T.dot(theta_gt, _meshgrid(20, 20))
Gp = T.dot(theta_pr, _meshgrid(20, 20))
train_loss = T.mean(T.sqr(Gg - Gp))
params = get_all_params(layers['trans'], trainable=True)
updates = nesterov_momentum(train_loss, params, 1e-3, 0.9)
corr_func = theano.function(
inputs=[theano.In(Xa_batch), theano.In(Xb_batch), theano.In(Tg_batch)],
outputs=[Tp, train_loss],
updates=updates,
givens={
Xa: Xa_batch, Xb: Xb_batch, # Ia, Ib
Tg: Tg_batch, # transform Ia --> Ib
}
)
return corr_func
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values)
return L.get_all_params( # this lasagne function also returns all var below the passed layers
L.concat(self._output_layers),
**tags
)
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values)
return L.get_all_params( # this lasagne function also returns all var below the passed layers
L.concat(self._output_layers),
**tags
)
def __init__(self, K, vocab_size, num_chars, W_init,
nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn,
save_attn=False):
self.nhidden = nhidden
self.embed_dim = embed_dim
self.dropout = dropout
self.train_emb = train_emb
self.char_dim = char_dim
self.learning_rate = LEARNING_RATE
self.num_chars = num_chars
self.use_feat = use_feat
self.save_attn = save_attn
self.gating_fn = gating_fn
self.use_chars = self.char_dim!=0
if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim))
doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
T.wtensor3('cand')
docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
T.bmatrix('c_mask')
target_var = T.ivector('ans')
feat_var = T.imatrix('feat')
doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars')
tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
cloze_var = T.ivector('cloze')
self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var,
qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var]
self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = (
self.build_network(K, vocab_size, W_init))
self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean()
self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs,
target_var).mean()
loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean()
eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val,
target_var).mean()
self.params = L.get_all_params(self.network, trainable=True)
updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate)
self.train_fn = theano.function(self.inps,
[self.loss_fn, self.eval_fn, self.predicted_probs],
updates=updates,
on_unused_input='warn')
self.validate_fn = theano.function(self.inps,
[loss_fn_val, eval_fn_val, predicted_probs_val]+attentions,
on_unused_input='warn')
def __init__(self, conf):
self.conf = conf
if self.conf.act == "linear":
self.conf.act = linear
elif self.conf.act == "sigmoid":
self.conf.act = sigmoid
elif self.conf.act == "relu":
self.conf.act = rectify
elif self.conf.act == "tanh":
self.conf.act = tanh
else:
raise ValueError("Unknown activation function", self.conf.act)
input_var_first = T.matrix('inputs1')
input_var_second = T.matrix('inputs2')
target_var = T.matrix('targets')
# create network
self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second)
self.out = get_output(self.autoencoder)
loss = squared_error(self.out, target_var)
loss = loss.mean()
params = get_all_params(self.autoencoder, trainable=True)
updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum)
# training function
self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates)
# fuction to reconstruct
test_reconstruction = get_output(self.autoencoder, deterministic=True)
self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction)
# encoding function
test_encode = get_output([encoder_first, encoder_second], deterministic=True)
self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode)
# utils
blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
self.blas_nrm2 = blas('nrm2', np.array([], dtype=float))
self.blas_scal = blas('scal', np.array([], dtype=float))
# load weights if necessary
if self.conf.load_model is not None:
self.load_model()
def __init__(self, K, vocab_size, num_chars, W_init, regularizer, rlambda,
nhidden, embed_dim, dropout, train_emb, subsample, char_dim, use_feat, feat_cnt,
save_attn=False):
self.nhidden = nhidden
self.embed_dim = embed_dim
self.dropout = dropout
self.train_emb = train_emb
self.subsample = subsample
self.char_dim = char_dim
self.learning_rate = LEARNING_RATE
self.num_chars = num_chars
self.use_feat = use_feat
self.feat_cnt = feat_cnt
self.save_attn = save_attn
norm = lasagne.regularization.l2 if regularizer=='l2' else lasagne.regularization.l1
self.use_chars = self.char_dim!=0
if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim))
doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
T.wtensor3('cand')
docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
T.bmatrix('c_mask')
target_var = T.ivector('ans')
feat_var = T.imatrix('feat')
doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars')
tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
cloze_var = T.ivector('cloze')
match_feat_var = T.itensor3('match_feat')
use_char_var = T.tensor3('use_char')
use_char_q_var = T.tensor3('use_char_q')
self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var,
qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var, match_feat_var, use_char_var, use_char_q_var]
if rlambda> 0.: W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape)
else: W_pert = W_init
self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = (
self.build_network(K, vocab_size, W_pert))
self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() + \
rlambda*norm(W_emb-W_init)
self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs,
target_var).mean()
loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \
rlambda*norm(W_emb-W_init)
eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val,
target_var).mean()
self.params = L.get_all_params(self.network, trainable=True)
updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate)
self.train_fn = theano.function(self.inps,
[self.loss_fn, self.eval_fn, self.predicted_probs],
updates=updates,
on_unused_input='ignore')
self.validate_fn = theano.function(self.inps,
[loss_fn_val, eval_fn_val, predicted_probs_val]+attentions,
on_unused_input='ignore')
def _compile(self, ddqn):
a = self.inputs["A"]
r = self.inputs["R"]
nonterminal = self.inputs["Nonterminal"]
q = ls.get_output(self.network, deterministic=True)
if ddqn:
q2 = ls.get_output(self.network, deterministic=True, inputs=self.alternate_input_mappings)
q2_action_ref = tensor.argmax(q2, axis=1)
q2_frozen = ls.get_output(self.frozen_network, deterministic=True)
q2_max = q2_frozen[tensor.arange(q2_action_ref.shape[0]), q2_action_ref]
else:
q2_max = tensor.max(ls.get_output(self.frozen_network, deterministic=True), axis=1)
target_q = r + self.gamma * nonterminal * q2_max
predicted_q = q[tensor.arange(q.shape[0]), a]
loss = self.build_loss_expression(predicted_q, target_q).sum()
params = ls.get_all_params(self.network, trainable=True)
# updates = lasagne.updates.rmsprop(loss, params, self._learning_rate, rho=0.95)
updates = deepmind_rmsprop(loss, params, self.learning_rate)
# TODO does FAST_RUN speed anything up?
mode = None # "FAST_RUN"
s0_img = self.inputs["S0"]
s1_img = self.inputs["S1"]
if self.misc_state_included:
s0_misc = self.inputs["S0_misc"]
s1_misc = self.inputs["S1_misc"]
print "Compiling the training function..."
self._learn = theano.function([s0_img, s0_misc, s1_img, s1_misc, a, r, nonterminal], loss,
updates=updates, mode=mode, name="learn_fn")
print "Compiling the evaluation function..."
self._evaluate = theano.function([s0_img, s0_misc], q, mode=mode,
name="eval_fn")
else:
print "Compiling the training function..."
self._learn = theano.function([s0_img, s1_img, a, r, nonterminal], loss, updates=updates, mode=mode,
name="learn_fn")
print "Compiling the evaluation function..."
self._evaluate = theano.function([s0_img], q, mode=mode, name="eval_fn")
print "Network compiled."
def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \
learning_rate_rl=0.005, batch_size=32, ment=0.1):
# 2-layer MLP
self.in_size = in_size # x and y coordinate
self.out_size = out_size # up, down, right, left
self.batch_size = batch_size
self.learning_rate = learning_rate_rl
self.n_hid = n_hid
input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \
T.itensor3('am'), T.fvector('r')
in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size))
l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask)
pol_in = T.fmatrix('pol-h')
l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var)
l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D
pol_out = L.get_output(l_pol_rnn)[:,-1,:]
l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D
l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax)
self.network = l_out
self.params = L.get_all_params(self.network)
# rl
probs = L.get_output(self.network) # BH x A
out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A
log_probs = T.log(out_probs)
act_probs = (log_probs*act_mask).sum(axis=2) # B x H
ep_probs = (act_probs*turn_mask).sum(axis=1) # B
H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B
self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs)
updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \
epsilon=1e-4)
self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in]
self.train_fn = theano.function(self.inps, self.loss, updates=updates)
self.obj_fn = theano.function(self.inps, self.loss)
self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out])
# sl
sl_loss = 0.-T.mean(ep_probs)
sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \
epsilon=1e-4)
self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \
updates=sl_updates)
self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)
def build_instrument_model(self, n_vars, **kwargs):
targets = TT.vector()
instrument_vars = TT.matrix()
instruments = layers.InputLayer((None, n_vars), instrument_vars)
instruments = layers.DropoutLayer(instruments, p=0.2)
dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
dense_layer = layers.DropoutLayer(dense_layer, p=0.2)
for _ in xrange(kwargs['n_dense_layers'] - 1):
dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
dense_layer = layers.DropoutLayer(dense_layer, p=0.5)
self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear)
init_params = layers.get_all_param_values(self.instrument_output)
prediction = layers.get_output(self.instrument_output, deterministic=False)
test_prediction = layers.get_output(self.instrument_output, deterministic=True)
# flexible here, endog variable can be categorical, continuous, etc.
l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2)
loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost
loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean()
params = layers.get_all_params(self.instrument_output, trainable=True)
param_updates = updates.adadelta(loss, params)
self._instrument_train_fn = theano.function(
[
targets,
instrument_vars,
],
loss,
updates=param_updates
)
self._instrument_loss_fn = theano.function(
[
targets,
instrument_vars,
],
loss_total
)
self._instrument_output_fn = theano.function([instrument_vars], test_prediction)
return init_params
def build_treatment_model(self, n_vars, **kwargs):
input_vars = TT.matrix()
instrument_vars = TT.matrix()
targets = TT.vector()
inputs = layers.InputLayer((None, n_vars), input_vars)
inputs = layers.DropoutLayer(inputs, p=0.2)
dense_layer = layers.DenseLayer(inputs, 2 * kwargs['dense_size'], nonlinearity=nonlinearities.rectify)
dense_layer = layers.batch_norm(dense_layer)
dense_layer= layers.DropoutLayer(dense_layer, p=0.2)
for _ in xrange(kwargs['n_dense_layers'] - 1):
dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.rectify)
dense_layer = layers.batch_norm(dense_layer)
self.treatment_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear)
init_params = layers.get_all_param_values(self.treatment_output)
prediction = layers.get_output(self.treatment_output, deterministic=False)
test_prediction = layers.get_output(self.treatment_output, deterministic=True)
l2_cost = regularization.regularize_network_params(self.treatment_output, regularization.l2)
loss = gmm_loss(prediction, targets, instrument_vars) + 1e-4 * l2_cost
params = layers.get_all_params(self.treatment_output, trainable=True)
param_updates = updates.adadelta(loss, params)
self._train_fn = theano.function(
[
input_vars,
targets,
instrument_vars,
],
loss,
updates=param_updates
)
self._loss_fn = theano.function(
[
input_vars,
targets,
instrument_vars,
],
loss,
)
self._output_fn = theano.function(
[
input_vars,
],
test_prediction,
)
return init_params
def train_resnet(
batch_size=64, # batch size on each GPU
validFreq=1,
do_valid=False,
learning_rate=1e-3,
update_rule=updates.sgd, # updates.nesterov_momentum,
n_epoch=3,
**update_kwargs):
# Initialize single GPU.
theano.gpuarray.use("cuda")
t_0 = time.time()
print("Loading data (synthetic)")
train, valid, test = load_data()
x_train, y_train = train
x_valid, y_valid = valid
x_test, y_test = test
print("Building model")
resnet = build_resnet()
params = L.get_all_params(resnet.values(), trainable=True)
f_train_minibatch, f_predict = build_training(resnet, params, update_rule,
learning_rate=learning_rate,
**update_kwargs)
t_last = t_1 = time.time()
print("Total setup time: {:,.1f} s".format(t_1 - t_0))
print("Starting training")
for ep in range(n_epoch):
train_loss = 0.
i = 0
for mb_idxs in iter_mb_idxs(batch_size, len(x_train), shuffle=True):
train_loss += f_train_minibatch(x_train[mb_idxs], y_train[mb_idxs])
i += 1
train_loss /= i
print("\nEpoch: ", ep)
print("Training Loss: {:.3f}".format(train_loss))
if do_valid and ep % validFreq == 0:
valid_loss = valid_mc = 0.
i = 0
for mb_idxs in iter_mb_idxs(batch_size, len(x_valid), shuffle=False):
mb_loss, mb_mc = f_predict(x_valid[mb_idxs], y_valid[mb_idxs])
valid_loss += mb_loss
valid_mc += mb_mc
i += 1
valid_loss /= i
valid_mc /= i
print("Validation Loss: {:3f}, Accuracy: {:3f}".format(valid_loss, 1 - valid_mc))
t_2 = time.time()
print("(epoch total time: {:,.1f} s)".format(t_2 - t_last))
t_last = t_2
print("\nTotal training time: {:,.1f} s".format(t_last - t_1))
def train_resnet(
batch_size=64, # batch size on each GPU
validFreq=1,
do_valid=False,
learning_rate=1e-3,
update_rule=updates.sgd, # updates.nesterov_momentum,
n_epoch=3,
n_gpu=None, # later get this from synk.fork
**update_kwargs):
n_gpu = synk.fork(n_gpu) # (n_gpu==None will use all)
t_0 = time.time()
print("Loading data (synthetic)")
train, valid, test = load_data()
x_train, y_train = [synk.data(d) for d in train]
x_valid, y_valid = [synk.data(d) for d in valid]
x_test, y_test = [synk.data(d) for d in test]
full_mb_size = batch_size * n_gpu
learning_rate = learning_rate * n_gpu # (one technique for larger minibatches)
num_valid_slices = len(x_valid) // n_gpu // batch_size
print("Will compute validation using {} slices".format(num_valid_slices))
print("Building model")
resnet = build_resnet()
params = L.get_all_params(resnet.values(), trainable=True)
f_train_minibatch, f_predict = build_training(resnet, params, update_rule,
learning_rate=learning_rate,
**update_kwargs)
synk.distribute()
synk.broadcast(params) # (ensure all GPUs have same values)
t_last = t_1 = time.time()
print("Total setup time: {:,.1f} s".format(t_1 - t_0))
print("Starting training")
for ep in range(n_epoch):
train_loss = 0.
i = 0
for mb_idxs in iter_mb_idxs(full_mb_size, len(x_train), shuffle=True):
train_loss += f_train_minibatch(x_train, y_train, batch=mb_idxs)
i += 1
train_loss /= i
print("\nEpoch: ", ep)
print("Training Loss: {:.3f}".format(train_loss))
if do_valid and ep % validFreq == 0:
valid_loss, valid_mc = f_predict(x_valid, y_valid,
num_slices=num_valid_slices)
print("Validation Loss: {:3f}, Accuracy: {:3f}".format(
float(valid_loss), float(1 - valid_mc)))
t_2 = time.time()
print("(epoch total time: {:,.1f} s)".format(t_2 - t_last))
t_last = t_2
print("\nTotal training time: {:,.1f} s".format(t_last - t_1))