def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data, requires_grad=True), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
# Display the gradients
plt.clf()
plt.subplot(211); plt.hist(forward_grad.ravel()); plt.title("Features magnitude")
plt.subplot(212); plt.hist(backward_grad.ravel()); plt.title("Gradients")
plt.show(block=False)
plt.pause(0.01)
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
python类cross_entropy()的实例源码
def forward(self, model, sample):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss, as a Variable
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
input = net_output.view(-1, net_output.size(-1))
target = sample['target'].view(-1)
loss = F.cross_entropy(input, target, size_average=False, ignore_index=self.padding_idx)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': loss.data[0],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
def train_epoch(self, epoch):
self.model.train()
total_loss = 0
for batch_idx, batch in enumerate(self.train_loader):
self.optimizer.zero_grad()
output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
loss = F.cross_entropy(output, batch.label, size_average=False)
total_loss += loss.data[0]
loss.backward()
self.optimizer.step()
if batch_idx % self.log_interval == 0:
self.logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, min(batch_idx * self.batch_size, len(batch.dataset.examples)),
len(batch.dataset.examples),
100. * batch_idx / (len(self.train_loader)), loss.data[0])
)
average_loss, mean_average_precision, mean_reciprocal_rank = self.evaluate(self.train_evaluator, 'train')
if self.use_tensorboard:
self.writer.add_scalar('{}/train/cross_entropy_loss'.format(self.train_loader.dataset.NAME), average_loss, epoch)
self.writer.add_scalar('{}/train/map'.format(self.train_loader.dataset.NAME), mean_average_precision, epoch)
self.writer.add_scalar('{}/train/mrr'.format(self.train_loader.dataset.NAME), mean_reciprocal_rank, epoch)
return total_loss
def get_scores(self):
self.model.eval()
test_cross_entropy_loss = 0
qids = []
true_labels = []
predictions = []
for batch in self.data_loader:
qids.extend(batch.id.data.cpu().numpy())
output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
test_cross_entropy_loss += F.cross_entropy(output, batch.label, size_average=False).data[0]
true_labels.extend(batch.label.data.cpu().numpy())
predictions.extend(output.data.exp()[:, 1].cpu().numpy())
del output
qids = list(map(lambda n: int(round(n * 10, 0)) / 10, qids))
mean_average_precision, mean_reciprocal_rank = get_map_mrr(qids, predictions, true_labels, self.data_loader.device)
test_cross_entropy_loss /= len(batch.dataset.examples)
return [test_cross_entropy_loss, mean_average_precision, mean_reciprocal_rank], ['cross entropy loss', 'map', 'mrr']
def eval(data_iter, model, args):
model.eval()
corrects, avg_loss = 0, 0
for batch in data_iter:
feature, target = batch.text, batch.label
feature.data.t_(), target.data.sub_(1) # batch first, index align
if args.cuda:
feature, target = feature.cuda(), target.cuda()
logit = model(feature)
loss = F.cross_entropy(logit, target, size_average=False)
avg_loss += loss.data[0]
corrects += (torch.max(logit, 1)
[1].view(target.size()).data == target.data).sum()
size = len(data_iter.dataset)
avg_loss = avg_loss/size
accuracy = 100.0 * corrects/size
model.train()
print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss,
accuracy,
corrects,
size))
train_ALL_CNN.py 文件源码
项目:cnn-lstm-bilstm-deepcnn-clstm-in-pytorch
作者: bamtercelboo
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def eval(data_iter, model, args):
model.eval()
corrects, avg_loss = 0, 0
for batch in data_iter:
feature, target = batch.text, batch.label
feature.data.t_(), target.data.sub_(1) # batch first, index align
if args.cuda:
feature, target = feature.cuda(), target.cuda()
logit = model(feature)
loss = F.cross_entropy(logit, target, size_average=True)
avg_loss += loss.data[0]
corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
size = len(data_iter.dataset)
avg_loss = loss.data[0]/size
accuracy = 100.0 * corrects/size
model.train()
print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss,
accuracy,
corrects,
size))
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def __init__(self, classes=None, debug=False):
super(FasterRCNN, self).__init__()
if classes is not None:
self.classes = classes
self.n_classes = len(classes)
self.rpn = RPN()
self.roi_pool = RoIPool(7, 7, 1.0/16)
self.fc6 = FC(512 * 7 * 7, 4096)
self.fc7 = FC(4096, 4096)
self.score_fc = FC(4096, self.n_classes, relu=False)
self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
# loss
self.cross_entropy = None
self.loss_box = None
# for log
self.debug = debug
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
pooled_features = self.roi_pool(features, rois)
x = pooled_features.view(pooled_features.size()[0], -1)
x = self.fc6(x)
x = F.dropout(x, training=self.training)
x = self.fc7(x)
x = F.dropout(x, training=self.training)
cls_score = self.score_fc(x)
cls_prob = F.softmax(cls_score)
bbox_pred = self.bbox_fc(x)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def forward(self, model, sample):
"""Compute the loss for the given sample.
Returns a tuple with three elements:
1) the loss, as a Variable
2) the sample size, which is used as the denominator for the gradient
3) logging outputs to display while training
"""
net_output = model(**sample['net_input'])
input = net_output.view(-1, net_output.size(-1))
target = sample['target'].view(-1)
loss = F.cross_entropy(input, target, size_average=False, ignore_index=self.padding_idx)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = {
'loss': loss.data[0],
'sample_size': sample_size,
}
return loss, sample_size, logging_output
def compute_loss(self, y, t):
arc_logits, label_logits = y
true_arcs, true_labels = t.T
b, l1, l2 = arc_logits.size()
true_arcs = _model_var(
self.model,
pad_sequence(true_arcs, padding=-1, dtype=np.int64))
arc_loss = F.cross_entropy(
arc_logits.view(b * l1, l2), true_arcs.view(b * l1),
ignore_index=-1)
b, l1, d = label_logits.size()
true_labels = _model_var(
self.model,
pad_sequence(true_labels, padding=-1, dtype=np.int64))
label_loss = F.cross_entropy(
label_logits.view(b * l1, d), true_labels.view(b * l1),
ignore_index=-1)
loss = arc_loss + label_loss
return loss
def build_loss_objectiveness(self, region_objectiveness, targets):
loss_objectiveness = F.cross_entropy(region_objectiveness, targets)
maxv, predict = region_objectiveness.data.max(1)
labels = targets.squeeze()
fg_cnt = torch.sum(labels.data.ne(0))
bg_cnt = labels.data.numel() - fg_cnt
if fg_cnt > 0:
self.tp_reg = torch.sum(predict[:fg_cnt].eq(labels.data[:fg_cnt]))
else:
self.tp_reg = 0.
if bg_cnt > 0:
self.tf_reg = torch.sum(predict[fg_cnt:].eq(labels.data[fg_cnt:]))
else:
self.tp_reg = 0.
self.fg_cnt_reg = fg_cnt
self.bg_cnt_reg = bg_cnt
return loss_objectiveness
def compute_loss(self, output_logprobs, y):
"""
Compute loss. We assume that the first element of the output sequence y is
a start token, and that each element of y is left-aligned and right-padded
with self.NULL out to T_out. We want the output_logprobs to predict the
sequence y, shifted by one timestep so that y[0] is fed to the network and
then y[1] is predicted. We also don't want to compute loss for padded
timesteps.
Inputs:
- output_logprobs: Variable of shape (N, T_out, V_out)
- y: LongTensor Variable of shape (N, T_out)
"""
self.multinomial_outputs = None
V_in, V_out, D, H, L, N, T_in, T_out = self.get_dims(y=y)
mask = y.data != self.NULL
y_mask = Variable(torch.Tensor(N, T_out).fill_(0).type_as(mask))
y_mask[:, 1:] = mask[:, 1:]
y_masked = y[y_mask]
out_mask = Variable(torch.Tensor(N, T_out).fill_(0).type_as(mask))
out_mask[:, :-1] = mask[:, 1:]
out_mask = out_mask.view(N, T_out, 1).expand(N, T_out, V_out)
out_masked = output_logprobs[out_mask].view(-1, V_out)
loss = F.cross_entropy(out_masked, y_masked)
return loss
def forward(self, predict, target, weight=None):
"""
Args:
predict:(n, c, h, w)
target:(n, h, w)
weight (Tensor, optional): a manual rescaling weight given to each class.
If given, has to be a Tensor of size "nclasses"
"""
assert not target.requires_grad
assert predict.dim() == 4
assert target.dim() == 3
assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0))
assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1))
assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3))
n, c, h, w = predict.size()
target_mask = (target >= 0) * (target != self.ignore_label)
target = target[target_mask]
predict = predict.transpose(1, 2).transpose(2, 3).contiguous()
predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c)
loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average)
return loss
def train(model, generator, batch_num, epoch):
model.train()
for batch_idx in range(batch_num):
data, target = next(generator)
data, target = torch.from_numpy(data), torch.from_numpy(target)
# convert BHWC to BCHW
data = data.permute(0, 3, 1, 2)
data, target = data.float().cuda(), target.long().cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
def test(model, generator, batch_num, epoch):
model.eval()
test_loss = 0
correct = 0
for batch_idx in range(batch_num):
data, target = next(generator)
data, target = torch.from_numpy(data), torch.from_numpy(target)
# convert BHWC to BCHW
data = data.permute(0, 3, 1, 2)
data, target = data.float().cuda(), target.long().cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += F.cross_entropy(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss /= batch_num# loss function already averages over batch size
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
test_loss, correct, n_test, 100. * correct / n_test))
# ---
# Normal CNN
def forward(self, predict, target, weight=None):
"""
Args:
predict:(n, c, h, w)
target:(n, h, w)
weight (Tensor, optional): a manual rescaling weight given to each class.
If given, has to be a Tensor of size "nclasses"
"""
assert not target.requires_grad
assert predict.dim() == 4
assert target.dim() == 3
assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0))
assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1))
assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3))
n, c, h, w = predict.size()
target_mask = (target >= 0) * (target != self.ignore_label)
target = target[target_mask]
if not target.data.dim():
return Variable(torch.zeros(1))
predict = predict.transpose(1, 2).transpose(2, 3).contiguous()
predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c)
loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average)
return loss
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def __init__(self, classes=None, debug=False):
super(FasterRCNN, self).__init__()
if classes is not None:
self.classes = classes
self.n_classes = len(classes)
self.rpn = RPN()
self.roi_pool = RoIPool(7, 7, 1.0/16)
self.fc6 = FC(1024 * 7 * 7, 4096)
self.fc7 = FC(4096, 4096)
self.score_fc = FC(4096, self.n_classes, relu=False)
self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
# loss
self.cross_entropy = None
self.loss_box = None
# for log
self.debug = debug
def __init__(self, classes=None, debug=False):
super(RFCN, self).__init__()
if classes is not None:
self.classes = classes
self.n_classes = len(classes)
self.rpn = RPN()
#self.psroi_pool = PSRoIPool(7,7,1.0/16,7,15) This is for test
self.psroi_pool_cls = PSRoIPool(7,7, 1.0/16, 7, self.n_classes)
self.psroi_pool_loc = PSRoIPool(7,7, 1.0/16, 7, 8)
self.new_conv = Conv2d(512, 1024, 1, same_padding=False)
self.rfcn_score = Conv2d(1024,7*7*8, 1,1, bn=False)
self.rfcn_bbox = Conv2d(1024, 7*7*self.n_classes,1,1,bn=False)
self.bbox_pred = nn.AvgPool2d((7,7),stride=(7,7))
self.cls_score = nn.AvgPool2d((7,7),stride=(7,7))
# loss
self.cross_entropy = None
self.loss_box = None
# for log
self.debug = debug
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
conv_new1 = self.new_conv(features)
r_score_map = self.rfcn_score(conv_new1)
r_bbox_map = self.rfcn_bbox(conv_new1)
psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois)
psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois)
bbox_pred = self.bbox_pred(psroi_pooled_loc)
bbox_pred = torch.squeeze(bbox_pred)
cls_score = self.cls_score(psroi_pooled_cls)
cls_score = torch.squeeze(cls_score)
cls_prob = F.softmax(cls_score)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def loss(self,examples):
# IMPORTANT: Sort the examples by their size. recurrent network stuff needs this
examples.sort(key = lambda e: len(e.tokens), reverse = True)
x = variable(np.array([ e.sequence.draw() for e in examples], dtype = np.float32))
x = x.unsqueeze(1) # insert the channel
imageFeatures = self.encoder(x)
inputs, sizes, T = self.decoder.buildCaptions([ e.tokens for e in examples ])
outputDistributions = self.decoder(imageFeatures, inputs, sizes)
T = pack_padded_sequence(T, sizes, batch_first = True)[0]
return F.cross_entropy(outputDistributions, T)
def validater(self, batch_loader):
def validate(batch_size, use_cuda):
input = batch_loader.next_batch(batch_size, 'valid')
input = [Variable(t.from_numpy(var)) for var in input]
input = [var.long() for var in input]
input = [var.cuda() if use_cuda else var for var in input]
[encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input
logits, _, kld = self(0.,
encoder_word_input, encoder_character_input,
decoder_word_input, decoder_character_input,
z=None)
logits = logits.view(-1, self.params.word_vocab_size)
target = target.view(-1)
cross_entropy = F.cross_entropy(logits, target)
return cross_entropy, kld
return validate
def train():
net.train()
loss_avg = 0.0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())
# forward
output = net(data)
# backward
optimizer.zero_grad()
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
# exponential moving average
loss_avg = loss_avg * 0.2 + loss.data[0] * 0.8
state['train_loss'] = loss_avg
# test function (forward only)
def test():
net.eval()
loss_avg = 0.0
correct = 0
for batch_idx, (data, target) in enumerate(test_loader):
data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())
# forward
output = net(data)
loss = F.cross_entropy(output, target)
# accuracy
pred = output.data.max(1)[1]
correct += pred.eq(target.data).sum()
# test loss average
loss_avg += loss.data[0]
state['test_loss'] = loss_avg / len(test_loader)
state['test_accuracy'] = correct / len(test_loader.dataset)
# Main loop
def get_attr_loss(output, attributes, flip, params):
"""
Compute attributes loss.
"""
assert type(flip) is bool
k = 0
loss = 0
for (_, n_cat) in params.attr:
# categorical
x = output[:, k:k + n_cat].contiguous()
y = attributes[:, k:k + n_cat].max(1)[1].view(-1)
if flip:
# generate different categories
shift = torch.LongTensor(y.size()).random_(n_cat - 1) + 1
y = (y + Variable(shift.cuda())) % n_cat
loss += F.cross_entropy(x, y)
k += n_cat
return loss
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
pooled_features = self.roi_pool(features, rois)
x = pooled_features.view(pooled_features.size()[0], -1)
# x = self.fc6(x)
# x = F.dropout(x, training=self.training)
# x = self.fc7(x)
# x = F.dropout(x, training=self.training)
x = self.fcs(x)
cls_score = self.score_fc(x)
cls_prob = F.softmax(cls_score)
bbox_pred = self.bbox_fc(x)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def train_model(self, train_loader, path, num_batch):
self.train()
fitness = 0
train_len = 0
for batch_idx, (data, target) in enumerate(train_loader):
if self.args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
self.optimizer.zero_grad()
output = self(data, path, -1)
pred = output.data.max(1)[1] # get the index of the max log-probability
fitness += pred.eq(target.data).cpu().sum()
train_len += len(target.data)
loss = F.cross_entropy(output, target)
loss.backward()
self.optimizer.step()
if not batch_idx < num_batch -1:
break
fitness = fitness / train_len
return fitness
def train_step(self, blobs, train_op):
self.forward(blobs['data'], blobs['im_info'], blobs['gt_boxes'])
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss = self._losses["rpn_cross_entropy"].data[0], \
self._losses['rpn_loss_box'].data[0], \
self._losses['cross_entropy'].data[0], \
self._losses['loss_box'].data[0], \
self._losses['total_loss'].data[0]
#utils.timer.timer.tic('backward')
train_op.zero_grad()
self._losses['total_loss'].backward()
#utils.timer.timer.toc('backward')
train_op.step()
self.delete_intermediate_states()
return rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss