def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
python类smooth_l1_loss()的实例源码
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def finish_episode():
R = 0
saved_actions = model.saved_actions
policy_losses = []
value_losses = []
rewards = []
for r in model.rewards[::-1]:
R = r + args.gamma * R
rewards.insert(0, R)
rewards = torch.Tensor(rewards)
rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
for (log_prob, value), r in zip(saved_actions, rewards):
reward = r - value.data[0, 0]
policy_losses.append(-log_prob * reward)
value_losses.append(F.smooth_l1_loss(value, Variable(torch.Tensor([r]))))
optimizer.zero_grad()
loss = torch.cat(policy_losses).sum() + torch.cat(value_losses).sum()
loss.backward()
optimizer.step()
del model.rewards[:]
del model.saved_actions[:]
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0].view(-1)
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return rpn_cross_entropy, rpn_loss_box
def loss(output, target, *args):
assert isinstance(output, Variable) and isinstance(target, Variable)
# return torch.mean(torch.sum((output - target).clamp(-1, 1) ** 2, dim=1))
return F.smooth_l1_loss(output, target, size_average=False)
def __init__(self, model, data_generator, epochs, loss):
self.epochs = epochs
self.model = model
self.data_generator = data_generator
self.loss = loss
if loss == "smoothl1":
self.loss_fn = F.smooth_l1_loss
elif loss == "l1":
self.loss_fn = nn.L1Loss()
elif loss == "l2":
self.loss_fn = nn.MSELoss()
else:
raise ValueError("Unrecognized loss type: {}".format(loss))
def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return cross_entropy, loss_box
def build_loss_bbox(self, bbox_pred, roi_data):
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
fg_cnt = torch.sum(bbox_inside_weights[:, 0].data.ne(0))
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-5)
return loss_box
def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return cross_entropy, loss_box
def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return cross_entropy, loss_box
def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
'''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).
Args:
loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].
loss:
(tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
'''
batch_size, num_boxes = cls_targets.size()
pos = cls_targets > 0 # [N,#anchors]
num_pos = pos.data.long().sum()
################################################################
# loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
################################################################
mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4]
masked_loc_preds = loc_preds[mask].view(-1,4) # [#pos,4]
masked_loc_targets = loc_targets[mask].view(-1,4) # [#pos,4]
loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False)
################################################################
# cls_loss = FocalLoss(loc_preds, loc_targets)
################################################################
pos_neg = cls_targets > -1 # exclude ignored anchors
mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
masked_cls_preds = cls_preds[mask].view(-1,self.num_classes)
cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg])
print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_pos), end=' | ')
loss = (loc_loss+cls_loss)/num_pos
return loss
def smoothl1loss_no_reduce_test():
t = Variable(torch.randn(2, 3, 4))
return dict(
fullname='SmoothL1Loss_no_reduce',
constructor=wrap_functional(
lambda i: F.smooth_l1_loss(i, t.type_as(i), reduce=False)),
input_fn=lambda: torch.randn(2, 3, 4),
reference_fn=lambda i, _:
loss_reference_fns['SmoothL1Loss'](i, t.data.type_as(i), reduce=False),
pickle=False)
def accumulate_gradient(self, batch_sz, states, actions, rewards,
next_states, mask):
""" Compute the temporal difference error.
td_error = (r + gamma * max Q(s_,a)) - Q(s,a)
"""
states = Variable(states)
actions = Variable(actions)
rewards = Variable(rewards.squeeze())
next_states = Variable(next_states, volatile=True)
# Compute Q(s, a)
q_values = self.policy(states)
q_values = q_values.gather(1, actions)
# Compute Q(s_, a)
q_target_values = Variable(torch.zeros(batch_sz).type(self.dtype.FT))
# Bootstrap for non-terminal states
q_target_values[mask] = self.target_policy(next_states).max(
1, keepdim=True)[0][mask]
q_target_values.volatile = False # So we don't mess the huber loss
expected_q_values = (q_target_values * self.gamma) + rewards
# Compute Huber loss
loss = F.smooth_l1_loss(q_values, expected_q_values)
# Accumulate gradients
loss.backward()
def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return cross_entropy, loss_box
def __init__(self, network, target_network, lr=0.01, learn_start = 1000, batch_size = 32, map_dim = 10, gamma = 0.95, replay_size = 10000, instr_len = 7, layout_channels = 1, object_channels = 1):
self.network = network
self.target_network = target_network
self._copy_net()
self.learn_start = learn_start
self.batch_size = batch_size
self.gamma = gamma
self.replay_size = replay_size
self.instr_len = instr_len
self.layout_channels = layout_channels
self.object_channels = object_channels
self._refresh_size(map_dim, map_dim)
self.criterion = F.smooth_l1_loss
self.optimizer = optim.RMSprop(self.network.parameters(), lr=lr)
def calculate_loss(model, target_model, transitions, configuration):
ValueTensorType = configuration.VALUE_TENSOR_TYPE
# Inverse of zip, transpose the batch, http://stackoverflow.com/a/19343/3343043
batch = Transition(*zip(*transitions)) # the * operator unpack,
# a collection to arguments, see below
# (S,A,R,S',T)^n -> (S^n,A^n,R^n,S'^n,T^n)
states = Variable(torch.cat(batch.state))
action_indices = Variable(torch.cat(batch.action))
rewards = Variable(torch.cat(batch.reward))
non_terminals = Variable(torch.cat(batch.non_terminal))
non_terminal_successor_states = [state for (state, non_terminal) in zip(
batch.successor_state, non_terminals.data) if non_terminal]
if len(non_terminal_successor_states) == 0:
return 0
non_terminal_successor_states = Variable(torch.cat(non_terminal_successor_states
))
Q_states = model(states).gather(1, action_indices)
Q_successors = model(non_terminal_successor_states)
if configuration.DOUBLE_DQN:
Q_successors = target_model(non_terminal_successor_states)
V_successors = Variable(
torch.zeros(configuration.BATCH_SIZE).type(ValueTensorType))
V_successors[non_terminals] = Q_successors.detach().max(1)[0]
Q_expected = rewards + (configuration.DISCOUNT_FACTOR * V_successors)
return F.smooth_l1_loss(Q_states, Q_expected)
def learn_single(self, value, value_last, last_action, reward):
expected_value = self.gamma * value + reward # What value_last should have been if it was perfect
value_loss = F.smooth_l1_loss(expected_value, value_last)
print(value_loss.data)
last_action.reinforce(value_loss.data[0])
self.optimizer.zero_grad()
final_nodes = [value_loss, last_action]
gradients = [maybe_cuda(torch.ones(1)), None]
autograd.backward(final_nodes, gradients, retain_graph=True)
self.optimizer.step()
del last_action
def build_loss_object(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
ce_weights = np.sqrt(self.object_loss_weight)
ce_weights[0] = float(fg_cnt) / (bg_cnt + 1e-5)
ce_weights = ce_weights.cuda()
maxv, predict = cls_score.data.max(1)
if fg_cnt > 0:
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt]))
else:
self.tp = 0.
if bg_cnt > 0:
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
else:
self.tp = 0.
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
# print '[object]:'
# if predict.sum() > 0:
# print predict
# print 'accuracy: %2.2f%%' % (((self.tp + self.tf) / float(fg_cnt + bg_cnt)) * 100)
# print predict
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# print cross_entropy
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
# b = bbox_targets.data.cpu().numpy()
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
# a = bbox_pred.data.cpu().numpy()
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-5)
# print loss_box
return cross_entropy, loss_box
def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data, is_region=False):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0]
# print rpn_label.size(), rpn_cls_score.size()
rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
fg_cnt = torch.sum(rpn_label.data.ne(0))
bg_cnt = rpn_label.data.numel() - fg_cnt
# ce_weights = torch.ones(rpn_cls_score.size()[1])
# ce_weights[0] = float(fg_cnt) / bg_cnt
# ce_weights = ce_weights.cuda()
_, predict = torch.max(rpn_cls_score.data, 1)
error = torch.sum(torch.abs(predict - rpn_label.data))
# try:
if predict.size()[0] < 256:
print predict.size()
print rpn_label.size()
print fg_cnt
if is_region:
self.tp_region = torch.sum(predict[:fg_cnt].eq(rpn_label.data[:fg_cnt]))
self.tf_region = torch.sum(predict[fg_cnt:].eq(rpn_label.data[fg_cnt:]))
self.fg_cnt_region = fg_cnt
self.bg_cnt_region = bg_cnt
if DEBUG:
print 'accuracy: %2.2f%%' % ((self.tp + self.tf) / float(fg_cnt + bg_cnt) * 100)
else:
self.tp = torch.sum(predict[:fg_cnt].eq(rpn_label.data[:fg_cnt]))
self.tf = torch.sum(predict[fg_cnt:].eq(rpn_label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
if DEBUG:
print 'accuracy: %2.2f%%' % ((self.tp + self.tf) / float(fg_cnt + bg_cnt) * 100)
rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# print rpn_cross_entropy
# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
# print 'Smooth L1 loss: ', F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False)
# print 'fg_cnt', fg_cnt
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)
# print 'rpn_loss_box', rpn_loss_box
# print rpn_loss_box
return rpn_cross_entropy, rpn_loss_box
def train(epoch):
for batch, (left, right) in enumerate(training_data_loader):
if args.direction == 'lr':
input.data.resize_(left.size()).copy_(left)
target.data.resize_(right.size()).copy_(right)
else:
input.data.resize_(right.size()).copy_(right)
target.data.resize_(left.size()).copy_(left)
## Discriminator
netD.zero_grad()
# real
D_real = netD(input, target)
ones_label.data.resize_(D_real.size()).fill_(1)
zeros_label.data.resize_(D_real.size()).fill_(0)
D_loss_real = F.binary_cross_entropy(D_real, ones_label)
D_x_y = D_real.data.mean()
# fake
G_fake = netG(input)
D_fake = netD(input, G_fake.detach())
D_loss_fake = F.binary_cross_entropy(D_fake, zeros_label)
D_x_gx = D_fake.data.mean()
D_loss = D_loss_real + D_loss_fake
D_loss.backward()
D_solver.step()
## Generator
netG.zero_grad()
G_fake = netG(input)
D_fake = netD(input, G_fake)
D_x_gx_2 = D_fake.data.mean()
G_loss = F.binary_cross_entropy(D_fake, ones_label) + 100 * F.smooth_l1_loss(G_fake, target)
G_loss.backward()
G_solver.step()
## debug
if (batch + 1) % 100 == 0:
print('[TRAIN] Epoch[{}]({}/{}); D_loss: {:.4f}; G_loss: {:.4f}; D(x): {:.4f} D(G(z)): {:.4f}/{:.4f}'.format(
epoch, batch + 1, len(training_data_loader), D_loss.data[0], G_loss.data[0], D_x_y, D_x_gx, D_x_gx_2))
def optimize_model():
global last_sync
if len(memory) < BATCH_SIZE:
return
transitions = memory.sample(BATCH_SIZE)
# Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for
# detailed explanation).
batch = Transition(*zip(*transitions))
# Compute a mask of non-final states and concatenate the batch elements
non_final_mask = torch.ByteTensor(
tuple(map(lambda s: s is not None, batch.next_state)))
if USE_CUDA:
non_final_mask = non_final_mask.cuda()
# We don't want to backprop through the expected action values and volatile
# will save us on temporarily changing the model parameters'
# requires_grad to False!
non_final_next_states = Variable(torch.cat([s for s in batch.next_state
if s is not None]),
volatile=True)
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
# Compute Q(s_t, a) - the model computes Q(s_t), then we select the
# columns of actions taken
state_action_values = model(state_batch).gather(1, action_batch)
# Compute V(s_{t+1}) for all next states.
next_state_values = Variable(torch.zeros(BATCH_SIZE))
next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]
# Now, we don't want to mess up the loss with a volatile flag, so let's
# clear it. After this, we'll just end up with a Variable that has
# requires_grad=False
next_state_values.volatile = False
# Compute the expected Q values
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
# Compute Huber loss
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
# Optimize the model
optimizer.zero_grad()
loss.backward()
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
print("Loss: " + str(loss.data[0]))
######################################################################
#
# Below, you can find the main training loop. At the beginning we reset
# the environment and initialize the ``state`` variable. Then, we sample
# an action, execute it, observe the next screen and the reward (always
# 1), and optimize our model once. When the episode ends (our model
# fails), we restart the loop.
#
# Below, `num_episodes` is set small. You should download
# the notebook and run lot more epsiodes.
def optimize_model():
global last_sync
if len(memory) < BATCH_SIZE:
return
transitions = memory.sample(BATCH_SIZE)
# Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for
# detailed explanation).
batch = Transition(*zip(*transitions))
# Compute a mask of non-final states and concatenate the batch elements
non_final_mask = torch.ByteTensor(
tuple(map(lambda s: s is not None, batch.next_state)))
if USE_CUDA:
non_final_mask = non_final_mask.cuda()
# We don't want to backprop through the expected action values and volatile
# will save us on temporarily changing the model parameters'
# requires_grad to False!
non_final_next_states = Variable(torch.cat([s for s in batch.next_state
if s is not None]),
volatile=True)
state_batch = Variable(torch.cat(batch.state)).cuda()
action_batch = Variable(torch.cat(batch.action)).cuda()
reward_batch = Variable(torch.cat(batch.reward)).cuda()
# Compute Q(s_t, a) - the model computes Q(s_t), then we select the
# columns of actions taken
state_action_values = model(state_batch).gather(1, action_batch)
# Compute V(s_{t+1}) for all next states.
next_state_values = Variable(torch.zeros(BATCH_SIZE))
next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]
# Now, we don't want to mess up the loss with a volatile flag, so let's
# clear it. After this, we'll just end up with a Variable that has
# requires_grad=False
next_state_values.volatile = False
# Compute the expected Q values
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
# Compute Huber loss
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
# Optimize the model
optimizer.zero_grad()
loss.backward()
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
def optimize_model():
global last_sync
if len(memory) < BATCH_SIZE:
return
transitions = memory.sample(BATCH_SIZE)
# Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for
# detailed explanation).
batch = Transition(*zip(*transitions))
# Compute a mask of non-final states and concatenate the batch elements
non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
batch.next_state)))
# We don't want to backprop through the expected action values and volatile
# will save us on temporarily changing the model parameters'
# requires_grad to False!
non_final_next_states = Variable(torch.cat([s for s in batch.next_state
if s is not None]),
volatile=True)
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
# Compute Q(s_t, a) - the model computes Q(s_t), then we select the
# columns of actions taken
state_action_values = model(state_batch).gather(1, action_batch)
# Compute V(s_{t+1}) for all next states.
next_state_values = Variable(torch.zeros(BATCH_SIZE).type(Tensor))
next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]
# Now, we don't want to mess up the loss with a volatile flag, so let's
# clear it. After this, we'll just end up with a Variable that has
# requires_grad=False
next_state_values.volatile = False
# Compute the expected Q values
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
# Compute Huber loss
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
# Optimize the model
optimizer.zero_grad()
loss.backward()
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
######################################################################
#
# Below, you can find the main training loop. At the beginning we reset
# the environment and initialize the ``state`` variable. Then, we sample
# an action, execute it, observe the next screen and the reward (always
# 1), and optimize our model once. When the episode ends (our model
# fails), we restart the loop.
#
# Below, `num_episodes` is set small. You should download
# the notebook and run lot more epsiodes.
def forward(self, loc_preds, loc_targets, conf_preds, conf_targets):
'''Compute loss between (loc_preds, loc_targets) and (conf_preds, conf_targets).
Args:
loc_preds: (tensor) predicted locations, sized [batch_size, 8732, 4].
loc_targets: (tensor) encoded target locations, sized [batch_size, 8732, 4].
conf_preds: (tensor) predicted class confidences, sized [batch_size, 8732, num_classes].
conf_targets: (tensor) encoded target classes, sized [batch_size, 8732].
loss:
(tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(conf_preds, conf_targets).
'''
batch_size, num_boxes, _ = loc_preds.size()
pos = conf_targets>0 # [N,8732], pos means the box matched.
# print(pos.size())
num_matched_boxes = pos.data.long().sum()
if num_matched_boxes == 0:
return Variable(torch.Tensor([0])), Variable(torch.Tensor([0]))
################################################################
# loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
################################################################
pos_mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,8732,4]
pos_loc_preds = loc_preds[pos_mask].view(-1,4) # [#pos,4]
# print(pos_loc_preds.size())
pos_loc_targets = loc_targets[pos_mask].view(-1,4) # [#pos,4]
loc_loss = F.smooth_l1_loss(pos_loc_preds, pos_loc_targets, size_average=False)
################################################################
# conf_loss = CrossEntropyLoss(pos_conf_preds, pos_conf_targets)
# + CrossEntropyLoss(neg_conf_preds, neg_conf_targets)
################################################################
# print('1',conf_preds.size()) # [N, 8732, 16]
# print('2',conf_targets.size()) # [N, 8732]
conf_loss = self.cross_entropy_loss(conf_preds.view(-1,self.num_classes), \
conf_targets.view(-1)) # [N*8732,]
# print(conf_loss.size()) # [8732, 8732]
conf_loss = conf_loss.view(-1, 8732)
neg = self.hard_negative_mining(conf_loss, pos) # [N,8732]
pos_mask = pos.unsqueeze(2).expand_as(conf_preds) # [N,8732,21]
# print(conf_preds.size()) # [N, 8732, 16]
# print(neg.size()) # [N, 8732*8732]
neg_mask = neg.unsqueeze(2).expand_as(conf_preds) # [N,8732,21]
mask = (pos_mask+neg_mask).gt(0)
pos_and_neg = (pos+neg).gt(0)
preds = conf_preds[mask].view(-1,self.num_classes) # [#pos+#neg,21]
targets = conf_targets[pos_and_neg] # [#pos+#neg,]
conf_loss = F.cross_entropy(preds, targets, size_average=False)
loc_loss /= num_matched_boxes
conf_loss /= num_matched_boxes
# print('loc_loss: %f conf_loss: %f' % (loc_loss.data[0], conf_loss.data[0]), end=' ')
return loc_loss, conf_loss