def add_loss_op(self, loss_type='negative_l1_dist'):
self.loss_type = loss_type
logits_shape = tf.shape(self.logits)
logits_flat = tf.reshape(self.logits, [-1])
location_dist = tf.contrib.distributions.MultivariateNormalDiag(mu=logits_flat,
diag_stdev=self.config.variance*tf.ones_like(logits_flat))
location_samples = location_dist.sample([self.config.num_samples])
new_logits_shape = tf.concat([[self.config.num_samples,] , logits_shape], axis=0)
location_samples = tf.reshape(location_samples, new_logits_shape)
self.location_samples = location_samples
if self.loss_type == 'negative_l1_dist':
rewards = -tf.reduce_mean(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)),axis=3,keep_dims=True) - \
tf.reduce_max(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)), axis=3,keep_dims=True)
elif self.loss_type == 'iou':
rewards = self.get_iou_loss()
rewards = tf.expand_dims(rewards,axis=-1)
timestep_rewards = tf.reduce_mean(rewards, axis=0, keep_dims=True)
self.timestep_rewards = timestep_rewards
if self.cumsum:
tot_cum_rewards = tf.cumsum(rewards, axis=2, reverse=True)
else:
tot_cum_rewards = tf.tile(tf.reduce_sum(rewards, axis=2, keep_dims = True),multiples=[1,1,self.config.seq_len, 1])
self.tot_cum_rewards = tot_cum_rewards
timestep_rewards_grad_op = tf.stop_gradient(timestep_rewards)
rewards_grad_op = tf.stop_gradient(rewards)
location_samples_op = tf.stop_gradient(location_samples)
tot_cum_rewards_op = tf.stop_gradient(tot_cum_rewards)
const1 = 1.0 / (np.sqrt(2.0 * math.pi) * self.config.variance)
const2 = 2.0 * self.config.variance**2
squared_diff = tf.square(self.targets_placeholder - self.logits)
density_func = tf.log(const1 * tf.exp(-squared_diff / const2))
self.density_func = density_func
self.loss = tf.reduce_mean(tf.reduce_sum(density_func*(tot_cum_rewards_op - timestep_rewards_grad_op), axis=2),
axis=[1, 0])
self.total_rewards = tf.reduce_mean(tf.reduce_sum(timestep_rewards, axis=2), axis=1)
tf.summary.scalar('Total Rewards', self.total_rewards[0][0])
评论列表
文章目录