def _build_q_head(self, input_state):
self.w_value, self.b_value, self.value = layers.fc('fc_value', input_state, 1, activation='linear')
self.w_L, self.b_L, self.L_full = layers.fc('L_full', input_state, self.num_actions, activation='linear')
self.w_mu, self.b_mu, self.mu = layers.fc('mu', input_state, self.num_actions, activation='linear')
#elements above the main diagonal in L_full are unused
D = tf.matrix_band_part(tf.exp(self.L_full) - L_full, 0, 0)
L = tf.matrix_band_part(L_full, -1, 0) + D
LT_u_minus_mu = tf.einsum('ikj,ik', L, self.selected_action_ph - self.mu)
self.advantage = tf.einsum('ijk,ikj->i', LT_u_minus_mu, LT_u_minus_mu)
q_selected_action = self.value + self.advantage
diff = tf.subtract(self.target_ph, q_selected_action)
return self._value_function_loss(diff)
continuous_actions.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录