def init_policy(self):
output_vec = L.get_output(self._output_vec_layer, deterministic=True)
action = tf.to_int64(tf.argmax(output_vec, 1))
action_vec = tf.one_hot(action, self._n)
max_qval = tf.reduce_max(output_vec, 1)
self._f_actions = tensor_utils.compile_function([self._obs_layer.input_var], action)
self._f_actions_vec = tensor_utils.compile_function([self._obs_layer.input_var], action_vec)
self._f_max_qvals = tensor_utils.compile_function([self._obs_layer.input_var], max_qval)
deterministic_discrete_mlp_q_function.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录