def choose_next_action(self, state):
network_output_v, network_output_pi, action_repeat_probs = self.session.run(
[
self.local_network.output_layer_v,
self.local_network.output_layer_pi,
self.local_network.action_repeat_probs,
],
feed_dict={
self.local_network.input_ph: [state],
})
network_output_pi = network_output_pi.reshape(-1)
network_output_v = np.asscalar(network_output_v)
action_index = self.sample_policy_action(network_output_pi)
new_action = np.zeros([self.num_actions])
new_action[action_index] = 1
action_repeat = 1 + self.sample_policy_action(action_repeat_probs[0])
return new_action, network_output_v, network_output_pi, action_repeat
sequence_decoder_actor_learner.py 文件源码
python
阅读 37
收藏 0
点赞 0
评论 0
评论列表
文章目录