def action_given(self, state, add_noise=False):
# feed explicitly provided state
actions = tf.get_default_session().run(self.output_action,
feed_dict={self.input_state: [state],
base_network.IS_TRAINING: False})
# NOTE: noise is added _outside_ tf graph. we do this simply because the noisy output
# is never used for any part of computation graph required for online training. it's
# only used during training after being the replay buffer.
if add_noise:
if VERBOSE_DEBUG:
pre_noise = str(actions)
actions[0] += self.exploration_noise.sample()
actions = np.clip(1, -1, actions) # action output is _always_ (-1, 1)
if VERBOSE_DEBUG:
print "TRAIN action_given pre_noise %s post_noise %s" % (pre_noise, actions)
return actions
评论列表
文章目录