def build_graph(self):
self.ph_local_step = tf.placeholder(tf.int64, [])
self.ph_q_value = tf.placeholder(tf.float32, [None, dqn_config.config.output.action_size])
if dqn_config.config.eps.stochastic:
decay_steps = int(np.random.uniform(*dqn_config.config.eps.decay_steps))
else:
decay_steps = dqn_config.config.eps.decay_steps
eps = tf.train.polynomial_decay(dqn_config.config.eps.initial,
self.ph_local_step,
decay_steps,
dqn_config.config.eps.end)
return tf.cond(tf.less(tf.random_uniform([]), eps),
lambda: tf.random_uniform([], 0, dqn_config.config.output.action_size, dtype=tf.int32),
lambda: tf.cast(tf.squeeze(tf.argmax(self.ph_q_value, axis=1)), tf.int32))
评论列表
文章目录