def tf_explore(self, episode, timestep, num_actions):
def true_fn():
# Know if first is not true second must be true from outer cond check.
return tf.cond(
pred=(timestep < self.start_timestep),
true_fn=(lambda: self.initial_epsilon),
false_fn=(lambda: self.final_epsilon)
)
def false_fn():
completed_ratio = (tf.cast(x=timestep, dtype=util.tf_dtype('float')) - self.start_timestep) / self.timesteps
return self.initial_epsilon + completed_ratio * (self.final_epsilon - self.initial_epsilon)
pred = tf.logical_or(x=(timestep < self.start_timestep), y=(timestep > self.start_timestep + self.timesteps))
return tf.cond(pred=pred, true_fn=true_fn, false_fn=false_fn)
评论列表
文章目录