def tf_explore(self, episode=0, timestep=0, num_actions=1):
def true_fn():
# Know if first is not true second must be true from outer cond check.
return tf.cond(
pred=(timestep < self.start_timestep),
true_fn=(lambda: self.initial_epsilon),
false_fn=(lambda: self.final_epsilon)
)
def false_fn():
half_life_ratio = (tf.cast(x=timestep, dtype=util.tf_dtype('float')) - self.start_timestep) / self.half_life
epsilon = self.final_epsilon + (2 ** (-half_life_ratio)) * (self.initial_epsilon - self.final_epsilon)
return epsilon
pred = tf.logical_or(x=(timestep < self.start_timestep), y=(timestep > self.start_timestep + self.timesteps))
return tf.cond(pred=pred, true_fn=true_fn, false_fn=false_fn)
评论列表
文章目录