def __init__(self, reward, **kwargs):
"""
reward: a function that acts on a pair of observation and action sequences,
and returns a reward in [0, 1]
The agent will sometimes call reward() in order to evaluate a partial trajectory.
Its goal is to maximize the total reward
of all of the complete trajectories produced by calling act().
We say that a trajectory is complete if act() was never called
on the agent's state at the end of the trajectory.
Note that each Agent has immutable state,
but calling methods on an RL agent may cause updates to external parameters,
and these parameters may affect the behavior of existing Agent objects
"""
super(IntrinsicRL, self).__init__(**kwargs)
self.reward = reward
#the act method is responsible for sometimes calling reward() to gather training data,
#and for updating the agent's parameters
评论列表
文章目录