def store_effect(self, idx, action, reward, done):
"""Store effects of action taken after obeserving frame stored
at index idx. The reason `store_frame` and `store_effect` is broken
up into two functions is so that once can call `encode_recent_observation`
in between.
Paramters
---------
idx: int
Index in buffer of recently observed frame (returned by `store_frame`).
action: int
Action that was performed upon observing this frame.
reward: float
Reward that was received when the actions was performed.
done: bool
True if episode was finished after performing that action.
"""
self.action[idx] = action
self.reward[idx] = reward
self.done[idx] = done
评论列表
文章目录