def update_on_policy(self, statevar):
assert self.t_start < self.t
if not self.disable_online_update:
if statevar is None:
R = 0
else:
with chainer.no_backprop_mode():
with state_kept(self.model):
action_distrib, action_value, v = self.model(statevar)
R = float(v.data)
self.update(
t_start=self.t_start, t_stop=self.t, R=R,
states=self.past_states,
actions=self.past_actions,
rewards=self.past_rewards,
values=self.past_values,
action_values=self.past_action_values,
action_distribs=self.past_action_distrib,
action_distribs_mu=None,
avg_action_distribs=self.past_avg_action_distrib)
self.init_history_data_for_online_update()
评论列表
文章目录