def __init__(self, env, cmdl):
super(EvaluationMonitor, self).__init__(env)
self.freq = cmdl.eval_frequency # in steps
self.eval_steps = cmdl.eval_steps
self.cmdl = cmdl
if self.cmdl.display_plots:
import Visdom
self.vis = Visdom()
self.plot = self.vis.line(
Y=np.array([0]), X=np.array([0]),
opts=dict(
title=cmdl.label,
caption="Episodic reward per %d steps." % self.eval_steps)
)
self.eval_cnt = 0
self.crt_training_step = 0
self.step_cnt = 0
self.ep_cnt = 1
self.total_rw = 0
self.max_mean_rw = -1000
no_of_evals = cmdl.training_steps // cmdl.eval_frequency \
- (cmdl.eval_start-1) // cmdl.eval_frequency
self.eval_frame_idx = torch.LongTensor(no_of_evals).fill_(0)
self.eval_rw_per_episode = torch.FloatTensor(no_of_evals).fill_(0)
self.eval_rw_per_frame = torch.FloatTensor(no_of_evals).fill_(0)
self.eval_eps_per_eval = torch.LongTensor(no_of_evals).fill_(0)
评论列表
文章目录