def test(self, nb_episodes=1, maximum_episode_length=5000000):
def evaluate_episode():
reward = 0
observation = self.env.reset()
for _ in range(maximum_episode_length):
action = self.choose_action(self.embedding_network(Variable(Tensor(observation)).unsqueeze(0)), 0)
observation, immediate_reward, finished, info = self.env.step(action)
reward += immediate_reward
if finished:
break
return reward
r = 0
for _ in range(nb_episodes):
r += evaluate_episode()
return r / nb_episodes
评论列表
文章目录