def main(env, weight_path, epsilon):
env = make_atari(env)
q_function = DQN(env.action_space.n)
q_function.load_state_dict(torch.load(weight_path))
done = False
state = env.reset()
step = 1
sleep(2)
while not done:
env.render()
if random() <= epsilon:
action = randrange(0, env.action_space.n)
else:
state = variable(to_tensor(state).unsqueeze(0))
action = q_function(state).data.view(-1).max(dim=0)[1].sum()
state, reward, done, info = env.step(action)
print(f"[step: {step:>5}] [reward: {reward:>5}]")
step += 1
sleep(2)
评论列表
文章目录