demo_a3c_ale.py 文件源码-python代码片段

demo_a3c_ale.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

def eval_performance(rom, model, deterministic=False, use_sdl=False,
                     record_screen_dir=None):
    env = ale.ALE(rom, treat_life_lost_as_terminal=False, use_sdl=use_sdl,
                  record_screen_dir=record_screen_dir)
    model.reset_state()
    test_r = 0
    while not env.is_terminal:
        s = chainer.Variable(np.expand_dims(dqn_phi(env.state), 0))
        pout = model.pi_and_v(s)[0]
        model.unchain_backward()
        if deterministic:
            a = pout.most_probable_actions[0]
        else:
            a = pout.action_indices[0]
        test_r += env.receive_action(a)
    return test_r