def play(self, n_step=10000, n_episode=1000, test_ep=0.05, render=False):
if test_ep == None:
test_ep = self.ep_end
test_history = History(self.config, self.ob_shape_list)
if not self.display:
gym_dir = '/tmp/%s-%s' % (self.env_name, get_time())
self.env.env.monitor.start(gym_dir)
best_reward, best_idx = 0, 0
for idx in xrange(n_episode):
screen, reward, action, terminal = self.env.new_random_game()
current_reward = 0
for _ in range(self.history_length):
test_history.add(screen)
for t in tqdm(range(n_step), ncols=70):
# 1. predict
action = self.predict(test_history.get(), test_ep)
# 2. act
screen, reward, terminal = self.env.act(action, is_training=False)
# 3. observe
test_history.add(screen)
current_reward += reward
if terminal:
break
if current_reward > best_reward:
best_reward = current_reward
best_idx = idx
print "=" * 30
print " [%d] Best reward : %d" % (best_idx, best_reward)
print "=" * 30
if not self.display:
self.env.env.monitor.close()
# gym.upload(gym_dir, writeup='https://github.com/devsisters/DQN-tensorflow', api_key='')
评论列表
文章目录