agent.py 文件源码-python代码片段

def play(self, n_step=10000, n_episode=1000, test_ep=0.05, render=False):
    if test_ep == None:
      test_ep = self.ep_end

    test_history = History(self.config, self.ob_shape_list)

    if not self.display:
      gym_dir = '/tmp/%s-%s' % (self.env_name, get_time())
      self.env.env.monitor.start(gym_dir)

    best_reward, best_idx = 0, 0
    for idx in xrange(n_episode):
      screen, reward, action, terminal = self.env.new_random_game()
      current_reward = 0

      for _ in range(self.history_length):
        test_history.add(screen)

      for t in tqdm(range(n_step), ncols=70):
        # 1. predict
        action = self.predict(test_history.get(), test_ep)
        # 2. act
        screen, reward, terminal = self.env.act(action, is_training=False)
        # 3. observe
        test_history.add(screen)

        current_reward += reward
        if terminal:
          break

      if current_reward > best_reward:
        best_reward = current_reward
        best_idx = idx

      print "=" * 30
      print " [%d] Best reward : %d" % (best_idx, best_reward)
      print "=" * 30

    if not self.display:
      self.env.env.monitor.close()
      # gym.upload(gym_dir, writeup='https://github.com/devsisters/DQN-tensorflow', api_key='')