flappy_dqn_gym.py 文件源码-python代码片段

def play(self, mode="random"):

        init = tf.global_variables_initializer()

        with tf.Session() as sess:

            sess.run(init)

            for i in range(1):

                writer = imageio.get_writer('gif/demo.gif', mode='I')

                game_state = game.GameState()
                total_steps = 0
                img_batch = []

                action = np.zeros([2])
                action[0] = 1
                new_state, reward, done =  game_state.frame_step(action)

                temp_img = self.pre_process(new_state)

                for j in range(4):
                    img_batch.insert(len(img_batch), temp_img)

                for j in range(self.max_steps):

                    if(mode=="random"):
                        temp_action = random.randint(0,1)
                    else :
                        temp_weights = sess.run([self.main_net.q_values], feed_dict={self.main_net.input_state:np.reshape(np.stack(img_batch,axis=2),[-1, 80, 80, 4])})
                        temp_action = np.argmax(temp_weights)
                        print(temp_weights)

                    action = np.zeros([2])
                    action[temp_action] = 1

                    new_state, reward, done =  game_state.frame_step(action)

                    temp_new_state = np.flip(np.rot90(new_state, k=1, axes=(1,0)), 1)

                    temp_img = self.pre_process(new_state)
                    img_batch.insert(0, temp_img)
                    img_batch.pop(len(img_batch)-1)

                    print(temp_action)

                    total_steps += 1

                    if done:
                        break

                print("Total Steps ", str(total_steps))

                sys.exit()