def play(self, mode="random"):
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(1):
writer = imageio.get_writer('gif/demo.gif', mode='I')
game_state = game.GameState()
total_steps = 0
img_batch = []
action = np.zeros([2])
action[0] = 1
new_state, reward, done = game_state.frame_step(action)
temp_img = self.pre_process(new_state)
for j in range(4):
img_batch.insert(len(img_batch), temp_img)
for j in range(self.max_steps):
if(mode=="random"):
temp_action = random.randint(0,1)
else :
temp_weights = sess.run([self.main_net.q_values], feed_dict={self.main_net.input_state:np.reshape(np.stack(img_batch,axis=2),[-1, 80, 80, 4])})
temp_action = np.argmax(temp_weights)
print(temp_weights)
action = np.zeros([2])
action[temp_action] = 1
new_state, reward, done = game_state.frame_step(action)
temp_new_state = np.flip(np.rot90(new_state, k=1, axes=(1,0)), 1)
temp_img = self.pre_process(new_state)
img_batch.insert(0, temp_img)
img_batch.pop(len(img_batch)-1)
print(temp_action)
total_steps += 1
if done:
break
print("Total Steps ", str(total_steps))
sys.exit()
评论列表
文章目录