def replay_train(self, mainDQN, targetDQN, train_batch):
x_stack = np.empty(0).reshape(0, self.input_size)
y_stack = np.empty(0).reshape(0, self.output_size)
step = 0
for state, action, reward, next_state, done in train_batch:
Q = mainDQN.predict(state)
#png.from_array(next_state, 'L').save('capture/' + str(step) + '.png')
if done:
Q[0, action] = reward
else:
Q[0, action] = reward + self.dis * targetDQN.predict(next_state)[0, np.argmax(mainDQN.predict(next_state))]
state = np.reshape(state, [self.input_size])
y_stack = np.vstack([y_stack, Q])
x_stack = np.vstack([x_stack, state])
step += 1
return mainDQN.update(x_stack, y_stack)
main_async.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录