main_async.py 文件源码-python代码片段

main_async.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

项目：tensorflow_dqn_supermario 作者: JSDanielPark 项目源码文件源码

def replay_train(self, mainDQN, targetDQN, train_batch):
        x_stack = np.empty(0).reshape(0, self.input_size)
        y_stack = np.empty(0).reshape(0, self.output_size)
        step = 0
        for state, action, reward, next_state, done in train_batch:
            Q = mainDQN.predict(state)
            #png.from_array(next_state, 'L').save('capture/' + str(step) + '.png')

            if done:
                Q[0, action] = reward
            else:
                Q[0, action] = reward + self.dis * targetDQN.predict(next_state)[0, np.argmax(mainDQN.predict(next_state))]


            state = np.reshape(state, [self.input_size])
            y_stack = np.vstack([y_stack, Q])
            x_stack = np.vstack([x_stack, state])
            step += 1

        return mainDQN.update(x_stack, y_stack)