def replay(self):
"""Memory Management and training of the agent
"""
if len(self.memory) < self.batch_size:
return
state, action, reward, next_state, done = self._get_batches()
reward += (self.gamma
* np.logical_not(done)
* np.amax(self.model.predict(next_state), axis=1))
q_target = self.target_model.predict(state)
_ = pd.Series(action)
one_hot = pd.get_dummies(_).as_matrix()
action_batch = np.where(one_hot == 1)
q_target[action_batch] = reward
return self.model.fit(state, q_target,
batch_size=self.batch_size,
epochs=1,
verbose=False)
评论列表
文章目录