def agent_start(self, observation):
# Initialize State
self.state = observation
state_ = cuda.to_gpu(np.asanyarray(self.state, dtype=np.float32),self.gpu_id)
# Generate an Action e-greedy
action, Q_now = self.DQN.e_greedy(state_, self.epsilon)
# Update for next step
self.lastAction = action
self.last_state = self.state.copy()
self.last_observation = observation.copy()
self.max_Q_list.append(np.max(Q_now.get()))
return action
评论列表
文章目录