def agent_start(self, observation):
# Initialize State
self.state = observation
state_ = np.asanyarray(self.state, dtype=np.float32)
# Generate an Action e-greedy
action, Q_now = self.DQN.e_greedy(state_, self.epsilon)
self.Q_recent = Q_now[0]
# Update for next step
self.lastAction = action
self.last_state = self.state.copy()
self.last_observation = observation.copy()
self.max_Q_list.append(np.max(self.Q_recent))
return action
评论列表
文章目录