def agent_start(self, observation):
# Preprocess
tmp = np.bitwise_and(np.asarray(observation.intArray[128:]).reshape([210, 160]), 0b0001111) # Get Intensity from the observation
obs_array = (spm.imresize(tmp, (110, 84)))[110-84-8:110-8, :] # Scaling
# Initialize State
self.state = np.zeros((4, 84, 84), dtype=np.uint8)
self.state[0] = obs_array
state_ = cuda.to_gpu(np.asanyarray(self.state.reshape(1, 4, 84, 84), dtype=np.float32))
# Generate an Action e-greedy
returnAction = Action()
action, Q_now = self.DDQN.e_greedy(state_, self.epsilon)
returnAction.intArray = [action]
# Update for next step
self.lastAction = copy.deepcopy(returnAction)
self.last_state = self.state.copy()
self.last_observation = obs_array
return returnAction
评论列表
文章目录