def start(self,x=None):
if x is None:
x=Tensor.context
obs_array = x.content.data
#print "sum",obs_array.sum()
# Initialize State
self.state = np.zeros((self.func.hist_size, self.image_feature_dim), dtype=np.uint8)
self.state[0] = obs_array
state_ = np.asanyarray(self.state.reshape(1, self.func.hist_size, self.image_feature_dim), dtype=np.float32)
if Deel.gpu >= 0:
state_ = cuda.to_gpu(state_)
# Generate an Action e-greedy
action, Q_now = self.func.e_greedy(state_, self.epsilon)
returnAction = action
# Update for next step
self.lastAction = copy.deepcopy(returnAction)
self.last_state = self.state.copy()
self.last_observation = obs_array
return returnAction
评论列表
文章目录