def learn(self):
y, x = self.state
current_acton_list = copy.deepcopy(self.action_list[y,x])
if np.random.rand() > self.epsilon:
max_q = self.q[current_acton_list,y,x].max()
action_list_index = list(np.argwhere(self.q[current_acton_list,y,x] == max_q))
random.shuffle(action_list_index)
action = current_acton_list[action_list_index[0]]
else:
random.shuffle(current_acton_list)
action = current_acton_list[0]
move = self.move_list.get(action)
self.update_q(action, move)
self.q_value_list.append(self.q_max_value(move))
self.state += move
评论列表
文章目录