def epsilon_greedy_q_policy(self, state):
'''
Args:
state (State)
Returns:
(str): action.
'''
# Policy: Epsilon of the time explore, otherwise, greedyQ.
if numpy.random.random() > self.epsilon:
# Exploit.
action = self.get_max_q_action(state)
else:
# Explore
action = numpy.random.choice(self.actions)
return action
评论列表
文章目录