def _compute_max_qval_action_pair(self, state, q_func_id=None):
'''
Args:
state (State)
q_func_id (str): either "A", "B", or None. If None, computes avg of A and B.
Returns:
(tuple) --> (float, str): where the float is the Qval, str is the action.
'''
# Grab random initial action in case all equal
best_action = random.choice(self.actions)
max_q_val = float("-inf")
shuffled_action_list = self.actions[:]
random.shuffle(shuffled_action_list)
# Find best action (action w/ current max predicted Q value)
for action in shuffled_action_list:
q_s_a = self.get_q_value(state, action, q_func_id)
if q_s_a > max_q_val:
max_q_val = q_s_a
best_action = action
return max_q_val, best_action
评论列表
文章目录