def get_action_distr(self, state, beta=0.2):
'''
Args:
state (State)
beta (float): Softmax temperature parameter.
Returns:
(list of floats): The i-th float corresponds to the probability
mass associated with the i-th action (indexing into self.actions)
'''
all_q_vals = []
for i in xrange(len(self.actions)):
action = self.actions[i]
all_q_vals.append(self.get_q_value(state, action))
# Softmax distribution.
total = sum([numpy.exp(beta * qv) for qv in all_q_vals])
softmax = [numpy.exp(beta * qv) / total for qv in all_q_vals]
return softmax
评论列表
文章目录