def neg_log_likelihood(self, alphabetas):
df = self.df
alphas = alphabetas[0::2]
betas = alphabetas[1::2]
df = self.df[self.df['cue'].isin(self.cues)]
actions, rewards = df['action'].values, df['reward'].values
cues = df['cue'].values
prob_log = 0
Q = dict([[cue, np.zeros(self.n_actions)] for cue in self.cues])
for action, reward, cue in zip(actions, rewards, cues):
alpha = alphas[self.cues.index(cue)]
beta = betas[self.cues.index(cue)]
Q[cue][action] += alpha * (reward - Q[cue][action])
prob_log += np.log(softmax(Q[cue], beta)[action])
return -prob_log
评论列表
文章目录