def neg_log_likelihood(self, alphabetas):
df = self.df
alphas = alphabetas[0::2]
betas = alphabetas[1::2]
df = self.df[self.df['cue'].isin(self.cues)]
actions, rewards = df['action'].values, df['reward'].values
cues = df['cue'].values
prob_log = 0
Q = dict([[cue, np.zeros(self.n_actions)] for cue in self.cues])
for action, reward, cue in zip(actions, rewards, cues):
alpha = alphas[self.cues.index(cue)]
beta = betas[self.cues.index(cue)]
Q[cue][action] += alpha * (reward - Q[cue][action])
prob_log += np.log(softmax(Q[cue], beta)[action])
return -prob_log
python类softmax()的实例源码
def run_single_softmax_experiment(beta, alpha):
"""Run experiment with agent using softmax update rule."""
print('Running a contextual bandit experiment')
cb = ContextualBandit()
ca = ContextualAgent(cb, beta=beta, alpha=alpha)
trials = 360
for _ in range(trials):
ca.run()
df = DataFrame(ca.log, columns=('context', 'action', 'reward', 'Q(c,23)',
'Q(c,14)', 'Q(c,8)', 'Q(c,3)'))
# fn = 'softmax_experiment.csv'
# df.to_csv(fn, index=False)
# print('Sequence written in', fn)
# globals().update(locals()) #
return df
def __iadd__(self, gradient):
"""
Add two gradient together
"""
# Tensor layer
if gradient.dV is not None: # Case for the leaf (indead, only depend of the softmax error so no tensor gradient is set)
self.dV += gradient.dV # Tensor of the RNTN layer
self.dW += gradient.dW # Regular term of the RNTN layer
self.db += gradient.db # Bias for the regular term of the RNTN layer
# Softmax (Computed in any case)
self.dWs += gradient.dWs # Softmax classifier
self.dbs += gradient.dbs # Bias of the softmax classifier
# Words
self.dL += gradient.dL # We merge the two lists (Backpropagate the dL gradient on the upper nodes)
return self
def choose_action(self, context):
p = softmax(self.Q[context], self.beta)
actions = range(self.n)
action = np.random.choice(actions, p=p)
return action
def _predictNode(self, node):
"""
Return the softmax sentiment prediction for the given word vector
WARNING: The node output(after activation fct) has to be already
computed (by the evaluateSample fct)
"""
z = np.dot(self.Ws, node.output) + self.bs
return utils.softmax(z)
def backpropagate(self, sample):
"""
Compute the derivate at each level of the sample and return the sum
of it (stored in a gradient object)
"""
# Notations:
# a: Output at root node (after activation)
# z: Output before softmax (z=Ws*a + bs)
# y: Output after softmax, final prediction (y=softmax(z))
# E: Cost of the current prediction (E = cost(softmax(Ws*a + bs)) = cost(y))
# t: Gound truth prediction
# We then have:
# x -> a -> x -> a -> ... x -> a(last layer) -> z (projection on dim 5) -> y (softmax prediction) -> E (cost)
return self._backpropagate(sample.root, None) # No incoming error for the root node (except the one coming from softmax)
def __init__(self):
# Tensor layer
self.dV = None # Tensor of the RNTN layer
self.dW = None # Regular term of the RNTN layer
self.db = None # Bias for the regular term of the RNTN layer
# Softmax
self.dWs = None # Softmax classifier
self.dbs = None # Bias of the softmax classifier
# Words << Contained in the vocab variable
self.dL = [] # List of ModelDl (index, dL_i)
def minibatch_update(self,x,y,lr,regularization):
n_sample = x.shape[0]
info = x
hidden_cache = []
for i in xrange(self.n_hidden + 1):
if i == self.n_hidden:
probs = softmax(info.dot(self.W[i]) + self.b[i])
else:
info = sigmoid(info.dot(self.W[i]) + self.b[i])
hidden_cache.append(info)
loss = neg_log_likelihood(probs,y)
probs[np.arange(n_sample),y] -= 1.0
errors = probs
for i in range(self.n_hidden,-1,-1):
if i >= 1:
hidden_out = hidden_cache[i - 1]
grad_hidden_out = errors.dot(self.W[i].T)
self.W[i] -= (lr * (hidden_out.T).dot(errors) + regularization * self.W[i])
self.b[i] -= lr * np.sum(errors,axis = 0)
errors = hidden_out * (1 - hidden_out) * grad_hidden_out
else:
hidden_out = x
self.W[i] -= (lr * (hidden_out.T).dot(errors) + regularization * self.W[i])
self.b[i] -= lr * np.sum(errors,axis = 0)
return loss