def lossFun(inputs, targets, hprev):
"""
inputs, targets are both list of integers.
"""
xs, hs, ys, ps = {}, {}, {}, {}
hs[-1] = np.copy(hprev)
loss = 0
#forward pass
for t in range(len(inputs)):
xs[t] = np.zeros((vocab_size,1)) #encode in 1-of-k representation
xs[t][inputs[t]] = 1
hs[t] = np.tanh(np.dot(Wxh, xs[t])) + np.dot(Whh, hs[t-1]+bh)
ys[t] = np.dot(Why, hs[t]) + by
ps[t] = np.exp(ys[t])/np.sum(np.exp(ys[t])) #probabilities for next char
loss += -np.log(ps[t][targets[t],0]) #softmax cross-entropy loss
#backward pass
dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
dbh, dby = np.zeros_like(bh), np.zeros_like(by)
dhnext = np.zeros_like(hs[0])
for t in reversed(range(len(inputs))):
dy = np.copy(ps[t])
dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
dWhy += np.dot(dy, hs[t].T)
dby += dy
dh = np.dot(Why.T, dy) + dhnext # backprop into h
dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
dbh += dhraw
dWxh += np.dot(dhraw, xs[t].T)
dWhh += np.dot(dhraw, hs[t-1].T)
dhnext = np.dot(Whh.T, dhraw)
for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
min_char_rnn.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录