def meanQvalue(Q, samples):
xp = Q.xp
s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
a = np.asarray([sample[1] for sample in samples], dtype=np.int32)
for i in xrange(minibatch_size):
s[i] = samples[i][0]
# to gpu if available
s = xp.asarray(s)
a = xp.asarray(a)
# Prediction: Q(s,a)
y = F.select_item(Q(s), a)
mean_Q = (F.sum(y)/minibatch_size).data
return mean_Q
评论列表
文章目录