def perform(self, node, inputs, outputs):
"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = numpy.outer
def G(n):
return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
for m in xrange(N) if m != n)
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
for n in xrange(N))
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
out = self.tri0(g) + self.tri1(g).T
# The call to self.tri0 in perform upcast from float32 to
# float64 or from int* to int64 in numpy 1.6.1 but not in
# 1.6.2. We do not want version dependent dtype in Theano.
# We think it should be the same as the output.
outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
评论列表
文章目录