def L_op(self, inputs, outputs, output_grads):
desc, w, x, hx = inputs[:4]
cx = inputs[4] if len(inputs) == 5 else None
reserve, y, hy = outputs[:3]
_, dy, dhy = output_grads[:3]
dcy = output_grads[3] if len(output_grads) == 4 else None
# Since the op return two outputs which contain essentially
# the same information, the user will most likely only use one
# of them. This leads to the situation that the other is
# considered "disconnected" by theano in the gradient.
# However we know that this isn't really the case so we fix it
# here.
# If all the ys are disconnected, then you get a boring
# gradient instead of an error. But in that case you
# shouldn't call this method anyway.
if isinstance(dy.type, DisconnectedType):
dy = as_gpuarray_variable(y.zeros_like(),
context_name=y.type.context_name)
if isinstance(dhy.type, DisconnectedType):
dhy = None
if dcy and isinstance(dcy.type, DisconnectedType):
dcy = None
dinputs = GpuDnnRNNGradInputs(rnn_mode=self.rnn_mode,
grad_h=(dhy is not None),
grad_c=(dcy is not None))(
desc, x, y, dy, dhy, dcy, w, hx, cx, reserve, return_list=True)
reserve2, dx, dhx = dinputs[:3]
dw = GpuDnnRNNGradWeights()(
desc, x, hx, y, reserve2, w)
res = [DisconnectedType()(), dw, dx, dhx]
if cx is not None:
res.append(dinputs[3]) # dcx
return res
评论列表
文章目录