def rbf_kernel(X0):
XY = T.dot(X0, X0.transpose())
x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
X2e = T.repeat(x2, X0.shape[0], axis=1)
H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)
V = H.flatten()
# median distance
h = T.switch(T.eq((V.shape[0] % 2), 0),
# if even vector
T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
# if odd vector
T.sort(V)[V.shape[0] // 2])
h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.
Kxy = T.exp(-H / h ** 2 / 2.0)
neighbors = T.argsort(H, axis=1)[:, 1]
return Kxy, neighbors, h
python类reshape()的实例源码
def get_output_for(self, input, deterministic=False, **kwargs):
def _phase_shift(input,r):
bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r
X = T.reshape(input, (bsize,r,r,a,b))
X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1
X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r]
X = [T.reshape(x,(bsize,b,r,r))for x in X]
X = T.concatenate(X,axis=2) # bsize, b, a*r, r
X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r]
X = [T.reshape(x,(bsize,a*r,r))for x in X]
X = T.concatenate(X,axis=2) # bsize, a*r, b*r
return X.dimshuffle(0,'x',1,2)
Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1)
return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1)
# Multiscale Dilated Convolution Block
# This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers.
# Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv).
# The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales,
# meaning that were starting by taking an elementwise mean. These should be learnable parameters.
# NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list.
# - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def get_output_for(self, inputs, attention_only=False, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# inputs[2]: B x N x Q / B x Q x N
# self.mask: B x Q
if self.transpose: M = inputs[2].dimshuffle((0,2,1))
else: M = inputs[2]
alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
self.mask[:,np.newaxis,:] # B x N x Q
alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D
return eval(self.gating_fn)(inputs[0],q_rep)
def rbf_kernel(X0):
XY = T.dot(X0, X0.transpose())
x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
X2e = T.repeat(x2, X0.shape[0], axis=1)
H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)
V = H.flatten()
# median distance
h = T.switch(T.eq((V.shape[0] % 2), 0),
# if even vector
T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
# if odd vector
T.sort(V)[V.shape[0] // 2])
h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.
Kxy = T.exp(-H / h ** 2 / 2.0)
neighbors = T.argsort(H, axis=1)[:, 1]
return Kxy, neighbors, h
def svgd_gradient(X0):
hidden, _, mse = discrim(X0)
grad = -1.0 * T.grad( mse.sum(), X0)
kxy, neighbors, h = rbf_kernel(hidden) #TODO
coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2
X1 = X0[neighbors]
hidden1, _, _ = discrim(X1)
dxkxy = T.Lop(hidden1, X1, v)
#svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
svgd_grad = grad + dxkxy / 2.
return grad, svgd_grad, dxkxy
def svgd_gradient(X0):
hidden, _, mse = discrim(X0)
grad = -1.0 * T.grad( mse.sum(), X0)
kxy, neighbors, h = rbf_kernel(hidden) #TODO
coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2
X1 = X0[neighbors]
hidden1, _, _ = discrim(X1)
dxkxy = T.Lop(hidden1, X1, v)
#svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
svgd_grad = grad + dxkxy / 2.
return grad, svgd_grad, dxkxy
def get_output_for(self, input, **kwargs):
# [batch, n-step, num_input_channels, input_length]
input_shape = input.shape
batch_size = input_shape[0]
time_steps = input_shape[1]
# [batch * n-step, num_input_channels, input_length]
input_shape = (batch_size * time_steps, input_shape[2], input_shape[3])
output = self.conv1d.get_output_for(T.reshape(input, input_shape), **kwargs)
# [batch * n-step, num_filters, output_length]
output_shape = output.shape
# [batch, n-step, num_filters, output_length]
output_shape = (batch_size, time_steps, output_shape[1], output_shape[2])
return T.reshape(output, output_shape)
def get_output_for(self, input, **kwargs):
# [batch, n-step, num_input_channels, input_length]
input_shape = input.shape
batch_size = input_shape[0]
time_steps = input_shape[1]
# [batch * n-step, num_input_channels, input_length]
input_shape = (batch_size * time_steps, input_shape[2], input_shape[3])
output = super(PoolTimeStep1DLayer, self).get_output_for(T.reshape(input, input_shape), **kwargs)
# [batch * n-step, num_input_channels, pool_length]
output_shape = output.shape
# [batch, n-step, num_input_channels, pool_length]
output_shape = (batch_size, time_steps, output_shape[1], output_shape[2])
return T.reshape(output, output_shape)
def get_output_for(self, input, **kwargs):
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input_reshape = input.flatten(2) if input.ndim > 2 else input
activation = T.dot(input_reshape, self.W_h)
if self.b_h is not None:
activation = activation + self.b_h.dimshuffle('x', 0)
activation = self.nonlinearity(activation)
transform = T.dot(input_reshape, self.W_t)
if self.b_t is not None:
transform = transform + self.b_t.dimshuffle('x', 0)
transform = nonlinearities.sigmoid(transform)
carry = 1.0 - transform
output = activation * transform + input_reshape * carry
# reshape output back to orignal input_shape
if input.ndim > 2:
output = T.reshape(output, input.shape)
return output
def multiclass_hinge_loss(self, predictions, targets, delta=1):
num_cls = predictions.shape[1]
if targets.ndim == predictions.ndim - 1:
targets = T.extra_ops.to_one_hot(targets, num_cls)
elif targets.ndim != predictions.ndim:
raise TypeError('rank mismatch between targets and predictions')
corrects = predictions[targets.nonzero()]
rest = T.reshape(predictions[(1-targets).nonzero()],
(-1, num_cls-1))
rest = T.max(rest, axis=1)
return T.nnet.relu(rest - corrects + delta).mean()
def set_output(self):
padding = self._padding
input_shape = self._input_shape
padded_input = tensor.alloc(0.0, # Value to fill the tensor
input_shape[0],
input_shape[1] + 2 * padding[1],
input_shape[2],
input_shape[3] + 2 * padding[3],
input_shape[4] + 2 * padding[4])
padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[
1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]],
self._prev_layer.output)
fc_output = tensor.reshape(
tensor.dot(self._fc_layer.output, self.Wx.val), self._output_shape)
self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \
fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def project3Dto2D(self, Li, idxs):
"""
Project 3D point to 2D
:param Li: joints in normalized 3D
:param idxs: frames specified by subset
:return: 2D points, in normalized 2D coordinates
"""
if not isinstance(idxs, numpy.ndarray):
idxs = numpy.asarray([idxs])
# 3D -> 2D projection also shift by M to cropped window
Li_glob3D = (numpy.reshape(Li, (len(idxs), self.numJoints, 3))*self.Di_scale[idxs][:, None, None]+self.Di_off3D[idxs][:, None, :]).reshape((len(idxs)*self.numJoints, 3))
Li_glob3D_hom = numpy.concatenate([Li_glob3D, numpy.ones((len(idxs)*self.numJoints, 1), dtype='float32')], axis=1)
Li_glob2D_hom = numpy.dot(Li_glob3D_hom, self.cam_proj.T)
Li_glob2D = (Li_glob2D_hom[:, 0:3] / Li_glob2D_hom[:, 3][:, None]).reshape((len(idxs), self.numJoints, 3))
Li_img2D_hom = numpy.einsum('ijk,ikl->ijl', Li_glob2D, self.Di_trans2D[idxs])
Li_img2D = (Li_img2D_hom[:, :, 0:2] / Li_img2D_hom[:, :, 2][:, :, None]).reshape((len(idxs), self.numJoints*2))
Li_img2Dcrop = (Li_img2D - (self.Di.shape[3]/2.)) / (self.Di.shape[3]/2.)
return Li_img2Dcrop
def evaluateToGT(self, Li, idxs):
"""
Evaluate the current estimate to a ground truth
:param Li: current estimates
:param idxs: idxs to evaluate
:return: mean error, max error and MD score
"""
if not isinstance(idxs, numpy.ndarray):
idxs = numpy.asarray(idxs)
if self.gt3D is not None:
gt3D_subset = self.gt3D[idxs]
if Li.shape[0] == len(idxs):
Li_subset = Li
else:
Li_subset = Li[idxs]
mean_error = numpy.mean(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2)), axis=1).mean()
max_error = numpy.max(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2)))
vals = [(numpy.nanmax(numpy.sqrt(numpy.square((gt3D_subset - Li_subset.reshape(gt3D_subset.shape))*self.Di_scale[idxs, None, None]).sum(axis=2)), axis=1) <= j).sum() / float(gt3D_subset.shape[0]) for j in range(0, 80)]
md_score = numpy.asarray(vals).sum() / float(80.)
return mean_error, max_error, md_score
else:
return 0., 0., 0.
def get_output_for(self, input, **kwargs):
# if the input has more than two dimensions, flatten it into a
# batch of feature vectors.
input_reshape = input.flatten(2) if input.ndim > 2 else input
activation = T.dot(input_reshape, self.W_h)
if self.b_h is not None:
activation = activation + self.b_h.dimshuffle('x', 0)
activation = self.nonlinearity(activation)
transform = T.dot(input_reshape, self.W_t)
if self.b_t is not None:
transform = transform + self.b_t.dimshuffle('x', 0)
transform = nonlinearities.sigmoid(transform)
carry = 1.0 - transform
output = activation * transform + input_reshape * carry
# reshape output back to orignal input_shape
if input.ndim > 2:
output = T.reshape(output, input.shape)
return output
def get_output_for(self, inputs, **kwargs):
input = inputs[0]
input_word = T.flatten(inputs[1])
word_dropout = inputs[2]
# Apply word embedding
sentence_rep = self.SemMem.get_output_for([input, word_dropout])
# Apply GRU Layer
gru_outs = self.GRU.get_output_for([sentence_rep])
# Extract candidate fact from GRU's output by input_word variable
# resolving input with adtional word
# e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5
candidate_facts = T.reshape(
gru_outs[T.arange(gru_outs.shape[0],dtype='int32'), input_word-1],
(-1, input.shape[1], self.hid_state_size))
return candidate_facts
def visualize_activations(self, x):
"""
Visualizes the activations in the mdn caused by a given data minibatch.
:param x: a minibatch of data
:return: none
"""
self.net.visualize_activations(x)
forwprop = theano.function(
inputs=[self.input],
outputs=[self.a, tt.concatenate(self.ms, axis=1) + tt.concatenate([tt.reshape(U, [U.shape[0], -1]) for U in self.Us], axis=1)]
)
activations = forwprop(x.astype(dtype))
for a, title in izip(activations, ['mixing coefficients', 'means', 'scale matrices']):
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.imshow(a, cmap='gray', interpolation='none')
ax.set_title(title)
ax.set_xlabel('layer units')
ax.set_ylabel('data points')
plt.show(block=False)
def construct(self, input_tv):
T_ = self._declare_mat('T', self.in_dim, self.out_dim)
T_.clip_gradient = self.prm('clip_gradient')
T_.l2_project = self.prm('l2_project')
T_.l2_projection_axis = 1
n_timesteps = input_tv.shape[0]
window_size = self.prm('win_size')
self.out_dim = window_size * self.out_dim
output_tv = T_[input_tv.flatten()].reshape(
[n_timesteps, self.out_dim])
if self.prm('do_dropout'):
T_.dropout_retention_freq = self.prm('dropout_retention_freq')
dropout_mask = dropout_mask_creator(
self.out_dim, self.prm('dropout_retention_freq'))
self.output_tv = output_tv * dropout_mask
else:
self.output_tv = output_tv
return (T_,)
def needed_key(self):
return self._needed_key_impl('activation_fn')
# class MaxPool(Chip):
# ''' This class_chip collapses the input tensor by max pooling along its last dimension.
# '''
# def construct(self, input_tv):
# pool_size = self.prm('pool_size')
# y = T.reshape(input_tv,
# ([input_tv.shape[i] for i in range(input_tv.ndim - 1)]
# + [T.floor_div(input_tv.shape[input_tv.ndim - 1], pool_size).astype('int32'), pool_size]),
# ndim=input_tv.ndim + 1)
# self.output_tv = T.max(y, axis=y.ndim - 1)
# return tuple()
# def needed_key(self):
# return self._needed_key_impl('pool_size')
def log_cross_entropy_extended(x, x_theta, log_distribution, k_max, eps = 0.0):
p_k = x_theta["p_k"]
F = x.shape[1]
p_k = T.clip(p_k, eps, 1.0)
x_k = T.clip(x, 0, k_max)
p_k = T.reshape(p_k, (-1, k_max + 1))
x_k = T.reshape(x_k, (-1, 1))
y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k)
y_cross_entropy = T.reshape(y_cross_entropy, (-1, F))
y_log_distribution = T.ge(x, k_max) * log_distribution(x - k_max, x_theta, eps)
# y = - T.lt(x, 0) * y_cross_entropy + y_log_distribution
y = - y_cross_entropy + T.lt(x, 0) * y_log_distribution
# y = - y_cross_entropy + y_log_distribution
return y
def log_softmax_poisson(x, p_k, log_lambda, k_max = 10, eps = 0.0):
F = x.shape[1]
p_k = T.clip(p_k, eps, 1.0 - eps)
x_k = T.clip(x, 0, k_max)
p_k = T.reshape(p_k, (-1, k_max + 1))
x_k = T.reshape(x_k, (-1, 1))
y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k)
y_cross_entropy = T.reshape(y_cross_entropy, (-1, F))
y_log_poisson = T.ge(x, k_max) * log_poisson(x - k_max, log_lambda, eps)
y = - y_cross_entropy + y_log_poisson
return y
def log_softmax_negative_binomial(x, p_k, p, log_r, k_max = 10, eps = 0.0):
F = x.shape[1]
p_k = T.clip(p_k, eps, 1.0 - eps)
x_k = T.clip(x, 0, k_max)
p_k = T.reshape(p_k, (-1, k_max + 1))
x_k = T.reshape(x_k, (-1, 1))
y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k)
y_cross_entropy = T.reshape(y_cross_entropy, (-1, F))
y_log_negative_binomial = T.ge(x, k_max) \
* log_negative_binomial(x - k_max, p, log_r, eps)
y = - y_cross_entropy + y_log_negative_binomial
return y
def log_softmax_poisson(x, p_k, log_lambda, k_max = 10, eps = 0.0):
F = x.shape[1]
p_k = T.clip(p_k, eps, 1.0 - eps)
x_k = T.clip(x, 0, k_max)
p_k = T.reshape(p_k, (-1, k_max + 1))
x_k = T.reshape(x_k, (-1, 1))
y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k)
y_cross_entropy = T.reshape(y_cross_entropy, (-1, F))
y_log_poisson = T.ge(x, k_max) * log_poisson(x - k_max, log_lambda, eps)
y = - y_cross_entropy + y_log_poisson
return y
def log_softmax_negative_binomial(x, p_k, p, log_r, k_max = 10, eps = 0.0):
F = x.shape[1]
p_k = T.clip(p_k, eps, 1.0 - eps)
x_k = T.clip(x, 0, k_max)
p_k = T.reshape(p_k, (-1, k_max + 1))
x_k = T.reshape(x_k, (-1, 1))
y_cross_entropy = objectives.categorical_crossentropy(p_k, x_k)
y_cross_entropy = T.reshape(y_cross_entropy, (-1, F))
y_log_negative_binomial = T.ge(x, k_max) \
* log_negative_binomial(x - k_max, p, log_r, eps)
y = - y_cross_entropy + y_log_negative_binomial
return y
def get_output(self, input_):
"""
This function overrides the parents' one.
Creates symbolic function to compute output from an input.
Parameters
----------
input_: TensorVariable
Returns
-------
TensorVariable
"""
result = pool_2d(input_,
ws=self.input_shape[1:],
ignore_border=True,
stride=self.input_shape[1:],
pad=self.padding,
mode='average_exc_pad') # result is 4D tensor yet, (batch size, output channel, 1, 1)
return T.reshape(result, (input_.shape[0], input_.shape[1])) # flatten to 2D matrix
def get_output_for(self, inputs, **kwargs):
p_gru, q_gru, q_mask, feature = tuple(inputs)
time_p = p_gru.shape[1]
time_q = q_gru.shape[1]
p_gru_re = p_gru.dimshuffle(0, 1, 'x', 2) # (batch, time_p, 1, units)
q_gru_re = q_gru.dimshuffle(0, 'x', 1, 2) # (batch, 1, time_q, units)
gru_merge = T.tanh(p_gru_re * q_gru_re).reshape((-1, time_q, self.units)) # (batch * time_p, time_q, units)
att = T.dot(gru_merge, self.v1).reshape((-1, time_p, time_q)) # (batch, time_p, time_q)
att_q = T.dot(q_gru, self.v2).squeeze() # (batch, time_q)
att = att + att_q.dimshuffle(0, 'x', 1) + feature # (batch, time_p, time_q)
att = T.nnet.softmax(att.reshape((-1, time_q))) # (batch * time_p, time_q)
att = att.reshape((-1, time_p, time_q)) * q_mask.dimshuffle(0, 'x', 1) # (batch, time_p, time_q)
att = att / (att.sum(axis = 2, keepdims = True) + 1e-8) # (batch, time_p, time_q)
att = att.reshape((-1, time_q))
output = T.batched_dot(att, gru_merge) # (batch * time_p, units)
output = output.reshape((-1, time_p, self.units))
return output
def get_output_for(self, inputs, attention_only=False, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# inputs[2]: B x N x Q / B x Q x N
# self.mask: B x Q
if self.transpose: M = inputs[2].dimshuffle((0,2,1))
else: M = inputs[2]
alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
self.mask[:,np.newaxis,:] # B x N x Q
alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D
return eval(self.gating_fn)(inputs[0],q_rep)
def get_output(self, train=False):
print(len(self.layers))
u=self.layers[0].get_output(train)
t=self.layers[1].get_output(train)
#tp=t[0]
#tn=t[1]
#un=T.dot(u,u)
#return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))]
#theano.printing.pprint('vals')
#x=T.dvector()
#printed_u = hello_world_op(x)
#f = theano.function([x], printed_u)
#f(['here'])
#T.reshape(u,[2,1])
#T.reshape(t,[1,2,2])
#d=T.dot(t.dimshuffle(1, 0, 2), u)
#u1=self.activation(u)
#t.reshape([2,2,2])
return (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2))#.reshape([2,2])
#return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False):
print(len(self.layers))
u=self.layers[0].get_output(train)
t=self.layers[1].get_output(train)
#tp=t[0]
#tn=t[1]
#un=T.dot(u,u)
#return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))]
#theano.printing.pprint('vals')
#x=T.dvector()
#printed_u = hello_world_op(x)
#f = theano.function([x], printed_u)
#f(['here'])
#T.reshape(u,[2,1])
#T.reshape(t,[1,2,2])
#d=T.dot(t.dimshuffle(1, 0, 2), u)
#u1=self.activation(u)
#t.reshape([2,2,2])
return T.max( (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2])
#return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False):
print(len(self.layers))
u=self.layers[0].get_output(train)
t=self.layers[1].get_output(train)
#tp=t[0]
#tn=t[1]
#un=T.dot(u,u)
#return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))]
#theano.printing.pprint('vals')
#x=T.dvector()
#printed_u = hello_world_op(x)
#f = theano.function([x], printed_u)
#f(['here'])
#T.reshape(u,[2,1])
#T.reshape(t,[1,2,2])
#d=T.dot(t.dimshuffle(1, 0, 2), u)
#u1=self.activation(u)
#t.reshape([2,2,2])
return T.sum( (([u ,u,u,u,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2])
#return d.dimshuffle(1,0,2) #just dot product
def get_output(self, train=False):
print(len(self.layers))
u=self.layers[0].get_output(train)
t=self.layers[1].get_output(train)
#tp=t[0]
#tn=t[1]
#un=T.dot(u,u)
#return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))]
#theano.printing.pprint('vals')
#x=T.dvector()
#printed_u = hello_world_op(x)
#f = theano.function([x], printed_u)
#f(['here'])
#T.reshape(u,[2,1])
#T.reshape(t,[1,2,2])
#d=T.dot(t.dimshuffle(1, 0, 2), u)
#u1=self.activation(u)
#t.reshape([2,2,2])
return T.sum( (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2])
#return d.dimshuffle(1,0,2) #just dot product