def get_output_for(self, inputs, attention_only=False, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# inputs[2]: B x N x Q / B x Q x N
# self.mask: B x Q
if self.transpose: M = inputs[2].dimshuffle((0,2,1))
else: M = inputs[2]
alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
self.mask[:,np.newaxis,:] # B x N x Q
alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D
return eval(self.gating_fn)(inputs[0],q_rep)
python类batched_dot()的实例源码
def get_output_for(self, inputs, **kwargs):
p_gru, q_gru, q_mask, feature = tuple(inputs)
time_p = p_gru.shape[1]
time_q = q_gru.shape[1]
p_gru_re = p_gru.dimshuffle(0, 1, 'x', 2) # (batch, time_p, 1, units)
q_gru_re = q_gru.dimshuffle(0, 'x', 1, 2) # (batch, 1, time_q, units)
gru_merge = T.tanh(p_gru_re * q_gru_re).reshape((-1, time_q, self.units)) # (batch * time_p, time_q, units)
att = T.dot(gru_merge, self.v1).reshape((-1, time_p, time_q)) # (batch, time_p, time_q)
att_q = T.dot(q_gru, self.v2).squeeze() # (batch, time_q)
att = att + att_q.dimshuffle(0, 'x', 1) + feature # (batch, time_p, time_q)
att = T.nnet.softmax(att.reshape((-1, time_q))) # (batch * time_p, time_q)
att = att.reshape((-1, time_p, time_q)) * q_mask.dimshuffle(0, 'x', 1) # (batch, time_p, time_q)
att = att / (att.sum(axis = 2, keepdims = True) + 1e-8) # (batch, time_p, time_q)
att = att.reshape((-1, time_q))
output = T.batched_dot(att, gru_merge) # (batch * time_p, units)
output = output.reshape((-1, time_p, self.units))
return output
def get_output_for(self, inputs, attention_only=False, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# inputs[2]: B x N x Q / B x Q x N
# self.mask: B x Q
if self.transpose: M = inputs[2].dimshuffle((0,2,1))
else: M = inputs[2]
alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
self.mask[:,np.newaxis,:] # B x N x Q
alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D
return eval(self.gating_fn)(inputs[0],q_rep)
def get_output_for(self, inputs, **kwargs):
sequence_length = inputs[0].shape[1]/2
input_first = inputs[0][(slice(None),) + (slice(0, sequence_length),)]
input_second = inputs[1]
mask = inputs[self.mask_incoming_index]
if input_second.ndim == 3:
input_second = input_second[(slice(None), -1)]
M = nonlinearities.tanh(T.dot(input_first, self.W_y) + T.dot(input_second.dimshuffle(0, 'x', 1), self.W_h))
# M.shape = N * L * k
alpha = nonlinearities.softmax(T.dot(M, self.w.T).reshape((inputs[0].shape[0], sequence_length)))
# alpha.shape = N * L
alpha = alpha * mask
r = T.batched_dot(alpha, input_first)
# r.shape = N * k
h_star = nonlinearities.tanh(T.dot(r, self.W_p) + T.dot(input_second, self.W_x))
return h_star
def output_func(self, input):
q = input[0]
all_list = [q]
for i in xrange(self.position):
dot = T.batched_dot(q, T.dot(input[i + 1], self.W[i].T))
all_list.append(dot.dimshuffle(0, 'x'))
all_list.append(input[i + 1])
begin_index = self.position
for i in range(1, self.position):
for j in range(0, i):
dot = T.batched_dot(input[j + 1], T.dot(input[i + 1], self.W[begin_index].T))
all_list.append(dot.dimshuffle(0, 'x'))
#begin_index += 1
out = T.concatenate(all_list, axis=1)
# dot = T.batched_dot(q, T.batched_dot(a, self.W))
#dot = T.batched_dot(q, T.dot(a, self.W.T))
#out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
return out
def score_batch(self, e1, e2, r_index):
"""
:param e1: (batch, entity_dim, )
:param e2: (batch, entity_dim, )
:param r_index: (batch, )
:return:
"""
# (batch, entity_dim) dot (batch, entity_dim, entity_dim, hidden) dot (batch, entity_dim) -> hidden * (batch, )
hidden1_sep, _ = theano.scan(fn=self.step_batch,
sequences=[self.slice_seq],
non_sequences=[e1, e2, self.W[r_index]],
name='batch_scan')
# hidden * (batch, ) -> (batch, hidden)
hidden1 = T.concatenate([hidden1_sep], axis=1).transpose()
if self.keep_normal:
# (batch, 2 * entity_dim) dot (batch, 2 * entity_dim, hidden) -> (batch, hidden, )
hidden2 = T.batched_dot(T.concatenate([e1, e2], axis=1), self.V[r_index])
# (batch, hidden) + (batch, hidden) + (batch, hidden) -> (batch, hidden)
hidden = hidden1 + hidden2 + self.b[r_index]
else:
hidden = hidden1
# (batch, hidden) -> (batch, hidden)
act_hidden = self.act.activate(hidden)
# (batch, hidden) dot (batch, hidden) -> (batch, )
return T.sum(act_hidden * self.U[r_index], axis=1)
def grams(X):
dim_ordering = K.image_dim_ordering()
if dim_ordering == 'tf':
X = K.permute_dimensions(X, (0, 3, 1, 2))
(samples, c, h, w) = get_shape(X)
X_reshaped = K.reshape(X, (-1, c, h * w))
X_T = K.permute_dimensions(X_reshaped, (0, 2, 1))
if K._BACKEND == 'theano':
X_gram = T.batched_dot(X_reshaped, X_T)
else:
X_gram = tf.batch_matmul(X_reshaped, X_T)
X_gram /= c * h * w
return X_gram
def op_matmul(s_x_, s_y_, axes_=(-2, -1)):
'''
limited implementation of np.matmul, does not support broadcasting
Args:
s_x_: (batch of) matrix(matrices)
s_y_: (batch of) matrix(matrices)
axes_: tuple of int, the axes for the matrix
'''
assert s_x_.ndim == s_y_.ndim
ndim = s_x_.ndim
assert -ndim <= axes_[0] < ndim
assert -ndim <= axes_[1] < ndim
assert ndim >= 2
axes = axes_[0]%ndim, axes_[1]%ndim
if ndim == 2:
if axes == (0,1):
return T.dot(s_x_, s_y_)
else:
return T.dot(s_y_, s_x_)
s_shp = T.shape(s_x_)
s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes])
s_szu = s_shp[axes[0]]
s_szv = s_shp[axes[1]]
s_szw = T.shape(s_y_)[axes[1]]
transpp = list(range(ndim))
transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]]
transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]]
s_shp2 = [s_shp[a] for a in transpp]
s_shp2[axes[1]] = s_szw
s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv))
s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw))
return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
def get_output_for(self, inputs, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# self.mask: B x Q
q_shuf = inputs[1].dimshuffle(0,2,1) # B x D x Q
return T.batched_dot(inputs[0], q_shuf) # B x N x Q
def get_output_for(self, inputs, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# self.aggregator: B x N x C
# self.pointer: B x 1
# self.mask: B x N
q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D
p = T.batched_dot(inputs[0],q) # B x N
pm = T.nnet.softmax(p)*self.mask # B x N
pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N
return T.batched_dot(pm, self.aggregator)
def get_output_for(self, input, **kwargs):
act = T.batched_dot(T.tensordot(input, self.V, axes = [1, 2]), input) + T.dot(input, self.W) + self.b.dimshuffle('x', 0)
return self.nonlinearity(act)
def dot_time_distributed_merge(model, layers, cos_norm=False):
""" Merging two time series layers into one, producing a new time series that
contains a dot-product scalar for each time step.
If cos_norm=True, actually computes cosine similarity. """
def batched_batched_dot(s):
""" from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their dot-products """
import theano
import theano.tensor as T
return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym),
outputs_info=None, sequences=s, non_sequences=None)[0]
def batched_cos_sim(s):
""" from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their cosine similarities """
import theano
import theano.tensor as T
return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym) / T.sqrt(T.batched_dot(xm, xm) * T.batched_dot(ym, ym)),
outputs_info=None, sequences=s, non_sequences=None)[0]
if cos_norm:
lmb = batched_cos_sim
else:
lmb = batched_batched_dot
return Lambda([model.nodes[l] for l in layers], lmb,
lambda s: (s[1][0], s[1][1]))
def dot_time_distributed_merge(model, layers, cos_norm=False):
""" Merging two time series layers into one, producing a new time series that
contains a dot-product scalar for each time step.
If cos_norm=True, actually computes cosine similarity. """
def batched_batched_dot(s):
""" from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their dot-products """
import theano
import theano.tensor as T
return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym),
outputs_info=None, sequences=s, non_sequences=None)[0]
def batched_cos_sim(s):
""" from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their cosine similarities """
import theano
import theano.tensor as T
return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym) / T.sqrt(T.batched_dot(xm, xm) * T.batched_dot(ym, ym)),
outputs_info=None, sequences=s, non_sequences=None)[0]
if cos_norm:
lmb = batched_cos_sim
else:
lmb = batched_batched_dot
return Lambda([model.nodes[l] for l in layers], lmb,
lambda s: (s[1][0], s[1][1]))
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = T.sqr(doc_batch_proj)
query_square = T.sqr(query_batch_proj)
doc_norm = (T.sqrt(T.sum(doc_square, axis=1)))
query_norm = T.sqrt(T.sum(query_square, axis=1))
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
DSSM_lstm_last_1neg_hidden300_sharewemblayer.py 文件源码
项目:models
作者: Jonbean
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
DSSM_blstm_1neg_hidden300_sharewemblayer.py 文件源码
项目:models
作者: Jonbean
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
DSSM_lstm_1neg_hidden300_sharewemblayer.py 文件源码
项目:models
作者: Jonbean
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def batch_cosine(self, doc_batch_proj, query_batch_proj):
dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)
doc_square = doc_batch_proj ** 2
query_square = query_batch_proj ** 2
doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
query_norm = T.sqrt(query_square.sum(axis = 1)).sum()
batch_cosine_vec = dot_prod/(doc_norm * query_norm)
return batch_cosine_vec
def get_output_for(self, input_, **kwargs):
W = T.tril(self.W, -1)
interactions = T.batched_dot(T.dot(input_, W), input_)
interactions = T.sqrt(T.max(interactions, 1e-6))
return self.nonlinearity(input_ + interactions)
def get_output_for(self, inputs, **kwargs):
# inputs[0]: B x N x D
# inputs[1]: B x Q x D
# self.mask: B x Q
q_shuf = inputs[1].dimshuffle(0,2,1) # B x D x Q
return T.batched_dot(inputs[0], q_shuf) # B x N x Q
def get_output_for(self, inputs, **kwargs):
# inputs[0]: B x N x D, doc
# inputs[1]: B x Q x D, query
# self.aggregator: B x N x C
# self.pointer: B x 1
# self.mask: B x N
q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D
p = T.batched_dot(inputs[0],q) # B x N
pm = T.nnet.softmax(p)*self.mask # B x N
pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N
return T.batched_dot(pm, self.aggregator)
def __call__(self, q, a):
return T.batched_dot(T.tensordot(q, self.W, axes=[1, 0]), a)
def output_func(self, input):
# P(Y|X) = softmax(W.X + b)
q, a = input[0], input[1]
dot = T.batched_dot(q, T.dot(a, self.W))
self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
self.prob = self.p_y_given_x[:,-1]
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
return self.y_pred
def output_func(self, input):
# P(Y|X) = softmax(W.X + b)
q, a, feats = input[0], input[1], input[2]
dot = T.batched_dot(q, T.dot(a, self.W))
feats_dot = T.dot(feats, self.W_feats)
l = self.lamda.dimshuffle('x', 0)
self.p_y_given_x = T.nnet.softmax(l*dot + (1-l) * feats_dot + self.b.dimshuffle('x', 0))
self.prob = self.p_y_given_x[:,-1]
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
return self.y_pred
def output_func(self, input):
# P(Y|X) = softmax(W.X + b)
q, a = input[0], input[1]
# dot = T.batched_dot(q, T.dot(a, self.W.T))
dot = T.batched_dot(q, T.dot(a, self.W))
self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
self.prob = self.p_y_given_x[:,-1]
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
return self.y_pred