def tensor_swirl(image, center=None, strength=1, radius=100, rotation=0, cval=0.0, **kwargs):
# **kwargs is for unsupported options (ignored)
cval = tf.fill(K.shape(image)[0:1], cval)
shape = K.int_shape(image)[1:3]
if center is None:
center = np.array(shape) / 2
ys = np.expand_dims(np.repeat(np.arange(shape[0]), shape[1]),-1)
xs = np.expand_dims(np.tile (np.arange(shape[1]), shape[0]),-1)
map_xs, map_ys = swirl_mapping(xs, ys, center, rotation, strength, radius)
mapping = np.zeros((*shape, *shape))
for map_x, map_y, x, y in zip(map_xs, map_ys, xs, ys):
results = tensor_linear_interpolation(image, map_x, map_y, cval)
for _y, _x, w in results:
# mapping[int(y),int(x),int(_y),int(_x),] = w
mapping[int(_y),int(_x),int(y),int(x),] = w
results = tf.tensordot(image, K.variable(mapping), [[1,2],[0,1]])
# results = K.reshape(results, K.shape(image))
return results
python类tensordot()的实例源码
def apply_attention(self):
with tf.variable_scope('attention'):
attention_vector = tf.get_variable(name='attention_vector',
shape=[self.params.ATTENTION_DIM],
dtype=tf.float32)
mlp_layer_projection = tf.layers.dense(inputs=self.rnn_outputs,
units=self.params.ATTENTION_DIM,
activation=tf.nn.tanh,
kernel_initializer=tf.contrib.layers.xavier_initializer(),
name='fc_attn')
attended_vector = tf.tensordot(mlp_layer_projection, attention_vector, axes=[[2], [0]])
attention_weights = tf.expand_dims(tf.nn.softmax(attended_vector), -1)
weighted_input = tf.matmul(self.rnn_outputs, attention_weights, transpose_a=True)
self.attention_output = tf.squeeze(weighted_input, axis=2)
def _distance_logits(self, x1, x2):
init = get_keras_initialization(self.init)
project1 = tf.get_variable("project1", (x1.shape.as_list()[-1], self.project_size), initializer=init)
x1 = tf.tensordot(x1, project1, [[2], [0]])
if self.share_project:
if x2.shape.as_list()[-1] != x1.shape.as_list()[-1]:
raise ValueError()
project2 = project1
else:
project2 = tf.get_variable("project2", (x2.shape.as_list()[-1], self.project_size), initializer=init)
x2 = tf.tensordot(x2, project2, [[2], [0]])
if self.project_bias:
x1 += tf.get_variable("bias1", (1, 1, self.project_size), initializer=tf.zeros_initializer())
x2 += tf.get_variable("bias2", (1, 1, self.project_size), initializer=tf.zeros_initializer())
dots = tf.matmul(x1, x2, transpose_b=True)
if self.scale:
dots /= tf.sqrt(tf.cast(self.project_size, tf.float32))
return dots
def _distance_logits(self, x, keys):
init = get_keras_initialization(self.init)
key_w = tf.get_variable("key_w", shape=(keys.shape.as_list()[-1], self.projected_size), initializer=init, dtype=tf.float32)
key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len, projected_size)
if self.shared_project:
x_w = key_w
else:
x_w = tf.get_variable("x_w", shape=(x.shape.as_list()[-1], self.projected_size), initializer=init, dtype=tf.float32)
x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len, projected_size)
summed = tf.expand_dims(x_logits, axis=2) + tf.expand_dims(key_logits, axis=1) # (batch, key_len, x_len, poject_size)
summed = get_keras_activation(self.activation)(summed)
combine_w = tf.get_variable("combine_w", shape=self.projected_size, initializer=init, dtype=tf.float32)
return tf.tensordot(summed, combine_w, axes=[[3], [0]]) # (batch, key_len, x_len)
def _distance_logits(self, x, keys):
init = get_keras_initialization(self.init)
key_w = tf.get_variable("key_w", shape=keys.shape.as_list()[-1], initializer=init, dtype=tf.float32)
key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len)
x_w = tf.get_variable("input_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32)
x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len)
dot_w = tf.get_variable("dot_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32)
# Compute x * dot_weights first, the batch mult with x
x_dots = x * tf.expand_dims(tf.expand_dims(dot_w, 0), 0)
dot_logits = tf.matmul(x_dots, keys, transpose_b=True)
return dot_logits + tf.expand_dims(key_logits, 1) + tf.expand_dims(x_logits, 2)
def apply(self, is_train, x, c, mask=None, context_mask=None):
c_w = tf.get_variable("context_weights", shape=(c.shape.as_list()[-1], self.n_out),
dtype=tf.float32, initializer=get_keras_initialization(self.init))
c_projected = tf.matmul(c, c_w)
x_w = tf.get_variable("input_weights", shape=(x.shape.as_list()[-1], self.n_out),
dtype=tf.float32, initializer=get_keras_initialization(self.init))
x_proj = tf.tensordot(x, x_w, [[2], [0]])
total = x_proj + tf.expand_dims(c_projected, 1)
if self.use_bias:
bias = tf.get_variable("bias", shape=self.n_out, dtype=tf.float32,
initializer=tf.zeros_initializer())
total += tf.expand_dims(tf.expand_dims(bias, 0), 0)
return get_keras_activation(self.activation)(total)
def apply(self, is_train, x, mask=None):
if self.key_mapper is not None:
with tf.variable_scope("map_keys"):
keys = self.key_mapper.apply(is_train, x, mask)
else:
keys = x
weights = tf.get_variable("weights", keys.shape.as_list()[-1], dtype=tf.float32,
initializer=get_keras_initialization(self.init))
dist = tf.tensordot(keys, weights, axes=[[2], [0]]) # (batch, x_words)
dist = exp_mask(dist, mask)
dist = tf.nn.softmax(dist)
out = tf.einsum("ajk,aj->ak", x, dist) # (batch, x_dim)
if self.post_process is not None:
with tf.variable_scope("post_process"):
out = self.post_process.apply(is_train, out)
return out
def apply(self, is_train, x, mask=None):
if self.key_mapper is not None:
with tf.variable_scope("map_keys"):
keys = self.key_mapper.apply(is_train, x, mask)
else:
keys = x
weights = tf.get_variable("weights", (keys.shape.as_list()[-1], self.n_encodings), dtype=tf.float32,
initializer=get_keras_initialization(self.init))
dist = tf.tensordot(keys, weights, axes=[[2], [0]]) # (batch, x_words, n_encoding)
if self.bias:
dist += tf.get_variable("bias", (1, 1, self.n_encodings),
dtype=tf.float32, initializer=tf.zeros_initializer())
if mask is not None:
bool_mask = tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(x)[1]), tf.float32), 2)
dist = bool_mask * bool_mask + (1 - bool_mask) * VERY_NEGATIVE_NUMBER
dist = tf.nn.softmax(dist, dim=1)
out = tf.einsum("ajk,ajn->ank", x, dist) # (batch, n_encoding, feature)
if self.post_process is not None:
with tf.variable_scope("post_process"):
out = self.post_process.apply(is_train, out)
return out
def augmented_loss(self, y_true, y_pred):
_y_pred = Activation("softmax")(y_pred)
loss = K.categorical_crossentropy(_y_pred, y_true)
# y is (batch x seq x vocab)
y_indexes = K.argmax(y_true, axis=2) # turn one hot to index. (batch x seq)
y_vectors = self.embedding(y_indexes) # lookup the vector (batch x seq x vector_length)
#v_length = self.setting.vector_length
#y_vectors = K.reshape(y_vectors, (-1, v_length))
#y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors)
#y_t = K.squeeze(y_t, axis=2) # unknown but necessary operation
#y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))
# vector x embedding dot products (batch x seq x vocab)
y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1)
y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # explicitly set shape
y_t = K.softmax(y_t / self.temperature)
_y_pred_t = Activation("softmax")(y_pred / self.temperature)
aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
loss += (self.gamma * self.temperature) * aug_loss
return loss
seq2seq_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training):
cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)])
encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
decoder_hidden_size = int(cell_dec.output_size)
# if encoder and decoder have different sizes, add a projection layer
if encoder_hidden_size != decoder_hidden_size:
assert False, (encoder_hidden_size, decoder_hidden_size)
with tf.variable_scope('hidden_projection'):
kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32)
# apply a relu to the projection for good measure
enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state)
enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
else:
# flatten and repack the state
enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state))
if self.config.connect_output_decoder:
cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state)
else:
cell_dec = InputIgnoringCellWrapper(cell_dec, enc_final_state)
if self.config.apply_attention:
attention = LuongAttention(self.config.decoder_hidden_size, enc_hidden_states, self.input_length_placeholder,
probability_fn=tf.nn.softmax)
cell_dec = AttentionWrapper(cell_dec, attention,
cell_input_fn=lambda inputs, _: inputs,
attention_layer_size=self.config.decoder_hidden_size,
initial_cell_state=enc_final_state)
enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32)
decoder = Seq2SeqDecoder(self.config, self.input_placeholder, self.input_length_placeholder,
self.output_placeholder, self.output_length_placeholder, self.batch_number_placeholder)
return decoder.decode(cell_dec, enc_final_state, self.config.grammar.output_size, output_embed_matrix, training)
def _setup(self, x, axes=None):
"""Setup the linear layer.
:param x: Input tensor.
:param axes: Axes. If x is a tensor, the layer will perform tensor dot.
:return: Output tensor.
"""
y = tf.matmul(x, self._w) if axes is None else tf.tensordot(x, self._w, axes=axes)
if self._with_bias:
y += self._b
if self._with_batch_norm:
y = self._batch_norm.setup(y)
return y
def _setup(self, seq, vec, activation=tf.nn.tanh):
"""Setup a soft attention mechanism for the given context sequence and state.
The result is an attention context for the state.
:param seq: The sequence tensor.
Its shape is defined as (seq_length, batch_size, seq_elem_size).
:param vec: The vector tensor.
Its shape is defined as (batch_size, vec_size).
:param activation: The activation function.
Default is tf.nn.tanh.
:return: An attention context with shape (batch_size, seq_elem_size).
"""
#
# (seq_length, batch_size, seq_elem_size) @ (seq_elem_size, common_size)
# -> (seq_length, batch_size, common_size)
a = tf.tensordot(seq, self._w, ((2,), (0,)))
#
# (batch_size, vec_size) @ (vec_size, common_size)
# -> (batch_size, common_size)
# -> (1, batch_size, common_size)
b = tf.matmul(vec, self._u)
b = tf.reshape(b, (1, -1, self._common_size))
#
# -> (seq_length, batch_size, common_size)
# (seq_length, batch_size, common_size) @ (common_size, 1)
# -> (seq_length, batch_size, 1)
a = activation(a + b) if activation is not None else a + b
a = tf.tensordot(a, self._omega, ((2,), (0,)))
a = tf.nn.softmax(a, dim=0)
#
# (seq_length, batch_size, 1) * (seq_length, batch_size, seq_elem_size)
# -> (seq_length, batch_size, seq_elem_size)
# -> (batch_size, seq_elem_size)
att_context = tf.reduce_sum(a * seq, 0)
return att_context
def q_indep(q, q_mask):
q_s = q
for i in range(2):
q_s = BiLSTM(q_s, q_mask, 'BiLSTM_q_indep_{}'.format(i))
w_q = tf.Variable(tf.random_normal([1, n_hidden]))
s = tf.tensordot(FFNN(q_s, q_mask, 'FFNN_q_s'), w_q, axes=[[-1],[-1]])
a = softmax_with_mask(s,q_mask, dim=1)
return tf.matmul(a, q_s, transpose_a=True)
def span_score_logits(spans, spans_mask):
w_a = tf.Variable(tf.random_normal([n_hidden]))
h_a = FFNN(spans, spans_mask, 'spans')
s_a = tf.tensordot(h_a, w_a, axes=[[-1],[-1]])
return s_a * spans_mask[:, :, 0]
def main():
"""
Train a policy on the CartPole-v0 environment.
"""
observations = tf.placeholder(tf.float32, shape=[None, 4])
out_probs = tf.nn.softmax(policy(observations))
# Selected actions (one-hot vectors) and cumulative
# episode rewards for those actions.
actions = tf.placeholder(tf.float32, shape=[None, 2])
goodnesses = tf.placeholder(tf.float32, shape=[None, 1])
loss = -tf.tensordot(tf.log(out_probs), actions*goodnesses, axes=2)
loss /= tf.cast(tf.shape(actions)[0], tf.float32)
opt = tf.train.AdamOptimizer(learning_rate=1e-2)
minimize = opt.minimize(loss)
env = gym.make('CartPole-v0')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
while True:
obs, acts, rews, mean_rew = rollouts(env, sess, observations,
out_probs, 2000)
loss_args = {
observations: obs,
actions: acts,
goodnesses: rews
}
print('mean_reward=%f' % (mean_rew,))
sess.run(minimize, feed_dict=loss_args)
def loss(self, actual_out):
"""
Compute the cross-entropy loss between the actual
output and the desired targets.
"""
cost_sum = None
for timestep, actual_term in enumerate(actual_out):
target_term = self.outputs[timestep]
log_probs = tf.log(tf.nn.softmax(actual_term))
loss = -tf.tensordot(log_probs, target_term, axes=2)
if cost_sum is None:
cost_sum = loss
else:
cost_sum += loss
return cost_sum / (self.batch * self.length)
def surrogate_objective(policy_out):
"""
Create the surrogate objective for policy gradients.
Returns actions, rewards, objective.
"""
actions = tf.placeholder(tf.float32, [None, 2])
rewards = tf.placeholder(tf.float32, [None, 1])
objective = tf.tensordot(tf.log(policy_out), actions*rewards, axes=2)
return actions, rewards, objective
def dOmega_dWrec(self):
# states in shape timesteps, batch, n_rec
states = self.states
dxt_list = tf.gradients(self.error, states)
#dxt_list[0] = tf.Print(dxt_list[0], [dxt_list[0]], "dxt 0: ")
test = tf.gradients(states[0], states[-1])
dxt = tf.stack(dxt_list)
xt = tf.stack(states)
num = (1 - self.alpha) * dxt + tf.tensordot(self.alpha * dxt ,
tf.transpose(
tf.matmul(tf.abs(self.W_rec) * self.rec_Connectivity,self.Dale_rec)),
axes=1) * \
tf.where(tf.greater(xt, 0), tf.ones_like(xt), tf.zeros_like(xt))
denom = dxt
# sum over hidden units
num = tf.reduce_sum(tf.square(num), axis=2)
denom = tf.reduce_sum(tf.square(denom), axis=2)
bounded = tf.where(tf.greater(denom, 1e-20), tf.div(num, 1.0 * denom), tf.ones_like(num))
nelems = tf.reduce_mean(tf.where(tf.greater(denom, 1e-20), 1.0 * tf.ones_like(num), 1.0 * tf.zeros_like(num)), axis=1)
# sum mean over each batch by time steps
Omega = tf.square(bounded - 1.0)
Omega = tf.reduce_sum(tf.reduce_mean(Omega, axis=1)) / (1.0 * tf.reduce_sum(nelems))
out = tf.gradients(Omega, self.W_rec)
out[0] = tf.Print(out[0], [out[0], self.W_rec, Omega], "omega grads")
out[0] = tf.verify_tensor_all_finite(out[0], "dead omega grad")
return out, test
def _distance_logits(self, x, keys):
init = get_keras_initialization(self.init)
key_w = tf.get_variable("key_w", shape=keys.shape.as_list()[-1], initializer=init, dtype=tf.float32)
key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len)
x_w = tf.get_variable("x_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32)
x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len)
# Broadcasting will expand the arrays to (batch, x_len, key_len)
return tf.expand_dims(x_logits, axis=2) + tf.expand_dims(key_logits, axis=1)
def apply(self, is_train, x, mask=None):
out = self.other.apply(is_train, x, mask)
w = tf.get_variable("project_w", (x.shape.as_list()[-1], out.shape.as_list()[-1]))
return out + tf.tensordot(x, w, axes=[[len(x.shape)-1], [0]])
def apply(self, is_train, tensor1: tf.Tensor, tensor2: tf.Tensor) -> tf.Tensor:
init = get_keras_initialization(self.init)
w1 = tf.get_variable("w1", (tensor1.shape.as_list()[-1], tensor2.shape.as_list()[-1]), initializer=init)
project1 = tf.tensordot(tensor1, w1, [[len(tensor1.shape)-1], [0]])
if self.scale:
project1 /= np.sqrt(tensor1.shape.as_list()[-1])
project1 *= tensor2
elements = [tensor1, project1]
if self.include_unscaled:
elements.append(tensor2)
return tf.concat(elements, axis=len(tensor1.shape) - 1)
def apply(self, is_train, x, mask=None):
s = x.shape.as_list()[1]
w = tf.get_variable("w", (s,), dtype=tf.float32,
initializer=tf.constant_initializer(s / 3.0))
b = tf.get_variable("b", (), dtype=tf.float32,
initializer=tf.zeros_initializer())
return tf.tensordot(x, w, [[1], [0]]) + b
def apply(self, is_train, x, mask=None):
_, d1, _, d2 = x.shape.as_list()
w = tf.get_variable("w", (d1, d2, self.n_out), dtype=tf.float32)
return tf.tensordot(x, w, [[1, 3], [0, 1]])
def apply(self, is_train, x, c, mask=None, context_mask=None):
x = dropout(x, self.keep_probs, is_train)
c = dropout(c, self.context_keep_probs, is_train)
init = get_keras_initialization(self.init)
x_w = tf.get_variable("merge_x_weights", (x.shape.as_list()[-1], self.output_size), initializer=init)
c_w = tf.get_variable("merge_context_weights", (c.shape.as_list()[-1], self.output_size), initializer=init)
output = tf.tensordot(x, x_w, axes=[[2], [0]]) + tf.expand_dims(tf.matmul(c, c_w), 1)
if self.use_dots:
dots = tf.einsum("aij,aj->aij", x, c)
dot_w = tf.get_variable("dot_weights", (c.shape.as_list()[-1], self.output_size), initializer=init)
output += tf.tensordot(dots, dot_w, axes=[[2], [0]])
bais = tf.get_variable("merge_bias", (1, 1, self.output_size))
output += bais
return get_keras_activation(self.activation)(output)
def context_shift(x,
context,
shift=True,
scale=True,
scope=None,
reuse=None):
B = context._shape_as_list()[-1]
C = x._shape_as_list()[-1]
ndim = len(x.shape)
var_shape = [B] + [1] * (ndim - 2) + [C]
with tf.variable_scope(scope, 'context_shift', reuse=reuse):
output = x
if scale:
gamma = tf.get_variable('gamma', var_shape, initializer=tf.ones_initializer)
output *= tf.tensordot(context, gamma, 1)
if shift:
beta = tf.get_variable('beta', var_shape, initializer=tf.zeros_initializer)
output += tf.tensordot(context, beta, 1)
output.set_shape(x.get_shape())
return output
def learn_comb_orth(poses, dm_shape, reuse=None, _float_type=tf.float32):
with tf.variable_scope("learn_comb", reuse=reuse):
comb_matrix = tf.get_variable(
"matrix", [dm_shape[0], dm_shape[1]],
initializer=identity_initializer(0),
dtype=_float_type, trainable=False
)
tf.add_to_collection(COMB_MATRIX_COLLECTION, comb_matrix)
poses = tf.tensordot(poses, comb_matrix, [[2], [1]])
poses = tf.transpose(poses, [0, 1, 3, 2])
# Special update code
def update_comb_mat(grad, lr):
A = tf.matmul(tf.transpose(grad), comb_matrix) - \
tf.matmul(tf.transpose(comb_matrix), grad)
I = tf.constant(np.eye(dm_shape[0]), dtype=_float_type)
t1 = I + lr / 2 * A
t2 = I - lr / 2 * A
Y = tf.matmul(tf.matmul(tf.matrix_inverse(t1), t2), comb_matrix)
return tf.assign(comb_matrix, Y)
# Visualization
cb_min = tf.reduce_min(comb_matrix)
cb_max = tf.reduce_max(comb_matrix)
comb_matrix_image = (comb_matrix - cb_min) / (cb_max - cb_min) * 255.0
comb_matrix_image = tf.cast(comb_matrix_image, tf.uint8)
comb_matrix_image = tf.reshape(comb_matrix_image, [1, dm_shape[0], dm_shape[1], 1])
return poses, comb_matrix_image, update_comb_mat
def tensordot(x, y, axes):
return tf.tensordot(x, y, axes=axes)
logistic_regression_tf.py 文件源码
项目:lecture-ss17-deep-learning
作者: bbaeuml
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def dcost_fun(y, t, phi, batch_size):
"""computes the gradient of the cost wrt. the weights
Args:
y, t: the predicted probability and target variable tensors of shape (N_examples, K_classes)
phi: feature tensor of shape (N_examples, dim_phi)
Returns:
The gradient tensor of shape (dim_phi, K_classes).
"""
return tf.tensordot(phi, (y - t), axes=([0],[0]))/batch_size
beam_aligner.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training):
cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, for_decoder=True) for i in range(self.config.rnn_layers)])
encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
decoder_hidden_size = int(cell_dec.output_size)
# if encoder and decoder have different sizes, add a projection layer
if encoder_hidden_size != decoder_hidden_size:
assert False, (encoder_hidden_size, decoder_hidden_size)
with tf.variable_scope('hidden_projection'):
kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32)
# apply a relu to the projection for good measure
enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state)
enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
else:
# flatten and repack the state
enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state))
beam_width = self.config.training_beam_size if training else self.config.beam_size
#cell_dec = ParentFeedingCellWrapper(cell_dec, tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width))
if self.config.apply_attention:
attention = LuongAttention(decoder_hidden_size,
tf.contrib.seq2seq.tile_batch(enc_hidden_states, beam_width),
tf.contrib.seq2seq.tile_batch(self.input_length_placeholder, beam_width),
probability_fn=tf.nn.softmax)
cell_dec = AttentionWrapper(cell_dec, attention,
cell_input_fn=lambda inputs, _: inputs,
attention_layer_size=decoder_hidden_size,
initial_cell_state=tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width))
enc_final_state = cell_dec.zero_state(self.batch_size * beam_width, dtype=tf.float32)
else:
enc_final_state = tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width)
print('enc_final_state', enc_final_state)
linear_layer = tf_core_layers.Dense(self.config.output_size)
go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
decoder = BeamSearchOptimizationDecoder(training, cell_dec, output_embed_matrix, go_vector, self.config.grammar.end,
enc_final_state,
beam_width=beam_width,
output_layer=linear_layer,
gold_sequence=self.output_placeholder if training else None,
gold_sequence_length=(self.output_length_placeholder+1) if training else None)
if self.config.use_grammar_constraints:
raise NotImplementedError("Grammar constraints are not implemented for the beam search yet")
final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, maximum_iterations=self.config.max_length)
return final_outputs
def lyr_linear(
name, s_x, odim,
axis=-1, bias=True, w_init=None, b_init=None):
'''
Like tf.xw_plus_b, but works on arbitrary shape
Args:
name: string
s_x: tensor variable
odim: integer
axis: integer
bias: boolean, whether to use bias
w_init: initializer for W
b_init: initializer for B
'''
assert isinstance(odim, int)
x_shape = s_x.get_shape().as_list()
idim = x_shape[axis]
ndim = len(x_shape)
assert -ndim <= axis < ndim
assert isinstance(idim, int)
with tf.variable_scope(name):
v_w = tf.get_variable(
'W', [idim, odim],
initializer=w_init,
dtype=hparams.FLOATX)
if ndim == 1:
s_y = tf.matmul(tf.expand_dims(s_x, 0), v_w)
s_y = tf.squeeze(s_y, 0)
elif ndim == 2:
if axis % 2 == 1:
s_y = tf.matmul(s_x, v_w)
else:
s_y = tf.matmul(tf.transpose(s_x), v_w)
s_y = tf.transpose(s_x)
elif (axis+1) % ndim == 0:
s_batch_shp = tf.shape(s_x)[:-1]
s_x = tf.reshape(
s_x,
[tf.reduce_prod(s_batch_shp, axis=None), x_shape[-1]])
s_y = tf.matmul(s_x, v_w)
s_y = tf.reshape(s_y, tf.concat([s_batch_shp, [odim]], axis=0))
else:
s_y = tf.tensordot(s_x, v_w, [[axis], [0]])
if bias:
if b_init is None:
b_init = tf.constant_initializer(0., dtype=hparams.FLOATX)
v_b = tf.get_variable(
'B', [odim],
initializer=b_init,
dtype=hparams.FLOATX)
s_b = tf.reshape(v_b, [odim] + [1] * (ndim - (axis % ndim) - 1))
s_y = s_y + s_b
return s_y