def testBuildAndTrain(self):
inputs = tf.random_normal([TIME_STEPS, BATCH_SIZE, INPUT_SIZE])
output, _ = rnn.dynamic_rnn(
cell=self.module,
inputs=inputs,
initial_state=self.initial_state,
time_major=True)
targets = np.random.rand(TIME_STEPS, BATCH_SIZE, NUM_READS, WORD_SIZE)
loss = tf.reduce_mean(tf.square(output - targets))
train_op = tf.train.GradientDescentOptimizer(1).minimize(loss)
init = tf.global_variables_initializer()
with self.test_session():
init.run()
train_op.run()
python类dynamic_rnn()的实例源码
def compute_states(self,emb):
def unpack_sequence(tensor):
return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))
with tf.variable_scope("Composition",initializer=
tf.contrib.layers.xavier_initializer(),regularizer=
tf.contrib.layers.l2_regularizer(self.reg)):
cell = rnn_cell.LSTMCell(self.hidden_dim)
#tf.cond(tf.less(self.dropout
#if tf.less(self.dropout, tf.constant(1.0)):
cell = rnn_cell.DropoutWrapper(cell,
output_keep_prob=self.dropout,input_keep_prob=self.dropout)
#output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32)
outputs,_=rnn.rnn(cell,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32)
#output = pack_sequence(outputs)
sum_out=tf.reduce_sum(tf.pack(outputs),[0])
sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1))
final_state=sent_rep
return final_state
seq2seq_test.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def testDynamicAttentionDecoder1(self):
with self.test_session() as sess:
with variable_scope.variable_scope(
"root", initializer=init_ops.constant_initializer(0.5)):
cell = core_rnn_cell_impl.GRUCell(2)
inp = constant_op.constant(0.5, shape=[2, 2, 2])
enc_outputs, enc_state = rnn.dynamic_rnn(
cell, inp, dtype=dtypes.float32)
attn_states = enc_outputs
dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3
dec, mem = seq2seq_lib.attention_decoder(
dec_inp, enc_state, attn_states, cell, output_size=4)
sess.run([variables.global_variables_initializer()])
res = sess.run(dec)
self.assertEqual(3, len(res))
self.assertEqual((2, 4), res[0].shape)
res = sess.run([mem])
self.assertEqual((2, 2), res[0].shape)
def _add_encoders(self):
with tf.variable_scope('query_encoder'):
query_encoder_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
query_encoder_cell = DropoutWrapper(cell=query_encoder_cell, output_keep_prob=0.8)
query_embeddings = tf.nn.embedding_lookup(self.embeddings, self.queries_placeholder)
query_encoder_outputs, _ = rnn.dynamic_rnn(query_encoder_cell, query_embeddings,
sequence_length=self.query_lengths_placeholder,
swap_memory=True, dtype=tf.float32)
self.query_last = query_encoder_outputs[:, -1, :]
with tf.variable_scope('encoder'):
fw_cell = GRUCell(self.encoder_cell_state_size)
bw_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
fw_cell = DropoutWrapper(cell=fw_cell, output_keep_prob=0.8)
bw_cell = DropoutWrapper(cell=bw_cell, output_keep_prob=0.8)
embeddings = tf.nn.embedding_lookup(self.embeddings, self.documents_placeholder)
(encoder_outputs_fw, encoder_outputs_bw), _ = rnn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
embeddings,
sequence_length=self.document_lengths_placeholder,
swap_memory=True,
dtype=tf.float32)
self.encoder_outputs = tf.concat([encoder_outputs_fw, encoder_outputs_bw], 2)
self.final_encoder_state = self.encoder_outputs[:, -1, :]
def fprop(self, inputs):
with tf.variable_scope('model', values=[inputs]):
one_hot_inputs = tf.one_hot(inputs, self.n_tokens, axis=-1)
with tf.variable_scope('rnn', values=[inputs]):
states, _ = dynamic_rnn(cell=IsanCell(self.hidden_dim), inputs=one_hot_inputs, dtype=tf.float32)
Wo = tf.get_variable('Wo', shape=[self.hidden_dim, self.target_dim],
initializer=tf.random_normal_initializer(
stddev=1.0 / (self.hidden_dim + self.target_dim) ** 2))
bo = tf.get_variable('bo', shape=[1, self.target_dim],
initializer=tf.zeros_initializer())
bs, t = inputs.get_shape().as_list()
logits = tf.matmul(tf.reshape(states, [t * bs, self.hidden_dim]), Wo) + bo
logits = tf.reshape(logits, [bs, t, self.target_dim])
return logits
def crf_log_norm(inputs, sequence_lengths, transition_params):
"""Computes the normalization for a CRF.
Args:
inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
to use as input to the CRF layer.
sequence_lengths: A [batch_size] vector of true sequence lengths.
transition_params: A [num_tags, num_tags] transition matrix.
Returns:
log_norm: A [batch_size] vector of normalizers for a CRF.
"""
# Split up the first and rest of the inputs in preparation for the forward
# algorithm.
first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
first_input = array_ops.squeeze(first_input, [1])
rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
return log_norm
def crf_log_norm(inputs, sequence_lengths, transition_params):
"""Computes the normalization for a CRF.
Args:
inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
to use as input to the CRF layer.
sequence_lengths: A [batch_size] vector of true sequence lengths.
transition_params: A [num_tags, num_tags] transition matrix.
Returns:
log_norm: A [batch_size] vector of normalizers for a CRF.
"""
# Split up the first and rest of the inputs in preparation for the forward
# algorithm.
first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
first_input = array_ops.squeeze(first_input, [1])
rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
return log_norm
def crf_log_norm(inputs, sequence_lengths, transition_params):
"""Computes the normalization for a CRF.
Args:
inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
to use as input to the CRF layer.
sequence_lengths: A [batch_size] vector of true sequence lengths.
transition_params: A [num_tags, num_tags] transition matrix.
Returns:
log_norm: A [batch_size] vector of normalizers for a CRF.
"""
# Split up the first and rest of the inputs in preparation for the forward
# algorithm.
first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
first_input = array_ops.squeeze(first_input, [1])
rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
'''
tf.nn.rnn creates an unrolled graph for a fixed RNN length. That means,
if you call tf.nn.rnn with inputs having 200 time steps you are creating a
static graph with 200 RNN steps. First, graph creation is slow. Second,
you’re unable to pass in longer sequences (> 200) than you’ve originally
specified.tf.nn.dynamic_rnn solves this. It uses a tf.While loop to dynamically
construct the graph when it is executed. That means graph creation is faster
and you can feed batches of variable size.
'''
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
initial_state=first_input,
dtype=dtypes.float32)
'''
'''
log_norm = math_ops.reduce_logsumexp(alphas, [1])
return log_norm
def apply(self, is_train, x, mask=None):
state = dynamic_rnn(self.cell_spec(is_train), x, mask, dtype=tf.float32)[1]
if isinstance(self.output, int):
return state[self.output]
else:
if self.output is None:
if not isinstance(state, tf.Tensor):
raise ValueError()
return state
for i,x in enumerate(state._fields):
if x == self.output:
return state[i]
raise ValueError()
def apply(self, is_train, inputs, mask=None):
cell = self.cell_spec(is_train)
batch_size = inputs.shape.as_list()[0]
if self.learn_initial:
initial = self.cell_spec.build_initial_state_var(batch_size, cell)
else:
initial = None
return dynamic_rnn(cell, inputs, mask, initial, dtype=tf.float32)[0]
def RNN(inputs, lens, name, reuse):
print ("Building network " + name)
# Define weights
weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")
# Define a lstm cell with tensorflow
outputs, states = rnn.dynamic_rnn(
__cell_kind(__n_hidden),
inputs,
sequence_length=lens,
dtype=tf.float32,
scope=name,
time_major=False)
assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
print ("Done building network " + name)
#
# All these asserts are actually documentation: they can't be out of date
#
outputs = tf.expand_dims(outputs, 2)
assert outputs.get_shape() == (__batch_size, __n_steps, 1, __n_hidden)
tiled_weights = tf.tile(tf.expand_dims(tf.expand_dims(weights, 0), 0), [__batch_size, __n_steps, 1, 1])
assert tiled_weights.get_shape() == (__batch_size, __n_steps, __n_hidden, n_output)
#assert tiled_weights.get_shape() == (1, 1, __n_hidden, n_output)
# Linear activation, using rnn inner loop output for each char
finals = tf.batch_matmul(outputs, tiled_weights) + biases
assert finals.get_shape() == (__batch_size, __n_steps, 1, n_output)
return tf.squeeze(finals)
# tf Graph input
def RNN(inputs, lens, name, reuse):
print ("Building network " + name)
# Define weights
inputs = tf.gather(one_hots, inputs)
weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")
# Define a lstm cell with tensorflow
outputs, states = rnn.dynamic_rnn(
__cell_kind(__n_hidden),
inputs,
sequence_length=lens,
dtype=tf.float32,
scope=name,
time_major=False)
assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
print ("Done building network " + name)
#
# All these asserts are actually documentation: they can't be out of date
#
outputs = tf.expand_dims(outputs, 2)
assert outputs.get_shape() == (__batch_size, __n_steps, 1, __n_hidden)
tiled_weights = tf.tile(tf.expand_dims(tf.expand_dims(weights, 0), 0), [__batch_size, __n_steps, 1, 1])
assert tiled_weights.get_shape() == (__batch_size, __n_steps, __n_hidden, n_output)
#assert tiled_weights.get_shape() == (1, 1, __n_hidden, n_output)
# Linear activation, using rnn inner loop output for each char
finals = tf.batch_matmul(outputs, tiled_weights) + biases
assert finals.get_shape() == (__batch_size, __n_steps, 1, n_output)
return tf.squeeze(finals)
# tf Graph input
#pat_chars = tf.placeholder(tf.float32, [__batch_size, __n_steps, n_input])
def RNN(inputs, lens, name, reuse):
print ("Building network " + name)
# Define weights
inputs = tf.gather(one_hots, inputs)
weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")
# Define a lstm cell with tensorflow
outputs, states = rnn.dynamic_rnn(
__cell_kind(__n_hidden),
inputs,
sequence_length=lens,
dtype=tf.float32,
scope=name,
time_major=False)
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (__batch_size, __n_steps, n_input)
# Required shape: '__n_steps' tensors list of shape (__batch_size, n_input)
'''outputs, states = rnn.rnn(
__cell_kind(__n_hidden),
tf.unpack(tf.transpose(inputs, [1, 0, 2])),
sequence_length=lens,
dtype=tf.float32,
scope=name)
outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])'''
print ("Done building network " + name)
# Asserts are actually documentation: they can't be out of date
assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
# Linear activation, using rnn output for each char
# Reshaping here for a `batch` matrix multiply
# It's faster than `batch_matmul` probably because it can guarantee a
# static shape
outputs = tf.reshape(outputs, [__batch_size * __n_steps, __n_hidden])
finals = tf.matmul(outputs, weights)
return tf.reshape(finals, [__batch_size, __n_steps, n_output]) + biases
# tf Graph input
def standard_lstm(input_data, rnn_size):
b, h, w, c = input_data.get_shape().as_list()
new_input_data = tf.reshape(input_data, (b, h * w, c))
rnn_out, _ = dynamic_rnn(tf.contrib.rnn.LSTMCell(rnn_size),
inputs=new_input_data,
dtype=tf.float32)
rnn_out = tf.reshape(rnn_out, (b, h, w, rnn_size))
return rnn_out
def compute_states(self,emb):
def unpack_sequence(tensor):
return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))
with tf.variable_scope("Composition",initializer=
tf.contrib.layers.xavier_initializer(),regularizer=
tf.contrib.layers.l2_regularizer(self.reg)):
cell_fw = rnn_cell.LSTMCell(self.hidden_dim)
cell_bw = rnn_cell.LSTMCell(self.hidden_dim)
#tf.cond(tf.less(self.dropout
#if tf.less(self.dropout, tf.constant(1.0)):
cell_fw = rnn_cell.DropoutWrapper(cell_fw,
output_keep_prob=self.dropout,input_keep_prob=self.dropout)
cell_bw=rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=self.dropout,input_keep_prob=self.dropout)
#output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32)
outputs,_,_=rnn.bidirectional_rnn(cell_fw,cell_bw,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32)
#output = pack_sequence(outputs)
sum_out=tf.reduce_sum(tf.pack(outputs),[0])
sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1))
final_state=sent_rep
return final_state
def get_last_hidden_state(self, sentence, init_hidden_state=None):
assert isinstance(sentence, Sentence)
with tf.variable_scope(self.scope, reuse=self.used):
J = sentence.shape[-1]
Ax = tf.nn.embedding_lookup(self.emb_mat, sentence.x) # [N, C, J, e]
F = reduce(mul, sentence.shape[:-1], 1)
init_hidden_state = init_hidden_state or self.cell.zero_state(F, tf.float32)
Ax_flat = tf.reshape(Ax, [F, J, self.input_size])
x_len_flat = tf.reshape(sentence.x_len, [F])
# Ax_flat_split = [tf.squeeze(x_flat_each, [1]) for x_flat_each in tf.split(1, J, Ax_flat)]
o_flat, h_flat = rnn.dynamic_rnn(self.cell, Ax_flat, x_len_flat, initial_state=init_hidden_state)
self.used = True
return h_flat
crf.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def crf_log_norm(inputs, sequence_lengths, transition_params):
"""Computes the normalization for a CRF.
Args:
inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
to use as input to the CRF layer.
sequence_lengths: A [batch_size] vector of true sequence lengths.
transition_params: A [num_tags, num_tags] transition matrix.
Returns:
log_norm: A [batch_size] vector of normalizers for a CRF.
"""
# Split up the first and rest of the inputs in preparation for the forward
# algorithm.
first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
first_input = array_ops.squeeze(first_input, [1])
rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
return log_norm
lstm1d.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False):
"""Run an LSTM, either forward or backward.
This is a 1D LSTM implementation using dynamic_rnn and
the TensorFlow LSTM op.
Args:
inputs: input sequence (length, batch_size, ninput)
noutput: depth of output
scope: optional scope name
reverse: run LSTM in reverse
Returns:
Output sequence (length, batch_size, noutput)
"""
with variable_scope.variable_scope(scope, "SeqLstm", [inputs]):
# TODO(tmb) make batch size, sequence_length dynamic
# example: sequence_length = tf.shape(inputs)[0]
_, batch_size, _ = _shape(inputs)
lstm_cell = core_rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False)
state = array_ops.zeros([batch_size, lstm_cell.state_size])
sequence_length = int(inputs.get_shape()[0])
sequence_lengths = math_ops.to_int64(
array_ops.fill([batch_size], sequence_length))
if reverse:
inputs = array_ops.reverse_v2(inputs, [0])
outputs, _ = rnn.dynamic_rnn(
lstm_cell, inputs, sequence_lengths, state, time_major=True)
if reverse:
outputs = array_ops.reverse_v2(outputs, [0])
return outputs
lstm1d.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def ndlstm_base(inputs, noutput, scope=None, reverse=False, dynamic=True):
"""Implements a 1D LSTM, either forward or backward.
This is a base case for multidimensional LSTM implementations, which
tend to be used differently from sequence-to-sequence
implementations. For general 1D sequence to sequence
transformations, you may want to consider another implementation
from TF slim.
Args:
inputs: input sequence (length, batch_size, ninput)
noutput: depth of output
scope: optional scope name
reverse: run LSTM in reverse
dynamic: use dynamic_rnn
Returns:
Output sequence (length, batch_size, noutput)
"""
# TODO(tmb) maybe add option for other LSTM implementations, like
# slim.rnn.basic_lstm_cell
if dynamic:
return ndlstm_base_dynamic(inputs, noutput, scope=scope, reverse=reverse)
else:
return ndlstm_base_unrolled(inputs, noutput, scope=scope, reverse=reverse)
def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000):
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
learning_rate = 0.001
file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'training_accuracy'])
x = tf.placeholder('float32', [batch_size, 784, 2 if lstm_cell == PhasedLSTMCell else 1])
y_ = tf.placeholder('float32', [batch_size, 10])
initial_states = (tf.random_normal([batch_size, hidden_size], stddev=0.1),
tf.random_normal([batch_size, hidden_size], stddev=0.1))
outputs, _ = dynamic_rnn(lstm_cell(hidden_size), x, initial_state=initial_states, dtype=tf.float32)
rnn_out = tf.squeeze(outputs[:, -1, :])
fc0_w = create_weight_variable('fc0_w', [hidden_size, 10])
fc0_b = create_bias_variable('fc0_b', [10])
y = tf.matmul(rnn_out, fc0_w) + fc0_b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
sess.run(tf.global_variables_initializer())
def transform_x(_x_):
if lstm_cell == PhasedLSTMCell:
t = np.reshape(np.tile(np.array(range(784)), (batch_size, 1)), (batch_size, 784))
return np.squeeze(np.stack([_x_, t], axis=2))
t_x = np.expand_dims(_x_, axis=2)
return t_x
for i in range(steps):
batch = mnist.train.next_batch(batch_size)
st = time()
tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update],
feed_dict={x: transform_x(batch[0]), y_: batch[1]})
print('Forward-Backward pass took {0:.2f}s to complete.'.format(time() - st))
file_logger.write([i, tr_loss, tr_acc])
file_logger.close()
def sentence_embedding_rnn(_encoder_inputs, vocab_size, cell,
embedding_size, mask=None, dtype=dtypes.float32, scope=None, reuse_scop=None):
"""
"""
with variable_scope.variable_scope("embedding_rnn", reuse=reuse_scop):
# encoder_cell = rnn_cell.EmbeddingWrapper(
# cell, embedding_classes=vocab_size,
# embedding_size=embedding_size)
# Divde encoder_inputs by given input_mask
if mask != None:
encoder_inputs = [[] for _ in mask]
_mask = 0
for num in range(len(_encoder_inputs)):
encoder_inputs[_mask].append(_encoder_inputs[num])
if num == mask[_mask]:
_mask += 1
else:
encoder_inputs = []
encoder_inputs.append(_encoder_inputs)
encoder_state = None
encoder_states = []
for encoder_input in encoder_inputs:
if encoder_state == []:
_, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, dtype=dtype)
else:
_, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, encoder_state, dtype=dtype)
encoder_states.append(encoder_state)
return encoder_states
# def def_feedforward_nn(input_size, l1_size, l2_size):
# with tf.variable_scope("episodic"):
# l1_weights = tf.get_variable("l1_weights", [input_size, l1_size])
# l1_biases = tf.get_variable("l1_biases", [l1_size])
# l2_weights = tf.get_variable("l2_weights", [l1_size, l2_size])
# l2_biases = tf.get_variable("l2_biases", [l2_size])
#def feedforward_nn(l1_input, input_size, l1_size, l2_size):
# with tf.variable_scope("episodic"):
# l1_weights = tf.get_variable("l1_weights", [input_size, l1_size])
# l1_biases = tf.get_variable("l1_biases", [l1_size])
# l2_weights = tf.get_variable("l2_weights", [l1_size, l2_size])
# l2_biases = tf.get_variable("l2_biases", [l2_size])
# l2_input = tf.tanh(tf.matmul(l1_input , l1_weights) + l1_biases)
# gate_prediction = tf.matmul(l2_input , l2_weights) + l2_biases
# return gate_prediction
def __init__(self,
cell,
target_column,
optimizer,
model_dir=None,
config=None,
gradient_clipping_norm=None,
inputs_key='inputs',
sequence_length_key='sequence_length',
initial_state_key='initial_state',
dtype=None,
parallel_iterations=None,
swap_memory=False,
name=None):
"""Initialize `DynamicRNNEstimator`.
Args:
cell: an initialized `RNNCell` to be used in the RNN.
target_column: an initialized `TargetColumn`, used to calculate loss and
metrics.
optimizer: an initialized `tensorflow.Optimizer`.
model_dir: The directory in which to save and restore the model graph,
parameters, etc.
config: A `RunConfig` instance.
gradient_clipping_norm: parameter used for gradient clipping. If `None`,
then no clipping is performed.
inputs_key: the key for input values in the features dict passed to
`fit()`.
sequence_length_key: the key for the sequence length tensor in the
features dict passed to `fit()`.
initial_state_key: the key for input values in the features dict passed to
`fit()`.
dtype: Parameter passed ot `dynamic_rnn`. The dtype of the state and
output returned by `RNNCell`.
parallel_iterations: Parameter passed ot `dynamic_rnn`. The number of
iterations to run in parallel.
swap_memory: Parameter passed ot `dynamic_rnn`. Transparently swap the
tensors produced in forward inference but needed for back prop from GPU
to CPU.
name: Optional name for the `Estimator`.
"""
super(_DynamicRNNEstimator, self).__init__(
model_dir=model_dir, config=config)
self._cell = cell
self._target_column = target_column
self._optimizer = optimizer
self._gradient_clipping_norm = gradient_clipping_norm
self._inputs_key = inputs_key
self._sequence_length_key = sequence_length_key
self._initial_state_key = initial_state_key
self._dtype = dtype or dtypes.float32
self._parallel_iterations = parallel_iterations
self._swap_memory = swap_memory
self._name = name or 'DynamicRnnEstimator'
def _construct_rnn(self, features):
"""Apply an RNN to `features`.
The `features` dict must contain `self._inputs_key`, and the corresponding
input should be a `Tensor` of shape `[batch_size, padded_length, k]`
where `k` is the dimension of the input for each element of a sequence.
`activations` has shape `[batch_size, sequence_length, n]` where `n` is
`self._target_column.num_label_columns`. In the case of a multiclass
classifier, `n` is the number of classes.
`final_state` has shape determined by `self._cell` and its dtype must match
`self._dtype`.
Args:
features: a `dict` containing the input for the RNN and (optionally) an
initial state and information about sequence lengths.
Returns:
activations: the output of the RNN, projected to the appropriate number of
dimensions.
final_state: the final state output by the RNN.
Raises:
KeyError: if `features` does not contain `self._inputs_key`.
"""
with ops.name_scope('RNN'):
inputs = features.get(self._inputs_key)
if inputs is None:
raise KeyError('features must contain the key {}'.format(
self._inputs_key))
if inputs.dtype != self._dtype:
inputs = math_ops.cast(inputs, self._dtype)
initial_state = features.get(self._initial_state_key)
rnn_outputs, final_state = rnn.dynamic_rnn(
cell=self._cell,
inputs=inputs,
initial_state=initial_state,
dtype=self._dtype,
parallel_iterations=self._parallel_iterations,
swap_memory=self._swap_memory,
time_major=False)
activations = layers.fully_connected(
inputs=rnn_outputs,
num_outputs=self._target_column.num_label_columns,
activation_fn=None,
trainable=False)
return activations, final_state
def construct_rnn(initial_state,
sequence_input,
cell,
num_label_columns,
dtype=dtypes.float32,
parallel_iterations=32,
swap_memory=False):
"""Build an RNN and apply a fully connected layer to get the desired output.
Args:
initial_state: The initial state to pass the the RNN. If `None`, the
default starting state for `self._cell` is used.
sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]`
that will be passed as input to the RNN.
cell: An initialized `RNNCell`.
num_label_columns: The desired output dimension.
dtype: dtype of `cell`.
parallel_iterations: Number of iterations to run in parallel. Values >> 1
use more memory but take less time, while smaller values use less memory
but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
Returns:
activations: The output of the RNN, projected to `num_label_columns`
dimensions.
final_state: The final state output by the RNN.
"""
with ops.name_scope('RNN'):
rnn_outputs, final_state = rnn.dynamic_rnn(
cell=cell,
inputs=sequence_input,
initial_state=initial_state,
dtype=dtype,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
time_major=False)
activations = layers.fully_connected(
inputs=rnn_outputs,
num_outputs=num_label_columns,
activation_fn=None,
trainable=True)
return activations, final_state
def _apply(self, X, state=None, memory=None):
# time_major: The shape format of the `inputs` and `outputs` Tensors.
# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
# ====== create attention if necessary ====== #
cell = self.cell
if self.bidirectional:
cell_bw = self.cell_bw
# create attention cell
if self.attention:
if not hasattr(self, "_cell_with_attention"):
self._cell_with_attention = self.__attention_creator(
cell, X=X, memory=memory)
cell = self._cell_with_attention
# bidirectional attention
if self.bidirectional:
if not hasattr(self, "_cell_with_attention_bw"):
self._cell_with_attention_bw = self.__attention_creator(
cell_bw, X=X, memory=memory)
cell_bw = self._cell_with_attention_bw
# ====== calling rnn_warpper ====== #
## Bidirectional
if self.bidirectional:
rnn_func = rnn.bidirectional_dynamic_rnn if self.dynamic \
else rnn.static_bidirectional_rnn
state_fw, state_bw = None, None
if isinstance(state, (tuple, list)):
state_fw = state[0]
if len(state) > 1:
state_bw = state[1]
else:
state_fw = state
outputs = rnn_func(cell_fw=cell, cell_bw=cell_bw, inputs=X,
initial_state_fw=state_fw,
initial_state_bw=state_bw,
dtype=X.dtype.base_dtype)
## Unidirectional
else:
rnn_func = rnn.dynamic_rnn if self.dynamic else rnn.static_rnn
outputs = rnn_func(cell, inputs=X, initial_state=state,
dtype=X.dtype.base_dtype)
# ====== initialize cell ====== #
if not self._is_initialized_variables:
# initialize only once, everytime you call this, the values of
# variables changed
K.eval(tf.variables_initializer(self.variables))
self._is_initialized_variables = True
_infer_variable_role(self.variables)
# ====== return ====== #
if self.bidirectional: # concat outputs
outputs = (tf.concat(outputs[0], axis=-1), outputs[1])
if not self.return_states:
return outputs[0]
return outputs
def RNN(inputs, lens, name, reuse):
print ("Building network " + name)
# Define weights
inputs = tf.gather(one_hots, inputs)
weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")
# Define a lstm cell with tensorflow
enc_outputs, enc_states = rnn.dynamic_rnn(
__cell_kind(__n_hidden),
inputs,
sequence_length=lens,
dtype=tf.float32,
scope=name,
time_major=False)
dec_outputs, dec_states = rnn.dynamic_rnn(
__cell_kind(__n_hidden),
enc_outputs,
sequence_length=lens,
dtype=tf.float32,
scope=name,
time_major=False)
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (__batch_size, __n_steps, n_input)
# Required shape: '__n_steps' tensors list of shape (__batch_size, n_input)
'''dec_outputs, dec_states = rnn.rnn(
__cell_kind(__n_hidden),
tf.unpack(tf.transpose(inputs, [1, 0, 2])),
sequence_length=lens,
dtype=tf.float32,
scope=name)
outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])'''
print ("Done building network " + name)
# Asserts are actually documentation: they can't be out of date
assert dec_outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
# Linear activation, using rnn output for each char
# Reshaping here for a `batch` matrix multiply
# It's faster than `batch_matmul` probably because it can guarantee a
# static shape
outputs = tf.reshape(dec_outputs, [__batch_size * __n_steps, __n_hidden])
finals = tf.matmul(outputs, weights)
finals = tf.reshape(finals, [__batch_size, __n_steps, n_output]) + biases
return finals[:, :__n_steps-1, :]
# tf Graph input
dynamic_rnn_estimator.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def construct_rnn(initial_state,
sequence_input,
cell,
num_label_columns,
dtype=dtypes.float32,
parallel_iterations=32,
swap_memory=True):
"""Build an RNN and apply a fully connected layer to get the desired output.
Args:
initial_state: The initial state to pass the RNN. If `None`, the
default starting state for `self._cell` is used.
sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]`
that will be passed as input to the RNN.
cell: An initialized `RNNCell`.
num_label_columns: The desired output dimension.
dtype: dtype of `cell`.
parallel_iterations: Number of iterations to run in parallel. Values >> 1
use more memory but take less time, while smaller values use less memory
but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
Returns:
activations: The output of the RNN, projected to `num_label_columns`
dimensions.
final_state: A `Tensor` or nested tuple of `Tensor`s representing the final
state output by the RNN.
"""
with ops.name_scope('RNN'):
rnn_outputs, final_state = rnn.dynamic_rnn(
cell=cell,
inputs=sequence_input,
initial_state=initial_state,
dtype=dtype,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
time_major=False)
activations = layers.fully_connected(
inputs=rnn_outputs,
num_outputs=num_label_columns,
activation_fn=None,
trainable=True)
return activations, final_state
official_tensorflow_phased_lstm.py 文件源码
项目:tensorflow-phased-lstm
作者: philipperemy
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000, log_file='log.tsv'):
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
learning_rate = 0.001
file_logger = FileLogger(log_file, ['step', 'training_loss', 'training_accuracy'])
x_ = tf.placeholder(tf.float32, (batch_size, mnist_img_size, 1))
t_ = tf.placeholder(tf.float32, (batch_size, mnist_img_size, 1))
y_ = tf.placeholder(tf.float32, (batch_size, num_classes))
if lstm_cell == PhasedLSTMCell:
inputs = (t_, x_)
else:
inputs = x_
outputs, _ = dynamic_rnn(cell=lstm_cell(hidden_size), inputs=inputs, dtype=tf.float32)
rnn_out = tf.squeeze(outputs[:, -1, :])
y = slim.fully_connected(inputs=rnn_out,
num_outputs=num_classes,
activation_fn=None)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
sess.run(tf.global_variables_initializer())
def feed_dict_phased_lstm(batch):
img = np.expand_dims(batch[0], axis=2)
t = np.reshape(np.tile(np.array(range(mnist_img_size)), (batch_size, 1)), (batch_size, mnist_img_size, 1))
return {x_: img, y_: batch[1], t_: t}
def feed_dict_basic_lstm(batch):
img = np.expand_dims(batch[0], axis=2)
return {x_: img, y_: batch[1]}
for i in range(steps):
b = mnist.train.next_batch(batch_size)
st = time()
if lstm_cell == PhasedLSTMCell:
feed_dict = feed_dict_phased_lstm(b)
else:
feed_dict = feed_dict_basic_lstm(b)
tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update], feed_dict=feed_dict)
print('steps = {0} | time {1:.2f} | tr_loss = {2:.3f} | tr_acc = {3:.3f}'.format(str(i).zfill(6),
time() - st,
tr_loss,
tr_acc))
file_logger.write([i, tr_loss, tr_acc])
file_logger.close()
def run_experiment(init_session=None, placeholder_def_func=get_placeholders):
batch_size = BATCH_SIZE
hidden_size = HIDDEN_STATES
learning_rate = 3e-4
momentum = 0.9
file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss'])
x, y = placeholder_def_func()
if ADD_TIME_INPUTS:
lstm = PhasedLSTMCell(hidden_size)
print('Using PhasedLSTMCell impl.')
else:
lstm = BasicLSTMCell(hidden_size)
print('Using BasicLSTMCell impl.')
initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1),
tf.random_normal([batch_size, hidden_size], stddev=0.1))
outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32)
rnn_out = tf.squeeze(tf.slice(outputs, begin=[0, tf.shape(outputs)[1] - 1, 0], size=[-1, -1, -1]))
# _, final_hidden = state
fc0_w = create_weight_variable('fc0_w', [hidden_size, 1])
fc0_b = tf.get_variable('fc0_b', [1])
out = tf.matmul(rnn_out, fc0_w) + fc0_b
loss = tf.reduce_mean(tf.square(tf.sub(out, y)))
optimizer = create_adam_optimizer(learning_rate, momentum)
trainable = tf.trainable_variables()
grad_update = optimizer.minimize(loss, var_list=trainable)
if init_session is not None:
sess = init_session
else:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
init = tf.global_variables_initializer()
sess.run(init)
# lstm.__call__(x[:, 0, :], initial_state, scope=None)
d = collections.deque(maxlen=10)
benchmark_d = collections.deque(maxlen=10)
for step in range(1, int(1e9)):
x_s, y_s = next_batch(batch_size)
loss_value, _, pred_value = sess.run([loss, grad_update, out], feed_dict={x: x_s, y: y_s})
# The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark.
benchmark_d.append(np.mean(np.square(0.5 - y_s)))
d.append(loss_value)
mean_loss = np.mean(d)
benchmark_mean_loss = np.mean(benchmark_d)
file_logger.write([step, mean_loss, benchmark_mean_loss])
file_logger.close()