def build_infer_graph(x, batch_size, vocab_size=VOCAB_SIZE, embedding_size=32,
rnn_size=128, num_layers=2, p_keep=1.0):
"""
builds inference graph
"""
infer_args = {"batch_size": batch_size, "vocab_size": vocab_size,
"embedding_size": embedding_size, "rnn_size": rnn_size,
"num_layers": num_layers, "p_keep": p_keep}
logger.debug("building inference graph: %s.", infer_args)
# other placeholders
p_keep = tf.placeholder_with_default(p_keep, [], "p_keep")
batch_size = tf.placeholder_with_default(batch_size, [], "batch_size")
# embedding layer
embed_seq = layers.embed_sequence(x, vocab_size, embedding_size)
# shape: [batch_size, seq_len, embedding_size]
embed_seq = tf.nn.dropout(embed_seq, keep_prob=p_keep)
# shape: [batch_size, seq_len, embedding_size]
# RNN layers
cells = [rnn.LSTMCell(rnn_size) for _ in range(num_layers)]
cells = [rnn.DropoutWrapper(cell, output_keep_prob=p_keep) for cell in cells]
cells = rnn.MultiRNNCell(cells)
input_state = cells.zero_state(batch_size, tf.float32)
# shape: [num_layers, 2, batch_size, rnn_size]
rnn_out, output_state = tf.nn.dynamic_rnn(cells, embed_seq, initial_state=input_state)
# rnn_out shape: [batch_size, seq_len, rnn_size]
# output_state shape: [num_layers, 2, batch_size, rnn_size]
with tf.name_scope("lstm"):
tf.summary.histogram("outputs", rnn_out)
for c_state, h_state in output_state:
tf.summary.histogram("c_state", c_state)
tf.summary.histogram("h_state", h_state)
# fully connected layer
logits = layers.fully_connected(rnn_out, vocab_size, activation_fn=None)
# shape: [batch_size, seq_len, vocab_size]
# predictions
with tf.name_scope("softmax"):
probs = tf.nn.softmax(logits)
# shape: [batch_size, seq_len, vocab_size]
with tf.name_scope("sequence"):
tf.summary.histogram("embeddings", embed_seq)
tf.summary.histogram("logits", logits)
model = {"logits": logits, "probs": probs,
"input_state": input_state, "output_state": output_state,
"p_keep": p_keep, "batch_size": batch_size, "infer_args": infer_args}
return model
python类MultiRNNCell()的实例源码
bilstm_model.py 文件源码
项目:tensorflow_video_classification_LSTM
作者: frankgu
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def _init_model(self):
# Create multiple forward lstm cell
cell_fw = rnn.MultiRNNCell(
[rnn.BasicLSTMCell(self._config['hidden_size'])
for _ in range(self._config['num_layers'])])
# Create multiple backward lstm cell
cell_bw = rnn.MultiRNNCell(
[rnn.BasicLSTMCell(self._config['hidden_size'])
for _ in range(self._config['num_layers'])])
inputs = self._input.input_data
# Add dropout layer to the input data
if self._is_training and self._config['keep_prob'] < 1:
intpus = [tf.nn.dropout(single_input, self._config['keep_prob'])
for single_input in inputs]
self._outputs, _, _ = rnn.static_bidirectional_rnn(
cell_fw, cell_bw, inputs, dtype=tf.float32)
# Hidden layer weights => 2*hidden_size because of forward + backward cells
softmax_w = tf.get_variable("softmax_w",
[2*self._config['hidden_size'], self._config['num_classes']])
softmax_b = tf.get_variable("softmax_b", [self._config['num_classes']])
# Linear activation, using rnn inner loop last output
# logit shape: [batch_size, num_classes]
self._logits = tf.matmul(self._outputs[-1], softmax_w) + softmax_b
# Define loss
# Required targets shape: [batch_size, num_classes] (one hot vector)
self._cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=self._logits,
labels=self._input.targets))
# Evaluate model
self._correct_pred = tf.equal(tf.argmax(self._logits, 1),
tf.argmax(self._input.targets, 1))
self.accuracy = tf.reduce_mean(tf.cast(self._correct_pred, tf.float32))
# Define optimizer
self._lr = tf.Variable(0.0, trainable=False)
self._train_op = tf.train.AdamOptimizer(
learning_rate=self._lr).minimize(self._cost)
self._new_lr = tf.placeholder(
tf.float32, shape=[], name="new_learning_rate")
self._lr_update = tf.assign(self._lr, self._new_lr)
def __init__(self,config):
self.initial_learning_rate = config.initial_learning_rate
self.min_learning_rate = config.min_learning_rate
self.decay_step = config.decay_step
self.decay_rate = config.decay_rate
self.num_step = config.num_step
self.num_classes = config.num_classes
self.hidden_neural_size = config.hidden_neural_size
self.vocabulary_size = config.vocabulary_size
self.embedding_dim = config.embedding_dim
self.hidden_layer_num = config.hidden_layer_num
self.w2v = config.w2v
self.input_x = tf.placeholder(tf.int32,[None,self.num_step],name="input_x")
self.input_y = tf.placeholder(tf.int32,[None,self.num_classes],name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32,name="dropout_keep_prob")
with tf.device('/cpu:0'),tf.name_scope("embedding_layer"):
W = tf.Variable(self.w2v,name="W")
inputs = tf.nn.embedding_lookup(W,self.input_x)
inputs = tf.nn.dropout(inputs,self.dropout_keep_prob,name='dropout')
if self.hidden_layer_num >1:
lstmCells = rnn.MultiRNNCell([self.lstm_cell() for _ in range(self.hidden_layer_num)])
else:
lstmCells = self.lstm_cell()
outputs,states = tf.nn.dynamic_rnn(lstmCells,inputs,dtype=tf.float32)
with tf.name_scope("mean_pooling_layer"):
output = outputs[:,self.num_step-1,:]
with tf.name_scope("softmax_layer"):
softmax_w = tf.get_variable('softmax_w',[self.hidden_neural_size,self.num_classes],dtype=tf.float32)
softmax_b = tf.get_variable('softmax_b',[self.num_classes],dtype=tf.float32)
self.logits = tf.add(tf.matmul(output,softmax_w),softmax_b)
with tf.name_scope("output"):
self.cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits + 1e-10,labels=self.input_y)
self.loss = tf.reduce_mean(self.cross_entropy,name="loss")
self.predition = tf.argmax(self.logits,1,name='prediction')
corrrect_prediction = tf.equal(self.predition,tf.argmax(self.input_y,1))
self.correct_num = tf.reduce_sum(tf.cast(corrrect_prediction,tf.float32))
self.accuracy = tf.reduce_mean(tf.cast(corrrect_prediction,tf.float32),name="accuracy")
self.global_step = tf.Variable(0,name="global_step",trainable=False)
self.learning_rate = tf.maximum(tf.train.exponential_decay(self.initial_learning_rate,self.global_step,self.decay_step,self.decay_rate,staircase=True),
self.min_learning_rate)
tvars = tf.trainable_variables()
grads,_ = tf.clip_by_global_norm(tf.gradients(self.loss,tvars),config.max_grad_norm)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
optimizer.apply_gradients(zip(grads,tvars))
self.train_op = optimizer.apply_gradients(zip(grads,tvars),global_step=self.global_step)
#self.summary = tf.summary.merge_all()
def attention_decoder(enc, length, state_transfer_helper,
voca_size=20, max_length=None,
name=None, reuse=None):
with tf.variable_scope(name, "attention-decoder", values=[enc, length],
reuse=reuse) as scope:
# get shapes
batch_size = enc.get_shape().as_list()[0]
if batch_size is None:
batch_size = tf.shape(enc)[0]
dims = int(enc.get_shape()[-1])
# decoder
dec_attn = seq2seq.DynamicAttentionWrapper(
cell=rnn.GRUCell(dims, reuse=scope.reuse),
attention_mechanism=seq2seq.LuongAttention(dims, enc, length),
attention_size=dims
)
dec_network = rnn.MultiRNNCell([
rnn.GRUCell(dims, reuse=scope.reuse),
dec_attn,
rnn.GRUCell(voca_size, reuse=scope.reuse)
], state_is_tuple=True)
decoder = seq2seq.BasicDecoder(
dec_network, state_transfer_helper(),
initial_state=dec_network.zero_state(batch_size, tf.float32)
)
dec_outputs, _ = seq2seq.dynamic_decode(
decoder,
maximum_iterations=max_length,
impute_finished=False
)
logits = dec_outputs.rnn_output
labels = dec_outputs.sample_id
# pad logits and labels
if max_length is not None:
logits = dynamic_time_pad(logits, max_length)
labels = dynamic_time_pad(labels, max_length)
return logits, labels
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
additional_cell_args = {}
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
elif args.model == 'gridlstm':
cell_fn = grid_rnn.Grid2LSTMCell
additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
elif args.model == 'gridgru':
cell_fn = grid_rnn.Grid2GRUCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, **additional_cell_args)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(axis=1, num_or_size_splits=args.seq_length,
value=tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
loop_function=loop if infer else None, scope='rnnlm')
# output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, reverse_input, infer=False):
if reverse_input:
self.start_token = special_tokens.END_TOKEN
self.end_token = special_tokens.START_TOKEN
else:
self.start_token = special_tokens.START_TOKEN
self.end_token = special_tokens.END_TOKEN
self.unk_token = special_tokens.UNK_TOKEN
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn.GRUCell
elif args.model == 'lstm':
cell_fn = rnn.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, state_is_tuple=True)
self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))