def _build_model(self, batch_size, helper_build_fn, decoder_maxiters=None, alignment_history=False):
# embed input_data into a one-hot representation
inputs = tf.one_hot(self.input_data, self._input_size, dtype=self._dtype)
inputs_len = self.input_lengths
with tf.name_scope('bidir-encoder'):
fw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True)
bw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True)
fw_cell_zero = fw_cell.zero_state(batch_size, self._dtype)
bw_cell_zero = bw_cell.zero_state(batch_size, self._dtype)
enc_out, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, inputs,
sequence_length=inputs_len,
initial_state_fw=fw_cell_zero,
initial_state_bw=bw_cell_zero)
with tf.name_scope('attn-decoder'):
dec_cell_in = rnn.GRUCell(self._dec_rnn_size)
attn_values = tf.concat(enc_out, 2)
attn_mech = seq2seq.BahdanauAttention(self._enc_rnn_size * 2, attn_values, inputs_len)
dec_cell_attn = rnn.GRUCell(self._enc_rnn_size * 2)
dec_cell_attn = seq2seq.AttentionWrapper(dec_cell_attn,
attn_mech,
self._enc_rnn_size * 2,
alignment_history=alignment_history)
dec_cell_out = rnn.GRUCell(self._output_size)
dec_cell = rnn.MultiRNNCell([dec_cell_in, dec_cell_attn, dec_cell_out],
state_is_tuple=True)
dec = seq2seq.BasicDecoder(dec_cell, helper_build_fn(),
dec_cell.zero_state(batch_size, self._dtype))
dec_out, dec_state = seq2seq.dynamic_decode(dec, output_time_major=False,
maximum_iterations=decoder_maxiters, impute_finished=True)
self.outputs = dec_out.rnn_output
self.output_ids = dec_out.sample_id
self.final_state = dec_state
python类BasicRNNCell()的实例源码
def attention_encoder(x, length,
num_blocks=3,
name=None, reuse=None):
with tf.variable_scope(name, "attention-encoder", values=[x, length],
reuse=reuse) as scope:
# get shapes
batch_size = x.get_shape().as_list()[0]
if batch_size is None:
batch_size = tf.shape(x)[0]
dims = int(x.get_shape()[-1])
# encode data
fw_cell = rnn.MultiRNNCell([
rnn.BasicRNNCell(dims, reuse=scope.reuse) for i in range(num_blocks)
], state_is_tuple=True)
bw_cell = rnn.MultiRNNCell([
rnn.BasicRNNCell(dims, reuse=scope.reuse) for i in range(num_blocks)
], state_is_tuple=True)
enc_out, _ = tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
x,
sequence_length=length,
initial_state_fw=fw_cell.zero_state(batch_size, tf.float32),
initial_state_bw=bw_cell.zero_state(batch_size, tf.float32)
)
enc_out = tf.concat(enc_out, 2)
return enc_out
def cell_create(self,scope_name):
with tf.variable_scope(scope_name):
if self.cell_type == 'tanh':
cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTM':
cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'GRU':
cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTMP':
cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph)
return cells
grid_rnn_cell.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def __init__(self, num_units):
super(Grid1BasicRNNCell, self).__init__(
num_units=num_units, num_dims=1,
input_dims=0, output_dims=0, priority_dims=0, tied=False,
cell_fn=lambda n, i: rnn.BasicRNNCell(num_units=n, input_size=i))
grid_rnn_cell.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __init__(self, num_units, tied=False, non_recurrent_fn=None):
super(Grid2BasicRNNCell, self).__init__(
num_units=num_units, num_dims=2,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn.BasicRNNCell(num_units=n, input_size=i),
non_recurrent_fn=non_recurrent_fn)
def __init__(self, data, model='lstm', infer=False):
self.rnn_size = 128
self.n_layers = 2
if infer:
self.batch_size = 1
else:
self.batch_size = data.batch_size
if model == 'rnn':
cell_rnn = rnn.BasicRNNCell
elif model == 'gru':
cell_rnn = rnn.GRUCell
elif model == 'lstm':
cell_rnn = rnn.BasicLSTMCell
cell = cell_rnn(self.rnn_size, state_is_tuple=False)
self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False)
self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])
self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])
self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size])
softmax_b = tf.get_variable("softmax_b", [data.words_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable(
"embedding", [data.words_size, self.rnn_size])
inputs = tf.nn.embedding_lookup(embedding, self.x_tf)
outputs, final_state = tf.nn.dynamic_rnn(
self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')
self.output = tf.reshape(outputs, [-1, self.rnn_size])
self.logits = tf.matmul(self.output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
self.final_state = final_state
pred = tf.reshape(self.y_tf, [-1])
# seq2seq
loss = seq2seq.sequence_loss_by_example([self.logits],
[pred],
[tf.ones_like(pred, dtype=tf.float32)],)
self.cost = tf.reduce_mean(loss)
self.learning_rate = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, n_hidden, cell="GRU"):
"""
qa_rnn module init.
:param n_hidden: num of hidden units
:param cell: gru|lstm|basic_rnn
"""
self.rnn_cell = rnn.BasicRNNCell(num_units=n_hidden)
if cell == "GRU":
self.rnn_cell = rnn.GRUCell(num_units=n_hidden)
elif cell == "LSTM":
self.rnn_cell = rnn.LSTMCell(num_units=n_hidden)
else:
raise Exception(cell + " not supported.")
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
additional_cell_args = {}
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
elif args.model == 'gridlstm':
cell_fn = grid_rnn.Grid2LSTMCell
additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
elif args.model == 'gridgru':
cell_fn = grid_rnn.Grid2GRUCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, **additional_cell_args)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(axis=1, num_or_size_splits=args.seq_length,
value=tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
loop_function=loop if infer else None, scope='rnnlm')
# output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, reverse_input, infer=False):
if reverse_input:
self.start_token = special_tokens.END_TOKEN
self.end_token = special_tokens.START_TOKEN
else:
self.start_token = special_tokens.START_TOKEN
self.end_token = special_tokens.END_TOKEN
self.unk_token = special_tokens.UNK_TOKEN
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn.GRUCell
elif args.model == 'lstm':
cell_fn = rnn.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, state_is_tuple=True)
self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))