def __init__(self, ob_space, ac_space, size=256, **kwargs):
self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))
for i in range(4):
x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
# introduce a "fake" batch dimension of 1 after flatten so that we can do GRU over time dim
x = tf.expand_dims(flatten(x), 1)
gru = rnn.GRUCell(size)
h_init = np.zeros((1, size), np.float32)
self.state_init = [h_init]
h_in = tf.placeholder(tf.float32, [1, size])
self.state_in = [h_in]
gru_outputs, gru_state = tf.nn.dynamic_rnn(
gru, x, initial_state=h_in, sequence_length=[size], time_major=True)
x = tf.reshape(gru_outputs, [-1, size])
self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01))
self.vf = tf.reshape(linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1])
self.state_out = [gru_state[:1]]
self.sample = categorical_sample(self.logits, ac_space)[0, :]
self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
python类GRUCell()的实例源码
def __init__(self, ob_space, ac_space, size=256, **kwargs):
self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))
for i in range(4):
x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
# introduce a "fake" batch dimension of 1 after flatten so that we can do GRU over time dim
x = tf.expand_dims(flatten(x), 1)
gru = rnn.GRUCell(size)
h_init = np.zeros((1, size), np.float32)
self.state_init = [h_init]
h_in = tf.placeholder(tf.float32, [1, size])
self.state_in = [h_in]
gru_outputs, gru_state = tf.nn.dynamic_rnn(
gru, x, initial_state=h_in, sequence_length=[size], time_major=True)
x = tf.reshape(gru_outputs, [-1, size])
self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01))
self.vf = tf.reshape(linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1])
self.state_out = [gru_state[:1]]
self.sample = categorical_sample(self.logits, ac_space)[0, :]
self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def answer_module(self):
""" Answer Module:generate an answer from the final memory vector.
Input:
hidden state from episodic memory module:[batch_size,hidden_size]
question:[batch_size, embedding_size]
"""
steps=self.sequence_length if self.decode_with_sequences else 1 #decoder for a list of tokens with sequence. e.g."x1 x2 x3 x4..."
a=self.m_T #init hidden state
y_pred=tf.zeros((self.batch_size,self.hidden_size)) #TODO usually we will init this as a special token '<GO>', you can change this line by pass embedding of '<GO>' from outside.
logits_list=[]
logits_return=None
for i in range(steps):
cell = rnn.GRUCell(self.hidden_size)
y_previous_q=tf.concat([y_pred,self.query_embedding],axis=1) #[batch_hidden_size*2]
_, a = cell( y_previous_q,a)
logits=tf.layers.dense(a,units=self.num_classes) #[batch_size,vocab_size]
logits_list.append(logits)
if self.decode_with_sequences:#need to get sequences.
logits_return = tf.stack(logits_list, axis=1) # [batch_size,sequence_length,num_classes]
else:#only need to get an answer, not sequences
logits_return = logits_list[0] #[batcj_size,num_classes]
return logits_return
def _create_rnn_cell(self):
"""
Creates a single RNN cell according to the architecture of this RNN.
Returns
-------
rnn cell
A single RNN cell according to the architecture of this RNN
"""
keep_prob = 1.0 if self.keep_prob is None else self.keep_prob
if self.cell_type == CellType.GRU:
return DropoutWrapper(GRUCell(self.num_units), keep_prob, keep_prob)
elif self.cell_type == CellType.LSTM:
return DropoutWrapper(LSTMCell(self.num_units), keep_prob, keep_prob)
else:
raise ValueError("unknown cell type: {}".format(self.cell_type))
def _build_model(self, batch_size, helper_build_fn, decoder_maxiters=None, alignment_history=False):
# embed input_data into a one-hot representation
inputs = tf.one_hot(self.input_data, self._input_size, dtype=self._dtype)
inputs_len = self.input_lengths
with tf.name_scope('bidir-encoder'):
fw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True)
bw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True)
fw_cell_zero = fw_cell.zero_state(batch_size, self._dtype)
bw_cell_zero = bw_cell.zero_state(batch_size, self._dtype)
enc_out, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, inputs,
sequence_length=inputs_len,
initial_state_fw=fw_cell_zero,
initial_state_bw=bw_cell_zero)
with tf.name_scope('attn-decoder'):
dec_cell_in = rnn.GRUCell(self._dec_rnn_size)
attn_values = tf.concat(enc_out, 2)
attn_mech = seq2seq.BahdanauAttention(self._enc_rnn_size * 2, attn_values, inputs_len)
dec_cell_attn = rnn.GRUCell(self._enc_rnn_size * 2)
dec_cell_attn = seq2seq.AttentionWrapper(dec_cell_attn,
attn_mech,
self._enc_rnn_size * 2,
alignment_history=alignment_history)
dec_cell_out = rnn.GRUCell(self._output_size)
dec_cell = rnn.MultiRNNCell([dec_cell_in, dec_cell_attn, dec_cell_out],
state_is_tuple=True)
dec = seq2seq.BasicDecoder(dec_cell, helper_build_fn(),
dec_cell.zero_state(batch_size, self._dtype))
dec_out, dec_state = seq2seq.dynamic_decode(dec, output_time_major=False,
maximum_iterations=decoder_maxiters, impute_finished=True)
self.outputs = dec_out.rnn_output
self.output_ids = dec_out.sample_id
self.final_state = dec_state
def _set_train_model(self):
"""
define train graph
:return:
"""
# Create the internal multi-layer cell for our RNN.
if use_lstm:
single_cell1 = LSTMCell(self.enc_hidden_size)
single_cell2 = LSTMCell(self.dec_hidden_size)
else:
single_cell1 = GRUCell(self.enc_hidden_size)
single_cell2 = GRUCell(self.dec_hidden_size)
enc_cell = MultiRNNCell([single_cell1 for _ in range(self.enc_num_layers)])
dec_cell = MultiRNNCell([single_cell2 for _ in range(self.dec_num_layers)])
self.encoder_cell = enc_cell
self.decoder_cell = dec_cell
self._make_graph(forward_only)
self.saver = tf.train.Saver(tf.global_variables())
def BidirectionalGRUEncoder(self,inputs,name):
'''
inputs: [batch,max_time,embedding_size]
output: [batch,max_time,2*hidden_size]
'''
with tf.variable_scope(name):
fw_gru_cell = rnn.GRUCell(self.hidden_size)
bw_gru_cell = rnn.GRUCell(self.hidden_size)
fw_gru_cell = rnn.DropoutWrapper(fw_gru_cell,output_keep_prob = self.dropout_keep_prob)
bw_gru_cell = rnn.DropoutWrapper(bw_gru_cell,output_keep_prob = self.dropout_keep_prob)
(fw_outputs,bw_outputs),(fw_outputs_sta,bw_outputs_sta) = tf.nn.bidirectional_dynamic_rnn(
cell_fw = fw_gru_cell,
cell_bw = bw_gru_cell,
inputs = inputs,
sequence_length = getSequenceRealLength(inputs),
dtype = tf.float32)
outputs = tf.concat((fw_outputs,bw_outputs),2)
return outputs
def RNN(_X, _weights, _biases, lens):
if FLAGS.unit == 'PLSTM':
cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True)
elif FLAGS.unit == 'GRU':
cell = GRUCell(FLAGS.n_hidden)
elif FLAGS.unit == 'LSTM':
cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True)
else:
raise ValueError('Unit {} not implemented.'.format(FLAGS.unit))
outputs, states = tf.nn.dynamic_rnn(cell, _X, dtype=tf.float32, sequence_length=lens)
# TODO better (?) in lack of smart indexing
batch_size = tf.shape(outputs)[0]
max_len = tf.shape(outputs)[1]
out_size = int(outputs.get_shape()[2])
index = tf.range(0, batch_size) * max_len + (lens - 1)
flat = tf.reshape(outputs, [-1, out_size])
relevant = tf.gather(flat, index)
return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def input_module(self):
"""encode raw texts into vector representation"""
story_embedding=tf.nn.embedding_lookup(self.Embedding,self.story) # [batch_size,story_length,sequence_length,embed_size]
story_embedding=tf.reshape(story_embedding,(self.batch_size,self.story_length,self.sequence_length*self.embed_size))
hidden_state=tf.ones((self.batch_size,self.hidden_size),dtype=tf.float32)
cell = rnn.GRUCell(self.hidden_size)
self.story_embedding,hidden_state=tf.nn.dynamic_rnn(cell,story_embedding,dtype=tf.float32,scope="input_module")
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def question_module(self):
"""
input:tokens of query:[batch_size,sequence_length]
:return: representation of question:[batch_size,hidden_size]
"""
query_embedding = tf.nn.embedding_lookup(self.Embedding, self.query) # [batch_size,sequence_length,embed_size]
cell=rnn.GRUCell(self.hidden_size)
_,self.query_embedding=tf.nn.dynamic_rnn(cell,query_embedding,dtype=tf.float32,scope="question_module") #query_embedding:[batch_size,hidden_size]
def _add_encoders(self):
with tf.variable_scope('query_encoder'):
query_encoder_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
query_encoder_cell = DropoutWrapper(cell=query_encoder_cell, output_keep_prob=0.8)
query_embeddings = tf.nn.embedding_lookup(self.embeddings, self.queries_placeholder)
query_encoder_outputs, _ = rnn.dynamic_rnn(query_encoder_cell, query_embeddings,
sequence_length=self.query_lengths_placeholder,
swap_memory=True, dtype=tf.float32)
self.query_last = query_encoder_outputs[:, -1, :]
with tf.variable_scope('encoder'):
fw_cell = GRUCell(self.encoder_cell_state_size)
bw_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
fw_cell = DropoutWrapper(cell=fw_cell, output_keep_prob=0.8)
bw_cell = DropoutWrapper(cell=bw_cell, output_keep_prob=0.8)
embeddings = tf.nn.embedding_lookup(self.embeddings, self.documents_placeholder)
(encoder_outputs_fw, encoder_outputs_bw), _ = rnn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
embeddings,
sequence_length=self.document_lengths_placeholder,
swap_memory=True,
dtype=tf.float32)
self.encoder_outputs = tf.concat([encoder_outputs_fw, encoder_outputs_bw], 2)
self.final_encoder_state = self.encoder_outputs[:, -1, :]
def __init__(self,n_classes,rnn_size = 256,n_chunks=75):
global gru_cell_units
self._name = "star_platinum"
self._hidden_layer_1 = {'weights': tf.Variable(tf.random_uniform([rnn_size,1024]),name = "weight1"),
'biases': tf.Variable(tf.random_uniform([1024]),name = "biases1")}
self._hidden_layer_2 = {'weights': tf.Variable(tf.random_uniform([1024,n_chunks * 10]),name = "weight2"),
'biases': tf.Variable(tf.random_uniform([n_chunks * 10]),name = "biases2")}
self._lstm_cell = rnn.BasicLSTMCell(rnn_size)
self._gru_cell = rnn.GRUCell(gru_cell_units)
self._output = {'weights': tf.Variable(tf.random_uniform([gru_cell_units,n_classes]),name = "weight3"),
'biases': tf.Variable(tf.random_uniform([n_classes]),name = "biases3")}
def __init__(self,n_classes,rnn_size = 256):
self._name = "little_gru"
self._layer_weights = tf.Variable(tf.random_uniform([rnn_size,n_classes]), name="weights")
self._layer_biases = tf.Variable(tf.random_uniform([n_classes]), name="biases")
self._gru_cell = rnn.GRUCell(rnn_size)
def BidirectionalGRUEncoder(self, inputs, name):
#??inputs?shape?[batch_size, max_time, voc_size]
with tf.variable_scope(name):
GRU_cell_fw = rnn.GRUCell(self.hidden_size)
GRU_cell_bw = rnn.GRUCell(self.hidden_size)
#fw_outputs?bw_outputs?size??[batch_size, max_time, hidden_size]
((fw_outputs, bw_outputs), (_, _)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=GRU_cell_fw,
cell_bw=GRU_cell_bw,
inputs=inputs,
sequence_length=length(inputs),
dtype=tf.float32)
#outputs?size?[batch_size, max_time, hidden_size*2]
outputs = tf.concat((fw_outputs, bw_outputs), 2)
return outputs
def _get_rnn_unit(self, rnn_unit):
if rnn_unit == 'lstm':
fw_cell = rnn.BasicLSTMCell(self._nb_hidden, forget_bias=1., state_is_tuple=True)
bw_cell = rnn.BasicLSTMCell(self._nb_hidden, forget_bias=1., state_is_tuple=True)
elif rnn_unit == 'gru':
fw_cell = rnn.GRUCell(self._nb_hidden)
bw_cell = rnn.GRUCell(self._nb_hidden)
else:
raise ValueError('rnn_unit must in (lstm, gru)!')
return fw_cell, bw_cell
def build_cell(units, cell_type='lstm', num_layers=1):
if num_layers > 1:
cell = rnn.MultiRNNCell([
build_cell(units, cell_type, 1) for _ in range(num_layers)
])
else:
if cell_type == "lstm":
cell = rnn.LSTMCell(units)
elif cell_type == "gru":
cell = rnn.GRUCell(units)
else:
raise ValueError('Do not support %s' % cell_type)
return cell
def RNN(X, num_hidden_layers):
# reshape to [1, n_input]
std_dev_He = np.sqrt(2 / np.prod(X.get_shape().as_list()[1:]))
X = tf.reshape(X, [-1, sequence_length* 8*8])
# Generate a n_input-element sequence of inputs
# (eg. [had] [a] [general] -> [20] [6] [33])
X = tf.split(X, sequence_length, 1)
# 1-layer LSTM with n_hidden units.
# rnn_cell = rnn.BasicLSTMCell(n_hidden)
with tf.variable_scope('RNN', tf.random_normal_initializer(mean=0.0, stddev=std_dev_He)): #tf.random_normal_initializer(mean=0.0, stddev=std_dev_He) #initializer=tf.contrib.layers.xavier_initializer()
# weights = {
# 'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
# }
# biases = {
# 'out': tf.Variable(tf.random_normal([num_classes]))
# }
weights = tf.get_variable(
name='weights',
shape=[num_hidden, num_classes], # 1 x 64 filter in, 1 class out
dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
biases = tf.get_variable(
name='biases',
shape=[num_classes],
dtype=tf.float32,
initializer=tf.constant_initializer(0.0))
GRU_cell_layer = [rnn.GRUCell(num_hidden)]
# LSTM_cell_layer = [rnn.BasicLSTMCell(num_hidden, forget_bias=1)]
rnn_cell = rnn.MultiRNNCell(GRU_cell_layer * num_hidden_layers)
# generate prediction
outputs, states = rnn.static_rnn(rnn_cell, X, dtype=tf.float32)
# there are n_input outputs but
# we only want the last output
# return tf.matmul(outputs[-1], weights['out']) + biases['out']
return tf.matmul(outputs[-1], weights) + biases
def cell_create(self,scope_name):
with tf.variable_scope(scope_name):
if self.cell_type == 'tanh':
cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTM':
cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'GRU':
cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTMP':
cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph)
return cells
def __init__(self, m, seq_len, name='gen', reuse=False, n_stack=1,
logit_range=4.0, **kwargs):
# Get GRU cell builder
range_wrapper = partial(OutputRangeWrapper, output_range=logit_range)
cb = GeneratorRNNCellBuilder(
rnn.GRUCell, m=m, n_stack=n_stack, wrappers=[range_wrapper]
)
# Super constructor
super(GRUGenerator, self).__init__(
m, seq_len, name=name, cell_builder=cb, reuse=reuse, **kwargs
)
def _build_rnn_encoder(self, sentence1, sentence2,
sentence1_lengths, sentence2_lengths):
with tf.variable_scope('word_embedding'):
sentence1_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence1)
sentence2_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2)
with tf.variable_scope('rnn'):
def _run_birnn(fw_cell, bw_cell, inputs, lengths):
(fw_output, bw_output), (fw_final_state, bw_final_state) =\
tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
inputs,
sequence_length=lengths,
time_major=False,
dtype=tf.float32
)
output = tf.concat([fw_output, bw_output], 2)
state = tf.concat([fw_final_state, bw_final_state], 1)
return output, state
state_size = self.config['rnn']['state_size']
forward_cell = GRUCell(state_size)
backward_cell = GRUCell(state_size)
sentence1_rnned, _ = _run_birnn(forward_cell, backward_cell,
sentence1_embedding, sentence1_lengths)
sentence2_rnned, _ = _run_birnn(forward_cell, backward_cell,
sentence2_embedding, sentence2_lengths)
return sentence1_embedding, sentence2_embedding, \
sentence1_rnned, sentence2_rnned
def _build_rnn_encoder(self, sentence1, sentence2_pos, sentence2_neg,
sentence1_lengths, sentence2_pos_lengths, sentence2_neg_lengths):
with tf.variable_scope('word_embedding'):
sentence1_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence1)
sentence2_pos_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2_pos)
sentence2_neg_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2_neg)
with tf.variable_scope('rnn'):
def _run_birnn(fw_cell, bw_cell, inputs, lengths):
(fw_output, bw_output), (fw_final_state, bw_final_state) =\
tf.nn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
inputs,
sequence_length=lengths,
time_major=False,
dtype=tf.float32
)
output = tf.concat([fw_output, bw_output], 2)
state = tf.concat([fw_final_state, bw_final_state], 1)
return output, state
state_size = self.config['rnn']['state_size']
forward_cell = GRUCell(state_size)
backward_cell = GRUCell(state_size)
sentence1_rnned, _ = _run_birnn(forward_cell, backward_cell,
sentence1_embedding, sentence1_lengths)
sentence2_rnned, _ = _run_birnn(
forward_cell, backward_cell,
tf.concat([sentence2_pos_embedding, sentence2_neg_embedding], 0),
tf.concat([sentence2_pos_lengths, sentence2_neg_lengths], 0))
sentence2_pos_rnned, sentence2_neg_rnned = \
tf.split(sentence2_rnned, num_or_size_splits=2, axis=0)
return sentence1_embedding, sentence2_pos_embedding, sentence2_neg_embedding, \
sentence1_rnned, sentence2_pos_rnned, sentence2_neg_rnned
grid_rnn_cell.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 15
收藏 0
点赞 0
评论 0
def __init__(self, num_units, tied=False, non_recurrent_fn=None):
super(Grid2GRUCell, self).__init__(
num_units=num_units, num_dims=2,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn.GRUCell(num_units=n, input_size=i),
non_recurrent_fn=non_recurrent_fn)
def HAN_model_1(session, restore_only=False):
"""Hierarhical Attention Network"""
import tensorflow as tf
try:
from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper
except ImportError:
MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell
GRUCell = tf.nn.rnn_cell.GRUCell
from bn_lstm import BNLSTMCell
from HAN_model import HANClassifierModel
is_training = tf.placeholder(dtype=tf.bool, name='is_training')
cell = BNLSTMCell(80, is_training) # h-h batchnorm LSTMCell
# cell = GRUCell(30)
cell = MultiRNNCell([cell]*5)
model = HANClassifierModel(
vocab_size=vocab_size,
embedding_size=200,
classes=classes,
word_cell=cell,
sentence_cell=cell,
word_output_size=100,
sentence_output_size=100,
device=args.device,
learning_rate=args.lr,
max_grad_norm=args.max_grad_norm,
dropout_keep_proba=0.5,
is_training=is_training,
)
saver = tf.train.Saver(tf.global_variables())
checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)
if checkpoint:
print("Reading model parameters from %s" % checkpoint.model_checkpoint_path)
saver.restore(session, checkpoint.model_checkpoint_path)
elif restore_only:
raise FileNotFoundError("Cannot restore model")
else:
print("Created model with fresh parameters")
session.run(tf.global_variables_initializer())
# tf.get_default_graph().finalize()
return model, saver
def __init__(self, data, model='lstm', infer=False):
self.rnn_size = 128
self.n_layers = 2
if infer:
self.batch_size = 1
else:
self.batch_size = data.batch_size
if model == 'rnn':
cell_rnn = rnn.BasicRNNCell
elif model == 'gru':
cell_rnn = rnn.GRUCell
elif model == 'lstm':
cell_rnn = rnn.BasicLSTMCell
cell = cell_rnn(self.rnn_size, state_is_tuple=False)
self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False)
self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])
self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])
self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size])
softmax_b = tf.get_variable("softmax_b", [data.words_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable(
"embedding", [data.words_size, self.rnn_size])
inputs = tf.nn.embedding_lookup(embedding, self.x_tf)
outputs, final_state = tf.nn.dynamic_rnn(
self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')
self.output = tf.reshape(outputs, [-1, self.rnn_size])
self.logits = tf.matmul(self.output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
self.final_state = final_state
pred = tf.reshape(self.y_tf, [-1])
# seq2seq
loss = seq2seq.sequence_loss_by_example([self.logits],
[pred],
[tf.ones_like(pred, dtype=tf.float32)],)
self.cost = tf.reduce_mean(loss)
self.learning_rate = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self,
encoder_size,
decoder_size,
encoder_vocab_size,
decoder_vocab_size,
encoder_layer_size,
decoder_layer_size,
RNN_type='LSTM',
encoder_input_keep_prob=1.0,
encoder_output_keep_prob=1.0,
decoder_input_keep_prob=1.0,
decoder_output_keep_prob=1.0,
learning_rate=0.01,
hidden_size=128):
self.encoder_size = encoder_size
self.decoder_size = decoder_size
self.encoder_vocab_size = encoder_vocab_size
self.decoder_vocab_size = decoder_vocab_size
self.encoder_layer_size = encoder_layer_size
self.decoder_layer_size = decoder_layer_size
self.encoder_input_keep_prob = encoder_input_keep_prob
self.encoder_output_keep_prob = encoder_output_keep_prob
self.decoder_input_keep_prob = decoder_input_keep_prob
self.decoder_output_keep_prob = decoder_output_keep_prob
self.learning_rate = learning_rate
self.hidden_size = hidden_size
self.encoder_input = tf.placeholder(tf.float32, shape=(None, self.encoder_size, self.encoder_vocab_size))
self.decoder_input = tf.placeholder(tf.float32, shape=(None, self.decoder_size, self.decoder_vocab_size))
self.target_input = tf.placeholder(tf.int32, shape=(None, self.decoder_size))
self.weight = tf.get_variable(shape=[self.hidden_size, self.decoder_vocab_size],
initializer=tf.contrib.layers.xavier_initializer(),
dtype=tf.float32,
name='weight')
self.bias = tf.get_variable(shape=[self.decoder_vocab_size],
initializer=tf.contrib.layers.xavier_initializer(),
dtype=tf.float32,
name='bias')
self.logits = None
self.cost = None
self.train_op = None
self.RNNCell = None
self.outputs = None
self.merged = None
if RNN_type == 'LSTM':
self.RNNCell = rnn.LSTMCell
elif RNN_type == 'GRU':
self.RNNCell = rnn.GRUCell
else:
raise Exception('not support {} RNN type'.format(RNN_type))
self.build_model()
self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, n_hidden, cell="GRU"):
"""
qa_rnn module init.
:param n_hidden: num of hidden units
:param cell: gru|lstm|basic_rnn
"""
self.rnn_cell = rnn.BasicRNNCell(num_units=n_hidden)
if cell == "GRU":
self.rnn_cell = rnn.GRUCell(num_units=n_hidden)
elif cell == "LSTM":
self.rnn_cell = rnn.LSTMCell(num_units=n_hidden)
else:
raise Exception(cell + " not supported.")
def attention_decoder(enc, length, state_transfer_helper,
voca_size=20, max_length=None,
name=None, reuse=None):
with tf.variable_scope(name, "attention-decoder", values=[enc, length],
reuse=reuse) as scope:
# get shapes
batch_size = enc.get_shape().as_list()[0]
if batch_size is None:
batch_size = tf.shape(enc)[0]
dims = int(enc.get_shape()[-1])
# decoder
dec_attn = seq2seq.DynamicAttentionWrapper(
cell=rnn.GRUCell(dims, reuse=scope.reuse),
attention_mechanism=seq2seq.LuongAttention(dims, enc, length),
attention_size=dims
)
dec_network = rnn.MultiRNNCell([
rnn.GRUCell(dims, reuse=scope.reuse),
dec_attn,
rnn.GRUCell(voca_size, reuse=scope.reuse)
], state_is_tuple=True)
decoder = seq2seq.BasicDecoder(
dec_network, state_transfer_helper(),
initial_state=dec_network.zero_state(batch_size, tf.float32)
)
dec_outputs, _ = seq2seq.dynamic_decode(
decoder,
maximum_iterations=max_length,
impute_finished=False
)
logits = dec_outputs.rnn_output
labels = dec_outputs.sample_id
# pad logits and labels
if max_length is not None:
logits = dynamic_time_pad(logits, max_length)
labels = dynamic_time_pad(labels, max_length)
return logits, labels
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
additional_cell_args = {}
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
elif args.model == 'gridlstm':
cell_fn = grid_rnn.Grid2LSTMCell
additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
elif args.model == 'gridgru':
cell_fn = grid_rnn.Grid2GRUCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, **additional_cell_args)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(axis=1, num_or_size_splits=args.seq_length,
value=tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
loop_function=loop if infer else None, scope='rnnlm')
# output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size])
self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def inference(self):
'''
1. embedding layer
2. Bi-LSTM layer
3. concat Bi-LSTM output
4. FC(full connected) layer
5. softmax layer
'''
#embedding layer
with tf.device('/cpu:0'),tf.name_scope('embedding'):
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x)
#Bi-LSTM layer
lstm_fw_cell = rnn.BasicLSTMCell(self.hidden_size)
lstm_bw_cell = rnn.BasicLSTMCell(self.hidden_size)
if self.dropout_keep_prob is not None:
lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob = self.dropout_keep_prob)
lstm_bw_cell = rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob = self.dropout_keep_prob)
outputs,output_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype = tf.float32)
#BI-GRU layer
'''
gru_fw_cell = rnn.GRUCell(self.hidden_size)
gru_bw_cell = rnn.GRUCell(self.hidden_size)
if self.dropout_keep_prob is not None:
gru_fw_cell = rnn.DropoutWrapper(gru_fw_cell,output_keep_prob = self.dropout_keep_prob)
gru_bw_cell = rnn.DropoutWrapper(gru_bw_cell,output_keep_prob = self.dropout_keep_prob)
outputs,output_states = tf.nn.bidirectional_dynamic_rnn(gru_fw_cell,gru_bw_cell,self.embedded_words,dtype = tf.float32)
'''
#concat output
#each output in outputs is [batch sequence_length hidden_size]
#concat forward output and backward output
output_cnn = tf.concat(outputs,axis = 2) #[batch sequence_length 2*hidden_size]
output_cnn_last = tf.reduce_mean(output_cnn,axis = 1) #[batch_size,2*hidden_size]
#FC layer
with tf.name_scope('output'):
self.score = tf.matmul(output_cnn_last,self.W_projection) + self.b_projection
return self.score
def __init__(self, args, reverse_input, infer=False):
if reverse_input:
self.start_token = special_tokens.END_TOKEN
self.end_token = special_tokens.START_TOKEN
else:
self.start_token = special_tokens.START_TOKEN
self.end_token = special_tokens.END_TOKEN
self.unk_token = special_tokens.UNK_TOKEN
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn.GRUCell
elif args.model == 'lstm':
cell_fn = rnn.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size, state_is_tuple=True)
self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))