def __call__(self, inputs, state, scope=None):
"""Most basic RNN:
output = new_state = activation(W * input + U * state + B)."""
with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell"
output = self._activation(_linear([inputs, state], self._num_units,
True, use_fp16=self.use_fp16))
return output, output
python类BasicRNNCell()的实例源码
def __init__(self, num_units):
super(Grid1BasicRNNCell, self).__init__(
num_units=num_units, num_dims=1,
input_dims=0, output_dims=0, priority_dims=0, tied=False,
cell_fn=lambda n, i: rnn_cell.BasicRNNCell(num_units=n, input_size=i))
def __init__(self, num_units, tied=False, non_recurrent_fn=None):
super(Grid2BasicRNNCell, self).__init__(
num_units=num_units, num_dims=2,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.BasicRNNCell(num_units=n, input_size=i),
non_recurrent_fn=non_recurrent_fn)
def setUp(self):
self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
self.mock_target_column = MockTargetColumn(
num_label_columns=self.NUM_LABEL_COLUMNS)
location = tf.contrib.layers.sparse_column_with_keys(
'location', keys=['west_side', 'east_side', 'nyc'])
location_onehot = tf.contrib.layers.one_hot_column(location)
self.context_feature_columns = [location_onehot]
wire_cast = tf.contrib.layers.sparse_column_with_keys(
'wire_cast', ['marlo', 'omar', 'stringer'])
wire_cast_embedded = tf.contrib.layers.embedding_column(
wire_cast, dimension=8)
measurements = tf.contrib.layers.real_valued_column(
'measurements', dimension=2)
self.sequence_feature_columns = [measurements, wire_cast_embedded]
self.columns_to_tensors = {
'location': tf.SparseTensor(
indices=[[0, 0], [1, 0], [2, 0]],
values=['west_side', 'west_side', 'nyc'],
shape=[3, 1]),
'wire_cast': tf.SparseTensor(
indices=[[0, 0, 0], [0, 1, 0],
[1, 0, 0], [1, 1, 0], [1, 1, 1],
[2, 0, 0]],
values=[b'marlo', b'stringer',
b'omar', b'stringer', b'marlo',
b'marlo'],
shape=[3, 2, 2]),
'measurements': tf.random_uniform([3, 2, 2])}
def __init__(self, num_units):
super(Grid1BasicRNNCell, self).__init__(
num_units=num_units, num_dims=1,
input_dims=0, output_dims=0, priority_dims=0, tied=False,
cell_fn=lambda n, i: rnn_cell.BasicRNNCell(num_units=n, input_size=i))
def __init__(self, num_units, tied=False, non_recurrent_fn=None):
super(Grid2BasicRNNCell, self).__init__(
num_units=num_units, num_dims=2,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.BasicRNNCell(num_units=n, input_size=i),
non_recurrent_fn=non_recurrent_fn)
def __init__(self, vocabularySize, config_param):
self.vocabularySize = vocabularySize
self.config = config_param
self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX")
self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY")
#Converting Input in an Embedded form
with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically
embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize])
embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]
#Define Tensor RNN
singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers)
self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32)
#Defining Logits
hidden_layer_output, last_state = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state)
hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size])
self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize]))
self._predictionSoftmax = tf.nn.softmax(self._logits)
#Define the loss
loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize)
self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)
self._final_state = last_state
def __init__(self, args):
self.args = args
if args.disc_model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.disc_model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.disc_model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
self.embedding = tf.Variable(tf.random_uniform([self.args.vocab_size, self.args.rnn_size], minval=-.05, maxval=.05, dtype=tf.float32), name='embedding')
with tf.variable_scope('DISC') as scope:
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
# If the input data is given as word tokens, feed this value
self.input_data_text = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='input_data_text')
#self.input_data_text = tf.Variable(tf.zeros((args.batch_size, args.seq_length), dtype=tf.int32), name='input_data_text')
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
# Fully connected layer is applied to the final state to determine the output class
self.fc_layer = tf.Variable(tf.random_normal([args.rnn_size, 1], stddev=0.35, dtype=tf.float32), name='disc_fc_layer')
self.lr = tf.Variable(0.0, trainable=False, name='learning_rate')
self.has_init_seq2seq = False
def __init__(self, args):
self.args = args
if args.gen_model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.gen_model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.gen_model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
with tf.variable_scope('GEN') as scope:
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
# sequence of word tokens taken as input
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='input_data')
self.latent_state = tf.placeholder(tf.float32, [args.batch_size, args.latent_size])
# weights to map the latent state into the (usually) bigger initial state
# right now this only works for rnn (other more complex models have more than
# one initial state which needs to be given a value)
# Right now we support up to two layers (state1 and state2)
self.latent_to_initial_state1 = tf.Variable(tf.random_normal([args.latent_size, args.rnn_size], stddev=0.35, dtype=tf.float32), name='latent_to_intial_state1')
self.latent_to_initial_state2 = tf.Variable(tf.random_normal([args.latent_size, args.rnn_size], stddev=0.35, dtype=tf.float32), name='latent_to_intial_state2')
self.initial_state1 = tf.matmul(self.latent_state, self.latent_to_initial_state1)
self.initial_state2 = tf.matmul(self.latent_state, self.latent_to_initial_state2)
# these are the actual approximate word vectors generated by the model
self.outputs = tf.placeholder(tf.float32, [args.seq_length, args.batch_size, args.rnn_size])
self.lr = tf.Variable(0.0, trainable=False, name='learning_rate')
self.has_init_seq2seq = False
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.rnncell == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.rnncell == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.rnncell == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("rnncell type not supported: {}".format(args.rnncell))
cell = cell_fn(args.rnn_size)
self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = build_weight([args.rnn_size, args.vocab_size],name='soft_w')
softmax_b = build_weight([args.vocab_size],name='soft_b')
word_embedding = build_weight([args.vocab_size, args.embedding_size],name='word_embedding')
inputs_list = tf.split(1, args.seq_length, tf.nn.embedding_lookup(word_embedding, self.input_data))
inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
if not args.attention:
outputs, last_state = seq2seq.rnn_decoder(inputs_list, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm')
else:
self.attn_length = 5
self.attn_size = 32
self.attention_states = build_weight([args.batch_size, self.attn_length, self.attn_size])
outputs, last_state = seq2seq.attention_decoder(inputs_list, self.initial_state, self.attention_states, self.cell, loop_function=loop if infer else None, scope='rnnlm')
self.final_state = last_state
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
# average loss for each word of each timestep
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.lr = tf.Variable(0.0, trainable=False)
self.var_trainable_op = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, self.var_trainable_op),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, self.var_trainable_op))
self.initial_op = tf.initialize_all_variables()
self.saver = tf.train.Saver(tf.all_variables(),max_to_keep=5,keep_checkpoint_every_n_hours=1)
self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','')
self.var_op = tf.all_variables()
def __init__(self, args, embedding):
self.args = args
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input')
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets')
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
self.embedding = embedding
with tf.variable_scope('STAND'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data))
inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs])
def loop(prev, i):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1)
o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND')
with tf.variable_scope('STAND', reuse=True) as scope:
sf_o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope)
output = tf.reshape(tf.concat(1, o), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
sf_output = tf.reshape(tf.concat(1, sf_o), [-1, args.rnn_size])
self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b
self.self_feed_probs = tf.nn.softmax(self_feed_logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
args.grad_clip)
for g, v in zip(grads, tvars):
print v.name
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False):
self.args = args
if infer:
args.batch_size = 1
args.seq_length = 1
if args.model == 'rnn':
cell_fn = rnn_cell.BasicRNNCell
elif args.model == 'gru':
cell_fn = rnn_cell.GRUCell
elif args.model == 'lstm':
cell_fn = rnn_cell.BasicLSTMCell
else:
raise Exception("model type not supported: {}".format(args.model))
cell = cell_fn(args.rnn_size)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
self.initial_state = cell.zero_state(args.batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example([self.logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([args.batch_size * args.seq_length])],
args.vocab_size)
self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
self.final_state = last_state
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
args.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))