def inference(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
# define lstm cess:get lstm cell output
lstm_fw_cell=rnn.BasicLSTMCell(self.hidden_size) #forward direction cell
lstm_bw_cell=rnn.BasicLSTMCell(self.hidden_size) #backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell=rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell=rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob=self.dropout_keep_prob)
# bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size]
# output: A tuple (outputs, output_states)
# where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`.
outputs,_=tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype=tf.float32) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
print("outputs:===>",outputs) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>))
#3. concat output
output_rnn=tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2]
self.output_rnn_last=tf.reduce_mean(output_rnn,axis=1) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO
print("output_rnn_last:", self.output_rnn_last) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32>
#4. logits(use linear layer)
with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(self.output_rnn_last, self.W_projection) + self.b_projection # [batch_size,num_classes]
return logits
python类DropoutWrapper()的实例源码
def _create_loss(self):
'''
Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of
next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss,
cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore,
the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position)
:return:
'''
#with tf.device("/cpu:0"):
xx = tf.unstack(self.x, self.step, 1)
lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer())
dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate)
outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32)
signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out']
scope = "activation_batch_norm"
norm_signal = self.batch_norm_layer(signal, scope=scope)
# batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6,
# is_training=is_training, scope="activation_batch_norm", reuse=False)
self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6.
self.avg_position = tf.reduce_mean(self.position)
# self.cost = 0.0002
self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name="estimated_risk"))
def _create_rnn_cell(self):
"""
Creates a single RNN cell according to the architecture of this RNN.
Returns
-------
rnn cell
A single RNN cell according to the architecture of this RNN
"""
keep_prob = 1.0 if self.keep_prob is None else self.keep_prob
if self.cell_type == CellType.GRU:
return DropoutWrapper(GRUCell(self.num_units), keep_prob, keep_prob)
elif self.cell_type == CellType.LSTM:
return DropoutWrapper(LSTMCell(self.num_units), keep_prob, keep_prob)
else:
raise ValueError("unknown cell type: {}".format(self.cell_type))
actor.py 文件源码
项目:neural-combinatorial-optimization-rl-tensorflow
作者: MichelDeudon
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def build_permutation(self):
with tf.variable_scope("encoder"):
with tf.variable_scope("embedding"):
# Embed input sequence
W_embed =tf.get_variable("weights", [1,self.input_dimension+2, self.input_embed], initializer=self.initializer) # +2 for TW feat. here too
embedded_input = tf.nn.conv1d(self.input_, W_embed, 1, "VALID", name="embedded_input")
# Batch Normalization
embedded_input = tf.layers.batch_normalization(embedded_input, axis=2, training=self.is_training, name='layer_norm', reuse=None)
with tf.variable_scope("dynamic_rnn"):
# Encode input sequence
cell1 = LSTMCell(self.num_neurons, initializer=self.initializer) # BNLSTMCell(self.num_neurons, self.training) or cell1 = DropoutWrapper(cell1, output_keep_prob=0.9)
# Return the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state as tensors.
encoder_output, encoder_state = tf.nn.dynamic_rnn(cell1, embedded_input, dtype=tf.float32)
with tf.variable_scope('decoder'):
# Ptr-net returns permutations (self.positions), with their log-probability for backprop
self.ptr = Pointer_decoder(encoder_output, self.config)
self.positions, self.log_softmax, self.attending, self.pointing = self.ptr.loop_decode(encoder_state)
variable_summaries('log_softmax',self.log_softmax, with_max_min = True)
def build_cells(self):
# encoder cell
with tf.name_scope('encoder_cell') as scope:
encoder_cell = rnn.MultiRNNCell([self.RNNCell(num_units=self.hidden_size)
for _ in range(self.encoder_layer_size)])
encoder_cell = rnn.DropoutWrapper(encoder_cell,
input_keep_prob=self.encoder_input_keep_prob,
output_keep_prob=self.encoder_output_keep_prob)
# decoder cell
with tf.name_scope('decoder_cell') as scope:
decoder_cell = rnn.MultiRNNCell([self.RNNCell(num_units=self.hidden_size)
for _ in range(self.decoder_layer_size)])
decoder_cell = rnn.DropoutWrapper(decoder_cell,
input_keep_prob=self.decoder_input_keep_prob,
output_keep_prob=self.decoder_output_keep_prob)
return encoder_cell, decoder_cell
def __build_rnn_cell(self):
with tf.name_scope('encoder_cell'):
encoder_cell = rnn.MultiRNNCell([self.RNN(num_units=self.hidden_layer_size)
for _ in range(self.encoder_layer_size)])
encoder_cell = rnn.DropoutWrapper(
cell=encoder_cell,
input_keep_prob=self.encoder_input_keep_prob,
output_keep_prob=self.encoder_output_keep_prob
)
with tf.name_scope('decoder_cell'):
decoder_cell = rnn.MultiRNNCell([self.RNN(num_units=self.hidden_layer_size)
for _ in range(self.decoder_layer_size)])
decoder_cell = rnn.DropoutWrapper(
cell=decoder_cell,
input_keep_prob=self.decoder_input_keep_prob,
output_keep_prob=self.decoder_output_keep_prob
)
return encoder_cell, decoder_cell
def _shared_layer(self, input_data, config, is_training):
"""Build the model up until decoding.
Args:
input_data = size batch_size X num_steps X embedding size
Returns:
output units
"""
with tf.variable_scope('encoder'):
lstm_cell = rnn.BasicLSTMCell(config.encoder_size, reuse=tf.get_variable_scope().reuse, forget_bias=1.0)
if is_training and config.keep_prob < 1:
lstm_cell = rnn.DropoutWrapper(
lstm_cell, output_keep_prob=config.keep_prob)
encoder_outputs, encoder_states = tf.nn.dynamic_rnn(lstm_cell,
input_data,
dtype=tf.float32,
scope="encoder_rnn")
return encoder_outputs
def buildRNN(self,x,scope):
print(x)
x = tf.transpose(x, [1, 0, 2])
#print(x)
x = tf.reshape(x, [-1,self.nfeatures])
#print(x)
x = tf.split(x, self.n_steps, 0)
print(x)
#lstm_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) for _ in range(self.n_layers)], state_is_tuple=True)
#outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float64)
with tf.name_scope("fw"+scope),tf.variable_scope("fw"+scope):
fw_cell_array = []
print(tf.get_variable_scope().name)
for _ in range(self.n_layers):
fw_cell = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0, state_is_tuple=True)
#fw_cell = rnn.DropoutWrapper(fw_cell,output_keep_prob=self.dropout)
fw_cell_array.append(fw_cell)
fw_cell = rnn.MultiRNNCell(fw_cell_array, state_is_tuple=True)
with tf.name_scope("bw"+scope),tf.variable_scope("bw"+scope):
bw_cell_array = []
print(tf.get_variable_scope().name)
for _ in range(self.n_layers):
bw_cell = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0, state_is_tuple=True)
#bw_cell = rnn.DropoutWrapper(bw_cell,output_keep_prob=self.dropout)
bw_cell_array.append(bw_cell)
bw_cell = rnn.MultiRNNCell(bw_cell_array, state_is_tuple=True)
outputs, _,_ = tf.contrib.rnn.static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float64)
#outputs, = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, x, dtype=tf.float64)
print(outputs)
print(outputs[-1])
return outputs[-1]
RCNNModelWithLSTM.py 文件源码
项目:DeeplearningForTextClassification
作者: zldeng
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def convertLayerWithRNN(self):
'''
use BI-LSTM to get contenxt
'''
lstm_fw_cell = rnn.BasicLSTMCell(self.context_size)
lstm_bw_cell = rnn.BasicLSTMCell(self.context_size)
if self.dropout_keep_prob is not None:
lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell,
output_keep_prob = self.dropout_keep_prob)
lstm_bw_cell = rnn.DropoutWrapper(lstm_bw_cell,
output_keep_prob = self.dropout_keep_prob)
outputs,output_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
lstm_bw_cell,self.embedded_words,dtype = tf.float32)
output_fw,output_bw = outputs
result_presentation = tf.concat([output_fw,self.embedded_words,output_bw],axis = 2)
return result_presentation
def BidirectionalGRUEncoder(self,inputs,name):
'''
inputs: [batch,max_time,embedding_size]
output: [batch,max_time,2*hidden_size]
'''
with tf.variable_scope(name):
fw_gru_cell = rnn.GRUCell(self.hidden_size)
bw_gru_cell = rnn.GRUCell(self.hidden_size)
fw_gru_cell = rnn.DropoutWrapper(fw_gru_cell,output_keep_prob = self.dropout_keep_prob)
bw_gru_cell = rnn.DropoutWrapper(bw_gru_cell,output_keep_prob = self.dropout_keep_prob)
(fw_outputs,bw_outputs),(fw_outputs_sta,bw_outputs_sta) = tf.nn.bidirectional_dynamic_rnn(
cell_fw = fw_gru_cell,
cell_bw = bw_gru_cell,
inputs = inputs,
sequence_length = getSequenceRealLength(inputs),
dtype = tf.float32)
outputs = tf.concat((fw_outputs,bw_outputs),2)
return outputs
state_saving_rnn_estimator.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def apply_dropout(
cell, input_keep_probability, output_keep_probability, random_seed=None):
"""Apply dropout to the outputs and inputs of `cell`.
Args:
cell: An `RNNCell`.
input_keep_probability: Probability to keep inputs to `cell`. If `None`,
no dropout is applied.
output_keep_probability: Probability to keep outputs of `cell`. If `None`,
no dropout is applied.
random_seed: Seed for random dropout.
Returns:
An `RNNCell`, the result of applying the supplied dropouts to `cell`.
"""
input_prob_none = input_keep_probability is None
output_prob_none = output_keep_probability is None
if input_prob_none and output_prob_none:
return cell
if input_prob_none:
input_keep_probability = 1.0
if output_prob_none:
output_keep_probability = 1.0
return rnn_cell.DropoutWrapper(
cell, input_keep_probability, output_keep_probability, random_seed)
dynamic_rnn_estimator.py 文件源码
项目:DeepLearning_VirtualReality_BigData_Project
作者: rashmitripathi
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def apply_dropout(
cell, input_keep_probability, output_keep_probability, random_seed=None):
"""Apply dropout to the outputs and inputs of `cell`.
Args:
cell: An `RNNCell`.
input_keep_probability: Probability to keep inputs to `cell`. If `None`,
no dropout is applied.
output_keep_probability: Probability to keep outputs of `cell`. If `None`,
no dropout is applied.
random_seed: Seed for random dropout.
Returns:
An `RNNCell`, the result of applying the supplied dropouts to `cell`.
"""
input_prob_none = input_keep_probability is None
output_prob_none = output_keep_probability is None
if input_prob_none and output_prob_none:
return cell
if input_prob_none:
input_keep_probability = 1.0
if output_prob_none:
output_keep_probability = 1.0
return contrib_rnn.DropoutWrapper(
cell, input_keep_probability, output_keep_probability, random_seed)
p9_BiLstmTextRelation_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 15
收藏 0
点赞 0
评论 0
def inference(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.mean pooling, 4.FC layer, 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
# define lstm cess:get lstm cell output
lstm_fw_cell=rnn.BasicLSTMCell(self.hidden_size) #forward direction cell
lstm_bw_cell=rnn.BasicLSTMCell(self.hidden_size) #backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell=rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell==rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob=self.dropout_keep_prob)
# bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size]
# output: A tuple (outputs, output_states)
# where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`.
outputs,_=tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype=tf.float32) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
print("outputs:===>",outputs) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>))
#3. concat output
output_rnn=tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2]
output_rnn_pooled=tf.reduce_mean(output_rnn,axis=1) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO
print("output_rnn_pooled:", output_rnn_pooled) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32>
#4. logits(use linear layer)
with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(output_rnn_pooled, self.W_projection) + self.b_projection # [batch_size,num_classes]
return logits
def input_encoder_bi_lstm(self):
"""use bi-directional lstm to encode query_embedding:[batch_size,sequence_length,embed_size]
and story_embedding:[batch_size,story_length,sequence_length,embed_size]
output:query_embedding:[batch_size,hidden_size*2] story_embedding:[batch_size,self.story_length,self.hidden_size*2]
"""
#1. encode query: bi-lstm layer
lstm_fw_cell = rnn.BasicLSTMCell(self.hidden_size) # forward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(self.hidden_size) # backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell, output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell == rnn.DropoutWrapper(lstm_bw_cell, output_keep_prob=self.dropout_keep_prob)
query_hidden_output, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, self.query_embedding,dtype=tf.float32,scope="query_rnn") # [batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
query_hidden_output = tf.concat(query_hidden_output, axis=2) #[batch_size,sequence_length,hidden_size*2]
self.query_embedding=tf.reduce_sum(query_hidden_output,axis=1) #[batch_size,hidden_size*2]
print("input_encoder_bi_lstm.self.query_embedding:",self.query_embedding)
#2. encode story
# self.story_embedding:[batch_size,story_length,sequence_length,embed_size]
self.story_embedding=tf.reshape(self.story_embedding,shape=(-1,self.story_length*self.sequence_length,self.embed_size)) #[self.story_length*self.sequence_length,self.embed_size]
lstm_fw_cell_story = rnn.BasicLSTMCell(self.hidden_size) # forward direction cell
lstm_bw_cell_story = rnn.BasicLSTMCell(self.hidden_size) # backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell_story = rnn.DropoutWrapper(lstm_fw_cell_story, output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell_story == rnn.DropoutWrapper(lstm_bw_cell_story, output_keep_prob=self.dropout_keep_prob)
story_hidden_output, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell_story, lstm_bw_cell_story, self.story_embedding,dtype=tf.float32,scope="story_rnn")
story_hidden_output=tf.concat(story_hidden_output,axis=2) #[batch_size,story_length*sequence_length,hidden_size*2]
story_hidden_output=tf.reshape(story_hidden_output,shape=(-1,self.story_length,self.sequence_length,self.hidden_size*2))
self.story_embedding = tf.reduce_sum(story_hidden_output, axis=2) # [batch_size,self.story_length,self.hidden_size*2]
def _add_encoders(self):
with tf.variable_scope('query_encoder'):
query_encoder_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
query_encoder_cell = DropoutWrapper(cell=query_encoder_cell, output_keep_prob=0.8)
query_embeddings = tf.nn.embedding_lookup(self.embeddings, self.queries_placeholder)
query_encoder_outputs, _ = rnn.dynamic_rnn(query_encoder_cell, query_embeddings,
sequence_length=self.query_lengths_placeholder,
swap_memory=True, dtype=tf.float32)
self.query_last = query_encoder_outputs[:, -1, :]
with tf.variable_scope('encoder'):
fw_cell = GRUCell(self.encoder_cell_state_size)
bw_cell = GRUCell(self.encoder_cell_state_size)
if self.dropout_enabled and self.mode != 'decode':
fw_cell = DropoutWrapper(cell=fw_cell, output_keep_prob=0.8)
bw_cell = DropoutWrapper(cell=bw_cell, output_keep_prob=0.8)
embeddings = tf.nn.embedding_lookup(self.embeddings, self.documents_placeholder)
(encoder_outputs_fw, encoder_outputs_bw), _ = rnn.bidirectional_dynamic_rnn(
fw_cell, bw_cell,
embeddings,
sequence_length=self.document_lengths_placeholder,
swap_memory=True,
dtype=tf.float32)
self.encoder_outputs = tf.concat([encoder_outputs_fw, encoder_outputs_bw], 2)
self.final_encoder_state = self.encoder_outputs[:, -1, :]
lstm_predictior.py 文件源码
项目:LSTM-Time-Series-Analysis-using-Tensorflow
作者: pusj
项目源码
文件源码
阅读 15
收藏 0
点赞 0
评论 0
def lstm_model(time_steps, rnn_layers, dense_layers=None, learning_rate=0.01, optimizer='Adagrad',learning_rate_decay_fn = None): # [Ftrl, Adam, Adagrad, Momentum, SGD, RMSProp]
print(time_steps)
#exit(0)
"""
Creates a deep model based on:
* stacked lstm cells
* an optional dense layers
:param num_units: the size of the cells.
:param rnn_layers: list of int or dict
* list of int: the steps used to instantiate the `BasicLSTMCell` cell
* list of dict: [{steps: int, keep_prob: int}, ...]
:param dense_layers: list of nodes for each layer
:return: the model definition
"""
def lstm_cells(layers):
print('-------------------------sdsdsdsdssd---------------------------------------------',layers)
if isinstance(layers[0], dict):
return [rnn.DropoutWrapper(rnn.BasicLSTMCell(layer['num_units'],state_is_tuple=True),layer['keep_prob'])
if layer.get('keep_prob')
else rnn.BasicLSTMCell(layer['num_units'], state_is_tuple=True)
for layer in layers]
return [rnn.BasicLSTMCell(steps, state_is_tuple=True) for steps in layers]
def dnn_layers(input_layers, layers):
if layers and isinstance(layers, dict):
return tflayers.stack(input_layers, tflayers.fully_connected,
layers['layers'],
activation=layers.get('activation'),
dropout=layers.get('dropout'))
elif layers:
return tflayers.stack(input_layers, tflayers.fully_connected, layers)
else:
return input_layers
def _lstm_model(X, y):
stacked_lstm = rnn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True)
x_ = tf.unstack(X, num=time_steps, axis=1)
output, layers = rnn.static_rnn(stacked_lstm, x_, dtype=dtypes.float32)
output = dnn_layers(output[-1], dense_layers)
prediction, loss = tflearn.models.linear_regression(output, y)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.contrib.framework.get_global_step(), optimizer=optimizer,
learning_rate = tf.train.exponential_decay(learning_rate, tf.contrib.framework.get_global_step(), decay_steps = 1000, decay_rate = 0.9, staircase=False, name=None))
print('learning_rate',learning_rate)
return prediction, loss, train_op
# https://www.tensorflow.org/versions/r0.10/api_docs/python/train/decaying_the_learning_rate
return _lstm_model
def lstm_cell(self):
cell = rnn.LSTMCell(self.num_hidden)
cell = rnn.DropoutWrapper(cell,self.dropout)
return cell
def lstm_cell(self):
lstm_cell = rnn.LSTMCell(self.hidden_neural_size,forget_bias=2.0)
lstm_cell = rnn.DropoutWrapper(lstm_cell,output_keep_prob=self.dropout_keep_prob)
return lstm_cell
def lstm_cell(self):
cell = rnn.LSTMCell(self.hidden_neural_size,use_peepholes=True,initializer=self.initializer)
cell = rnn.DropoutWrapper(cell,self.dropout)
return cell
def lstm_cell(self):
lstm_cell = rnn.LSTMCell(self.hidden_neural_size,forget_bias=1.0)
lstm_cell = rnn.DropoutWrapper(lstm_cell,output_keep_prob=self.dropout_keep_prob)
return lstm_cell
def lstm_fw(self):
lstm_fw = rnn.LSTMCell(self.hidden_neural_size)
lstm_fw = rnn.DropoutWrapper(lstm_fw, self.dropout)
return lstm_fw
def _pos_private(self, encoder_units, config, is_training):
"""Decode model for pos
Args:
encoder_units - these are the encoder units
num_pos - the number of pos tags there are (output units)
returns:
logits
"""
with tf.variable_scope("pos_decoder"):
pos_decoder_cell = rnn.BasicLSTMCell(config.pos_decoder_size,
forget_bias=1.0, reuse=tf.get_variable_scope().reuse)
if is_training and config.keep_prob < 1:
pos_decoder_cell = rnn.DropoutWrapper(
pos_decoder_cell, output_keep_prob=config.keep_prob)
encoder_units = tf.transpose(encoder_units, [1, 0, 2])
decoder_outputs, decoder_states = tf.nn.dynamic_rnn(pos_decoder_cell,
encoder_units,
dtype=tf.float32,
scope="pos_rnn")
output = tf.reshape(tf.concat(decoder_outputs, 1),
[-1, config.pos_decoder_size])
softmax_w = tf.get_variable("softmax_w",
[config.pos_decoder_size,
config.num_pos_tags])
softmax_b = tf.get_variable("softmax_b", [config.num_pos_tags])
logits = tf.matmul(output, softmax_w) + softmax_b
return logits, decoder_states
def _chunk_private(self, encoder_units, pos_prediction, config, is_training):
"""Decode model for chunks
Args:
encoder_units - these are the encoder units:
[batch_size X encoder_size] with the one the pos prediction
pos_prediction:
must be the same size as the encoder_size
returns:
logits
"""
# concatenate the encoder_units and the pos_prediction
pos_prediction = tf.reshape(pos_prediction,
[self.batch_size, self.num_steps, self.pos_embedding_size])
encoder_units = tf.transpose(encoder_units, [1, 0, 2])
chunk_inputs = tf.concat([pos_prediction, encoder_units], 2)
with tf.variable_scope("chunk_decoder"):
cell = rnn.BasicLSTMCell(config.chunk_decoder_size, forget_bias=1.0, reuse=tf.get_variable_scope().reuse)
if is_training and config.keep_prob < 1:
cell = rnn.DropoutWrapper(
cell, output_keep_prob=config.keep_prob)
decoder_outputs, decoder_states = tf.nn.dynamic_rnn(cell,
chunk_inputs,
dtype=tf.float32,
scope="chunk_rnn")
output = tf.reshape(tf.concat(decoder_outputs, 1),
[-1, config.chunk_decoder_size])
softmax_w = tf.get_variable("softmax_w",
[config.chunk_decoder_size,
config.num_chunk_tags])
softmax_b = tf.get_variable("softmax_b", [config.num_chunk_tags])
logits = tf.matmul(output, softmax_w) + softmax_b
return logits, decoder_states
def cell_create(self,scope_name):
with tf.variable_scope(scope_name):
if self.cell_type == 'tanh':
cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTM':
cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'GRU':
cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
elif self.cell_type == 'LSTMP':
cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True)
cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph)
return cells
def rnn_segment(features, targets, mode, params):
seq_feature = features['seq_feature']
seq_length = features['seq_length']
with tf.variable_scope("emb"):
embeddings = tf.get_variable("char_emb", shape=[params['num_char'], params['emb_size']])
seq_emb = tf.nn.embedding_lookup(embeddings, seq_feature)
batch_size = tf.shape(seq_feature)[0]
time_step = tf.shape(seq_feature)[1]
flat_seq_emb = tf.reshape(seq_emb, shape=[batch_size, time_step, (params['k'] + 1) * params['emb_size']])
cell = rnn.LSTMCell(params['rnn_units'])
if mode == ModeKeys.TRAIN:
cell = rnn.DropoutWrapper(cell, params['input_keep_prob'], params['output_keep_prob'])
projection_cell = rnn.OutputProjectionWrapper(cell, params['num_class'])
logits, _ = tf.nn.dynamic_rnn(projection_cell, flat_seq_emb, sequence_length=seq_length, dtype=tf.float32)
weight_mask = tf.to_float(tf.sequence_mask(seq_length))
loss = seq2seq.sequence_loss(logits, targets, weights=weight_mask)
train_op = layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=params["learning_rate"],
optimizer=tf.train.AdamOptimizer,
clip_gradients=params['grad_clip'],
summaries=[
"learning_rate",
"loss",
"gradients",
"gradient_norm",
])
pred_classes = tf.to_int32(tf.argmax(input=logits, axis=2))
pred_words = tf.logical_or(tf.equal(pred_classes, 0), tf.equal(pred_classes, 3))
target_words = tf.logical_or(tf.equal(targets, 0), tf.equal(targets, 3))
precision = metrics.streaming_precision(pred_words, target_words, weights=weight_mask)
recall = metrics.streaming_recall(pred_words, target_words, weights=weight_mask)
predictions = {
"classes": pred_classes
}
eval_metric_ops = {
"precision": precision,
"recall": recall
}
return learn.ModelFnOps(mode, predictions, loss, train_op, eval_metric_ops=eval_metric_ops)
def build_infer_graph(x, batch_size, vocab_size=VOCAB_SIZE, embedding_size=32,
rnn_size=128, num_layers=2, p_keep=1.0):
"""
builds inference graph
"""
infer_args = {"batch_size": batch_size, "vocab_size": vocab_size,
"embedding_size": embedding_size, "rnn_size": rnn_size,
"num_layers": num_layers, "p_keep": p_keep}
logger.debug("building inference graph: %s.", infer_args)
# other placeholders
p_keep = tf.placeholder_with_default(p_keep, [], "p_keep")
batch_size = tf.placeholder_with_default(batch_size, [], "batch_size")
# embedding layer
embed_seq = layers.embed_sequence(x, vocab_size, embedding_size)
# shape: [batch_size, seq_len, embedding_size]
embed_seq = tf.nn.dropout(embed_seq, keep_prob=p_keep)
# shape: [batch_size, seq_len, embedding_size]
# RNN layers
cells = [rnn.LSTMCell(rnn_size) for _ in range(num_layers)]
cells = [rnn.DropoutWrapper(cell, output_keep_prob=p_keep) for cell in cells]
cells = rnn.MultiRNNCell(cells)
input_state = cells.zero_state(batch_size, tf.float32)
# shape: [num_layers, 2, batch_size, rnn_size]
rnn_out, output_state = tf.nn.dynamic_rnn(cells, embed_seq, initial_state=input_state)
# rnn_out shape: [batch_size, seq_len, rnn_size]
# output_state shape: [num_layers, 2, batch_size, rnn_size]
with tf.name_scope("lstm"):
tf.summary.histogram("outputs", rnn_out)
for c_state, h_state in output_state:
tf.summary.histogram("c_state", c_state)
tf.summary.histogram("h_state", h_state)
# fully connected layer
logits = layers.fully_connected(rnn_out, vocab_size, activation_fn=None)
# shape: [batch_size, seq_len, vocab_size]
# predictions
with tf.name_scope("softmax"):
probs = tf.nn.softmax(logits)
# shape: [batch_size, seq_len, vocab_size]
with tf.name_scope("sequence"):
tf.summary.histogram("embeddings", embed_seq)
tf.summary.histogram("logits", logits)
model = {"logits": logits, "probs": probs,
"input_state": input_state, "output_state": output_state,
"p_keep": p_keep, "batch_size": batch_size, "infer_args": infer_args}
return model
def inference(self):
'''
1. embedding layer
2. Bi-LSTM layer
3. concat Bi-LSTM output
4. FC(full connected) layer
5. softmax layer
'''
#embedding layer
with tf.device('/cpu:0'),tf.name_scope('embedding'):
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x)
#Bi-LSTM layer
lstm_fw_cell = rnn.BasicLSTMCell(self.hidden_size)
lstm_bw_cell = rnn.BasicLSTMCell(self.hidden_size)
if self.dropout_keep_prob is not None:
lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob = self.dropout_keep_prob)
lstm_bw_cell = rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob = self.dropout_keep_prob)
outputs,output_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype = tf.float32)
#BI-GRU layer
'''
gru_fw_cell = rnn.GRUCell(self.hidden_size)
gru_bw_cell = rnn.GRUCell(self.hidden_size)
if self.dropout_keep_prob is not None:
gru_fw_cell = rnn.DropoutWrapper(gru_fw_cell,output_keep_prob = self.dropout_keep_prob)
gru_bw_cell = rnn.DropoutWrapper(gru_bw_cell,output_keep_prob = self.dropout_keep_prob)
outputs,output_states = tf.nn.bidirectional_dynamic_rnn(gru_fw_cell,gru_bw_cell,self.embedded_words,dtype = tf.float32)
'''
#concat output
#each output in outputs is [batch sequence_length hidden_size]
#concat forward output and backward output
output_cnn = tf.concat(outputs,axis = 2) #[batch sequence_length 2*hidden_size]
output_cnn_last = tf.reduce_mean(output_cnn,axis = 1) #[batch_size,2*hidden_size]
#FC layer
with tf.name_scope('output'):
self.score = tf.matmul(output_cnn_last,self.W_projection) + self.b_projection
return self.score