def create_model(self):
# Placeholders
# [batch size, step, features]
self.input_data = tf.placeholder(tf.float32, [None, None, self.args.num_features], name='wave_input')
self.targets = tf.sparse_placeholder(tf.int32, name='target')
self.seq_len = tf.placeholder(tf.int32, [None], name='sequence_length')
skip = 0
'''
Construct of a stack of dilated causal convolutional layers
'''
# First non-causal convolution to inputs to expand feature dimension
h = conv1d(self.input_data, self.args.num_hidden, filter_width=self.args.filter_width, name='conv_in', normalization=self.args.layer_norm, activation=tf.nn.tanh)
# As many as number of blocks, block means one total dilated convolution layers
for blocks in range(self.args.num_blocks):
# Construction of dilation
for dilated in range(self.args.num_wavenet_layers):
# [1,2,4,8,16..]
rate = 2**dilated
h, s = res_block(h, self.args.num_hidden, rate, self.args.causal, self.args.filter_width, normalization=self.args.layer_norm, activation=self.args.dilated_activation, name='{}block_{}layer'.format(blocks+1, dilated+1))
skip += s
# Make skip connections
with tf.variable_scope('postprocessing'):
# 1*1 convolution
skip = conv1d(tf.nn.relu(skip), self.args.num_hidden, filter_width=self.args.skip_filter_width, activation=tf.nn.relu, normalization=self.args.layer_norm, name='conv_out1')
hidden = conv1d(skip, self.args.num_hidden, filter_width=self.args.skip_filter_width, activation=tf.nn.relu, normalization=self.args.layer_norm, name='conv_out2')
self.logits = conv1d(hidden, self.args.num_classes, filter_width=1, activation=None, normalization=self.args.layer_norm, name='conv_out3')
self.probability = tf.nn.softmax(self.logits)
# To calculate ctc, consider timemajor
self.logits_reshaped = tf.transpose(self.logits, [1,0,2])
self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.targets, inputs=self.logits_reshaped, sequence_length=self.seq_len))
self.decoded, _ = tf.nn.ctc_greedy_decoder(self.logits_reshaped, self.seq_len)
self.ler = tf.reduce_mean(tf.edit_distance(tf.cast(self.decoded[0], tf.int32), self.targets))
# When use tf.contrib.layers.layer_norm(batch_norm), update_ops are placed in tf.GraphKeys.UPDATE_OPS so they need to be added as a dependency to the train_op
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#with tf.control_dependencies(update_ops):
#self.train_op = tf.train.AdamOptimizer(self.args.learning_rate).minimize(self.loss)
trainable_vr = tf.trainable_variables()
for i in trainable_vr:
print(i.name)
optimizer = tf.train.AdamOptimizer(self.args.learning_rate)
grad, vrbs = zip(*optimizer.compute_gradients(self.loss))
# clip_by_global_norm returns (list_clipped, global_norm), global_norm is sum of total l2 gradient(inputs)
# If global_norm(sum of total norm) is greater than clipnorm(maxgrad), each element of input shrunk as ratio of global norm
# Right way of gradient clipping
# Automatically ignore None gradient
grads, _ = tf.clip_by_global_norm(grad, self.args.maxgrad)
with tf.control_dependencies(update_ops):
self.train_op = optimizer.apply_gradients(zip(grads, vrbs))
self.saver = tf.train.Saver()
评论列表
文章目录