wavenet_model.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:Speech-Recognition-CTC 作者: yjhong89 项目源码 文件源码
def create_model(self):
        # Placeholders
        # [batch size, step, features]
        self.input_data = tf.placeholder(tf.float32, [None, None, self.args.num_features], name='wave_input')
        self.targets = tf.sparse_placeholder(tf.int32, name='target')
        self.seq_len = tf.placeholder(tf.int32, [None], name='sequence_length')

        skip = 0
        '''
            Construct of a stack of dilated causal convolutional layers
        '''
        # First non-causal convolution to inputs to expand feature dimension
        h = conv1d(self.input_data, self.args.num_hidden, filter_width=self.args.filter_width, name='conv_in', normalization=self.args.layer_norm, activation=tf.nn.tanh)
        # As many as number of blocks, block means one total dilated convolution layers
        for blocks in range(self.args.num_blocks):
            # Construction of dilation
            for dilated in range(self.args.num_wavenet_layers):
                # [1,2,4,8,16..]
                rate = 2**dilated 
                h, s = res_block(h, self.args.num_hidden, rate, self.args.causal, self.args.filter_width, normalization=self.args.layer_norm, activation=self.args.dilated_activation, name='{}block_{}layer'.format(blocks+1, dilated+1))
                skip += s
        # Make skip connections
        with tf.variable_scope('postprocessing'):
            # 1*1 convolution
            skip = conv1d(tf.nn.relu(skip), self.args.num_hidden, filter_width=self.args.skip_filter_width, activation=tf.nn.relu, normalization=self.args.layer_norm, name='conv_out1')
            hidden = conv1d(skip, self.args.num_hidden, filter_width=self.args.skip_filter_width, activation=tf.nn.relu, normalization=self.args.layer_norm, name='conv_out2')
            self.logits = conv1d(hidden, self.args.num_classes, filter_width=1, activation=None, normalization=self.args.layer_norm, name='conv_out3')

        self.probability = tf.nn.softmax(self.logits)

        # To calculate ctc, consider timemajor
        self.logits_reshaped = tf.transpose(self.logits, [1,0,2])
        self.loss = tf.reduce_mean(tf.nn.ctc_loss(labels=self.targets, inputs=self.logits_reshaped, sequence_length=self.seq_len))
        self.decoded, _ = tf.nn.ctc_greedy_decoder(self.logits_reshaped, self.seq_len)  
        self.ler = tf.reduce_mean(tf.edit_distance(tf.cast(self.decoded[0], tf.int32), self.targets))
        # When use tf.contrib.layers.layer_norm(batch_norm), update_ops are placed in tf.GraphKeys.UPDATE_OPS so they need to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        #with tf.control_dependencies(update_ops):
            #self.train_op = tf.train.AdamOptimizer(self.args.learning_rate).minimize(self.loss)
        trainable_vr = tf.trainable_variables()
        for i in trainable_vr:
            print(i.name)
        optimizer = tf.train.AdamOptimizer(self.args.learning_rate)
        grad, vrbs = zip(*optimizer.compute_gradients(self.loss))
        # clip_by_global_norm returns (list_clipped, global_norm), global_norm is sum of total l2 gradient(inputs)
        # If global_norm(sum of total norm) is greater than clipnorm(maxgrad), each element of input shrunk as ratio of global norm
        # Right way of gradient clipping
        # Automatically ignore None gradient
        grads, _ = tf.clip_by_global_norm(grad, self.args.maxgrad)
        with tf.control_dependencies(update_ops):
            self.train_op = optimizer.apply_gradients(zip(grads, vrbs)) 

        self.saver = tf.train.Saver()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号