python类l2_regularizer()的实例源码

ssd.py 文件源码 项目:SSD_tensorflow_VOC 作者: LevinJ 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
        """Defines the VGG arg scope.

        Args:
          weight_decay: The l2 regularization coefficient.

        Returns:
          An arg_scope.
        """
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            activation_fn=tf.nn.relu,
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            weights_initializer=tf.contrib.layers.xavier_initializer(),
                            biases_initializer=tf.zeros_initializer()):
            with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                                padding='SAME',
                                data_format=data_format):
                with slim.arg_scope([custom_layers.pad2d,
                                     custom_layers.l2_normalization,
                                     custom_layers.channel_to_last],
                                    data_format=data_format) as sc:
                    return sc
began_model.py 文件源码 项目:Awesome-GANs 作者: kozistr 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def encoder(self, x, embedding, reuse=None):
        with tf.variable_scope("encoder", reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                stride=1, activation_fn=tf.nn.elu, padding="SAME",
                                weights_initializer=tf.contrib.layers.variance_scaling_initializer(),
                                weights_regularizer=slim.l2_regularizer(5e-4),
                                bias_initializer=tf.zeros_initializer()):
                x = slim.conv2d(x, embedding, 3)

                for i in range(self.conv_repeat_num):
                    channel_num = embedding * (i + 1)
                    x = slim.repeat(x, 2, slim.conv2d, channel_num, 3)
                    if i < self.conv_repeat_num - 1:
                        # Is using stride pooling more better method than max pooling?
                        # or average pooling
                        # x = slim.conv2d(x, channel_num, kernel_size=3, stride=2)  # sub-sampling
                        x = slim.avg_pool2d(x, kernel_size=2, stride=2)
                        # x = slim.max_pooling2d(x, 3, 2)

                x = tf.reshape(x, [-1, np.prod([8, 8, channel_num])])
        return x
began_model.py 文件源码 项目:Awesome-GANs 作者: kozistr 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def decoder(self, z, embedding, reuse=None):
        with tf.variable_scope("decoder", reuse=reuse):
            with slim.arg_scope([slim.conv2d, slim.fully_connected],
                                weights_initializer=tf.contrib.layers.variance_scaling_initializer(),
                                weights_regularizer=slim.l2_regularizer(5e-4),
                                bias_initializer=tf.zeros_initializer()):
                with slim.arg_scope([slim.conv2d], padding="SAME",
                                    activation_fn=tf.nn.elu, stride=1):
                    x = slim.fully_connected(z, 8 * 8 * embedding, activation_fn=None)
                    x = tf.reshape(x, [-1, 8, 8, embedding])

                    for i in range(self.conv_repeat_num):
                        x = slim.repeat(x, 2, slim.conv2d, embedding, 3)
                        if i < self.conv_repeat_num - 1:
                            x = resize_nn(x, 2)  # NN up-sampling

                    x = slim.conv2d(x, 3, 3, activation_fn=None)
        return x
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
    num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
    feature_size = model_input.get_shape().as_list()[2]
    max_frames = model_input.get_shape().as_list()[1]


    denominators = tf.reshape(
        tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
    avg_pooled = tf.reduce_sum(model_input,
                               axis=[1]) / denominators
    output = slim.fully_connected(
        avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid,
        weights_regularizer=slim.l2_regularizer(1e-8))
    return {"predictions": output}
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def sub_moe(self,
                model_input,
                vocab_size,
                num_mixtures = None,
                l2_penalty=1e-8,
                scopename="",
                **unused_params):

        num_mixtures = num_mixtures or FLAGS.moe_num_mixtures

        gate_activations = slim.fully_connected(
            model_input,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates"+scopename)
        expert_activations = slim.fully_connected(
            model_input,
            vocab_size * num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts"+scopename)

        gating_distribution = tf.nn.softmax(tf.reshape(
            gate_activations,
            [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
        expert_distribution = tf.nn.sigmoid(tf.reshape(
            expert_activations,
            [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures


        final_probabilities_by_class_and_batch = tf.reduce_sum(
            gating_distribution[:, :num_mixtures] * expert_distribution, 1)

        final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
                                         [-1, vocab_size])
        return model_input, final_probabilities
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     **unused_params):

        shape = model_input.get_shape().as_list()
        frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2)
        frames_true = tf.ones(tf.shape(frames_sum))
        frames_false = tf.zeros(tf.shape(frames_sum))
        frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1])

        activation_1 = tf.reduce_max(model_input, axis=1)
        activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6)
        activation_3 = tf.reduce_min(model_input, axis=1)

        model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max")
        model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean")
        model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min")
        final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[shape[2], 3, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2)
        weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters = [1024, 1024, 1024],
            filter_sizes = [1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters = [1024, 1024, 1024],
            filter_sizes = [1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        cnn_size = FLAGS.cnn_cells
        num_filters = [cnn_size, cnn_size, cnn_size*2]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []

        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, lstm_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters=[1024,1024,1024],
            filter_sizes=[1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters=[1024,1024,1024],
            filter_sizes=[1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        num_filters = [256, 256, 512]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn_glu(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities, axis=1)
        moe_inputs = tf.stack(moe_inputs, axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
        result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []

        for layer in range(num_layers):
            cnn_output, num_t = LstmMultiscaleModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = LstmMultiscaleModel().rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = LstmMultiscaleModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities, axis=1)
        moe_inputs = tf.stack(moe_inputs, axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1)
        result = {}
        result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = 10
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)

        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            if layer < 3:
                num_t = pool_size*(num_t//pool_size)
                cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
                cnn_input = tf.reduce_max(cnn_output, axis=2)
            else:
                cnn_input = cnn_output

        cnn_output, num_t = self.kmax(cnn_input, num_filters=features_size, filter_sizes=num_extend, sub_scope="kmax")
        cnn_input = tf.reshape(cnn_output,[-1,features_size])
        final_probilities = self.sub_moe(cnn_input,vocab_size)
        final_probilities = tf.reshape(final_probilities,[-1,num_extend,vocab_size])
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", cnn_output, weight2d), dim=1)
        result = {}
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters = [1024, 1024, 1024],
            filter_sizes = [1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = tf.reduce_max(cnn_output,axis=1)
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters = [1024, 1024, 1024],
            filter_sizes = [1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            sub_bias = tf.get_variable(sub_scope+"cnn-bias-len%d"%fs,
                                         shape=[nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter) + sub_bias)

        cnn_output = tf.concat(cnn_outputs, axis=2)
        cnn_output = slim.batch_norm(
            cnn_output,
            center=True,
            scale=True,
            is_training=FLAGS.train,
            scope=sub_scope+"cluster_bn")
        return cnn_output, max_frames
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = tf.reduce_max(cnn_output,axis=1)
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def cnn(self,
            model_input,
            l2_penalty=1e-8,
            num_filters=[1024, 1024, 1024],
            filter_sizes=[1,2,3],
            sub_scope="",
            **unused_params):
        max_frames = model_input.get_shape().as_list()[1]
        num_features = model_input.get_shape().as_list()[2]

        shift_inputs = []
        for i in range(max(filter_sizes)):
            if i == 0:
                shift_inputs.append(model_input)
            else:
                shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:])

        cnn_outputs = []
        for nf, fs in zip(num_filters, filter_sizes):
            sub_input = tf.concat(shift_inputs[:fs], axis=2)
            sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs,
                                         shape=[num_features*fs, nf], dtype=tf.float32,
                                         initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         regularizer=tf.contrib.layers.l2_regularizer(l2_penalty))
            cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter))

        cnn_output = tf.concat(cnn_outputs, axis=2)
        return cnn_output, max_frames


问题


面经


文章

微信
公众号

扫码关注公众号