model.py 文件源码-python代码片段

def cnn_sen_enc(word_vocab_size,
                    word_embed_size=50,
                    batch_size=20,
                    num_highway_layers=2,
                    max_sen_length=65,
                    kernels         = [ 1,   2,   3,   4,   5,   6,   7],
                    kernel_features = [50, 100, 150, 200, 200, 200, 200],
                    max_doc_length=35,
                    pretrained=None):

    # cnn sentence encoder
    assert len(kernels) == len(kernel_features), 'Kernel and Features must have the same size'

    input_ = tf.placeholder(tf.int32, shape=[batch_size, max_doc_length, max_sen_length], name="input")

    ''' First, embed words to sentence '''
    with tf.variable_scope('Embedding'):
        if pretrained is not None:
            word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size], 
                                       initializer=tf.constant_initializer(pretrained))
        else:
            word_embedding = tf.get_variable(name='word_embedding', shape=[word_vocab_size, word_embed_size])

        ''' this op clears embedding vector of first symbol (symbol at position 0, which is by convention the position
        of the padding symbol). It can be used to mimic Torch7 embedding operator that keeps padding mapped to
        zero embedding vector and ignores gradient updates. For that do the following in TF:
        1. after parameter initialization, apply this op to zero out padding embedding vector
        2. after each gradient update, apply this op to keep padding at zero'''
        clear_word_embedding_padding = tf.scatter_update(word_embedding, [0], tf.constant(0.0, shape=[1, word_embed_size]))

        # [batch_size, max_doc_length, max_sen_length, word_embed_size]
        input_embedded = tf.nn.embedding_lookup(word_embedding, input_)

        input_embedded = tf.reshape(input_embedded, [-1, max_sen_length, word_embed_size])

    ''' Second, apply convolutions '''
    # [batch_size x max_doc_length, cnn_size]  # where cnn_size=sum(kernel_features)
    input_cnn = tdnn(input_embedded, kernels, kernel_features)

    ''' Maybe apply Highway '''
    if num_highway_layers > 0:
        input_cnn = highway(input_cnn, input_cnn.get_shape()[-1], num_layers=num_highway_layers)

    return adict(
        input = input_,
        clear_word_embedding_padding=clear_word_embedding_padding,
        input_embedded=input_embedded,
        input_cnn=input_cnn
    )