python类TimeDistributed()的实例源码-第2页-面圈网

imdb_lm_gcnn.py 文件源码项目：nn_playground 作者: DingKe 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
    x = Input(batch_shape=(batch_size, None))
    # mebedding
    y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
    for i in range(nb_layers-1):
        y = GCNN(hidden_dim, window_size=window_size,
                 name='gcnn{}'.format(i + 1))(y)
    y = GCNN(hidden_dim, window_size=window_size, 
             name='gcnn{}'.format(nb_layers))(y)
    y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)

    model = Model(inputs=x, outputs=y)

    return model

char_lm_gcnn.py 文件源码项目：nn_playground 作者: DingKe 项目源码文件源码阅读 116 收藏 0 点赞 0 评论 0

def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
    x = Input(batch_shape=(batch_size, None))
    # mebedding
    y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
    for i in range(nb_layers-1):
        y = GCNN(hidden_dim, window_size=window_size,
                 name='gcnn{}'.format(i + 1))(y)
    y = GCNN(hidden_dim, window_size=window_size, 
             name='gcnn{}'.format(nb_layers))(y)
    y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)

    model = Model(inputs=x, outputs=y)

    return model

imdb_lm.py 文件源码项目：nn_playground 作者: DingKe 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
    x = Input(batch_shape=(batch_size, None))
    # mebedding
    y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
    for i in range(nb_layers-1):
        y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(i + 1))(y)
    y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(nb_layers))(y)
    y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)

    model = Model(inputs=x, outputs=y)

    return model

imdb_lm.py 文件源码项目：nn_playground 作者: DingKe 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
    x = Input(batch_shape=(batch_size, None))
    # mebedding
    y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
    for i in range(nb_layers-1):
        y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(i + 1))(y)
    y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(nb_layers))(y)
    y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)

    model = Model(input=x, output=y)

    return model

imdb_lm.py 文件源码项目：nn_playground 作者: DingKe 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
    x = Input(batch_shape=(batch_size, None))
    # mebedding
    y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
    for i in range(nb_layers-1):
        y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(i + 1))(y)
    y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(nb_layers))(y)
    y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)

    model = Model(input=x, output=y)

    return model

layers.py 文件源码项目：deeppavlov 作者: deepmipt 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def learnable_wiq(context, question, question_mask, layer_dim):
    """Aligned question embedding. Same as in DRQA paper."""

    question_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(question)
    context_enc = TimeDistributed(Dense(units=layer_dim, activation='relu'))(context)
    question_enc = Lambda(lambda q: tf.transpose(q, [0, 2, 1]))(question_enc)
    matrix = Lambda(lambda q: tf.matmul(q[0], q[1]))([context_enc, question_enc])
    coefs = Lambda(lambda q: masked_softmax(matrix, question_mask, axis=2, expand=1))([matrix, question_mask])
    aligned_question_enc = Lambda(lambda q: tf.matmul(q[0], q[1]))([coefs, question])
    return(aligned_question_enc)

layers.py 文件源码项目：deeppavlov 作者: deepmipt 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def projection(encoding, W, dropout_rate):
    """Projection layer. Dense layer from keras.

    In FastQA is applied after the encoder, to project context and question representations
    into different spaces."""

    proj = TimeDistributed(
        Dense(W,
              trainable=True,
              weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(encoding)
    proj = Dropout(rate=dropout_rate)(proj)
    return proj

layers.py 文件源码项目：deeppavlov 作者: deepmipt 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def question_attn_vector(question_encoding, question_mask, context_encoding, repeat=True):
    """Attention over question."""

    question_attention_vector = TimeDistributed(Dense(1))(question_encoding)
    # apply masking
    question_attention_vector = Lambda(lambda q: masked_softmax(q[0], q[1]))([question_attention_vector, question_mask])
    # apply the attention
    question_attention_vector = Lambda(lambda q: q[0] * q[1])([question_encoding, question_attention_vector])
    question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector)
    if repeat==True:
        question_attention_vector = Lambda(lambda q: repeat_vector(q[0], q[1]))([question_attention_vector, context_encoding])
    return question_attention_vector

layers.py 文件源码项目：deeppavlov 作者: deepmipt 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def answer_end_pred(context_encoding, question_attention_vector, context_mask, answer_start_distribution, W, dropout_rate):
    """Answer end prediction layer."""

    # Answer end prediction depends on the start prediction
    def s_answer_feature(x):
        maxind = K.argmax(
            x,
            axis=1,
        )
        return maxind

    x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start_distribution)
    start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack(
        [tf.range(K.tf.shape(arg[1])[0]), tf.cast(arg[1], K.tf.int32)], axis=1)))([context_encoding, x])

    start_feature = Lambda(lambda q: repeat_vector(q[0], q[1]))([start_feature, context_encoding])

    # Answer end prediction
    answer_end = Lambda(lambda arg: concatenate([
        arg[0],
        arg[1],
        arg[2],
        multiply([arg[0], arg[1]]),
        multiply([arg[0], arg[2]])
    ]))([context_encoding, question_attention_vector, start_feature])

    answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end)
    answer_end = Dropout(rate=dropout_rate)(answer_end)
    answer_end = TimeDistributed(Dense(1))(answer_end)

    # apply masking
    answer_end = Lambda(lambda q: masked_softmax(q[0], q[1]))([answer_end, context_mask])
    answer_end = Lambda(lambda q: flatten(q))(answer_end)
    return answer_end

model.py 文件源码项目：deepcut 作者: rkcosmos 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def conv_unit(inp, n_gram, no_word = 200, window = 2):
    out = Conv1D(no_word, window, strides=1, padding="valid", activation='relu')(inp)
    out = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(out)
    out = ZeroPadding1D(padding=(0, window-1))(out)
    return out

sentence_model.py 文件源码项目：keras-text 作者: raghakot 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def __init__(self, num_classes, token_index, max_sents, max_tokens,
                 embedding_type='glove.6B.100d', embedding_dims=100):
        """Creates a `SentenceModelFactory` instance for building various models that operate over
        (samples, max_sentences, max_tokens) input.

        Args:
            num_classes: The number of output classes.
            token_index: The dictionary of token and its corresponding integer index value.
            max_sents: The max number of sentences in a document.
            max_tokens: The max number of tokens in a sentence.
            embedding_type: The embedding type to use. Set to None to use random embeddings.
                (Default value: 'glove.6B.100d')
            embedding_dims: The number of embedding dims to use for representing a word. This argument will be ignored
                when `embedding_type` is set. (Default value: 100)
        """
        self.num_classes = num_classes
        self.token_index = token_index
        self.max_sents = max_sents
        self.max_tokens = max_tokens

        # This is required to make TimeDistributed(word_encoder_model) work.
        # TODO: Get rid of this restriction when https://github.com/fchollet/keras/issues/6917 resolves.
        if self.max_tokens is None:
            raise ValueError('`max_tokens` should be provided.')

        if embedding_type is not None:
            self.embeddings_index = get_embeddings_index(embedding_type)
            self.embedding_dims = self.embeddings_index.values()[0].shape[-1]
        else:
            self.embeddings_index = None
            self.embedding_dims = embedding_dims

test_model_saving.py 文件源码项目：keras 作者: GeekLiB 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_sequential_model_saving():
    model = Sequential()
    model.add(Dense(2, input_dim=3))
    model.add(RepeatVector(3))
    model.add(TimeDistributed(Dense(3)))
    model.compile(loss=objectives.MSE,
                  optimizer=optimizers.RMSprop(lr=0.0001),
                  metrics=[metrics.categorical_accuracy],
                  sample_weight_mode='temporal')
    x = np.random.random((1, 3))
    y = np.random.random((1, 3, 3))
    model.train_on_batch(x, y)

    out = model.predict(x)
    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)

    new_model = load_model(fname)
    os.remove(fname)

    out2 = new_model.predict(x)
    assert_allclose(out, out2, atol=1e-05)

    # test that new updates are the same with both models
    x = np.random.random((1, 3))
    y = np.random.random((1, 3, 3))
    model.train_on_batch(x, y)
    new_model.train_on_batch(x, y)
    out = model.predict(x)
    out2 = new_model.predict(x)
    assert_allclose(out, out2, atol=1e-05)

hier_networks.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def build_hcnn_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False):

    N = maxnum
    L = maxlen

    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, nbfilters = %s, filter1_len = %s, filter2_len = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.nbfilters, opts.filter1_len, opts.filter2_len, opts.dropout, opts.l2_value))

    word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(Convolution1D(opts.nbfilters, opts.filter1_len, border_mode='valid'), name='z')(resh_W)

    avg_z = TimeDistributed(AveragePooling1D(pool_length=L-opts.filter1_len+1), name='avg_z')(z)    # shape= (N, 1, nbfilters)

    resh_z = Reshape((N, opts.nbfilters), name='resh_z')(avg_z)     # shape(N, nbfilters)

    hz = Convolution1D(opts.nbfilters, opts.filter2_len, border_mode='valid', name='hz')(resh_z)
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)

    avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)

    model = Model(input=word_input, output=y)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

hier_lstm.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def build_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):

    N = maxnum
    L = maxlen
    logger = get_logger("Build model")
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
    avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z)

    hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z)
    # TODO, random drop sentences
    drop_hz = Dropout(opts.dropout, name='drop_hz')(hz)
    avg_hz = GlobalAveragePooling1D(name='avg_hz')(drop_hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)

    model = Model(input=word_input, output=y)

    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)

    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

hier_lstm.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def build_bidirectional_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):

    N = maxnum
    L = maxlen
    logger = get_logger("Build bidirectional model")
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z_fwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z_fwd')(resh_W)
    z_bwd = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True, go_backwards=True), name='z_bwd')(resh_W)
    z_merged = merge([z_fwd, z_bwd], mode='concat', name='z_merged')

    avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z_merged)

    hz_fwd = LSTM(opts.lstm_units, return_sequences=True, name='hz_fwd')(avg_z)
    hz_bwd = LSTM(opts.lstm_units, return_sequences=True, go_backwards=True, name='hz_bwd')(avg_z)
    hz_merged = merge([hz_fwd, hz_bwd], mode='concat', name='hz_merged')
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
    avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz_merged)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(avg_hz)

    model = Model(input=word_input, output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

hier_lstm.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def build_attention_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
    avg_z = TimeDistributed(GlobalAveragePooling1D(), name='avg_z')(z)

    hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(avg_z)
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
    # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
    attent_hz = Attention(name='attent_hz')(hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)

    model = Model(input=word_input, output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

hier_lstm.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def build_attention2_model(opts, vocab_size=0, maxnum=50, maxlen=50, embedd_dim=50, embedding_weights=None, verbose=False, init_mean_value=None):
    N = maxnum
    L = maxlen

    logger = get_logger('Build attention pooling model')
    logger.info("Model parameters: max_sentnum = %d, max_sentlen = %d, embedding dim = %s, lstm_units = %s, drop rate = %s, l2 = %s" % (N, L, embedd_dim,
        opts.lstm_units, opts.dropout, opts.l2_value))
    word_input = Input(shape=(N*L,), dtype='int32', name='word_input')
    x = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=N*L, weights=embedding_weights, name='x')(word_input)
    drop_x = Dropout(opts.dropout, name='drop_x')(x)

    resh_W = Reshape((N, L, embedd_dim), name='resh_W')(drop_x)

    z = TimeDistributed(LSTM(opts.lstm_units, return_sequences=True), name='z')(resh_W)
    att_z = TimeDistributed(Attention(name='att_z'))(z)

    hz = LSTM(opts.lstm_units, return_sequences=True, name='hz')(att_z)
    # avg_h = MeanOverTime(mask_zero=True, name='avg_h')(hz)
    # avg_hz = GlobalAveragePooling1D(name='avg_hz')(hz)
    attent_hz = Attention(name='attent_hz')(hz)
    y = Dense(output_dim=1, activation='sigmoid', name='output')(attent_hz)

    model = Model(input=word_input, output=y)
    if opts.init_bias and init_mean_value:
        logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
        bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
        model.layers[-1].b.set_value(bias_value)
    if verbose:
        model.summary()

    start_time = time.time()
    model.compile(loss='mse', optimizer='rmsprop')
    total_time = time.time() - start_time
    logger.info("Model compiled in %.4f s" % total_time)

    return model

layers.py 文件源码项目：quora_duplicate 作者: ijinmao 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, rnn_dim, rnn_unit='gru', input_shape=(0,),
                 dropout=0.0, highway=False, return_sequences=False,
                 dense_dim=0):
        if rnn_unit == 'gru':
            rnn = GRU
        else:
            rnn = LSTM
        self.model = Sequential()
        self.model.add(
            Bidirectional(rnn(rnn_dim,
                              dropout=dropout,
                              recurrent_dropout=dropout,
                              return_sequences=return_sequences),
                          input_shape=input_shape))
        # self.model.add(rnn(rnn_dim,
        #                    dropout=dropout,
        #                    recurrent_dropout=dropout,
        #                    return_sequences=return_sequences,
        #                    input_shape=input_shape))
        if highway:
            if return_sequences:
                self.model.add(TimeDistributed(Highway(activation='tanh')))
            else:
                self.model.add(Highway(activation='tanh'))

        if dense_dim > 0:
            self.model.add(TimeDistributed(Dense(dense_dim,
                                                 activation='relu')))
            self.model.add(TimeDistributed(Dropout(dropout)))
            self.model.add(TimeDistributed(BatchNormalization()))

layers.py 文件源码项目：quora_duplicate 作者: ijinmao 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def __init__(self, dense_dim, sequence_length=0,
                 input_dim=0, dropout=0.0):
        self.dense_dim = dense_dim
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        model = Sequential()
        model.add(Dense(dense_dim,
                        activation='relu',
                        input_shape=(input_dim,)))
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
        self.model = TimeDistributed(model)

layers.py 文件源码项目：quora_duplicate 作者: ijinmao 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init__(self, dense_dim, sequence_length=0,
                 input_dim=0, dropout=0.0):
        model = Sequential()
        model.add(Dense(dense_dim,
                        activation='relu',
                        input_shape=(input_dim,)))
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
        self.model = TimeDistributed(model, input_shape=(sequence_length, input_dim,))