def __init__(self, num_classes, token_index, max_sents, max_tokens,
embedding_type='glove.6B.100d', embedding_dims=100):
"""Creates a `SentenceModelFactory` instance for building various models that operate over
(samples, max_sentences, max_tokens) input.
Args:
num_classes: The number of output classes.
token_index: The dictionary of token and its corresponding integer index value.
max_sents: The max number of sentences in a document.
max_tokens: The max number of tokens in a sentence.
embedding_type: The embedding type to use. Set to None to use random embeddings.
(Default value: 'glove.6B.100d')
embedding_dims: The number of embedding dims to use for representing a word. This argument will be ignored
when `embedding_type` is set. (Default value: 100)
"""
self.num_classes = num_classes
self.token_index = token_index
self.max_sents = max_sents
self.max_tokens = max_tokens
# This is required to make TimeDistributed(word_encoder_model) work.
# TODO: Get rid of this restriction when https://github.com/fchollet/keras/issues/6917 resolves.
if self.max_tokens is None:
raise ValueError('`max_tokens` should be provided.')
if embedding_type is not None:
self.embeddings_index = get_embeddings_index(embedding_type)
self.embedding_dims = self.embeddings_index.values()[0].shape[-1]
else:
self.embeddings_index = None
self.embedding_dims = embedding_dims
评论列表
文章目录