def model(self,
input_doc, input_words, output_label, batch_size,
vocabulary_size=VOCABULARY_SIZE,
embedding_size=EMBEDDINGS_SIZE,
context_size=D2V_CONTEXT_SIZE,
num_negative_samples=D2V_NEGATIVE_NUM_SAMPLES,
learning_rate_initial=D2V_LEARNING_RATE_INITIAL,
learning_rate_decay=D2V_LEARNING_RATE_DECAY,
learning_rate_decay_steps=D2V_LEARNING_RATE_DECAY_STEPS):
self.global_step = training_util.get_or_create_global_step()
# inputs/outputs
input_doc = tf.reshape(input_doc, [batch_size])
input_words = tf.reshape(input_words, [batch_size, context_size])
output_label = tf.reshape(output_label, [batch_size, 1])
# embeddings
word_embeddings = _load_embeddings(vocabulary_size, embedding_size,
filename_prefix='word_embeddings',
from_dir=DIR_DATA_DOC2VEC)
self.word_embeddings = tf.constant(value=word_embeddings,
shape=[vocabulary_size, embedding_size],
dtype=tf.float32, name='word_embeddings')
self.doc_embeddings = tf.get_variable(shape=[self.dataset.num_docs, embedding_size],
initializer=layers.xavier_initializer(),
dtype=tf.float32, name='doc_embeddings')
words_embed = tf.nn.embedding_lookup(self.word_embeddings, input_words)
doc_embed = tf.nn.embedding_lookup(self.word_embeddings, input_doc)
# average the words_embeds
words_embed_average = tf.reduce_mean(words_embed, axis=1)
embed = tf.concat([words_embed_average, doc_embed], axis=1)
# NCE loss
nce_weights = tf.get_variable(shape=[vocabulary_size, embedding_size * 2],
initializer=layers.xavier_initializer(),
dtype=tf.float32, name='nce_weights')
nce_biases = tf.get_variable(shape=[vocabulary_size],
initializer=layers.xavier_initializer(),
dtype=tf.float32, name='nce_biases')
nce_loss = tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
labels=output_label,
inputs=embed, num_sampled=num_negative_samples,
num_classes=vocabulary_size)
self.loss = tf.reduce_mean(nce_loss)
tf.summary.scalar('loss', self.loss)
# learning rate & optimizer
self.learning_rate = tf.train.exponential_decay(learning_rate_initial, self.global_step,
learning_rate_decay_steps,
learning_rate_decay,
staircase=True, name='learning_rate')
tf.summary.scalar('learning_rate', self.learning_rate)
sgd = tf.train.GradientDescentOptimizer(self.learning_rate)
self.optimizer = sgd.minimize(self.loss, global_step=self.global_step)
return None
doc2vec_train_eval_word_embeds.py 文件源码
python
阅读 16
收藏 0
点赞 0
评论 0
评论列表
文章目录