def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
self.n_units = embedding_size
print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
评论列表
文章目录