def loss_nce(self,l2_lambda=0.0001): #0.0001-->0.001
"""calculate loss using (NCE)cross entropy here"""
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
if self.is_training: #training
#labels=tf.reshape(self.input_y,[-1]) #[batch_size,1]------>[batch_size,]
labels=tf.expand_dims(self.input_y,1) #[batch_size,]----->[batch_size,1]
loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
tf.nn.nce_loss(weights=tf.transpose(self.W_projection),#[hidden_size*2, num_classes]--->[num_classes,hidden_size*2]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
biases=self.b_projection, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
inputs=self.output_rnn_last,# [batch_size,hidden_size*2] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
num_sampled=self.num_sampled, #scalar. 100
num_classes=self.num_classes,partition_strategy="div")) #scalar. 1999
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
loss = loss + l2_losses
return loss
python类nce_loss()的实例源码
p6_fastTextB_model_multilabel.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def loss(self,l2_lambda=0.0001):
"""calculate loss using (NCE)cross entropy here"""
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
if self.is_training:#training
#labels=tf.reshape(self.labels,[-1]) #3.[batch_size,max_label_per_example]------>[batch_size*max_label_per_example,]
#labels=tf.expand_dims(labels,1) #[batch_size*max_label_per_example,]----->[batch_size*max_label_per_example,1]
#nce_loss: notice-->for now, if you have a variable number of target classes, you can pad them out to a constant number by either repeating them or by padding with an otherwise unused class.
loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim]
tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
labels=self.labels, #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
num_sampled=self.num_sampled, # scalar. 100
num_true=self.max_label_per_example,
num_classes=self.label_size,partition_strategy="div")) #scalar. 1999
else:#eval(/inference)
labels_multi_hot = self.labels_l1999 #[batch_size,label_size]
#sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time.
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size]
loss = tf.reduce_sum(loss, axis=1)
# add regularization result in not converge
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
loss=loss+l2_losses
return loss
def loss(self,l2_lambda=0.01): #0.0001-->0.001
"""calculate loss using (NCE)cross entropy here"""
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
if self.is_training: #training
labels=tf.reshape(self.labels,[-1]) #[batch_size,1]------>[batch_size,]
labels=tf.expand_dims(labels,1) #[batch_size,]----->[batch_size,1]
loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
inputs=self.sentence_embeddings,# [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
num_sampled=self.num_sampled, #scalar. 100
num_classes=self.label_size,partition_strategy="div")) #scalar. 1999
else:#eval/inference
#logits = tf.matmul(self.sentence_embeddings, tf.transpose(self.W)) #matmul([None,self.embed_size])--->
#logits = tf.nn.bias_add(logits, self.b)
labels_one_hot = tf.one_hot(self.labels, self.label_size) #[batch_size]---->[batch_size,label_size]
#sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time.
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_one_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size]
print("loss0:", loss) #shape=(?, 1999)
loss = tf.reduce_sum(loss, axis=1)
print("loss1:",loss) #shape=(?,)
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
return loss
def __call__(self, embed, train_labels):
with tf.name_scope("negative_sampling"):
# mask out skip or OOV
# if switched on, this yields ...
# UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
# mask = tf.greater(train_labels, NegativeSampling.IGNORE_LABEL_MAX)
# # mask = tf.not_equal(train_labels, NegativeSampling.IGNORE_LABEL)
# embed = tf.boolean_mask(embed, mask)
# train_labels = tf.expand_dims(tf.boolean_mask(train_labels, mask), -1)
train_labels = tf.expand_dims(train_labels, -1)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
# By default this uses a log-uniform (Zipfian) distribution for sampling
# and therefore assumes labels are sorted - which they are!
sampler = (self.freqs if self.freqs is None # default to unigram
else tf.nn.fixed_unigram_candidate_sampler(
train_labels, num_true=1, num_sampled=self.sample_size,
unique=True, range_max=self.vocab_size,
#num_reserved_ids=2, # skip or OoV
# ^ only if not in unigrams
distortion=self.power, unigrams=list(self.freqs)))
loss = tf.reduce_mean(
tf.nn.nce_loss(self.nce_weights, self.nce_biases,
embed, # summed doc and context embedding
train_labels, self.sample_size, self.vocab_size,
sampled_values=sampler), # log-unigram if not specificed
name="nce_batch_loss")
# TODO negative sampling versus NCE
# TODO uniform vs. Zipf with exponent `distortion` param
#https://www.tensorflow.org/versions/r0.12/api_docs/python/nn.html#log_uniform_candidate_sampler
return loss
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
print(" [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
print(" [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
dtype=D_TYPE,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
dtype=D_TYPE,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
dtype=D_TYPE,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
print(" [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
self.n_units = embedding_size
print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}
def __init__(
self,
inputs = None,
train_labels = None,
vocabulary_size = 80000,
embedding_size = 200,
num_sampled = 64,
nce_loss_args = {},
E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
E_init_args = {},
nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
nce_W_init_args = {},
nce_b_init = tf.constant_initializer(value=0.0),
nce_b_init_args = {},
name ='word2vec_layer',
):
Layer.__init__(self, name=name)
self.inputs = inputs
self.n_units = embedding_size
print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
# Look up embeddings for inputs.
# Note: a row of 'embeddings' is the vector representation of a word.
# for the sake of speed, it is better to slice the embedding matrix
# instead of transfering a word id to one-hot-format vector and then
# multiply by the embedding matrix.
# embed is the outputs of the hidden layer (embedding layer), it is a
# row vector with 'embedding_size' values.
with tf.variable_scope(name) as vs:
embeddings = tf.get_variable(name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=E_init,
**E_init_args)
embed = tf.nn.embedding_lookup(embeddings, self.inputs)
# Construct the variables for the NCE loss (i.e. negative sampling)
nce_weights = tf.get_variable(name='nce_weights',
shape=(vocabulary_size, embedding_size),
initializer=nce_W_init,
**nce_W_init_args)
nce_biases = tf.get_variable(name='nce_biases',
shape=(vocabulary_size),
initializer=nce_b_init,
**nce_b_init_args)
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels
# each time we evaluate the loss.
self.nce_cost = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
inputs=embed, labels=train_labels,
num_sampled=num_sampled, num_classes=vocabulary_size,
**nce_loss_args))
self.outputs = embed
self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
self.all_layers = [self.outputs]
self.all_params = [embeddings, nce_weights, nce_biases]
self.all_drop = {}