def _to_tfidf(term_frequency, reduced_term_freq, corpus_size, smooth):
"""Calculates the inverse document frequency of terms in the corpus.
Args:
term_frequency: The `SparseTensor` output of _to_term_frequency.
reduced_term_freq: A `Tensor` of shape (vocabSize,) that represents the
count of the number of documents with each term.
corpus_size: A scalar count of the number of documents in the corpus.
smooth: A bool indicating if the idf value should be smoothed. See
tfidf_weights documentation for details.
Returns:
A `SparseTensor` with indices=<doc_index_in_batch>, <term_index_in_vocab>,
values=term frequency * inverse document frequency,
and shape=(batch, vocab_size)
"""
# The idf tensor has shape (vocab_size,)
if smooth:
idf = tf.log((tf.to_double(corpus_size) + 1.0) / (
1.0 + tf.to_double(reduced_term_freq))) + 1
else:
idf = tf.log(tf.to_double(corpus_size) / (
tf.to_double(reduced_term_freq))) + 1
gathered_idfs = tf.gather(tf.squeeze(idf), term_frequency.indices[:, 1])
tfidf_values = tf.to_float(term_frequency.values) * tf.to_float(gathered_idfs)
return tf.SparseTensor(
indices=term_frequency.indices,
values=tfidf_values,
dense_shape=term_frequency.dense_shape)
python类to_double()的实例源码
def per_image_standardizer(self, image):
stand = SamplewiseStandardizer(clip=6)
image = tf.py_func(stand, [tf.to_double(image), False], tf.float64)
return image
def tf_apply(self, x, update):
inputs_to_merge = list()
for name in self.inputs:
# Previous input, by name or "*", like normal network_spec
# Not using named_tensors as there could be unintended outcome
if name == "*" or name == "previous":
inputs_to_merge.append(x)
elif name in self.named_tensors:
inputs_to_merge.append(self.named_tensors[name])
else:
# Failed to find key in available inputs, print out help to user, raise error
keys=list(self.named_tensors)
raise TensorForceError(
'ComplexNetwork input "{}" doesn\'t exist, Available inputs: {}'.format(name,keys)
)
# Review data for casting to more precise format so TensorFlow doesn't throw error for mixed data
# Quick & Dirty cast only promote types: bool=0,int32=10, int64=20, float32=30, double=40
cast_type_level = 0
cast_type_dict = {
'bool':0,
'int32':10,
'int64':20,
'float32':30,
'float64':40
}
cast_type_func_dict = {
0:tf.identity,
10:tf.to_int32,
20:tf.to_int64,
30:tf.to_float,
40:tf.to_double
}
# Scan inputs for max cast_type
for tensor in inputs_to_merge:
key = str(tensor.dtype.name)
if key in cast_type_dict:
if cast_type_dict[key] > cast_type_level:
cast_type_level = cast_type_dict[key]
else:
raise TensorForceError('Network spec input does not support dtype {}'.format(key))
# Add casting if needed
for index, tensor in enumerate(inputs_to_merge):
key = str(tensor.dtype.name)
if cast_type_dict[key] < cast_type_level:
inputs_to_merge[index]=cast_type_func_dict[cast_type_level](tensor)
input_tensor = tf.concat(inputs_to_merge, self.axis)
return input_tensor
def _to_term_frequency(x, vocab_size):
"""Creates a SparseTensor of term frequency for every doc/term pair.
Args:
x : a SparseTensor of int64 representing string indices in vocab.
vocab_size: An int - the count of vocab used to turn the string into int64s
including any OOV buckets.
Returns:
a SparseTensor with the count of times a term appears in a document at
indices <doc_index_in_batch>, <term_index_in_vocab>,
with size (num_docs_in_batch, vocab_size).
"""
# Construct intermediary sparse tensor with indices
# [<doc>, <term_index_in_doc>, <vocab_id>] and tf.ones values.
split_indices = tf.to_int64(
tf.split(x.indices, axis=1, num_or_size_splits=2))
expanded_values = tf.to_int64(tf.expand_dims(x.values, 1))
next_index = tf.concat(
[split_indices[0], split_indices[1], expanded_values], axis=1)
next_values = tf.ones_like(x.values)
vocab_size_as_tensor = tf.constant([vocab_size], dtype=tf.int64)
next_shape = tf.concat(
[x.dense_shape, vocab_size_as_tensor], 0)
next_tensor = tf.SparseTensor(
indices=tf.to_int64(next_index),
values=next_values,
dense_shape=next_shape)
# Take the intermediary tensor and reduce over the term_index_in_doc
# dimension. This produces a tensor with indices [<doc_id>, <term_id>]
# and values [count_of_term_in_doc] and shape batch x vocab_size
term_count_per_doc = tf.sparse_reduce_sum_sparse(next_tensor, 1)
dense_doc_sizes = tf.to_double(tf.sparse_reduce_sum(tf.SparseTensor(
indices=x.indices,
values=tf.ones_like(x.values),
dense_shape=x.dense_shape), 1))
gather_indices = term_count_per_doc.indices[:, 0]
gathered_doc_sizes = tf.gather(dense_doc_sizes, gather_indices)
term_frequency = (tf.to_double(term_count_per_doc.values) /
tf.to_double(gathered_doc_sizes))
return tf.SparseTensor(
indices=term_count_per_doc.indices,
values=term_frequency,
dense_shape=term_count_per_doc.dense_shape)
def classifier_score(images, classifier_fn, num_batches=1):
"""Classifier score for evaluating a conditional generative model.
This is based on the Inception Score, but for an arbitrary classifier.
This technique is described in detail in https://arxiv.org/abs/1606.03498. In
summary, this function calculates
exp( E[ KL(p(y|x) || p(y)) ] )
which captures how different the network's classification prediction is from
the prior distribution over classes.
Args:
images: Images to calculate the classifier score for.
classifier_fn: A function that takes images and produces logits based on a
classifier.
num_batches: Number of batches to split `generated_images` in to in order to
efficiently run them through the classifier network.
Returns:
The classifier score. A floating-point scalar of the same type as the output
of `classifier_fn`.
"""
generated_images_list = tf.split(
images, num_or_size_splits=num_batches)
# Compute the classifier splits using the memory-efficient `map_fn`.
logits = tf.map_fn(
fn=classifier_fn,
elems=tf.stack(generated_images_list),
parallel_iterations=1,
back_prop=False,
swap_memory=True,
name='RunClassifier')
logits = tf.concat(tf.unstack(logits), 0)
logits.shape.assert_has_rank(2)
# Use maximum precision for best results.
logits_dtype = logits.dtype
if logits_dtype != tf.float64:
logits = tf.to_double(logits)
p = tf.nn.softmax(logits)
q = tf.reduce_mean(p, axis=0)
kl = _kl_divergence(p, logits, q)
kl.shape.assert_has_rank(1)
log_score = tf.reduce_mean(kl)
final_score = tf.exp(log_score)
if logits_dtype != tf.float64:
final_score = tf.cast(final_score, logits_dtype)
return final_score