def read_and_decode(self, example_serialized):
""" Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """
num = self.flags['num_classes']
# Parse features from binary file
features = tf.parse_single_example(
example_serialized,
features={
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([num], tf.int64, default_value=[-1] * num),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
})
# Return the converted data
label = features['label']
image = tf.decode_raw(features['image'], tf.float32)
image.set_shape([784])
image = tf.reshape(image, [28, 28, 1])
image = (image - 0.5) * 2 # max value = 1, min value = -1
return image, tf.cast(label, tf.int32)
python类int64()的实例源码
def read_and_decode(self, example_serialized):
""" Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """
features = tf.parse_single_example(
example_serialized,
features={
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([self.flags['num_classes']], tf.int64, default_value=[-1]*self.flags['num_classes']),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
})
# now return the converted data
label = features['label']
image = tf.decode_raw(features['image'], tf.float32)
image.set_shape([784])
image = tf.reshape(image, [28, 28, 1])
image = (image - 0.5) * 2 # max value = 1, min value = -1
return image, tf.cast(label, tf.int32)
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def _convert_string_dtype(dtype):
if dtype == 'float16':
return tf.float16
if dtype == 'float32':
return tf.float32
elif dtype == 'float64':
return tf.float64
elif dtype == 'int16':
return tf.int16
elif dtype == 'int32':
return tf.int32
elif dtype == 'int64':
return tf.int64
elif dtype == 'uint8':
return tf.int8
elif dtype == 'uint16':
return tf.uint16
else:
raise ValueError('Unsupported dtype:', dtype)
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def _impute2D(self, X_2D):
r"""Mean impute a rank 2 tensor."""
# Fill zeros in for missing data initially
data_zeroed_missing_tf = X_2D * self.real_val_mask
# Sum the real values in each column
col_tot = tf.reduce_sum(data_zeroed_missing_tf, 0)
# Divide column totals by the number of non-nan values
num_values_col = tf.reduce_sum(self.real_val_mask, 0)
num_values_col = tf.maximum(num_values_col,
tf.ones(tf.shape(num_values_col)))
col_nan_means = tf.div(col_tot, num_values_col)
# Make an vector of the impute values for each missing point
imputed_vals = tf.gather(col_nan_means, self.missing_ind[:, 1])
# Fill the imputed values into the data tensor of zeros
shape = tf.cast(tf.shape(data_zeroed_missing_tf), dtype=tf.int64)
missing_imputed = tf.scatter_nd(self.missing_ind, imputed_vals, shape)
X_with_impute = data_zeroed_missing_tf + missing_imputed
return X_with_impute
def _impute2D(self, X_2D):
r"""Randomly impute a rank 2 tensor."""
# Fill zeros in for missing data initially
data_zeroed_missing_tf = X_2D * self.real_val_mask
# Divide column totals by the number of non-nan values
col_draws = [n.sample(seed=next(seedgen)) for n in self.normal_array]
# Make an vector of the impute values for each missing point
imputed_vals = tf.gather(col_draws, self.missing_ind[:, 1])
# Fill the imputed values into the data tensor of zeros
shape = tf.cast(tf.shape(data_zeroed_missing_tf), dtype=tf.int64)
missing_imputed = tf.scatter_nd(self.missing_ind, imputed_vals, shape)
X_with_impute = data_zeroed_missing_tf + missing_imputed
return X_with_impute
def testIsIterable(self):
self.assertTrue(base_info._is_iterable((1, 2, 3)))
self.assertTrue(base_info._is_iterable([1, 2, 3]))
self.assertTrue(base_info._is_iterable({1: 1, 2: 2, 3: 3}))
self.assertTrue(base_info._is_iterable(
collections.OrderedDict([(1, 1), (2, 2)])))
self.assertTrue(base_info._is_iterable(DumbNamedTuple(1, 2)))
tensor = tf.placeholder(dtype=tf.float32, shape=(1, 10,))
self.assertFalse(base_info._is_iterable(set([1, 2, 3])))
self.assertFalse(base_info._is_iterable(tensor))
sparse_tensor = tf.SparseTensor(
indices=tf.placeholder(dtype=tf.int64, shape=(10, 2,)),
values=tf.placeholder(dtype=tf.float32, shape=(10,)),
dense_shape=tf.placeholder(dtype=tf.int64, shape=(2,)))
self.assertFalse(base_info._is_iterable(sparse_tensor))
self.assertFalse(base_info._is_iterable(NotATensor()))
self.assertFalse(base_info._is_iterable("foo"))
def generator():
for count in xrange(3):
self.assertFalse(False)
yield count
self.assertFalse(base_info._is_iterable(generator))
def testModuleInfo_sparsetensor(self):
# pylint: disable=not-callable
tf.reset_default_graph()
dumb = DumbModule(name="dumb_a")
sparse_tensor = tf.SparseTensor(
indices=tf.placeholder(dtype=tf.int64, shape=(10, 2,)),
values=tf.placeholder(dtype=tf.float32, shape=(10,)),
dense_shape=tf.placeholder(dtype=tf.int64, shape=(2,)))
dumb(sparse_tensor)
def check():
sonnet_collection = tf.get_default_graph().get_collection(
base_info.SONNET_COLLECTION_NAME)
connected_subgraph = sonnet_collection[0].connected_subgraphs[0]
self.assertIsInstance(
connected_subgraph.inputs["inputs"], tf.SparseTensor)
self.assertIsInstance(connected_subgraph.outputs, tf.SparseTensor)
check()
_copy_default_graph()
check()
def parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers.
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
}
with tf.name_scope('decode_tfrecord'):
features = tf.parse_single_example(example_serialized, feature_map)
image = decode_jpeg(features['image/encoded'])
label = tf.cast(features['image/class/label'], dtype=tf.int32)
return image, label
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits,
labels,
name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def testUniquesAnalyzerWithTokenization(self):
def preprocessing_fn(inputs):
return {
'index': tft.string_to_int(tf.string_split(inputs['a']))
}
input_data = [{'a': 'hello hello world'}, {'a': 'hello goodbye world'}]
input_metadata = dataset_metadata.DatasetMetadata({
'a': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation())
})
expected_data = [{'index': [0, 0, 1]}, {'index': [0, 2, 1]}]
expected_metadata = dataset_metadata.DatasetMetadata({
'index': sch.ColumnSchema(
sch.IntDomain(tf.int64, -1, 2, True,
'vocab_string_to_int_uniques'),
[None], sch.ListColumnRepresentation())
})
self.assertAnalyzeAndTransformResults(
input_data, input_metadata, preprocessing_fn, expected_data,
expected_metadata)
def test_example_proto_coder_error(self):
input_schema = dataset_schema.from_feature_spec({
'2d_vector_feature': tf.FixedLenFeature(shape=[2, 2], dtype=tf.int64),
})
coder = example_proto_coder.ExampleProtoCoder(input_schema)
example_decoded_value = {
'2d_vector_feature': [1, 2, 3]
}
example_proto_text = """
features {
feature { key: "1d_vector_feature"
value { int64_list { value: [ 1, 2, 3 ] } } }
}
"""
example = tf.train.Example()
text_format.Merge(example_proto_text, example)
# Ensure that we raise an exception for trying to encode invalid data.
with self.assertRaisesRegexp(ValueError, 'got wrong number of values'):
_ = coder.encode(example_decoded_value)
# Ensure that we raise an exception for trying to parse invalid data.
with self.assertRaisesRegexp(ValueError, 'got wrong number of values'):
_ = coder.decode(example.SerializeToString())
def test_valency(self):
data = ('11|12,"this is a ,text",categorical_value|other_value,1|3,89.0|'
'91.0,12.0|15.0,False')
feature_spec = self._INPUT_SCHEMA.as_feature_spec().copy()
feature_spec['numeric1'] = tf.FixedLenFeature(shape=[2], dtype=tf.int64)
schema = dataset_schema.from_feature_spec(feature_spec)
multivalent_columns = ['numeric1', 'numeric2', 'y']
coder = csv_coder.CsvCoder(self._COLUMNS, schema,
delimiter=',', secondary_delimiter='|',
multivalent_columns=multivalent_columns)
expected_decoded = {'category1': ['categorical_value|other_value'],
'numeric1': [11, 12],
'numeric2': [89.0, 91.0],
'boolean1': [False],
'text1': 'this is a ,text',
'y': ([1, 3], [12.0, 15.0])}
self._assert_encode_decode(coder, data, expected_decoded)
# Test successful decoding with a single column.
def testInferFeatureSchema(self):
d = tf.placeholder(tf.int64, None)
tensors = {
'a': tf.placeholder(tf.float32, (None,)),
'b': tf.placeholder(tf.string, (1, 2, 3)),
'c': tf.placeholder(tf.int64, None),
'd': d
}
d_column_schema = sch.ColumnSchema(tf.int64, [1, 2, 3],
sch.FixedColumnRepresentation())
api.set_column_schema(d, d_column_schema)
schema = impl_helper.infer_feature_schema(tf.get_default_graph(), tensors)
expected_schema = sch.Schema(column_schemas={
'a': sch.ColumnSchema(tf.float32, [],
sch.FixedColumnRepresentation()),
'b': sch.ColumnSchema(tf.string, [2, 3],
sch.FixedColumnRepresentation()),
'c': sch.ColumnSchema(tf.int64, None,
sch.FixedColumnRepresentation()),
'd': sch.ColumnSchema(tf.int64, [1, 2, 3],
sch.FixedColumnRepresentation())
})
self.assertEqual(schema, expected_schema)
def _from_sparse_feature_dict(feature_dict):
"""Translate a JSON sparse feature dict into a ColumnSchema."""
# assume there is only one value column
value_feature = feature_dict['valueFeature'][0]
domain = _from_domain_dict(value_feature['domain'])
index_feature_dicts = feature_dict['indexFeature']
# int() is needed because protobuf JSON encodes int64 as string
axes = [sch.Axis(int(index_feature_dict['size']))
for index_feature_dict in index_feature_dicts]
value_field_name = value_feature['name']
index_fields = [sch.SparseIndexField(index_feature_dict['name'],
index_feature_dict['isSorted'])
for index_feature_dict in index_feature_dicts]
representation = sch.SparseColumnRepresentation(value_field_name,
index_fields)
return sch.ColumnSchema(domain, axes, representation)
def _from_domain_dict(domain):
"""Translate a JSON domain dict into a Domain."""
if domain.get('ints') is not None:
def maybe_to_int(s):
return int(s) if s is not None else None
return sch.IntDomain(
tf.int64,
maybe_to_int(domain['ints'].get('min')),
maybe_to_int(domain['ints'].get('max')),
domain['ints'].get('isCategorical'),
domain['ints'].get('vocabularyFile', ''))
if domain.get('floats') is not None:
return sch.FloatDomain(tf.float32)
if domain.get('strings') is not None:
return sch.StringDomain(tf.string)
if domain.get('bools') is not None:
return sch.BoolDomain(tf.bool)
raise ValueError('Unknown domain: {}'.format(domain))
def _make_raw_schema(shape, should_add_unused_feature=False):
schema = sch.Schema()
schema.column_schemas['raw_a'] = (sch.ColumnSchema(
tf.int64, shape, sch.FixedColumnRepresentation(default_value=0)))
schema.column_schemas['raw_b'] = (sch.ColumnSchema(
tf.int64, shape, sch.FixedColumnRepresentation(default_value=1)))
schema.column_schemas['raw_label'] = (sch.ColumnSchema(
tf.int64, shape, sch.FixedColumnRepresentation(default_value=-1)))
if should_add_unused_feature:
schema.column_schemas['raw_unused'] = (sch.ColumnSchema(
tf.int64, shape, sch.FixedColumnRepresentation(default_value=1)))
return schema