def _test_metric_spec(self, metric_spec, hyps, refs, expected_scores):
"""Tests a MetricSpec"""
predictions = {"predicted_tokens": tf.placeholder(dtype=tf.string)}
labels = {"target_tokens": tf.placeholder(dtype=tf.string)}
value, update_op = metric_spec.create_metric_ops(None, labels, predictions)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
scores = []
for hyp, ref in zip(hyps, refs):
hyp = hyp.split(" ")
ref = ref.split(" ")
sess.run(update_op, {
predictions["predicted_tokens"]: [hyp],
labels["target_tokens"]: [ref]
})
scores.append(sess.run(value))
for score, expected in zip(scores, expected_scores):
np.testing.assert_almost_equal(score, expected, decimal=2)
np.testing.assert_almost_equal(score, expected, decimal=2)
python类string()的实例源码
def parse_example(serialized_example):
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'shape': tf.FixedLenFeature([], tf.string),
'img_raw': tf.FixedLenFeature([], tf.string),
'gt_raw': tf.FixedLenFeature([], tf.string),
'example_name': tf.FixedLenFeature([], tf.string)
})
with tf.variable_scope('decoder'):
shape = tf.decode_raw(features['shape'], tf.int32)
image = tf.decode_raw(features['img_raw'], tf.float32)
ground_truth = tf.decode_raw(features['gt_raw'], tf.uint8)
example_name = features['example_name']
with tf.variable_scope('image'):
# reshape and add 0 dimension (would be batch dimension)
image = tf.expand_dims(tf.reshape(image, shape), 0)
with tf.variable_scope('ground_truth'):
# reshape
ground_truth = tf.cast(tf.reshape(ground_truth, shape[:-1]), tf.float32)
return image, ground_truth, example_name
def _build_synset_lookup(imagenet_metadata_file):
"""Build lookup for synset to human-readable label.
Args:
imagenet_metadata_file: string, path to file containing mapping from
synset to human-readable label.
Assumes each line of the file looks like:
n02119247 black fox
n02119359 silver fox
n02119477 red fox, Vulpes fulva
where each line corresponds to a unique mapping. Note that each line is
formatted as <synset>\t<human readable label>.
Returns:
Dictionary of synset to human labels, such as:
'n02119022' --> 'red fox, Vulpes vulpes'
"""
lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
synset_to_human = {}
for l in lines:
if l:
parts = l.strip().split('\t')
assert len(parts) == 2
synset = parts[0]
human = parts[1]
synset_to_human[synset] = human
return synset_to_human
def prepare_reader(self, filename_queue, batch_size=1024):
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def accumulate_strings(values, name="strings"):
"""Accumulates strings into a vector.
Args:
values: A 1-d string tensor that contains values to add to the accumulator.
Returns:
A tuple (value_tensor, update_op).
"""
tf.assert_type(values, tf.string)
strings = tf.Variable(
name=name,
initial_value=[],
dtype=tf.string,
trainable=False,
collections=[],
validate_shape=True)
value_tensor = tf.identity(strings)
update_op = tf.assign(
ref=strings, value=tf.concat([strings, values], 0), validate_shape=False)
return value_tensor, update_op
def testSimple(self):
labels = [9, 3, 0]
records = [self._record(labels[0], 0, 128, 255),
self._record(labels[1], 255, 0, 1),
self._record(labels[2], 254, 255, 0)]
contents = b"".join([record for record, _ in records])
expected = [expected for _, expected in records]
filename = os.path.join(self.get_temp_dir(), "cifar")
open(filename, "wb").write(contents)
with self.test_session() as sess:
q = tf.FIFOQueue(99, [tf.string], shapes=())
q.enqueue([filename]).run()
q.close().run()
result = cifar10_input.read_cifar10(q)
for i in range(3):
key, label, uint8image = sess.run([
result.key, result.label, result.uint8image])
self.assertEqual("%s:%d" % (filename, i), tf.compat.as_text(key))
self.assertEqual(labels[i], label)
self.assertAllEqual(expected[i], uint8image)
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run([result.key, result.uint8image])
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
def make_png_thumbnail(x, n):
'''
Input:
`x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c]
`n`: sqrt of the number of images
Return:
`tf.string` (bytes) of the PNG.
(write these binary directly into a file)
'''
with tf.name_scope('MakeThumbnail'):
_, h, w, c = x.get_shape().as_list()
x = tf.reshape(x, [n, n, h, w, c])
x = tf.transpose(x, [0, 2, 1, 3, 4])
x = tf.reshape(x, [n * h, n * w, c])
x = x / 2. + .5
x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True)
x = tf.image.encode_png(x)
return x
def make_png_jet_thumbnail(x, n):
'''
Input:
`x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c]
`n`: sqrt of the number of images
Return:
`tf.string` (bytes) of the PNG.
(write these binary directly into a file)
'''
with tf.name_scope('MakeThumbnail'):
_, h, w, c = x.get_shape().as_list()
x = tf.reshape(x, [n, n, h, w, c])
x = tf.transpose(x, [0, 2, 1, 3, 4])
x = tf.reshape(x, [n * h, n * w, c])
x = x / 2. + .5
x = gray2jet(x)
x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True)
x = tf.image.encode_png(x)
return x
def get_shapes_and_dtypes(data):
shapes = {}
dtypes = {}
for k in data.keys():
if isinstance(data[k][0], str):
shapes[k] = []
dtypes[k] = tf.string
elif isinstance(data[k][0], np.ndarray):
shapes[k] = data[k][0].shape
dtypes[k] = tf.uint8
elif isinstance(data[k][0], np.bool_):
shapes[k] = []
dtypes[k] = tf.string
else:
raise TypeError('Unknown data type', type(data[k][0]))
return shapes, dtypes
def make_example(filename, image_data, labels, text, height, width):
"""Build an Example proto for an example.
Args:
filename: string, path to an image file, e.g., '/path/to/example.JPG'
image_data: string, JPEG encoding of grayscale image
labels: integer list, identifiers for the ground truth for the network
text: string, unique human-readable, e.g. 'dog'
height: integer, image height in pixels
width: integer, image width in pixels
Returns:
Example proto
"""
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_feature(tf.compat.as_bytes(image_data)),
'image/labels': _int64_feature(labels),
'image/height': _int64_feature([height]),
'image/width': _int64_feature([width]),
'image/filename': _bytes_feature(tf.compat.as_bytes(filename)),
'text/string': _bytes_feature(tf.compat.as_bytes(text)),
'text/length': _int64_feature([len(text)])
}))
return example
def image_reading(path: str, resized_size: Tuple[int, int]=None, data_augmentation: bool=False,
padding: bool=False) -> Tuple[tf.Tensor, tf.Tensor]:
# Read image
image_content = tf.read_file(path, name='image_reader')
image = tf.cond(tf.equal(tf.string_split([path], '.').values[1], tf.constant('jpg', dtype=tf.string)),
true_fn=lambda: tf.image.decode_jpeg(image_content, channels=1, try_recover_truncated=True), # TODO channels = 3 ?
false_fn=lambda: tf.image.decode_png(image_content, channels=1), name='image_decoding')
# Data augmentation
if data_augmentation:
image = augment_data(image)
# Padding
if padding:
with tf.name_scope('padding'):
image, img_width = padding_inputs_width(image, resized_size, increment=CONST.DIMENSION_REDUCTION_W_POOLING)
# Resize
else:
image = tf.image.resize_images(image, size=resized_size)
img_width = tf.shape(image)[1]
with tf.control_dependencies([tf.assert_equal(image.shape[:2], resized_size)]):
return image, img_width
def build_inputs_and_outputs(self):
if self.frame_features:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
fn = lambda x: self.build_prediction_graph(x)
video_id_output, top_indices_output, top_predictions_output = (
tf.map_fn(fn, serialized_examples,
dtype=(tf.string, tf.int32, tf.float32)))
else:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
video_id_output, top_indices_output, top_predictions_output = (
self.build_prediction_graph(serialized_examples))
inputs = {"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)}
outputs = {
"video_id": saved_model_utils.build_tensor_info(video_id_output),
"class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
"predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
return inputs, outputs
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def _convert_to_example(filename, video_buffer, label, text, height, width, sequence_length):
"""Deprecated: use _convert_to_sequential_example instead
Build an Example proto for an example.
Args:
filename: string, path to a video file, e.g., '/path/to/example.avi'
video_buffer: numpy array with the video frames, with dims [n_frames, height, width, n_channels]
label: integer or list of integers, identifier for the ground truth for the network
text: string, unique human-readable, e.g. 'dog'
height: integer, image height in pixels
width: integer, image width in pixels
sequence_length: real length of the data, i.e. number of frames that are not zero-padding
Returns:
Example proto
"""
example = tf.train.Example(features=tf.train.Features(feature={
'sequence_length': _int64_feature(sequence_length),
'height': _int64_feature(height),
'width': _int64_feature(width),
'class/label': _int64_feature(label),
'class/text': _bytes_feature(text),
'filename': _bytes_feature(os.path.basename(filename)),
'frames': _bytes_feature(video_buffer.tostring())}))
return example
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that decodes video
self._video_path = tf.placeholder(dtype=tf.string)
self._decode_video = decode_video(self._video_path)
# Initialize function that resizes a frame
self._resize_video_data = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])
# Initialize function to JPEG-encode a frame
self._raw_frame = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
self._raw_mask = tf.placeholder(dtype=tf.uint8, shape=[None, None, 1])
self._encode_frame = tf.image.encode_jpeg(self._raw_frame, quality=100)
self._encode_mask = tf.image.encode_png(self._raw_mask)
def setup_reader(self, image_paths, image_shape, num_concurrent, batch_size):
# Path queue is list of image paths which will further be processed by another queue
num_images = len(image_paths)
indices = tf.range(0, num_images, 1)
self.path_queue = tf.FIFOQueue(capacity=num_images, dtypes=[tf.int32, tf.string], name='path_queue')
self.enqueue_path = self.path_queue.enqueue_many([indices, image_paths])
self.close_path = self.path_queue.close()
processed_queue = tf.FIFOQueue(capacity=num_images,
dtypes=[tf.int32, tf.float32],
shapes=[(), image_shape],
name='processed_queue')
(idx, processed_image) = self.process()
enqueue_process = processed_queue.enqueue([idx, processed_image])
self.dequeue_batch = processed_queue.dequeue_many(batch_size)
self.queue_runner = tf.train.QueueRunner(processed_queue, [enqueue_process] * num_concurrent)
def test_compiler_input_tensor(self):
input_tensor = tf.Variable(['foobar', 'baz'],
dtype=tf.string, name='input_variable')
init_op = tf.global_variables_initializer()
root_block = tdb.InputTransform(len) >> tdb.Scalar()
compiler = tdc.Compiler()
compiler.compile(root_block)
compiler.init_loom(max_depth=1, input_tensor=input_tensor)
output_tensor, = compiler.output_tensors
with self.test_session() as sess:
sess.run(init_op)
results = sess.run(output_tensor)
self.assertEqual(len(results), 2)
self.assertEqual(results[0], 6.)
self.assertEqual(results[1], 3.)
sess.run(input_tensor.assign(['foo', 'blah']))
results = sess.run(output_tensor)
self.assertEqual(len(results), 2)
self.assertEqual(results[0], 3.)
self.assertEqual(results[1], 4.)
def create(cls, mode):
"""Creates a plan.
Args:
mode: A string; 'train', 'eval', or 'infer'.
Raises:
ValueError: If `mode` is invalid.
Returns:
A Plan.
"""
cases = {Plan.mode_keys.TRAIN: TrainPlan,
Plan.mode_keys.EVAL: EvalPlan,
Plan.mode_keys.INFER: InferPlan}
if mode not in cases:
raise ValueError('invalid mode %r not in %s' % (mode, sorted(cases)))
return cases[mode]()
def parse_mnist_tfrec(tfrecord, features_shape):
tfrecord_features = tf.parse_single_example(
tfrecord,
features={
'features': tf.FixedLenFeature([], tf.string),
'targets': tf.FixedLenFeature([], tf.string)
}
)
features = tf.decode_raw(tfrecord_features['features'], tf.uint8)
features = tf.reshape(features, features_shape)
features = tf.cast(features, tf.float32)
targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8)
targets = tf.reshape(targets, [])
targets = tf.one_hot(indices=targets, depth=10, on_value=1, off_value=0)
targets = tf.cast(targets, tf.float32)
return features, targets
def parse_mnist_tfrec(tfrecord, name, features_shape, scalar_targs=False):
tfrecord_features = tf.parse_single_example(
tfrecord,
features={
'features': tf.FixedLenFeature([], tf.string),
'targets': tf.FixedLenFeature([], tf.string)
},
name=name+'_data'
)
with tf.variable_scope('features'):
features = tf.decode_raw(
tfrecord_features['features'], tf.uint8
)
features = tf.reshape(features, features_shape)
features = tf.cast(features, tf.float32)
with tf.variable_scope('targets'):
targets = tf.decode_raw(tfrecord_features['targets'], tf.uint8)
if scalar_targs:
targets = tf.reshape(targets, [])
targets = tf.one_hot(
indices=targets, depth=10, on_value=1, off_value=0
)
targets = tf.cast(targets, tf.float32)
return features, targets
def get_placeholder_input_fn(metadata):
"""Wrap the get input features function to provide the metadata."""
def get_input_features():
"""Read the input features from the given placeholder."""
examples = tf.placeholder(
dtype=tf.string,
shape=(None,),
name='input_example')
features = ml.features.FeatureMetadata.parse_features(metadata, examples,
keep_target=False)
features[EXAMPLES_PLACEHOLDER_KEY] = examples
# The target feature column is not used for prediction so return None.
return features, None
# Return a function to input the feaures into the model from a placeholder.
return get_input_features
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
"""Input schema definition.
Args:
mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
train/eval or prediction.
Returns:
A `Schema` object.
"""
result = ({} if mode == tf.contrib.learn.ModeKeys.INFER
else {'clicked': tf.FixedLenFeature(shape=[], dtype=tf.int64)})
for name in INTEGER_COLUMN_NAMES:
result[name] = tf.FixedLenFeature(
shape=[], dtype=tf.int64, default_value=-1)
for name in CATEGORICAL_COLUMN_NAMES:
result[name] = tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value='')
return dataset_schema.from_feature_spec(result)
def example_serving_input_fn(default_batch_size=None):
"""Build the serving inputs.
Args:
default_batch_size (int): Batch size for the tf.placeholder shape
"""
feature_spec = {}
for feat in CONTINUOUS_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.int64)
for feat, _ in CATEGORICAL_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string)
example_bytestring = tf.placeholder(
shape=[default_batch_size],
dtype=tf.string,
)
features = tf.parse_example(example_bytestring, feature_spec)
return features, {'example': example_bytestring}
def parse_label_column(label_string_tensor):
"""Parses a string tensor into the label tensor
Args:
label_string_tensor: Tensor of dtype string. Result of parsing the
CSV column specified by LABEL_COLUMN
Returns:
A Tensor of the same shape as label_string_tensor, should return
an int64 Tensor representing the label index for classification tasks,
and a float32 Tensor representing the value for a regression task.
"""
# Build a Hash Table inside the graph
table = tf.contrib.lookup.index_table_from_tensor(tf.constant(LABELS))
# Use the hash table to convert string labels to ints and one-hot encode
return table.lookup(label_string_tensor)
# ************************************************************************
# YOU NEED NOT MODIFY ANYTHING BELOW HERE TO ADAPT THIS MODEL TO YOUR DATA
# ************************************************************************
def _make_schema(columns, types, default_values):
"""Input schema definition.
Args:
columns: column names for fields appearing in input.
types: column types for fields appearing in input.
default_values: default values for fields appearing in input.
Returns:
feature_set dictionary of string to *Feature.
"""
result = {}
assert len(columns) == len(types)
assert len(columns) == len(default_values)
for c, t, v in zip(columns, types, default_values):
if isinstance(t, list):
result[c] = tf.VarLenFeature(dtype=t[0])
else:
result[c] = tf.FixedLenFeature(shape=[], dtype=t, default_value=v)
return dataset_schema.from_feature_spec(result)
def build_prediction_graph(self):
"""Builds prediction graph and registers appropriate endpoints."""
tensors = self.build_graph(None, 1, GraphMod.PREDICT)
keys_placeholder = tf.placeholder(tf.string, shape=[None])
inputs = {
'key': keys_placeholder,
'image_bytes': tensors.input_jpeg
}
# To extract the id, we need to add the identity function.
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': tensors.predictions[0],
'scores': tensors.predictions[1]
}
return inputs, outputs
def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN):
"""Input schema definition.
Args:
mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
train/eval or prediction.
Returns:
A `Schema` object.
"""
result = ({} if mode == tf.contrib.learn.ModeKeys.INFER else {
'score': tf.FixedLenFeature(shape=[], dtype=tf.float32)
})
result.update({
'subreddit': tf.FixedLenFeature(shape=[], dtype=tf.string),
'author': tf.FixedLenFeature(shape=[], dtype=tf.string),
'comment_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value=''),
'comment_parent_body': tf.FixedLenFeature(shape=[], dtype=tf.string,
default_value=''),
'toplevel': tf.FixedLenFeature(shape=[], dtype=tf.int64),
})
return dataset_schema.from_feature_spec(result)
def __init__(self, dtype, shape, name, doc_string=None, shape_string=None):
"""Create a new TensorPort.
Args:
dtype: the (TF) data type of the port.
shape: the shape of the tensor.
name: the name of this port (should be a valid TF name)
doc_string: a documentation string associated with this port
shape_string: a string of the form [size_1,size_2,size_3] where size_i is a text describing the
size of the tensor's dimension i (such as "number of batches").
"""
self.dtype = dtype
self.shape = shape
self.name = name
self.__doc__ = doc_string
self.shape_string = shape_string
def build_inputs_and_outputs(self):
if self.frame_features:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
fn = lambda x: self.build_prediction_graph(x)
video_id_output, top_indices_output, top_predictions_output = (
tf.map_fn(fn, serialized_examples,
dtype=(tf.string, tf.int32, tf.float32)))
else:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
video_id_output, top_indices_output, top_predictions_output = (
self.build_prediction_graph(serialized_examples))
inputs = {"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)}
outputs = {
"video_id": saved_model_utils.build_tensor_info(video_id_output),
"class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
"predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
return inputs, outputs