def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
python类string()的实例源码
def transform_wav_data_op(wav_data_tensor, hparams, is_training,
jitter_amount_sec):
"""Transforms wav data."""
def transform_wav_data(wav_data):
"""Transforms wav data."""
# Only do audio transformations during training.
if is_training:
wav_data = audio_io.jitter_wav_data(wav_data, hparams.sample_rate,
jitter_amount_sec)
# Normalize.
if hparams.normalize_audio:
wav_data = audio_io.normalize_wav_data(wav_data, hparams.sample_rate)
return [wav_data]
return tf.py_func(
transform_wav_data,
[wav_data_tensor],
tf.string,
name='transform_wav_data_op')
def build_inputs_and_outputs(self):
if self.frame_features:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
fn = lambda x: self.build_prediction_graph(x)
video_id_output, top_indices_output, top_predictions_output = (
tf.map_fn(fn, serialized_examples,
dtype=(tf.string, tf.int32, tf.float32)))
else:
serialized_examples = tf.placeholder(tf.string, shape=(None,))
video_id_output, top_indices_output, top_predictions_output = (
self.build_prediction_graph(serialized_examples))
inputs = {"example_bytes":
saved_model_utils.build_tensor_info(serialized_examples)}
outputs = {
"video_id": saved_model_utils.build_tensor_info(video_id_output),
"class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
"predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
return inputs, outputs
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
self._cmyk_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
def _is_cmyk(filename):
"""Determine if file contains a CMYK JPEG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a JPEG encoded with CMYK color space.
"""
# File list from:
# https://github.com/cytsai/ilsvrc-cmyk-image-list
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
return filename.split('/')[-1] in blacklist
def _find_image_bounding_boxes(filenames, image_to_bboxes):
"""Find the bounding boxes for a given image file.
Args:
filenames: list of strings; each string is a path to an image file.
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
Returns:
List of bounding boxes for each image. Note that each entry in this
list might contain from 0+ entries corresponding to the number of bounding
box annotations for the image.
"""
num_image_bbox = 0
bboxes = []
for f in filenames:
basename = os.path.basename(f)
if basename in image_to_bboxes:
bboxes.append(image_to_bboxes[basename])
num_image_bbox += 1
else:
bboxes.append([])
print('Found %d images with bboxes out of %d images' % (
num_image_bbox, len(filenames)))
return bboxes
def _process_dataset(name, directory, num_shards, synset_to_human,
image_to_bboxes):
"""Process a complete data set and save it as a TFRecord.
Args:
name: string, unique identifier specifying the data set.
directory: string, root path to the data set.
num_shards: integer number of shards for this data set.
synset_to_human: dict of synset to human labels, e.g.,
'n02119022' --> 'red fox, Vulpes vulpes'
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
"""
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
humans = _find_human_readable_labels(synsets, synset_to_human)
bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
_process_image_files(name, filenames, synsets, labels,
humans, bboxes, num_shards)
def _build_synset_lookup(imagenet_metadata_file):
"""Build lookup for synset to human-readable label.
Args:
imagenet_metadata_file: string, path to file containing mapping from
synset to human-readable label.
Assumes each line of the file looks like:
n02119247 black fox
n02119359 silver fox
n02119477 red fox, Vulpes fulva
where each line corresponds to a unique mapping. Note that each line is
formatted as <synset>\t<human readable label>.
Returns:
Dictionary of synset to human labels, such as:
'n02119022' --> 'red fox, Vulpes vulpes'
"""
lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
synset_to_human = {}
for l in lines:
if l:
parts = l.strip().split('\t')
assert len(parts) == 2
synset = parts[0]
human = parts[1]
synset_to_human[synset] = human
return synset_to_human
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
self._cmyk_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
def _is_cmyk(filename):
"""Determine if file contains a CMYK JPEG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a JPEG encoded with CMYK color space.
"""
# File list from:
# https://github.com/cytsai/ilsvrc-cmyk-image-list
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
return filename.split('/')[-1] in blacklist
def _find_image_bounding_boxes(filenames, image_to_bboxes):
"""Find the bounding boxes for a given image file.
Args:
filenames: list of strings; each string is a path to an image file.
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
Returns:
List of bounding boxes for each image. Note that each entry in this
list might contain from 0+ entries corresponding to the number of bounding
box annotations for the image.
"""
num_image_bbox = 0
bboxes = []
for f in filenames:
basename = os.path.basename(f)
if basename in image_to_bboxes:
bboxes.append(image_to_bboxes[basename])
num_image_bbox += 1
else:
bboxes.append([])
print('Found %d images with bboxes out of %d images' % (
num_image_bbox, len(filenames)))
return bboxes
def _process_dataset(name, directory, num_shards, synset_to_human,
image_to_bboxes):
"""Process a complete data set and save it as a TFRecord.
Args:
name: string, unique identifier specifying the data set.
directory: string, root path to the data set.
num_shards: integer number of shards for this data set.
synset_to_human: dict of synset to human labels, e.g.,
'n02119022' --> 'red fox, Vulpes vulpes'
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
"""
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
humans = _find_human_readable_labels(synsets, synset_to_human)
bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
_process_image_files(name, filenames, synsets, labels,
humans, bboxes, num_shards)
def read_and_decode(self, example_serialized):
""" Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """
num = self.flags['num_classes']
# Parse features from binary file
features = tf.parse_single_example(
example_serialized,
features={
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([num], tf.int64, default_value=[-1] * num),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
})
# Return the converted data
label = features['label']
image = tf.decode_raw(features['image'], tf.float32)
image.set_shape([784])
image = tf.reshape(image, [28, 28, 1])
image = (image - 0.5) * 2 # max value = 1, min value = -1
return image, tf.cast(label, tf.int32)
def read_and_decode(self, example_serialized):
""" Read and decode binarized, raw MNIST dataset from .tfrecords file generated by MNIST.py """
features = tf.parse_single_example(
example_serialized,
features={
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([self.flags['num_classes']], tf.int64, default_value=[-1]*self.flags['num_classes']),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
})
# now return the converted data
label = features['label']
image = tf.decode_raw(features['image'], tf.float32)
image.set_shape([784])
image = tf.reshape(image, [28, 28, 1])
image = (image - 0.5) * 2 # max value = 1, min value = -1
return image, tf.cast(label, tf.int32)
cifar10_input_test.py 文件源码
项目:facial-emotion-detection-dl
作者: dllatas
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def testSimple(self):
labels = [9, 3, 0]
records = [self._record(labels[0], 0, 128, 255),
self._record(labels[1], 255, 0, 1),
self._record(labels[2], 254, 255, 0)]
contents = b"".join([record for record, _ in records])
expected = [expected for _, expected in records]
filename = os.path.join(self.get_temp_dir(), "cifar")
open(filename, "wb").write(contents)
with self.test_session() as sess:
q = tf.FIFOQueue(99, [tf.string], shapes=())
q.enqueue([filename]).run()
q.close().run()
result = cifar10_input.read_cifar10(q)
for i in range(3):
key, label, uint8image = sess.run([
result.key, result.label, result.uint8image])
self.assertEqual("%s:%d" % (filename, i), tf.compat.as_text(key))
self.assertEqual(labels[i], label)
self.assertAllEqual(expected[i], uint8image)
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run([result.key, result.uint8image])
def __init__(self, data_dir, data_list, input_size, random_scale, coord):
'''Initialise an ImageReader.
Args:
data_dir: path to the directory with images and masks.
data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
input_size: a tuple with (height, width) values, to which all the images will be resized.
random_scale: whether to randomly scale the images prior to random crop.
coord: TensorFlow queue coordinator.
'''
self.data_dir = data_dir
self.data_list = data_list
self.input_size = input_size
self.coord = coord
self.image_list, self.label_list = read_labeled_image_list(self.data_dir, self.data_list)
self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
self.queue = tf.train.slice_input_producer([self.images, self.labels],
shuffle=input_size is not None) # Not shuffling if it is val.
self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale)
def decode_jpeg(image_buffer, scope=None):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3)
# After this point, all image pixels reside in [0,1)
# until the very end, when they're rescaled to (-1, 1). The various
# adjust_* ops all require this range for dtype float.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
return image
def decode_jpeg(image_buffer, scope=None):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
with tf.name_scope(values=[image_buffer], name=scope, default_name='decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3)
# After this point, all image pixels reside in [0,1)
# until the very end, when they're rescaled to (-1, 1). The various
# adjust_* ops all require this range for dtype float.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
return image
def _test_metric_spec(self, metric_spec, hyps, refs, expected_scores):
"""Tests a MetricSpec"""
predictions = {"predicted_tokens": tf.placeholder(dtype=tf.string)}
labels = {"target_tokens": tf.placeholder(dtype=tf.string)}
value, update_op = metric_spec.create_metric_ops(None, labels, predictions)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
scores = []
for hyp, ref in zip(hyps, refs):
hyp = hyp.split(" ")
ref = ref.split(" ")
sess.run(update_op, {
predictions["predicted_tokens"]: [hyp],
labels["target_tokens"]: [ref]
})
scores.append(sess.run(value))
for score, expected in zip(scores, expected_scores):
np.testing.assert_almost_equal(score, expected, decimal=2)
np.testing.assert_almost_equal(score, expected, decimal=2)