def read_instances(self, count, shuffle, epochs):
"""Reads the data represented by this DataSource using a TensorFlow reader.
Arguments:
epochs: The number of epochs or passes over the data to perform.
Returns:
A tensor containing instances that are read.
"""
# None implies unlimited; switch the value to None when epochs is 0.
epochs = epochs or None
options = None
if self._compressed:
options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP)
files = tf.train.match_filenames_once(self._path, name='files')
queue = tf.train.string_input_producer(files, num_epochs=epochs, shuffle=shuffle,
name='queue')
reader = tf.TFRecordReader(options=options, name='reader')
_, instances = reader.read_up_to(queue, count, name='read')
return instances
python类TFRecordReader()的实例源码
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
image.set_shape(128 * 128 * 3)
image = tf.reshape(image, [128, 128, 3])
image = tf.cast(image, tf.float32) * (2. / 255) - 1.
return image
def read_and_decode_with_labels(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label' : tf.FixedLenFeature([], tf.int64)
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
image.set_shape(128 * 128 * 3)
image = tf.reshape(image, [128, 128, 3])
image = tf.cast(image, tf.float32) * (2. / 255) - 1.
label = tf.cast(features['label'], tf.int32)
return image, label
def batches(data_file_path, max_number_length, batch_size, size,
num_preprocess_threads=1, is_training=True, channels=1):
filename_queue = tf.train.string_input_producer([data_file_path])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_png': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([max_number_length], tf.int64),
'length': tf.FixedLenFeature([1], tf.int64),
'bbox': tf.FixedLenFeature([4], tf.int64),
})
image, bbox, label, length = features['image_png'], features['bbox'], features['label'], features['length']
bbox = tf.cast(bbox, tf.int32)
dequeued_data = []
for i in range(num_preprocess_threads):
dequeued_img = tf.image.decode_png(image, channels)
dequeued_img = resize_image(dequeued_img, bbox, is_training, size, channels)
dequeued_data.append([dequeued_img, tf.one_hot(length - 1, max_number_length)[0], tf.one_hot(label, 11)])
return tf.train.batch_join(dequeued_data, batch_size=batch_size, capacity=batch_size * 3)
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label_raw': tf.FixedLenFeature([], tf.string)})
image = tf.cast(tf.decode_raw(features['image_raw'], tf.int16), tf.float32)
labels = tf.decode_raw(features['label_raw'], tf.int16)
#PW 2017/03/03: Zero-center data here?
image.set_shape([IMG_DIM*IMG_DIM*IMG_DIM])
image = tf.reshape(image, [IMG_DIM,IMG_DIM,IMG_DIM,1])
labels.set_shape([IMG_DIM*IMG_DIM*IMG_DIM])
labels = tf.reshape(image, [IMG_DIM,IMG_DIM,IMG_DIM])
# Dimensions (X, Y, Z, channles)
return image, labels
def read_examples(input_files, shuffle, num_epochs=None):
"""Creates readers and queues for reading example protos."""
files = []
for e in input_files:
for path in e.split(','):
files.extend(file_io.get_matching_files(path))
files = sorted(files)
# Convert num_epochs == 0 -> num_epochs is None, if necessary
num_epochs = num_epochs or None
# Build a queue of the filenames to be read.
filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle)
options = tf.python_io.TFRecordOptions(
compression_type=tf.python_io.TFRecordCompressionType.GZIP)
example_id, encoded_example = tf.TFRecordReader(options=options).read(
filename_queue)
return example_id, encoded_example
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
"label": tf.FixedLenFeature([], tf.float32),
"categorical_features": tf.FixedLenFeature([CATEGORICAL_FEATURES_SIZE], tf.string),
"continuous_features": tf.FixedLenFeature([CONTINUOUS_FEATURES_SIZE], tf.float32),
})
label = features["label"]
continuous_features = features["continuous_features"]
categorical_features = tf.cast(tf.string_to_hash_bucket(features["categorical_features"], BUCKET_SIZE), tf.float32)
return label, tf.concat(0, [continuous_features, categorical_features])
# Read serialized examples from filename queue
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
"labels": tf.VarLenFeature(tf.int64)}
features = tf.parse_example(serialized_examples, features=feature_map)
return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={
"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
"rgb": tf.FixedLenSequenceFeature([], dtype=tf.string),
"audio": tf.FixedLenSequenceFeature([], dtype=tf.string),
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames)
audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames)
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_rgbs = tf.expand_dims(rgbs, 0)
batch_audios = tf.expand_dims(audios, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
def reader(self):
return tf.TFRecordReader()
tensorflow_file_reader.py 文件源码
项目:US-image-prediction
作者: ChengruiWu008
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def read_and_decode(filename):
#???????????
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue) #????????
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [224, 224, 3])
img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
label = tf.cast(features['label'], tf.int32)
return img, label
def __init__(self, fnames, shuffle=True, num_epochs=None):
"""Init from a list of filenames to enqueue.
Args:
fnames: list of .tfrecords filenames to enqueue.
shuffle: if true, shuffle the list at each epoch
"""
self._fnames = fnames
self._fname_queue = tf.train.string_input_producer(
self._fnames,
capacity=1000,
shuffle=shuffle,
num_epochs=num_epochs,
shared_name='input_files')
self._reader = tf.TFRecordReader()
# Read first record to initialize the shape parameters
with tf.Graph().as_default():
fname_queue = tf.train.string_input_producer(self._fnames)
reader = tf.TFRecordReader()
_, serialized = reader.read(fname_queue)
shapes = self._parse_shape(serialized)
dtypes = self._parse_dtype(serialized)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
self.shapes = sess.run(shapes)
self.shapes = {k: self.shapes[k+'_sz'].tolist() for k in self.FEATURES}
self.dtypes = sess.run(dtypes)
self.dtypes = {k: REVERSE_TYPEMAP[self.dtypes[k+'_dtype'][0]] for k in self.FEATURES}
coord.request_stop()
coord.join(threads)
def _parse(self, filename_queue):
with tf.name_scope("parsing"):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={'image':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)
}
)
label = tf.cast(features['label'],tf.int32)
return features, label
def get_input_op(self, fq, parsers):
reader = tf.TFRecordReader()
_, serialized_data = reader.read_up_to(fq, self.batch_size)
return tf.parse_example(serialized_data, parsers)
def create_input_fn(mode, input_files, batch_size, num_epochs):
def input_fn():
features = tf.contrib.layers.create_feature_spec_for_parsing(
get_feature_columns(mode))
feature_map = tf.contrib.learn.io.read_batch_features(
file_pattern=input_files,
batch_size=batch_size,
features=features,
reader=tf.TFRecordReader,
randomize_input=True,
num_epochs=num_epochs,
queue_capacity=200000 + batch_size * 10,
name="read_batch_features_{}".format(mode))
# This is an ugly hack because of a current bug in tf.learn
# During evaluation TF tries to restore the epoch variable which isn't defined during training
# So we define the variable manually here
if mode == tf.contrib.learn.ModeKeys.TRAIN:
tf.get_variable(
"read_batch_features_eval/file_name_queue/limit_epochs/epochs",
initializer=tf.constant(0, dtype=tf.int64))
if mode == tf.contrib.learn.ModeKeys.TRAIN:
target = feature_map.pop("label")
else:
# In evaluation we have 10 classes (utterances).
# The first one (index 0) is always the correct one
target = tf.zeros([batch_size, 1], dtype=tf.int64)
return feature_map, target
return input_fn
def decode(filename_queue):
# Create TFRecords reader
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Feature keys in TFRecords example
features = tf.parse_single_example(serialized_example, features={
'id': tf.FixedLenFeature([], tf.string),
'vector': tf.FixedLenFeature([], tf.string),
'label': tf.VarLenFeature(tf.int64)
})
video_id = features['id']
# Decode vector and pad to fixed size
vector = tf.decode_raw(features['vector'], tf.float32)
vector = tf.reshape(vector, [-1, 300])
vector = tf.pad(vector, [[0, 40 - tf.shape(vector)[0]], [0, 0]])
vector.set_shape([40, 300])
# Get label index
label = tf.sparse_to_indicator(features['label'], 4716)
label.set_shape([4716])
label = tf.cast(label, tf.float32)
return video_id, vector, label
# Creates input pipeline for tensorflow networks