def _read_sequence_example(filename_queue,
n_labels=50, n_samples=59049, n_segments=10):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
context, sequence = tf.parse_single_sequence_example(
serialized_example,
context_features={
'raw_labels': tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
'raw_segments': tf.FixedLenSequenceFeature([], dtype=tf.string)
})
segments = tf.decode_raw(sequence['raw_segments'], tf.float32)
segments.set_shape([n_segments, n_samples])
labels = tf.decode_raw(context['raw_labels'], tf.uint8)
labels.set_shape([n_labels])
labels = tf.cast(labels, tf.float32)
return segments, labels
python类parse_single_sequence_example()的实例源码
def read_tfrecord(filename_queue):
reader = tf.TFRecordReader()
_,examples = reader.read(filename_queue)
context_features = {
"length": tf.FixedLenFeature([], dtype=tf.int64)
}
sequence_features = {
"tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"labels": tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=examples,
context_features=context_features,
sequence_features=sequence_features
)
return context_parsed, sequence_parsed
def example_parser(self, filename_queue):
reader = tf.TFRecordReader()
key, record_string = reader.read(filename_queue)
features = {
'labels': tf.FixedLenSequenceFeature([], tf.int64),
'tokens': tf.FixedLenSequenceFeature([], tf.int64),
'shapes': tf.FixedLenSequenceFeature([], tf.int64),
'chars': tf.FixedLenSequenceFeature([], tf.int64),
'seq_len': tf.FixedLenSequenceFeature([], tf.int64),
'tok_len': tf.FixedLenSequenceFeature([], tf.int64),
}
_, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features)
labels = example['labels']
tokens = example['tokens']
shapes = example['shapes']
chars = example['chars']
seq_len = example['seq_len']
tok_len = example['tok_len']
# context = c['context']
return labels, tokens, shapes, chars, seq_len, tok_len
# return labels, tokens, labels, labels, labels
def read_and_decode_single_example(filenames, shuffle=False, num_epochs=None):
# first construct a queue containing a list of filenames.
# this lets a user split up there dataset in multiple files to keep size down
# filename_queue = tf.train.string_input_producer([filename], num_epochs=10)
filename_queue = tf.train.string_input_producer(filenames,
shuffle=shuffle, num_epochs=num_epochs)
reader = tf.TFRecordReader()
# One can read a single serialized example from a filename
# serialized_example is a Tensor of type string.
_, serialized_ex = reader.read(filename_queue)
context, sequences = tf.parse_single_sequence_example(serialized_ex,
context_features={
"seq_length": tf.FixedLenFeature([], dtype=tf.int64)
},
sequence_features={
"seq_feature": tf.VarLenFeature(dtype=tf.int64),
"label": tf.VarLenFeature(dtype=tf.int64)
})
return context, sequences
def ReadInput(self,num_epochs=None, val=False,test=False):
if val:
filenames = tf.gfile.Glob(self.data_dir+'/surfing_val.tfrecords')
elif test:
filenames = tf.gfile.Glob(self.data_dir+'/surfing_test.tfrecords')
else:
filenames = tf.gfile.Glob(self.data_dir+'/surfing.tfrecords')
filename_queue = tf.train.string_input_producer(filenames,num_epochs=num_epochs, shuffle=True)
reader = tf.TFRecordReader()
_, example = reader.read(filename_queue)
feature_sepc = {
self.features: tf.FixedLenSequenceFeature(
shape=[self.image_width * self.image_width * self.c_dim], dtype=tf.float32)}
_, features = tf.parse_single_sequence_example(
example, sequence_features=feature_sepc)
moving_objs = tf.reshape(
features[self.features], [self.video_len, self.image_width, self.image_width, self.c_dim])
examples = tf.train.shuffle_batch(
[moving_objs],
batch_size=self.batch_size,
num_threads=self.batch_size,
capacity=self.batch_size * 100,
min_after_dequeue=self.batch_size * 4)
return examples
def prepare_reader(self, filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={
"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
"rgb": tf.FixedLenSequenceFeature([], dtype=tf.string),
"audio": tf.FixedLenSequenceFeature([], dtype=tf.string),
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
rgbs, num_frames = self.get_video_matrix(features["rgb"], 1024, self.max_frames)
audios, num_frames = self.get_video_matrix(features["audio"], 1024, self.max_frames)
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_rgbs = tf.expand_dims(rgbs, 0)
batch_audios = tf.expand_dims(audios, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_rgbs, batch_audios, batch_labels, batch_frames
def frame_example_2_np(seq_example_bytes,
max_quantized_value=2,
min_quantized_value=-2):
feature_names=['rgb','audio']
feature_sizes = [1024, 128]
with tf.Graph().as_default():
contexts, features = tf.parse_single_sequence_example(
seq_example_bytes,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in feature_names
})
decoded_features = { name: tf.reshape(
tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
[-1, size]) for name, size in zip(feature_names, feature_sizes)
}
feature_matrices = {
name: utils.Dequantize(decoded_features[name],
max_quantized_value, min_quantized_value) for name in feature_names}
with tf.Session() as sess:
vid = sess.run(contexts['video_id'])
labs = sess.run(contexts['labels'].values)
rgb = sess.run(feature_matrices['rgb'])
audio = sess.run(feature_matrices['audio'])
return vid, labs, rgb, audio
#%% Split frame level file into three video level files: all, 1st half, 2nd half.
def build_graph():
feature_names=['rgb','audio']
feature_sizes = [1024, 128]
max_quantized_value=2
min_quantized_value=-2
seq_example_bytes = tf.placeholder(tf.string)
contexts, features = tf.parse_single_sequence_example(
seq_example_bytes,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in feature_names
})
decoded_features = { name: tf.reshape(
tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
[-1, size]) for name, size in zip(feature_names, feature_sizes)
}
feature_matrices = {
name: utils.Dequantize(decoded_features[name],
max_quantized_value, min_quantized_value) for name in feature_names}
tf.add_to_collection("vid_tsr", contexts['video_id'])
tf.add_to_collection("labs_tsr", contexts['labels'].values)
tf.add_to_collection("rgb_tsr", feature_matrices['rgb'])
tf.add_to_collection("audio_tsr", feature_matrices['audio'])
tf.add_to_collection("seq_example_bytes", seq_example_bytes)
# with tf.Session() as sess:
# writer = tf.summary.FileWriter('./graphs', sess.graph)
def parse_record_fn(self):
def _parse(proto):
spec = self.record_spec()
ctx, seq = tf.parse_single_sequence_example(proto, **spec)
ctx.update(seq)
result = []
for field in self.FIELDS:
result.append(ctx[field])
return result
return _parse
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def make_example(seq_len, spec_feat, labels):
''' Creates a SequenceExample for a single utterance.
This function makes a SequenceExample given the sequence length,
mfcc features and corresponding transcript.
These sequence examples are read using tf.parse_single_sequence_example
during training.
Note: Some of the tf modules used in this function(such as
tf.train.Feature) do not have comprehensive documentation in v0.12.
This function was put together using the test routines in the
tensorflow repo.
See: https://github.com/tensorflow/tensorflow/
blob/246a3724f5406b357aefcad561407720f5ccb5dc/
tensorflow/python/kernel_tests/parsing_ops_test.py
Args:
seq_len: integer represents the sequence length in time frames.
spec_feat: [TxF] matrix of mfcc features.
labels: list of ints representing the encoded transcript.
Returns:
Serialized sequence example.
'''
# Feature lists for the sequential features of the example
feats_list = [tf.train.Feature(float_list=tf.train.FloatList(value=frame))
for frame in spec_feat]
feat_dict = {"feats": tf.train.FeatureList(feature=feats_list)}
sequence_feats = tf.train.FeatureLists(feature_list=feat_dict)
# Context features for the entire sequence
len_feat = tf.train.Feature(int64_list=tf.train.Int64List(value=[seq_len]))
label_feat = tf.train.Feature(int64_list=tf.train.Int64List(value=labels))
context_feats = tf.train.Features(feature={"seq_len": len_feat,
"labels": label_feat})
ex = tf.train.SequenceExample(context=context_feats,
feature_lists=sequence_feats)
return ex.SerializeToString()
tfrecord_read.py 文件源码
项目:Youtube8mdataset_kagglechallenge
作者: jasonlee27
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def prepare_reader(self,
filename_queue,
max_quantized_value=2,
min_quantized_value=-2):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
context_features, sequence_features = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}, None
if self.sequence_data:
sequence_features = {self.feature_name[0]: tf.FixedLenSequenceFeature([], dtype=tf.string),
self.feature_name[1]: tf.FixedLenSequenceFeature([], dtype=tf.string), }
else:
context_features[self.feature_name[0]] = tf.FixedLenFeature(self.feature_size[0], tf.float32)
context_features[self.feature_name[1]] = tf.FixedLenFeature(self.feature_size[1], tf.float32)
contexts, features = tf.parse_single_sequence_example(serialized_example,
context_features=context_features,
sequence_features=sequence_features)
labels = (tf.cast(contexts["labels"].values, tf.int64))
if self.sequence_data:
decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[0]], tf.uint8), tf.float32),
[-1, self.feature_size[0]])
video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)
decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[1]], tf.uint8), tf.float32),
[-1, self.feature_size[1]])
audio_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)
num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames)
else:
video_matrix = contexts[self.feature_name[0]]
audio_matrix = contexts[self.feature_name[1]]
num_frames = tf.constant(-1)
# Pad or truncate to 'max_frames' frames.
# video_matrix = resize_axis(video_matrix, 0, self.max_frames)
return contexts["video_id"], video_matrix, audio_matrix, labels, num_frames
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_example_queue(example_queue, config):
""" Read one example.
This function read one example and return context sequence and tag sequence
correspondingly.
Args:
filename_queue: A filename queue returned by string_input_producer
context_feature_name: Context feature name in TFRecord. Set in ModelConfig
tag_feature_name: Tag feature name in TFRecord. Set in ModelConfig
Returns:
input_seq: An int32 Tensor with different length.
tag_seq: An int32 Tensor with different length.
"""
#Parse one example
context, features = tf.parse_single_sequence_example(
example_queue,
context_features={
config.length_name: tf.FixedLenFeature([], dtype=tf.int64)
},
sequence_features={
config.context_feature_name:
tf.FixedLenSequenceFeature([], dtype=tf.int64),
config.tag_feature_name:
tf.FixedLenSequenceFeature([], dtype=tf.int64)
})
return (features[config.context_feature_name],
features[config.tag_feature_name], context[config.length_name])
sequence_example_decoder.py 文件源码
项目:automatic-summarization
作者: mozilla
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def decode(self, serialized_example, items=None):
"""Decodes the given serialized TF-example.
Args:
serialized_example: a serialized TF-example tensor.
items: the list of items to decode. These must be a subset of the item
keys in self._items_to_handlers. If `items` is left as None, then all
of the items in self._items_to_handlers are decoded.
Returns:
the decoded items, a list of tensor.
"""
context, sequence = tf.parse_single_sequence_example(
serialized_example, self._context_keys_to_features,
self._sequence_keys_to_features)
# Merge context and sequence features
example = {}
example.update(context)
example.update(sequence)
all_features = {}
all_features.update(self._context_keys_to_features)
all_features.update(self._sequence_keys_to_features)
# Reshape non-sparse elements just once:
for k, value in all_features.items():
if isinstance(value, tf.FixedLenFeature):
example[k] = tf.reshape(example[k], value.shape)
if not items:
items = self._items_to_handlers.keys()
outputs = []
for item in items:
handler = self._items_to_handlers[item]
keys_to_tensors = {key: example[key] for key in handler.keys}
outputs.append(handler.tensors_to_item(keys_to_tensors))
return outputs
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_sequence_example(serialized_example):
context, sequence = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features={
"length": tf.FixedLenFeature([], dtype=tf.int64)
},
sequence_features={
"source": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"target": tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
)
return (context['length'], sequence['source'], sequence['target'])
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def parse_sequence_example(serialized, image_feature, caption_feature):
"""Parses a tensorflow.SequenceExample into an image and caption.
Args:
serialized: A scalar string Tensor; a single serialized SequenceExample.
image_feature: Name of SequenceExample context feature containing image
data.
caption_feature: Name of SequenceExample feature list containing integer
captions.
Returns:
encoded_image: A scalar string Tensor containing a JPEG encoded image.
caption: A 1-D uint64 Tensor with dynamically specified length.
"""
context, sequence = tf.parse_single_sequence_example(
serialized,
context_features={
image_feature: tf.FixedLenFeature([], dtype=tf.string)
},
sequence_features={
caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
})
encoded_image = context[image_feature]
caption = sequence[caption_feature]
return encoded_image, caption
def decode(self, data, items=None):
"""Decodes the given serialized TF-example.
Args:
data: a serialized TF-example tensor.
items: the list of items to decode. These must be a subset of the item
keys in self._items_to_handlers. If `items` is left as None, then all
of the items in self._items_to_handlers are decoded.
Returns:
the decoded items, a list of tensor.
"""
context, sequence = tf.parse_single_sequence_example(
data, self._context_keys_to_features, self._sequence_keys_to_features)
# Merge context and sequence features
example = {}
example.update(context)
example.update(sequence)
all_features = {}
all_features.update(self._context_keys_to_features)
all_features.update(self._sequence_keys_to_features)
# Reshape non-sparse elements just once:
for k, value in all_features.items():
if isinstance(value, tf.FixedLenFeature):
example[k] = tf.reshape(example[k], value.shape)
if not items:
items = self._items_to_handlers.keys()
outputs = []
for item in items:
handler = self._items_to_handlers[item]
keys_to_tensors = {key: example[key] for key in handler.keys}
outputs.append(handler.tensors_to_item(keys_to_tensors))
return outputs
def single_feature_file_reader(filename_queue, num_features):
""" Read and interpret data from a set of TFRecord files.
Args:
filename_queue: a queue of filenames to read through.
num_features: the depth of the features.
Returns:
A pair of tuples:
1. a context dictionary for the feature
2. the vessel movement features, tensor of dimension [width, num_features].
"""
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# The serialized example is converted back to actual values.
context_features, sequence_features = tf.parse_single_sequence_example(
serialized_example,
# Defaults are not specified since both keys are required.
context_features={'mmsi': tf.FixedLenFeature([], tf.int64), },
sequence_features={
'movement_features': tf.FixedLenSequenceFeature(
shape=(num_features, ), dtype=tf.float32)
})
return context_features, sequence_features
def decode(self, serialized_example, items=None):
"""Decodes the given serialized TF-example.
Args:
serialized_example: a serialized TF-example tensor.
items: the list of items to decode. These must be a subset of the item
keys in self._items_to_handlers. If `items` is left as None, then all
of the items in self._items_to_handlers are decoded.
Returns:
the decoded items, a list of tensor.
"""
context, sequence = tf.parse_single_sequence_example(
serialized_example, self._context_keys_to_features,
self._sequence_keys_to_features)
# Merge context and sequence features
example = {}
example.update(context)
example.update(sequence)
all_features = {}
all_features.update(self._context_keys_to_features)
all_features.update(self._sequence_keys_to_features)
# Reshape non-sparse elements just once:
for k, value in all_features.items():
if isinstance(value, tf.FixedLenFeature):
example[k] = tf.reshape(example[k], value.shape)
if not items:
items = self._items_to_handlers.keys()
outputs = []
for item in items:
handler = self._items_to_handlers[item]
keys_to_tensors = {key: example[key] for key in handler.keys}
outputs.append(handler.tensors_to_item(keys_to_tensors))
return outputs
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def read_my_file_format_dis(filename_queue, is_training):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
context_features = {
"height": tf.FixedLenFeature([], dtype=tf.int64),
"width": tf.FixedLenFeature([], dtype=tf.int64),
"sequence_length": tf.FixedLenFeature([], dtype=tf.int64),
"text": tf.FixedLenFeature([], dtype=tf.string),
"label": tf.FixedLenFeature([], dtype=tf.int64)
}
sequence_features = {
"frames": tf.FixedLenSequenceFeature([], dtype=tf.string),
"masks": tf.FixedLenSequenceFeature([], dtype=tf.string)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
height = 128#context_parsed['height'].eval()
width = 128#context_parsed['width'].eval()
sequence_length = 32#context_parsed['sequence_length'].eval()
clip = decode_frames(sequence_parsed['frames'], height, width, sequence_length)
# generate one hot vector
label = context_parsed['label']
label = tf.one_hot(label-1, FLAGS.num_class)
text = context_parsed['text']
# randomly sample clips of 16 frames
if is_training:
idx = tf.squeeze(tf.random_uniform([1], 0, sequence_length-FLAGS.seq_length+1, dtype=tf.int32))
else:
idx = 8
clip = clip[idx:idx+FLAGS.seq_length] / 255.0 * 2 - 1
if is_training:
# randomly reverse data
reverse = tf.squeeze(tf.random_uniform([1], 0, 2, dtype=tf.int32))
clip = tf.cond(tf.equal(reverse,0), lambda: clip, lambda: clip[::-1])
# randomly horizontally flip data
flip = tf.squeeze(tf.random_uniform([1], 0, 2, dtype=tf.int32))
clip = tf.cond(tf.equal(flip,0), lambda: clip, lambda: \
tf.map_fn(lambda img: tf.image.flip_left_right(img), clip))
clip.set_shape([FLAGS.seq_length, height, width, 3])
return clip, label, text
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def read_record(self, record):
"""Parse record TFRecord into a set a set of values, names and types
that can be queued and then read.
Returns:
- queue_values: Dict with tensor values.
- queue_names: Names for each tensor.
- queue_types: Types for each tensor.
"""
# We parse variable length features (bboxes in a image) as sequence
# features
context_example, sequence_example = tf.parse_single_sequence_example(
record,
context_features=self.CONTEXT_FEATURES,
sequence_features=self.SEQUENCE_FEATURES
)
# Decode image
image_raw = tf.image.decode_image(
context_example['image_raw'], channels=3
)
image = tf.cast(image_raw, tf.float32)
height = tf.cast(context_example['height'], tf.int32)
width = tf.cast(context_example['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
label = self._sparse_to_tensor(sequence_example['label'])
xmin = self._sparse_to_tensor(sequence_example['xmin'])
xmax = self._sparse_to_tensor(sequence_example['xmax'])
ymin = self._sparse_to_tensor(sequence_example['ymin'])
ymax = self._sparse_to_tensor(sequence_example['ymax'])
# Stack parsed tensors to define bounding boxes of shape (num_boxes, 5)
bboxes = tf.stack([xmin, ymin, xmax, ymax, label], axis=1)
image, bboxes, preprocessing_details = self.preprocess(image, bboxes)
filename = tf.cast(context_example['filename'], tf.string)
# TODO: Send additional metadata through the queue (scale_factor,
# applied_augmentations)
queue_dtypes = [tf.float32, tf.int32, tf.string]
queue_names = ['image', 'bboxes', 'filename']
queue_values = {
'image': image,
'bboxes': bboxes,
'filename': filename,
}
return queue_values, queue_dtypes, queue_names