def make_data_provider(self, **kwargs):
context_keys_to_features = {
self.params["image_field"]: tf.FixedLenFeature(
[], dtype=tf.string),
"image/format": tf.FixedLenFeature(
[], dtype=tf.string, default_value=self.params["image_format"]),
}
sequence_keys_to_features = {
self.params["caption_ids_field"]: tf.FixedLenSequenceFeature(
[], dtype=tf.int64),
self.params["caption_tokens_field"]: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
items_to_handlers = {
"image": tfexample_decoder.Image(
image_key=self.params["image_field"],
format_key="image/format",
channels=3),
"target_ids":
tfexample_decoder.Tensor(self.params["caption_ids_field"]),
"target_tokens":
tfexample_decoder.Tensor(self.params["caption_tokens_field"]),
"target_len": tfexample_decoder.ItemHandlerCallback(
keys=[self.params["caption_tokens_field"]],
func=lambda x: tf.size(x[self.params["caption_tokens_field"]]))
}
decoder = TFSEquenceExampleDecoder(
context_keys_to_features, sequence_keys_to_features, items_to_handlers)
dataset = tf.contrib.slim.dataset.Dataset(
data_sources=self.params["files"],
reader=tf.TFRecordReader,
decoder=decoder,
num_samples=None,
items_to_descriptions={})
return tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
dataset=dataset,
shuffle=self.params["shuffle"],
num_epochs=self.params["num_epochs"],
**kwargs)
python类FixedLenSequenceFeature()的实例源码
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (4716,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (4716,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
#return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def get_padded_batch(file_list, batch_size, input_size, output_size,
num_enqueuing_threads=4, num_epochs=1, shuffle=True):
"""Reads batches of SequenceExamples from TFRecords and pads them.
Can deal with variable length SequenceExamples by padding each batch to the
length of the longest sequence with zeros.
Args:
file_list: A list of paths to TFRecord files containing SequenceExamples.
batch_size: The number of SequenceExamples to include in each batch.
input_size: The size of each input vector. The returned batch of inputs
will have a shape [batch_size, num_steps, input_size].
num_enqueuing_threads: The number of threads to use for enqueuing
SequenceExamples.
Returns:
inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
labels: A tensor of shape [batch_size, num_steps] of float32s.
lengths: A tensor of shape [batch_size] of int32s. The lengths of each
SequenceExample before padding.
"""
file_queue = tf.train.string_input_producer(
file_list, num_epochs=num_epochs, shuffle=shuffle)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(file_queue)
sequence_features = {
'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
dtype=tf.float32),
'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size],
dtype=tf.float32),
'labels1': tf.FixedLenSequenceFeature(shape=[output_size],
dtype=tf.float32),
'labels2': tf.FixedLenSequenceFeature(shape=[output_size],
dtype=tf.float32)}
_, sequence = tf.parse_single_sequence_example(
serialized_example, sequence_features=sequence_features)
length = tf.shape(sequence['inputs'])[0]
capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
queue = tf.PaddingFIFOQueue(
capacity=capacity,
dtypes=[tf.float32, tf.float32,tf.float32,tf.float32, tf.int32],
shapes=[(None, input_size),(None,input_size), (None, output_size),(None,output_size), ()])
enqueue_ops = [queue.enqueue([sequence['inputs'],
sequence['inputs_cmvn'],
sequence['labels1'],
sequence['labels2'],
length])] * num_enqueuing_threads
tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
return queue.dequeue_many(batch_size)
def read_and_decode(filename_queue, feature_columns):
"""
Read and decode one example from a TFRecords file
:param feature_columns: list of feature columns
:param filename_queue: filename queue containing the TFRecords filenames
:return: list of tensors representing one example
"""
with tf.device('/cpu:0'):
# New TFRecord file
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Contextual TFRecords features
context_features = {
"x_length": tf.FixedLenFeature([], dtype=tf.int64),
"x_id": tf.FixedLenFeature([], dtype=tf.string)
}
# Sequential TFRecords features
sequence_features = {
"x_tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"x_chars": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"x_chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"y": tf.FixedLenSequenceFeature([], dtype=tf.int64),
}
for col in feature_columns:
sequence_features["x_att_{}".format(col)] = tf.FixedLenSequenceFeature([], dtype=tf.int64)
# Parsing contextual and sequential features
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
sequence_length = tf.cast(context_parsed["x_length"], tf.int32)
chars = tf.reshape(sequence_parsed["x_chars"], tf.stack([sequence_length, -1]))
# Preparing tensor list, casting values to 32 bits when necessary
tensor_list = [
context_parsed["x_id"],
tf.cast(context_parsed["x_length"], tf.int32),
tf.cast(sequence_parsed["x_tokens"], dtype=tf.int32),
tf.cast(chars, dtype=tf.int32),
tf.cast(sequence_parsed["x_chars_len"], dtype=tf.int32),
tf.cast(sequence_parsed["y"], dtype=tf.int32)
]
for col in feature_columns:
tensor_list.append(tf.cast(sequence_parsed["x_att_{}".format(col)], dtype=tf.int32))
return tensor_list
def read_and_decode_test(filename_queue, feature_columns):
"""
Read and decode one example from a TFRecords file
:param feature_columns: list of feature columns
:param filename_queue: filename queue containing the TFRecords filenames
:return: list of tensors representing one example
"""
with tf.device('/cpu:0'):
# New TFRecord file
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Contextual TFRecords features
context_features = {
"x_length": tf.FixedLenFeature([], dtype=tf.int64),
"x_id": tf.FixedLenFeature([], dtype=tf.string)
}
# Sequential TFRecords features
sequence_features = {
"x_tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"x_chars": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"x_chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64),
}
for col in feature_columns:
sequence_features["x_att_{}".format(col)] = tf.FixedLenSequenceFeature([], dtype=tf.int64)
# Parsing contextual and sequential features
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
sequence_length = tf.cast(context_parsed["x_length"], tf.int32)
chars = tf.reshape(sequence_parsed["x_chars"], tf.stack([sequence_length, -1]))
# Preparing tensor list, casting values to 32 bits when necessary
tensor_list = [
context_parsed["x_id"],
tf.cast(context_parsed["x_length"], tf.int32),
tf.cast(sequence_parsed["x_tokens"], dtype=tf.int32),
tf.cast(chars, dtype=tf.int32),
tf.cast(sequence_parsed["x_chars_len"], dtype=tf.int32),
]
for col in feature_columns:
tensor_list.append(tf.cast(sequence_parsed["x_att_{}".format(col)], dtype=tf.int32))
return tensor_list
def _generate_feats_and_label_batch(filename_queue, batch_size):
"""Construct a queued batch of spectral features and transcriptions.
Args:
filename_queue: queue of filenames to read data from.
batch_size: Number of utterances per batch.
Returns:
feats: mfccs. 4D tensor of [batch_size, height, width, 3] size.
labels: transcripts. List of length batch_size.
seq_lens: Sequence Lengths. List of length batch_size.
"""
# Define how to parse the example
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
context_features = {
"seq_len": tf.FixedLenFeature([], dtype=tf.int64),
"labels": tf.VarLenFeature(dtype=tf.int64)
}
sequence_features = {
# mfcc features are 13 dimensional
"feats": tf.FixedLenSequenceFeature([13, ], dtype=tf.float32)
}
# Parse the example (returns a dictionary of tensors)
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
# Generate a batch worth of examples after bucketing
seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length(
input_length=tf.cast(context_parsed['seq_len'], tf.int32),
tensors=[sequence_parsed['feats'], context_parsed['labels']],
batch_size=batch_size,
bucket_boundaries=list(range(100, 1900, 100)),
allow_smaller_final_batch=True,
num_threads=16,
dynamic_pad=True)
return feats, tf.cast(labels, tf.int32), seq_len
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def get_padded_batch(file_list, batch_size, input_size, output_size,
num_enqueuing_threads=4, num_epochs=1, shuffle=True):
"""Reads batches of SequenceExamples from TFRecords and pads them.
Can deal with variable length SequenceExamples by padding each batch to the
length of the longest sequence with zeros.
Args:
file_list: A list of paths to TFRecord files containing SequenceExamples.
batch_size: The number of SequenceExamples to include in each batch.
input_size: The size of each input vector. The returned batch of inputs
will have a shape [batch_size, num_steps, input_size].
num_enqueuing_threads: The number of threads to use for enqueuing
SequenceExamples.
Returns:
inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
labels: A tensor of shape [batch_size, num_steps] of float32s.
lengths: A tensor of shape [batch_size] of int32s. The lengths of each
SequenceExample before padding.
"""
file_queue = tf.train.string_input_producer(
file_list, num_epochs=num_epochs, shuffle=shuffle)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(file_queue)
sequence_features = {
'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
dtype=tf.float32),
'labels': tf.FixedLenSequenceFeature(shape=[output_size],
dtype=tf.float32),
'genders': tf.FixedLenSequenceFeature(shape=[2], dtype=tf.float32)}
_, sequence = tf.parse_single_sequence_example(
serialized_example, sequence_features=sequence_features)
length = tf.shape(sequence['inputs'])[0]
capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
queue = tf.PaddingFIFOQueue(
capacity=capacity,
dtypes=[tf.float32, tf.float32, tf.float32, tf.int32],
shapes=[(None, input_size), (None, output_size),(1,2), ()])
enqueue_ops = [queue.enqueue([sequence['inputs'],
sequence['labels'],
sequence['genders'],
length])] * num_enqueuing_threads
tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
return queue.dequeue_many(batch_size)
def get_padded_batch_v2(file_list, batch_size, input_size, output_size,
num_enqueuing_threads=4, num_epochs=1, shuffle=True):
"""Reads batches of SequenceExamples from TFRecords and pads them.
Can deal with variable length SequenceExamples by padding each batch to the
length of the longest sequence with zeros.
Args:
file_list: A list of paths to TFRecord files containing SequenceExamples.
batch_size: The number of SequenceExamples to include in each batch.
input_size: The size of each input vector. The returned batch of inputs
will have a shape [batch_size, num_steps, input_size].
num_enqueuing_threads: The number of threads to use for enqueuing
SequenceExamples.
Returns:
inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
labels: A tensor of shape [batch_size, num_steps] of float32s.
lengths: A tensor of shape [batch_size] of int32s. The lengths of each
SequenceExample before padding.
"""
file_queue = tf.train.string_input_producer(
file_list, num_epochs=num_epochs, shuffle=shuffle)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(file_queue)
sequence_features = {
'inputs': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32),
'inputs_cmvn': tf.FixedLenSequenceFeature(shape=[input_size],dtype=tf.float32),
'labels1': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32),
'labels2': tf.FixedLenSequenceFeature(shape=[output_size],dtype=tf.float32),
}
_, sequence = tf.parse_single_sequence_example(
serialized_example, sequence_features=sequence_features)
length = tf.shape(sequence['inputs'])[0]
capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
queue = tf.PaddingFIFOQueue(
capacity=capacity,
dtypes=[tf.float32, tf.float32,tf.float32, tf.float32, tf.int32],
shapes=[(None, input_size),(None, input_size),(None, output_size), (None, output_size), ()])
enqueue_ops = [queue.enqueue([sequence['inputs'],
sequence['inputs_cmvn'],
sequence['labels1'],
sequence['labels2'],
length])] * num_enqueuing_threads
tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
return queue.dequeue_many(batch_size)
def make_data_provider(self, **kwargs):
context_keys_to_features = {
self.params["image_field"]: tf.FixedLenFeature(
[], dtype=tf.string),
"image/format": tf.FixedLenFeature(
[], dtype=tf.string, default_value=self.params["image_format"]),
}
sequence_keys_to_features = {
self.params["caption_ids_field"]: tf.FixedLenSequenceFeature(
[], dtype=tf.int64),
self.params["caption_tokens_field"]: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
items_to_handlers = {
"image": tfexample_decoder.Image(
image_key=self.params["image_field"],
format_key="image/format",
channels=3),
"target_ids":
tfexample_decoder.Tensor(self.params["caption_ids_field"]),
"target_tokens":
tfexample_decoder.Tensor(self.params["caption_tokens_field"]),
"target_len": tfexample_decoder.ItemHandlerCallback(
keys=[self.params["caption_tokens_field"]],
func=lambda x: tf.size(x[self.params["caption_tokens_field"]]))
}
decoder = TFSEquenceExampleDecoder(
context_keys_to_features, sequence_keys_to_features, items_to_handlers)
dataset = tf.contrib.slim.dataset.Dataset(
data_sources=self.params["files"],
reader=tf.TFRecordReader,
decoder=decoder,
num_samples=None,
items_to_descriptions={})
return tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
dataset=dataset,
shuffle=self.params["shuffle"],
num_epochs=self.params["num_epochs"],
**kwargs)
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def make_data_provider(self, **kwargs):
"""Creates DataProvider instance for this input pipeline. Additional keyword arguments
are passed to the DataProvider.
"""
context_keys_to_features = {
self.image_field: tf.FixedLenFeature(
[], dtype=tf.string),
"image/format": tf.FixedLenFeature(
[], dtype=tf.string, default_value=self.image_format),
}
sequence_keys_to_features = {
self.caption_ids_field: tf.FixedLenSequenceFeature(
[], dtype=tf.int64),
self.caption_tokens_field: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
items_to_handlers = {
'image': tfslim.tfexample_decoder.Image(
image_key=self.image_field,
format_key="image/format",
channels=3),
'target_ids': tfslim.tfexample_decoder.Tensor(self.caption_ids_field),
'target_token': tfslim.tfexample_decoder.Tensor(self.caption_tokens_field),
'target_len': tfslim.tfexample_decoder.ItemHandlerCallback(
keys=[self.caption_tokens_field],
func=lambda x: tf.size(x[self.caption_tokens_field]))
}
decoder = TFSequenceExampleDecoder(
context_keys_to_features, sequence_keys_to_features, items_to_handlers)
dataset = Dataset(
data_sources=self.files,
reader=tf.TFRecordReader,
decoder=decoder,
num_samples=None,
items_to_descriptions={})
return DatasetDataProvider(
dataset=dataset,
shuffle=self.shuffle,
num_epochs=self.num_epochs,
**kwargs)
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example,
max_quantized_value=2, min_quantized_value=-2):
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={"video_id": tf.FixedLenFeature(
[], tf.string),
"labels": tf.VarLenFeature(tf.int64)},
sequence_features={
feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index],
self.max_frames,
max_quantized_value,
min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
else:
tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = tf.concat(feature_matrices, 1)
# convert to batch format.
# TODO: Do proper batch reads to remove the IO bottleneck.
batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
batch_video_matrix = tf.expand_dims(video_matrix, 0)
batch_labels = tf.expand_dims(labels, 0)
batch_frames = tf.expand_dims(num_frames, 0)
return batch_video_ids, batch_video_matrix, batch_labels, batch_frames