def _shuffle_inputs(self, input_tensors, capacity,
min_after_dequeue, num_threads):
"""Shuffles tensors in `input_tensors`, maintaining grouping."""
shuffle_queue = tf.RandomShuffleQueue(
capacity, min_after_dequeue, dtypes=[t.dtype for t in input_tensors])
enqueue_op = shuffle_queue.enqueue(input_tensors)
runner = tf.train.QueueRunner(shuffle_queue, [enqueue_op] * num_threads)
tf.train.add_queue_runner(runner)
output_tensors = shuffle_queue.dequeue()
for i in range(len(input_tensors)):
output_tensors[i].set_shape(input_tensors[i].shape)
return output_tensors
python类RandomShuffleQueue()的实例源码
def _setup_base_graph(self):
"""
Set up queue, variables and session
"""
self.graph = tf.Graph()
with self.graph.as_default() as g:
input_dim = self.input_dim
batch_size = self.batch_size
hidden_units = self.hidden_units
layer_units = [input_dim] + hidden_units + [1]
layer_num = len(layer_units)
#make Queue for getting batch
self.queue = q = tf.RandomShuffleQueue(capacity=self.q_capacity,
min_after_dequeue=self.min_after_dequeue,
dtypes=["float", "float"],
shapes=[[input_dim], [input_dim]])
#input data
self.data1, self.data2 = q.dequeue_many(batch_size, name="inputs")
self._setup_variables()
self._setup_training()
self._setup_prediction()
self._setup_pretraining()
def _shuffle(inputs, capacity, min_after_dequeue, num_threads):
if isinstance(inputs, dict):
names, dtypes = zip(*[(key, input_.dtype)
for key, input_ in inputs.items()])
else:
dtypes = [input_.dtype for input_ in inputs]
queue = tf.RandomShuffleQueue(
capacity,
min_after_dequeue,
dtypes,
**({'names': names} if isinstance(inputs, dict) else {}))
tf.train.add_queue_runner(tf.train.QueueRunner(
queue,
[queue.enqueue(inputs)] * num_threads))
shuffled_inputs = queue.dequeue()
for key, input_ in (inputs.items()
if isinstance(inputs, dict) else
enumerate(inputs)):
shuffled_inputs[key].set_shape(input_.get_shape())
return shuffled_inputs
def queue_setup(filename, mode, batch_size, num_readers, min_examples):
""" Sets up the queue runners for data input """
filename_queue = tf.train.string_input_producer([filename], shuffle=True, capacity=16)
if mode == "train":
examples_queue = tf.RandomShuffleQueue(capacity=min_examples + 3 * batch_size,
min_after_dequeue=min_examples, dtypes=[tf.string])
else:
examples_queue = tf.FIFOQueue(capacity=min_examples + 3 * batch_size, dtypes=[tf.string])
enqueue_ops = list()
for _ in range(num_readers):
reader = tf.TFRecordReader()
_, value = reader.read(filename_queue)
enqueue_ops.append(examples_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
example_serialized = examples_queue.dequeue()
return example_serialized
def __init__(self, pkl_path, shuffle=False, distort=False,
capacity=2000, image_per_thread=16):
self._shuffle = shuffle
self._distort = distort
with open(pkl_path, 'rb') as fd:
data = pickle.load(fd)
self._images = data['data'].reshape([-1, 3, 32, 32]).transpose((0, 2, 3, 1)).copy(order='C')
self._labels = data['labels'] # numpy 1-D array
self.size = len(self._labels)
self.queue = tf.FIFOQueue(shapes=[[32,32,3], []],
dtypes=[tf.float32, tf.int32],
capacity=capacity)
# self.queue = tf.RandomShuffleQueue(shapes=[[32,32,3], []],
# dtypes=[tf.float32, tf.int32],
# capacity=capacity,
# min_after_dequeue=min_after_dequeue)
self.dataX = tf.placeholder(dtype=tf.float32, shape=[None,32,32,3])
self.dataY = tf.placeholder(dtype=tf.int32, shape=[None,])
self.enqueue_op = self.queue.enqueue_many([self.dataX, self.dataY])
self.image_per_thread = image_per_thread
self._image_summary_added = False
def _gather_into_queue(*tensor_lists):
assert len(tensor_lists) % FLAGS.batch_size == 0
queue = tf.RandomShuffleQueue(FLAGS.batch_queue_capacity,
FLAGS.batch_queue_capacity // 2,
dtypes(*tensor_lists[0]))
collections.add_metric(queue.size(), "sorted_batches_in_queue")
add_queue_runner(
queue,
[tf.group(*[
queue.enqueue(transform.batch(
*tensor_lists[i:i + FLAGS.batch_size]))
for i in range(0, len(tensor_lists), FLAGS.batch_size)])])
results = queue.dequeue()
for result, tensor in zip(results, tensor_lists[0]):
result.set_shape([None, *static_shape(tensor)])
return results
input.py 文件源码
项目:Saliency_Detection_Convolutional_Autoencoder
作者: arthurmeyer
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def __init__(self, hight, width, batch_size, folder_image, folder_label, format_image = '.jpg' , random = True):
"""
Args:
hight : hight of samples
width : width of samples
batch_size : batch size
folder_image : the folder where the images are
folder_label : the folder where the ground truth are
format_image : format of images (usually jpg)
random : is the queue shuffled (for training) or not (FIFO for test related tasks)
"""
self.hight = hight
self.width = width
self.batch_size = batch_size
self.image = np.array([f for f in os.listdir(folder_image) if format_image in f])
self.f1 = folder_image
self.f2 = folder_label
self.size_epoch = len(self.image)
if random:
self.queue = tf.RandomShuffleQueue(shapes=[(self.hight,self.width,3), (self.hight,self.width), []],dtypes=[tf.float32, tf.float32, tf.string],capacity=16*self.batch_size, min_after_dequeue=8*self.batch_size)
else:
self.queue = tf.FIFOQueue(shapes=[(self.hight,self.width,3), (self.hight,self.width), []],dtypes=[tf.float32, tf.float32, tf.string],capacity=16*self.batch_size)
self.image_pl = tf.placeholder(tf.float32, shape=(batch_size,hight,width,3))
self.label_pl = tf.placeholder(tf.float32, shape=(batch_size,hight,width))
self.name_pl = tf.placeholder(tf.string, shape=(batch_size))
self.enqueue_op = self.queue.enqueue_many([self.image_pl, self.label_pl, self.name_pl])
def _create_queue(self, queue_id, ctor=tf.RandomShuffleQueue):
# The enqueuing workers transform inputs into serialized loom
# weaver messages, which are represented as strings.
return ctor(
capacity=self.queue_capacity or 4 * self.batch_size,
min_after_dequeue=0, dtypes=[tf.string], shapes=[tf.TensorShape([])],
shared_name='tensorflow_fold_plan_queue%s' % queue_id)
def _shuffle_inputs(input_tensors, capacity, min_after_dequeue, num_threads):
"""Shuffles tensors in `input_tensors`, maintaining grouping."""
shuffle_queue = tf.RandomShuffleQueue(
capacity, min_after_dequeue, dtypes=[t.dtype for t in input_tensors])
enqueue_op = shuffle_queue.enqueue(input_tensors)
runner = tf.train.QueueRunner(shuffle_queue, [enqueue_op] * num_threads)
tf.train.add_queue_runner(runner)
output_tensors = shuffle_queue.dequeue()
for i in range(len(input_tensors)):
output_tensors[i].set_shape(input_tensors[i].shape)
return output_tensors
def build_augmentation_graph(self):
num_targets = len(self.dset.targets)
# Outputs and queue of the data augmentation graph
train_queue = tf.RandomShuffleQueue(
self.config["queueing"]["random_queue_size"],
self.config["queueing"]["min_size"],
[tf.float32] + [tf.int32] * num_targets,
shapes=self.input_variables["shapes"]["crops"]
)
augmented_op = imaging.augmentations.aument_multiple(
self.input_variables["labeled_crops"][0],
self.config["queueing"]["augmentation_workers"]
)
train_enqueue_op = train_queue.enqueue_many(
[augmented_op] +
self.input_variables["labeled_crops"][1:]
)
train_inputs = train_queue.dequeue() #_many(config["training"]["minibatch"])
self.train_variables = {
"image_batch":train_inputs[0],
"queue":train_queue,
"enqueue_op":train_enqueue_op
}
for i in range(num_targets):
tname = "target_" + str(i)
tgt = self.dset.targets[i]
self.train_variables[tname] = tf.one_hot(train_inputs[i+1], tgt.shape[1])
#################################################
## START TRAINING QUEUES
#################################################
audio_reader.py 文件源码
项目:CNN-for-single-channel-speech-enhancement
作者: zhr1201
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self,
audio_dir,
noise_dir,
coord,
N_IN,
frame_length,
frame_move,
is_val):
'''coord: tensorflow coordinator
N_IN: number of input frames presented to DNN
frame_move: hopsize'''
self.audio_dir = audio_dir
self.noise_dir = noise_dir
self.coord = coord
self.N_IN = N_IN
self.frame_length = frame_length
self.frame_move = frame_move
self.is_val = is_val
self.sample_placeholder_many = tf.placeholder(
tf.float32, shape=(None, self.N_IN, 2, frame_length))
# queues to store the data
if not is_val:
self.q = tf.RandomShuffleQueue(
200000, 5000, tf.float32, shapes=(self.N_IN, 2, frame_length))
else:
self.q = tf.FIFOQueue(
200000, tf.float32, shapes=(self.N_IN, 2, frame_length))
self.enqueue_many = self.q.enqueue_many(
self.sample_placeholder_many + 0)
self.audiofiles = find_files(audio_dir)
self.noisefiles = find_files(noise_dir)
print('%d speech found' % len(self.audiofiles))
print('%d noise found' % len(self.noisefiles))
# ipdb.set_trace()
def cifar_shuffle_queue_batch(image, label, batch_size, capacity, min_after_dequeue, threads):
tensor_list = [image, label]
dtypes = [tf.float32, tf.int32]
shapes = [image.get_shape(), label.get_shape()]
q = tf.RandomShuffleQueue(capacity=capacity, min_after_dequeue=min_after_dequeue,
dtypes=dtypes, shapes=shapes)
enqueue_op = q.enqueue(tensor_list)
# add to the queue runner
tf.train.add_queue_runner(tf.train.QueueRunner(q, [enqueue_op] * threads))
# now extract the batch
image_batch, label_batch = q.dequeue_many(batch_size)
return image_batch, label_batch
def make_input(model_options):
'''
Prepare the input placeholders and queues
'''
model_vars = {}
if model_options['mode'] == 'train':
images = tf.placeholder("float",[None,224,224,model_options['num_channels']])
model_vars['images'] = images
labels = tf.placeholder("uint8",[1])
model_vars['labels'] = labels
q = tf.RandomShuffleQueue(200, model_options['min_to_keep'], [tf.float32, tf.uint8],
shapes=[[model_options['example_size'],224,224,\
model_options['num_channels']],1])
model_vars['queue'] = q
enqueue_op = q.enqueue([images, labels])
model_vars['enqueue_op'] = enqueue_op
else:
num_crops = 10 if model_options['flip'] else 5;
images = tf.placeholder("float",[num_crops,model_options['example_size']\
,224,224,model_options['num_channels']])
labels = tf.placeholder("uint8",[num_crops,1])
names = tf.placeholder("string",[num_crops,1])
model_vars['images'] = images
model_vars['labels'] = labels
model_vars['names'] = names
q = tf.FIFOQueue(100, [tf.float32, tf.uint8, "string"],
shapes=[[model_options['example_size'],224,224,\
model_options['num_channels']],[1],[1]])
model_vars['queue'] = q
enqueue_op = q.enqueue_many([images, labels, names])
model_vars['enqueue_op'] = enqueue_op
return model_vars
def parallel_reader(self, min_queue_examples=1024):
"""Parallel record reader
Primarily used for Training ops
Args:
min_queue_examples: min number of queue examples after dequeue
Returns
a single item from the tfrecord files
"""
with tf.name_scope('parallel_reader'):
data_files = self.dataset.data_files()
filename_queue = tf.train.string_input_producer(
data_files, num_epochs=self.num_epochs, shuffle=self.shuffle)
if self.shuffle:
examples_queue = tf.RandomShuffleQueue(
capacity=self.capacity, min_after_dequeue=min_queue_examples, dtypes=[tf.string])
else:
examples_queue = tf.FIFOQueue(
capacity=self.capacity, dtypes=[tf.string])
enqueue_ops = []
for _reader in self._readers:
_, value = _reader.read(filename_queue)
enqueue_ops.append(examples_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(
tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
return examples_queue.dequeue()
def training_inputs(self):
fps, labels = self._load_training_labelmap()
filepaths = tf.constant(fps)
labels = tf.constant(labels, dtype=tf.int32)
min_num_examples_in_queue = int(FLAGS.min_frac_examples_in_queue * len(fps))
filename_queue = tf.RandomShuffleQueue(len(fps), min_num_examples_in_queue, [tf.string, tf.int32],
name='training_filename_queue')
enqueue_op = filename_queue.enqueue_many([filepaths, labels])
qr = tf.train.QueueRunner(filename_queue, [enqueue_op])
tf.train.add_queue_runner(qr)
example_list = [self._read_and_preprocess_image_for_training(filename_queue) for _ in
xrange(FLAGS.num_consuming_threads)]
image_batch, label_batch = tf.train.shuffle_batch_join(
example_list,
batch_size=FLAGS.batch_size,
capacity=min_num_examples_in_queue + (FLAGS.num_consuming_threads + 2) * FLAGS.batch_size,
min_after_dequeue=min_num_examples_in_queue,
shapes=[[224, 224, 3], []],
name='training_example_queue'
)
return image_batch, util.encode_one_hot(label_batch, self.num_classes)
def training_inputs(self):
fps, labels = self._load_training_labelmap()
filepaths = tf.constant(fps)
labels = tf.constant(labels, dtype=tf.int32)
min_num_examples_in_queue = int(FLAGS.min_frac_examples_in_queue * len(fps))
filename_queue = tf.RandomShuffleQueue(len(fps), min_num_examples_in_queue, [tf.string, tf.int32],
name='training_filename_queue')
enqueue_op = filename_queue.enqueue_many([filepaths, labels])
qr = tf.train.QueueRunner(filename_queue, [enqueue_op])
tf.train.add_queue_runner(qr)
example_list = [self._read_and_preprocess_image_for_training(filename_queue) for _ in
xrange(FLAGS.num_consuming_threads)]
image_batch, label_batch = tf.train.shuffle_batch_join(
example_list,
batch_size=FLAGS.batch_size,
capacity=min_num_examples_in_queue + (FLAGS.num_consuming_threads + 2) * FLAGS.batch_size,
min_after_dequeue=min_num_examples_in_queue,
shapes=[[224, 224, 3], []],
name='training_example_queue'
)
return image_batch, util.encode_one_hot(label_batch, self.num_classes)
def _create_input_queue(self, queue_capacity_factor=16):
self.input_ops, self.target_ops = {}, {}
self.queue_ops, self.enqueue_ops = {}, {}
self.x, self.y, self.seq_length, self.mask = {}, {}, {}, {}
for name in self.data_num.keys():
self.input_ops[name] = tf.placeholder(tf.float32, shape=[None, None])
self.target_ops[name] = tf.placeholder(tf.int32, shape=[None])
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * self.batch_size
self.queue_ops[name] = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32, tf.int32],
shapes=[[self.max_length, 2,], [self.max_length]],
seed=self.random_seed,
name="random_queue_{}".format(name))
self.enqueue_ops[name] = \
self.queue_ops[name].enqueue([self.input_ops[name], self.target_ops[name]])
inputs, labels = self.queue_ops[name].dequeue()
seq_length = tf.shape(inputs)[0]
if self.use_terminal_symbol:
mask = tf.ones([seq_length + 1], dtype=tf.float32) # terminal symbol
else:
mask = tf.ones([seq_length], dtype=tf.float32)
self.x[name], self.y[name], self.seq_length[name], self.mask[name] = \
tf.train.batch(
[inputs, labels, seq_length, mask],
batch_size=self.batch_size,
capacity=capacity,
dynamic_pad=True,
name="batch_and_pad")
def shuffle_tensor_list(input_tensors, **kwargs):
dtypes = [tensor.dtype for tensor in input_tensors]
shuffle_queue = tf.RandomShuffleQueue(dtypes=dtypes, **kwargs)
shuffle_enqueue = shuffle_queue.enqueue(input_tensors)
tf.train.add_queue_runner(
tf.train.QueueRunner(shuffle_queue, [shuffle_enqueue])
)
output_tensors = shuffle_queue.dequeue()
for output_tensor, input_tensor in zip(output_tensors, input_tensors):
output_tensor.set_shape(input_tensor.get_shape())
return tuple(output_tensors)
def shuffle_tensor_index(input_queue, dequeue_many=32, **kwargs):
dequeue_op = input_queue.dequeue_many(dequeue_many)
dtypes = [dequeue_op.dtype]
shapes = [dequeue_op.get_shape()[1:]]
shuffle_queue = tf.RandomShuffleQueue(
dtypes=dtypes, shapes=shapes,
**kwargs)
shuffle_enqueue = shuffle_queue.enqueue_many([dequeue_op])
tf.train.add_queue_runner(
tf.train.QueueRunner(shuffle_queue, [shuffle_enqueue])
)
return shuffle_queue.dequeue()
def __init__(self, train=True):
self.train = train # training mode or not
self.dataX = tf.placeholder(dtype=tf.float32, shape=[FLAGS.raw_size, FLAGS.raw_size, 3])
self.dataY = tf.placeholder(dtype=tf.int64, shape=[])
# get the mean.
mean_ = np.load(os.path.join(FLAGS.data_dir, FLAGS.mean_file))
mean_ = mean_['data_mean'].astype(np.float32)
self.mean_dataX = tf.constant(mean_, dtype=tf.float32)
# mean subtraction
self.mean_sub_image = self.dataX - self.mean_dataX
# The actual queue of data. The queue contains a vector for an image and a scalar label.
if self.train:
self.queue = tf.RandomShuffleQueue(shapes=[[FLAGS.crop_size, FLAGS.crop_size, 3], []],
dtypes=[tf.float32, tf.int64], capacity=2000, min_after_dequeue=1000)
# random crop
self.distorted_image = tf.random_crop(self.mean_sub_image, [FLAGS.crop_size, FLAGS.crop_size, 3])
# random flip
self.distorted_image = tf.image.random_flip_left_right(self.distorted_image)
# random brightness, saturation and contrast
self.distorted_image = tf.image.random_brightness(self.distorted_image, max_delta=63. / 255.)
self.distorted_image = tf.image.random_saturation(self.distorted_image, lower=0.5, upper=1.5)
self.distorted_image = tf.image.random_contrast(self.distorted_image, lower=0.2, upper=1.8)
else:
self.queue = tf.FIFOQueue(shapes=[[FLAGS.crop_size, FLAGS.crop_size, 3], []],
dtypes=[tf.float32, tf.int64], capacity=20000)
# center crop
self.distorted_image = tf.image.resize_image_with_crop_or_pad(self.mean_sub_image, FLAGS.crop_size, FLAGS.crop_size)
# tf.image.central_crop(image, central_fraction)
# enqueue
self.enqueue_op = self.queue.enqueue([self.distorted_image, self.dataY])
#self.enqueue_op = self.queue.enqueue([self.dataX, self.dataY])
data_loader.py 文件源码
项目:neural-combinatorial-rl-tensorflow
作者: devsisters
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def _create_input_queue(self, queue_capacity_factor=16):
self.input_ops, self.target_ops = {}, {}
self.queue_ops, self.enqueue_ops = {}, {}
self.x, self.y, self.seq_length, self.mask = {}, {}, {}, {}
for name in self.data_num.keys():
self.input_ops[name] = tf.placeholder(tf.float32, shape=[None, None])
self.target_ops[name] = tf.placeholder(tf.int32, shape=[None])
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * self.batch_size
self.queue_ops[name] = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32, tf.int32],
shapes=[[self.max_length, 2,], [self.max_length]],
seed=self.random_seed,
name="random_queue_{}".format(name))
self.enqueue_ops[name] = \
self.queue_ops[name].enqueue([self.input_ops[name], self.target_ops[name]])
inputs, labels = self.queue_ops[name].dequeue()
seq_length = tf.shape(inputs)[0]
if self.use_terminal_symbol:
mask = tf.ones([seq_length + 1], dtype=tf.float32) # terminal symbol
else:
mask = tf.ones([seq_length], dtype=tf.float32)
self.x[name], self.y[name], self.seq_length[name], self.mask[name] = \
tf.train.batch(
[inputs, labels, seq_length, mask],
batch_size=self.batch_size,
capacity=capacity,
dynamic_pad=True,
name="batch_and_pad")
def get_queue(nodes,
queue_type='fifo',
batch_size=256,
capacity=None,
min_after_dequeue=None,
shape_flag=True,
seed=0):
""" A generic queue for reading data
Built on top of https://indico.io/blog/tensorflow-data-input-part2-extensions/
"""
if capacity is None:
capacity = 2 * batch_size
if min_after_dequeue is None:
min_after_dequeue = capacity // 2
names = []
dtypes = []
shapes = []
for name in nodes.keys():
names.append(name)
dtypes.append(nodes[name].dtype)
if shape_flag:
shapes.append(nodes[name].get_shape()[1:])
else:
shapes.append(nodes[name].get_shape())
if batch_size==1:
shapes = None
if queue_type == 'random':
queue = tf.RandomShuffleQueue(capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=dtypes,
shapes=shapes,
names=names,
seed=seed)
elif queue_type == 'fifo':
queue = tf.FIFOQueue(capacity=capacity,
dtypes=dtypes,
shapes=shapes,
names=names)
elif queue_type == 'padding_fifo':
queue = tf.PaddingFIFOQueue(capacity=capacity,
dtypes=dtypes,
shapes=shapes,
names=names)
elif queue_type == 'priority':
queue = tf.PriorityQueue(capacity=capacity,
types=dtypes,
shapes=shapes,
names=names)
else:
Exception('Queue type %s not recognized' % queue_type)
return queue
def _build(self):
# Find split file from which we are going to read.
split_path = os.path.join(
self._dataset_dir, '{}.tfrecords'.format(self._split)
)
if not tf.gfile.Exists(split_path):
raise InvalidDataDirectory(
'"{}" does not exist.'.format(split_path)
)
# String input producer allows for a variable number of files to read
# from. We just know we have a single file.
filename_queue = tf.train.string_input_producer(
[split_path], num_epochs=self._num_epochs, seed=self._seed
)
# Define reader to parse records.
reader = tf.TFRecordReader()
_, raw_record = reader.read(filename_queue)
values, dtypes, names = self.read_record(raw_record)
if self._random_shuffle:
queue = tf.RandomShuffleQueue(
capacity=100,
min_after_dequeue=0,
dtypes=dtypes,
names=names,
name='tfrecord_random_queue',
seed=self._seed
)
else:
queue = tf.FIFOQueue(
capacity=100,
dtypes=dtypes,
names=names,
name='tfrecord_fifo_queue'
)
# Generate queueing ops for QueueRunner.
enqueue_ops = [queue.enqueue(values)] * self._total_queue_ops
self.queue_runner = tf.train.QueueRunner(queue, enqueue_ops)
tf.train.add_queue_runner(self.queue_runner)
return queue.dequeue()
def shuffle(tensors,
capacity=32,
min_after_dequeue=16,
num_threads=1,
dtypes=None,
shapes=None,
seed=None,
shared_name=None,
name='shuffle'):
"""Wrapper around a `tf.RandomShuffleQueue` creation.
Return a dequeue op that dequeues elements from `tensors` in a
random order, through a `tf.RandomShuffleQueue` -- see for further
documentation.
Arguments:
tensors: an iterable of tensors.
capacity: (Optional) the capacity of the queue; default value set to 32.
num_threads: (Optional) the number of threads to be used fo the queue runner;
default value set to 1.
min_after_dequeue: (Optional) minimum number of elements to remain in the
queue after a `dequeue` or `dequeu_many` has been performend,
in order to ensure better mixing of elements; default value set to 16.
dtypes: (Optional) list of `DType` objects, one for each tensor in `tensors`;
if not provided, will be inferred from `tensors`.
shapes: (Optional) list of shapes, one for each tensor in `tensors`.
seed: (Optional) seed for random shuffling.
shared_name: (Optional) If non-empty, this queue will be shared under
the given name across multiple sessions.
name: Optional name scope for the ops.
Returns:
The tuple of tensors that was randomly dequeued from `tensors`.
"""
tensors = list(tensors)
with tf.name_scope(name, tensors):
dtypes = dtypes or list([t.dtype for t in tensors])
queue = tf.RandomShuffleQueue(
seed=seed,
shared_name=shared_name,
name='random_shuffle_queue',
dtypes=dtypes,
shapes=shapes,
capacity=capacity,
min_after_dequeue=min_after_dequeue)
enqueue = queue.enqueue(tensors)
runner = tf.train.QueueRunner(queue, [enqueue] * num_threads)
tf.train.add_queue_runner(runner)
dequeue = queue.dequeue()
return dequeue
def _build_train_pipeline(tfrecords_file_path, feature_columns, buckets=None, batch_size=None,
nb_instances=None):
"""
Build the train pipeline. Sequences are grouped into buckets for faster training.
:param tfrecords_file_path: train TFRecords file path
:param buckets: train buckets
:param batch_size: mini-batch size
:return: queue runner list, queues, symbolic link to mini-batch
"""
with tf.device('/cpu:0'):
# Creating a list with tfrecords
tfrecords_list = [tfrecords_file_path]
# Will contains queue runners for thread creation
queue_runner_list = list()
# Filename queue, contains only on filename (train TFRecords file)
filename_queue = tf.train.string_input_producer(tfrecords_list)
# Decode one example
tensor_list = read_and_decode(filename_queue, feature_columns)
dtypes = [tf.string, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32]
for _ in feature_columns:
dtypes.append(tf.int32)
# Random shuffle queue, allow for randomization of training instances (maximum size: 50% of nb. instances)
shuffle_queue = tf.RandomShuffleQueue(nb_instances, nb_instances//2, dtypes=dtypes)
# Enqueue and dequeue Ops + queue runner creation
enqueue_op_shuffle_queue = shuffle_queue.enqueue(tensor_list)
inputs = shuffle_queue.dequeue()
queue_runner_list.append(tf.train.QueueRunner(shuffle_queue, [enqueue_op_shuffle_queue] * 4))
shapes = [[], [], [None], [None, None], [None], [None]]
for _ in feature_columns:
shapes.append([None])
if buckets:
# Bucketing according to bucket boundaries passed as arguments
length, batch = tf.contrib.training.bucket_by_sequence_length(inputs[1], inputs, batch_size,
sorted(buckets),
num_threads=4,
capacity=32,
shapes=shapes,
dynamic_pad=True)
else:
padding_queue = tf.PaddingFIFOQueue(nb_instances, dtypes=dtypes, shapes=shapes)
enqueue_op_padding_queue = padding_queue.enqueue(inputs)
batch = padding_queue.dequeue_many(batch_size)
queue_runner_list.append(tf.train.QueueRunner(padding_queue, [enqueue_op_padding_queue] * 4))
return queue_runner_list, [filename_queue, shuffle_queue], batch
def make_input(model_options):
'''
Prepare the input placeholders and queues
'''
model_vars = {}
if model_options['mode'] == 'train':
images = tf.placeholder("float",[None,224,224,model_options['num_channels']])
model_vars['images'] = images
labels = tf.placeholder("uint8",[1])
model_vars['labels'] = labels
q = tf.RandomShuffleQueue(200, model_options['min_to_keep'], [tf.float32, tf.uint8],
shapes=[[model_options['example_size'],224,224,\
model_options['num_channels']],1])
model_vars['queue'] = q
enqueue_op = q.enqueue([images, labels])
model_vars['enqueue_op'] = enqueue_op
elif model_options['mode'] == 'test':
num_crops = 10 if model_options['flip'] else 5;
images = tf.placeholder("float",[num_crops,model_options['example_size']\
,224,224,model_options['num_channels']])
labels = tf.placeholder("uint8",[num_crops,1])
names = tf.placeholder("string",[num_crops,1])
model_vars['images'] = images
model_vars['labels'] = labels
model_vars['names'] = names
q = tf.FIFOQueue(200, [tf.float32, tf.uint8, "string"],
shapes=[[model_options['example_size'],224,224,\
model_options['num_channels']],[1],[1]])
model_vars['queue'] = q
enqueue_op = q.enqueue_many([images, labels, names])
model_vars['enqueue_op'] = enqueue_op
elif model_options['mode'] == 'save':
images = tf.placeholder("float",[None,224,224,model_options['num_channels']],
name = 'images')
model_vars['images'] = images
return model_vars
def example_queue_shuffle(reader,
filename_queue,
is_training,
example_queue_name='example_queue',
capacity=50000,
num_reader_threads=1):
"""
This function shuffle the examples within the filename queues. Since there's no
padding option in shuffle_batch, we have to manually shuffle the example queue.
The process is given as below.
create filename queue >> read examples from filename queue >> enqueue example to example queue(RandomShuffleQueue)
However, this is not totally random shuffle since the memory limiation. Therefore,
we need to specify a capacity of the example queue.
Args:
reader: A TFRecord Reader
filename_queue: A queue generated by string_input_producer
is_traning: If not training then use FIFOqueue(No need to shuffle).
example_queue_name: Name of the example queue
capacity: Value queue capacity. Should be large enough for better mixing
num_reader_threads: Number of thread to enqueue the value queue
Returns:
example_queue: An example queue that is shuffled. Ready for parsing and batching.
"""
#Init queue
if is_training:
example_queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=capacity % 2,
dtypes=[tf.string],
name="random_" + example_queue_name)
else:
example_queue = tf.FIFOQueue(
capacity=capacity,
dtypes=[tf.string],
name="fifo_" + example_queue_name)
#Manually create ops to enqueue
enqueue_example_ops = []
for _ in range(num_reader_threads):
_, example = reader.read(filename_queue)
enqueue_example_ops.append(example_queue.enqueue([example]))
#Add queue runner
tf.train.queue_runner.add_queue_runner(
tf.train.queue_runner.QueueRunner(example_queue, enqueue_example_ops))
tf.summary.scalar(
"queue/%s/fraction_of_%d_full" % (example_queue.name, capacity),
tf.cast(example_queue.size(), tf.float32) * (1. / capacity))
return example_queue
def __init__(self, dataset_source, dataset_target, shuffle=True, num_epochs=None,
common_queue_capacity=4096, common_queue_min=1024, seed=None):
if seed is None:
seed = np.random.randint(10e8)
_, data_source = parallel_read(
dataset_source.data_sources,
reader_class=dataset_source.reader,
num_epochs=num_epochs,
num_readers=1,
shuffle=False,
capacity=common_queue_capacity,
min_after_dequeue=common_queue_min,
seed=seed)
data_target = ""
if dataset_target is not None:
_, data_target = parallel_read(
dataset_target.data_sources,
reader_class=dataset_target.reader,
num_epochs=num_epochs,
num_readers=1,
shuffle=False,
capacity=common_queue_capacity,
min_after_dequeue=common_queue_min,
seed=seed)
# Optionally shuffle the data
if shuffle:
shuffle_queue = tf.RandomShuffleQueue(
capacity=common_queue_capacity,
min_after_dequeue=common_queue_min,
dtypes=[tf.string, tf.string],
seed=seed)
enqueue_ops = [shuffle_queue.enqueue([data_source, data_target])]
tf.train.add_queue_runner(
tf.train.QueueRunner(shuffle_queue, enqueue_ops))
data_source, data_target = shuffle_queue.dequeue()
# Decode source items
items = dataset_source.decoder.list_items()
tensors = dataset_source.decoder.decode(data_source, items)
if dataset_target is not None:
# Decode target items
items2 = dataset_target.decoder.list_items()
tensors2 = dataset_target.decoder.decode(data_target, items2)
# Merge items and results
items = items + items2
tensors = tensors + tensors2
super(ParallelDatasetProvider, self).__init__(items_to_tensors=dict(zip(items, tensors)),
num_samples=dataset_source.num_samples)