def make_data_provider(self, **kwargs):
decoder_source = split_tokens_decoder.SplitTokensDecoder(
tokens_feature_name="source_tokens",
length_feature_name="source_len",
append_token="SEQUENCE_END",
delimiter=self.params["source_delimiter"])
dataset_source = tf.contrib.slim.dataset.Dataset(
data_sources=self.params["source_files"],
reader=tf.TextLineReader,
decoder=decoder_source,
num_samples=None,
items_to_descriptions={})
dataset_target = None
if len(self.params["target_files"]) > 0:
decoder_target = split_tokens_decoder.SplitTokensDecoder(
tokens_feature_name="target_tokens",
length_feature_name="target_len",
prepend_token="SEQUENCE_END",
append_token="SEQUENCE_END",
delimiter=self.params["target_delimiter"])
dataset_target = tf.contrib.slim.dataset.Dataset(
data_sources=self.params["target_files"],
reader=tf.TextLineReader,
decoder=decoder_target,
num_samples=None,
items_to_descriptions={})
return parallel_data_provider.ParallelDataProvider(
dataset1=dataset_source,
dataset2=dataset_target,
shuffle=self.params["shuffle"],
num_epochs=self.params["num_epochs"],
**kwargs)
python类TextLineReader()的实例源码
def batch_generator(filenames):
""" filenames is the list of files you want to read from.
In this case, it contains only heart.csv
"""
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.TextLineReader(skip_header_lines=1) # skip the first line in the file
_, value = reader.read(filename_queue)
record_defaults = [[''] for _ in range(N_FEATURES)]
# read in the 10 rows of data
content = tf.decode_csv(value, record_defaults = record_defaults,field_delim = '\t')
# pack all 9 features into a tensor
features = tf.stack(content[:N_FEATURES - 1])
# assign the last column to label
label = content[-1]
# minimum number elements in the queue after a dequeue, used to ensure
# that the samples are sufficiently mixed
# I think 10 times the BATCH_SIZE is sufficient
min_after_dequeue = 10 * BATCH_SIZE
# the maximum number of elements in the queue
capacity = 20 * BATCH_SIZE
# shuffle the data to generate BATCH_SIZE sample pairs
data_batch, label_batch = tf.train.batch([features, label], batch_size=BATCH_SIZE,
capacity=capacity, min_after_dequeue = min_after_dequeue,
allow_smaller_final_batch=True)
return data_batch, label_batch
# return features,label
def inputs_without_crop(lists, image_shape, batch_size):
filename_queue = tf.train.string_input_producer(lists, shuffle=True)
reader = tf.TextLineReader()
_, value = reader.read(filename_queue)
image, label = read_my_file_format(value)
image = tf.image.resize_images(image, [image_shape[0], image_shape[1]])
# image = tf.random_crop(image, image_shape)
label = tf.cast(label, tf.float32)
image.set_shape(image_shape)
# image = tf.image.random_flip_left_right(image)
float_image = tf.image.per_image_whitening(image)
min_after_dequeue = 1000
capacity = min_after_dequeue+(2+1)*batch_size
# image_batch, label_batch = tf.train.shuffle_batch([float_image, label],
# batch_size=batch_size,
# capacity=capacity,
# min_after_dequeue=min_after_dequeue)
image_batch, label_batch = tf.train.batch([float_image, label],
batch_size=batch_size,
capacity=128)
return image_batch, label_batch
def read_audio_csv(filename_queue):
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
defaultVal = [[0.] for idx in range(WIDE*FEATURE_DIM + OUT_DIM)]
fileData = tf.decode_csv(value, record_defaults=defaultVal)
features = fileData[:WIDE*FEATURE_DIM]
features = tf.reshape(features, [WIDE, FEATURE_DIM])
labels = fileData[WIDE*FEATURE_DIM:]
return features, labels
def read_audio_csv(filename_queue):
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
defaultVal = [[0.] for idx in range(WIDE*FEATURE_DIM + OUT_DIM)]
fileData = tf.decode_csv(value, record_defaults=defaultVal)
features = fileData[:WIDE*FEATURE_DIM]
features = tf.reshape(features, [WIDE, FEATURE_DIM])
labels = fileData[WIDE*FEATURE_DIM:]
return features, labels
def read_audio_csv(filename_queue):
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
defaultVal = [[0.] for idx in range(WIDE*FEATURE_DIM + OUT_DIM)]
fileData = tf.decode_csv(value, record_defaults=defaultVal)
features = fileData[:WIDE*FEATURE_DIM]
features = tf.reshape(features, [WIDE, FEATURE_DIM])
labels = fileData[WIDE*FEATURE_DIM:]
return features, labels
def test_keyed_read_text_lines(self):
gfile.Glob = self._orig_glob
filename = self._create_temp_file("ABC\nDEF\nGHK\n")
batch_size = 1
queue_capacity = 5
name = "my_batch"
with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples(
filename, batch_size,
reader=tf.TextLineReader, randomize_input=False,
num_epochs=1, queue_capacity=queue_capacity, name=name)
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run([keys, inputs]),
[[filename.encode("utf-8") + b":1"], [b"ABC"]])
self.assertAllEqual(session.run([keys, inputs]),
[[filename.encode("utf-8") + b":2"], [b"DEF"]])
self.assertAllEqual(session.run([keys, inputs]),
[[filename.encode("utf-8") + b":3"], [b"GHK"]])
with self.assertRaises(errors.OutOfRangeError):
session.run(inputs)
coord.request_stop()
def test_keyed_parse_json(self):
gfile.Glob = self._orig_glob
filename = self._create_temp_file(
'{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n'
'{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n'
'{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n'
)
batch_size = 1
queue_capacity = 5
name = "my_batch"
with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
dtypes = {"age": tf.FixedLenFeature([1], tf.int64)}
parse_fn = lambda example: tf.parse_single_example( # pylint: disable=g-long-lambda
tf.decode_json_example(example), dtypes)
keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples(
filename, batch_size,
reader=tf.TextLineReader, randomize_input=False,
num_epochs=1, queue_capacity=queue_capacity,
parse_fn=parse_fn, name=name)
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coord)
key, age = session.run([keys, inputs["age"]])
self.assertAllEqual(age, [[0]])
self.assertAllEqual(key, [filename.encode("utf-8") + b":1"])
key, age = session.run([keys, inputs["age"]])
self.assertAllEqual(age, [[1]])
self.assertAllEqual(key, [filename.encode("utf-8") + b":2"])
key, age = session.run([keys, inputs["age"]])
self.assertAllEqual(age, [[2]])
self.assertAllEqual(key, [filename.encode("utf-8") + b":3"])
with self.assertRaises(errors.OutOfRangeError):
session.run(inputs)
coord.request_stop()
def test_read_keyed_batch_features_mutual_exclusive_args(self):
filename = self._create_temp_file("abcde")
features = {"sequence": tf.FixedLenFeature([], tf.string)}
with self.assertRaisesRegexp(ValueError, "can not both be set"):
_, _ = tf.contrib.learn.read_keyed_batch_features(
filename, 1, features, tf.TextLineReader, randomize_input=False,
num_queue_runners=2, num_enqueue_threads=2)
def test_read_text_lines_multifile(self):
gfile.Glob = self._orig_glob
filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])
batch_size = 1
queue_capacity = 5
name = "my_batch"
with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
inputs = tf.contrib.learn.io.read_batch_examples(
filenames, batch_size, reader=tf.TextLineReader,
randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
name=name)
self.assertAllEqual((None,), inputs.get_shape().as_list())
session.run(tf.local_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session, coord=coord)
self.assertEqual("%s:1" % name, inputs.name)
file_name_queue_name = "%s/file_name_queue" % name
file_names_name = "%s/input" % file_name_queue_name
example_queue_name = "%s/fifo_queue" % name
test_util.assert_ops_in_graph({
file_names_name: "Const",
file_name_queue_name: "FIFOQueue",
"%s/read/TextLineReader" % name: "TextLineReader",
example_queue_name: "FIFOQueue",
name: "QueueDequeueUpTo"
}, g)
self.assertAllEqual(session.run(inputs), [b"ABC"])
self.assertAllEqual(session.run(inputs), [b"DEF"])
self.assertAllEqual(session.run(inputs), [b"GHK"])
with self.assertRaises(errors.OutOfRangeError):
session.run(inputs)
coord.request_stop()
coord.join(threads)
def test_batch_text_lines(self):
gfile.Glob = self._orig_glob
filename = self._create_temp_file("A\nB\nC\nD\nE\n")
batch_size = 3
queue_capacity = 10
name = "my_batch"
with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
inputs = tf.contrib.learn.io.read_batch_examples(
[filename], batch_size, reader=tf.TextLineReader,
randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
read_batch_size=10, name=name)
self.assertAllEqual((None,), inputs.get_shape().as_list())
session.run(tf.local_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
self.assertAllEqual(session.run(inputs), [b"D", b"E"])
with self.assertRaises(errors.OutOfRangeError):
session.run(inputs)
coord.request_stop()
coord.join(threads)
def read_single_line_example(filename):
filename_queue = tf.train.string_input_producer([filename], num_epochs=1)
reader = tf.TextLineReader()
line, value = reader.read(filename_queue)
return line, value
def _load_samples(csv_name, image_type):
filename_queue = tf.train.string_input_producer(
[csv_name])
reader = tf.TextLineReader()
_, csv_filename = reader.read(filename_queue)
record_defaults = [tf.constant([], dtype=tf.string),
tf.constant([], dtype=tf.string)]
filename_i, filename_j = tf.decode_csv(
csv_filename, record_defaults=record_defaults)
file_contents_i = tf.read_file(filename_i)
file_contents_j = tf.read_file(filename_j)
if image_type == '.jpg':
image_decoded_A = tf.image.decode_jpeg(
file_contents_i, channels=model.IMG_CHANNELS)
image_decoded_B = tf.image.decode_jpeg(
file_contents_j, channels=model.IMG_CHANNELS)
elif image_type == '.png':
image_decoded_A = tf.image.decode_png(
file_contents_i, channels=model.IMG_CHANNELS, dtype=tf.uint8)
image_decoded_B = tf.image.decode_png(
file_contents_j, channels=model.IMG_CHANNELS, dtype=tf.uint8)
return image_decoded_A, image_decoded_B
def test_inputs(self, csv, batch_size, verbose=False):
print("input csv file path: %s, batch size: %d" % (csv, batch_size))
filename_queue = tf.train.string_input_producer([csv], shuffle=False)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, label = tf.decode_csv(serialized_example, [["path"], [0]])
label = tf.cast(label, tf.int32)
jpg = tf.read_file(filename)
image = tf.image.decode_jpeg(jpg, channels=3)
image = tf.cast(image, tf.float32)
if verbose:
print "original image shape:"
print image.get_shape()
# resize to distort
dist = tf.image.resize_images(image, (FLAGS.scale_h, FLAGS.scale_w))
# random crop
dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w)
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue)
print (
'filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples)
return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size, shuffle=False)
def csv_inputs(self, csv, batch_size, distorted=False, verbose=False):
print("input csv file path: %s, batch size: %d" % (csv, batch_size))
filename_queue = tf.train.string_input_producer([csv], shuffle=True)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, label = tf.decode_csv(serialized_example, [["path"], [0]])
label = tf.cast(label, tf.int32)
jpg = tf.read_file(filename)
image = tf.image.decode_jpeg(jpg, channels=3)
image = tf.cast(image, tf.float32)
if verbose:
print "original image shape:"
print image.get_shape()
if distorted:
# resize to distort
dist = tf.image.resize_images(image, (FLAGS.scale_h, FLAGS.scale_w))
# random crop
dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w)
# random flip
dist = tf.image.random_flip_left_right(dist)
# color constancy
#dist = self.distort_color(dist)
else:
# resize to input
dist = tf.image.resize_images(image, FLAGS.input_h, FLAGS.input_w)
if verbose:
print "dist image shape:"
print dist.get_shape()
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue)
print ('filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples)
return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size)
image_processing.py 文件源码
项目:single-image-depth-estimation
作者: liuhyCV
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def train_image(dataset, batch_size=None):
filename_queue = tf.train.string_input_producer([dataset.file_name()], shuffle=True)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
rgb_filename, depth_filename = tf.decode_csv(serialized_example,
[["path"], ["meters"]])
# input
rgb_png = tf.read_file(rgb_filename)
image = tf.image.decode_png(rgb_png, channels=3)
image = tf.cast(image, tf.float32)
# target
depth_png = tf.read_file(depth_filename)
depth = tf.image.decode_png(depth_png, channels=1)
depth = tf.cast(depth, tf.float32)
depth = tf.div(depth, [255.0])
# depth = tf.cast(depth, tf.int64)
# resize
image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
invalid_depth = tf.sign(depth)
# generate batch
images, depths, invalid_depths = tf.train.batch(
[image, depth, invalid_depth],
batch_size=self.batch_size,
num_threads=4,
capacity=50 + 3 * self.batch_size,
)
return images, depths, invalid_depths
image_processing.py 文件源码
项目:single-image-depth-estimation
作者: liuhyCV
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def train_batch_inputs(dataset_csv_file_path, batch_size):
with tf.name_scope('batch_processing'):
if (os.path.isfile(dataset_csv_file_path) != True):
raise ValueError('No data files found for this dataset')
filename_queue = tf.train.string_input_producer([dataset_csv_file_path], shuffle=True)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]])
# input
png = tf.read_file(filename)
image = tf.image.decode_png(png, channels=3)
image = tf.cast(image, tf.float32)
# target
depth_png = tf.read_file(depth_filename)
depth = tf.image.decode_png(depth_png, dtype=tf.uint16, channels=1)
depth = tf.cast(depth, dtype=tf.int16)
# resize
image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
invalid_depth = tf.sign(depth)
# generate batch
images, depths, invalid_depths = tf.train.batch(
[image, depth, invalid_depth],
batch_size = batch_size,
num_threads = 4,
capacity = 50 + 3 * batch_size
)
return images, depths, invalid_depths
image_processing.py 文件源码
项目:single-image-depth-estimation
作者: liuhyCV
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def eval_batch_inputs(dataset_csv_file_path, batch_size):
with tf.name_scope('eval_batch_processing'):
if (os.path.isfile(dataset_csv_file_path) != True):
raise ValueError('No data files found for this dataset')
filename_queue = tf.train.string_input_producer([dataset_csv_file_path], shuffle=True)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]])
# input
png = tf.read_file(filename)
image = tf.image.decode_png(png, channels=3)
image = tf.cast(image, tf.float32)
# target
depth_png = tf.read_file(depth_filename)
depth = tf.image.decode_png(depth_png, dtype=tf.uint16, channels=1)
depth = tf.cast(depth, dtype=tf.int16)
# resize
image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
invalid_depth = tf.sign(depth)
# generate batch
images, depths, invalid_depths = tf.train.batch(
[image, depth, invalid_depth],
batch_size = batch_size,
num_threads = 4,
capacity = 50 + 3 * batch_size
)
return images, depths, invalid_depths
def csv_inputs(self, csv_file_path):
filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=True)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, depth_filename, depthMeters_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"], ["meters"]])
# input
rgb_png = tf.read_file(filename)
image = tf.image.decode_png(rgb_png, channels=3)
image = tf.cast(image, tf.float32)
# target
depth_png = tf.read_file(depth_filename)
depth = tf.image.decode_png(depth_png, channels=1)
depth = tf.cast(depth, tf.float32)
depth = tf.div(depth, [255.0])
#depth = tf.cast(depth, tf.int64)
# resize
image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
invalid_depth = tf.sign(depth)
# generate batch
images, depths, invalid_depths = tf.train.batch(
[image, depth, invalid_depth],
batch_size=self.batch_size,
num_threads=4,
capacity= 50 + 3 * self.batch_size,
)
return images, depths, invalid_depths
def csv_inputs_test(self, csv_file_path):
filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=False)
reader = tf.TextLineReader()
_, serialized_example = reader.read(filename_queue)
filename, depth_filename, depthMeters_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"], ["meters"]])
# input
rgb_png = tf.read_file(filename)
image = tf.image.decode_png(rgb_png, channels=3)
image = tf.cast(image, tf.float32)
# target
depth_png = tf.read_file(depth_filename)
depth = tf.image.decode_png(depth_png, channels=1)
depth = tf.cast(depth, tf.float32)
depth = tf.div(depth, [255.0])
# resize
image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
invalid_depth = tf.sign(depth)
# generate batch
images, depths, invalid_depths, filenames, depth_filenames = tf.train.batch(
[image, depth, invalid_depth, filename, depth_filename],
batch_size=self.batch_size,
num_threads=4,
capacity= 50 + 3 * self.batch_size,
)
return images, depths, invalid_depths, filenames, depth_filenames