def _abspath_no_label_load_file(path, epochs=None, shuffle=True, seed=0):
filename_queue = tf.train.string_input_producer([path],
num_epochs=epochs, shuffle=shuffle, seed=seed)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
#image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
image_path = value
image_abspath = image_path
image_content = tf.read_file(image_abspath)
image = decode_image(image_content, channels=3)
image.set_shape([None, None, 3])
imgshape = tf.shape(image)[:2]
return image, imgshape, image_path
python类decode_csv()的实例源码
def my_input(file_path, perform_shuffle=False, repeat_count=1):
"""
create an input function reading a file with the Dataset API
"""
def decode_csv(line):
parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
label = parsed_line[-1:]
del parsed_line[-1]
features = parsed_line
d = dict(zip(feature_names, features)), label
return d
dataset = (tf.data.TextLineDataset(file_path).skip(1).map(decode_csv))
if perform_shuffle:
dataset = dataset.shuffle(buffer_size=256)
dataset = dataset.repeat(repeat_count)
dataset = dataset.batch(32)
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features, batch_labels
PASCALVOC2012Localization.py 文件源码
项目:dynamic-training-bench
作者: galeone
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def _read_image_and_box(self, bboxes_csv):
"""Extract the filename from the queue, read the image and
produce a single box
Returns:
image, [y_min, x_min, y_max, x_max, label]
"""
reader = tf.TextLineReader(skip_header_lines=True)
_, row = reader.read(bboxes_csv)
# file ,y_min, x_min, y_max, x_max, label
record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
# eg:
# 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
row, record_defaults)
image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
'JPEGImages') + "/" + filename + ".jpg"
# image is normalized in [-1,1]
image = read_image_jpg(image_path)
return image, tf.stack([y_min, x_min, y_max, x_max, label])
def read_csv(batch_size, file_name):
filename_queue = tf.train.string_input_producer([file_name])
reader = tf.TextLineReader(skip_header_lines=0)
key, value = reader.read(filename_queue)
# decode_csv will convert a Tensor from type string (the text line) in
# a tuple of tensor columns with the specified defaults, which also
# sets the data type for each column
decoded = tf.decode_csv(
value,
field_delim=' ',
record_defaults=[[0] for i in range(FLAGS.max_sentence_len * 2)])
# batch actually reads the file and loads "batch_size" rows in a single tensor
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
def read_image_and_label(image_label_q):
# Returns three Tensors: the decoded PNG image, the hour, and the minute.
filename, hour_str, minute_str = tf.decode_csv(
image_label_q.dequeue(), [[""], [""], [""]], " ")
file_contents = tf.read_file(filename)
# Decode image from PNG, and cast it to a float.
example = tf.image.decode_png(file_contents, channels=image_channels)
image = tf.cast(example, tf.float32)
# Set the tensor size manually from the image.
image.set_shape([image_size, image_size, image_channels])
# Do per-image whitening (zero mean, unit standard deviation). Without this,
# the learning algorithm diverges almost immediately because the gradient is
# too big.
image = tf.image.per_image_whitening(image)
# The label should be an integer.
hour = tf.string_to_number(hour_str, out_type=tf.int32)
minute = tf.string_to_number(minute_str, out_type=tf.int32)
return image, hour, minute
PASCALVOC2012Classification.py 文件源码
项目:dynamic-training-bench
作者: galeone
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def _read_image_and_box(self, bboxes_csv):
"""Extract the filename from the queue, read the image and
produce a single box
Returns:
image, box
"""
reader = tf.TextLineReader(skip_header_lines=True)
_, row = reader.read(bboxes_csv)
# file ,y_min, x_min, y_max, x_max, label
record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
# eg:
# 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
row, record_defaults)
image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
'JPEGImages') + "/" + filename + ".jpg"
# image is normalized in [-1,1], convert to #_image_depth depth
image = read_image_jpg(image_path, depth=self._image_depth)
return image, tf.stack([y_min, x_min, y_max, x_max, label])
def _voc_seg_load_file(path, epochs=None, shuffle=True, seed=0):
PASCAL_ROOT = os.environ['VOC_DIR']
filename_queue = tf.train.string_input_producer([path],
num_epochs=epochs, shuffle=shuffle, seed=seed)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
image_path, seg_path = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')
image_abspath = PASCAL_ROOT + image_path
seg_abspath = PASCAL_ROOT + seg_path
image_content = tf.read_file(image_abspath)
image = decode_image(image_content, channels=3)
image.set_shape([None, None, 3])
imgshape = tf.shape(image)[:2]
imgname = image_path
seg_content = tf.read_file(seg_abspath)
seg = tf.cast(tf.image.decode_png(seg_content, channels=1), tf.int32)
return image, seg, imgshape, imgname
def _imagenet_load_file(path, epochs=None, shuffle=True, seed=0, subset='train', prepare_path=True):
IMAGENET_ROOT = os.environ.get('IMAGENET_DIR', '')
if not isinstance(path, list):
path = [path]
filename_queue = tf.train.string_input_producer(path,
num_epochs=epochs, shuffle=shuffle, seed=seed)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
image_path, label_str = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')
if prepare_path:
image_abspath = IMAGENET_ROOT + '/images/' + subset + image_path
else:
image_abspath = image_path
image_content = tf.read_file(image_abspath)
image = decode_image(image_content, channels=3)
image.set_shape([None, None, 3])
imgshape = tf.shape(image)[:2]
label = tf.string_to_number(label_str, out_type=tf.int32)
return image, label, imgshape, image_path
def _relpath_no_label_load_file(path, root_path, epochs=None, shuffle=True, seed=0):
filename_queue = tf.train.string_input_producer([path],
num_epochs=epochs, shuffle=shuffle, seed=seed)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
#image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
image_path = value
image_abspath = root_path + '/' + image_path
image_content = tf.read_file(image_abspath)
image = decode_image(image_content, channels=3)
image.set_shape([None, None, 3])
imgshape = tf.shape(image)[:2]
return image, imgshape, image_path
def _get_image(self):
_, records = self.reader.read(self.input_queue)
file_names = tf.decode_csv(records, [tf.constant([], tf.string), tf.constant([], tf.string)],
field_delim=None, name=None)
im_raw = tf.read_file(self.base_folder+file_names[0])
seg_raw = tf.read_file(self.base_folder+file_names[1])
image = tf.reshape(
tf.cast(tf.image.decode_png(
im_raw,
channels=1, dtype=tf.uint16),
tf.float32), self.image_size, name='input_image')
seg = tf.reshape(
tf.cast(tf.image.decode_png(
seg_raw,
channels=1, dtype=tf.uint8),
tf.float32), self.image_size, name='input_seg')
return image, seg, file_names[0]
def acquire_data_ops(filename_queue, processing_method, record_defaults=None):
"""
Get the line/lines from the files in the given filename queue,
read/decode them, and give them to the given method for processing
the information.
"""
with tf.name_scope("acquire_data"):
# with tf.device("/cpu:0"):
if record_defaults is None:
record_defaults = [[""]]
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
row = tf.decode_csv(value, record_defaults=record_defaults)
#The 3 is because this is used for training and it trains on triplets
return processing_method(row[0], 3), tf.constant(True, dtype=tf.bool)
def read_data(filename_queue, bucket):
'''
:param filename_queue:file queue
:param bucket:(encoder_length,decoder_length)
:return:
'''
class DataRecord(object):
pass
result = DataRecord()
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
recoder_defaults = [[1] for i in range(bucket[0] + bucket[1])]
recoder = tf.decode_csv(value,
record_defaults=recoder_defaults)
# encoder_input
result.encoder = tf.pack(recoder[0:bucket[0]])
# decoder_input
result.decoder = tf.pack(recoder[bucket[0]:])
return result
def make_dataset(self, filenames, batch_size, shuffle_buffer_size=100, num_dataset_parallel=4):
def decode_line(line):
items = tf.decode_csv(line, [[""], [""], [""]], field_delim=",")
return items
if len(filenames) > 1:
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.flat_map(
lambda filename: (
tf.data.TextLineDataset(filename).map(decode_line, num_dataset_parallel)))
else:
dataset = tf.data.TextLineDataset(filenames).map(decode_line, num_dataset_parallel)
if shuffle_buffer_size > 0:
dataset = dataset.shuffle(shuffle_buffer_size)
self.dataset_iterator = dataset.batch(batch_size).make_initializable_iterator()
self.num_samples = Dataset.get_number_of_items(filenames)
def read_csv(batch_size, file_name):
filename_queue = tf.train.string_input_producer([file_name])
reader = tf.TextLineReader(skip_header_lines=0)
key, value = reader.read(filename_queue)
# decode_csv will convert a Tensor from type string (the text line) in
# a tuple of tensor columns with the specified defaults, which also
# sets the data type for each column
decoded = tf.decode_csv(
value,
field_delim=' ',
record_defaults=[[0] for i in range(FLAGS.max_sentence_len * 2)])
# batch actually reads the file and loads "batch_size" rows in a single tensor
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
def batch_generator(filenames):
""" filenames is the list of files you want to read from.
In this case, it contains only heart.csv
"""
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.TextLineReader(skip_header_lines=1)
_,value = reader.read(filename_queue)
record_defaults = [[1.0] for _ in range(N_FEATURES)]
record_defaults[4] = ['']
record_defaults.append([1])
content = tf.decode_csv(value,record_defaults=record_defaults)
content[4] = tf.cond(tf.equal(content[4],tf.constant('Present')),lambda : tf.constant(1.0),lambda :tf.constant(0.0))
features = tf.stack(content[:N_FEATURES])
label = content[-1]
min_after_dequeue = 10 * BATCH_SIZE
capacity = 20 * BATCH_SIZE
data_batch,laebl_batch = tf.train.shuffle_batch([features,label],batch_size=BATCH_SIZE,capacity=capacity,min_after_dequeue=min_after_dequeue)
return data_batch,laebl_batch
def read_csv(batch_size, file_name):
filename_queue = tf.train.string_input_producer([file_name])
reader = tf.TextLineReader(skip_header_lines=0)
key, value = reader.read(filename_queue)
# decode_csv will convert a Tensor from type string (the text line) in
# a tuple of tensor columns with the specified defaults, which also
# sets the data type for each column
decoded = tf.decode_csv(
value,
field_delim=' ',
record_defaults=[[0] for i in range(FLAGS.max_sentence_len * 2)])
# batch actually reads the file and loads "batch_size" rows in a single tensor
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
def read_csv(batch_size, file_name):
filename_queue = tf.train.string_input_producer([file_name])
reader = tf.TextLineReader(skip_header_lines=0)
key, value = reader.read(filename_queue)
# decode_csv will convert a Tensor from type string (the text line) in
# a tuple of tensor columns with the specified defaults, which also
# sets the data type for each column
decoded = tf.decode_csv(
value,
field_delim=' ',
record_defaults=[[0] for i in range(FLAGS.max_sentence_len * 2)])
# batch actually reads the file and loads "batch_size" rows in a single tensor
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
wide_deep_evaluate_predict.py 文件源码
项目:provectus-final-project
作者: eds-uga
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def input_fn(batch_size,file_name):
"""
Input function creates feautre and label dict for cross-validation
:param batch_size:
:param file_name:
:return: feature dict
"""
examples_op = tf.contrib.learn.read_batch_examples(
file_name,
batch_size=batch_size,
reader=tf.TextLineReader,
num_threads=5,
num_epochs=1,
randomize_input=False,
parse_fn=lambda x: tf.decode_csv(x, [tf.constant([''], dtype=tf.string)] * len(COLUMNS),field_delim=","))
examples_dict = {}
for i, header in enumerate(COLUMNS):
examples_dict[header] = examples_op[:,i]
feature_cols = {k: tf.string_to_number(examples_dict[k], out_type=tf.float32)
for k in CONTINUOUS_COLUMNS}
feature_cols.update({k: dense_to_sparse(examples_dict[k])
for k in CATEGORICAL_COLUMNS})
label = tf.string_to_number(examples_dict[LABEL_COLUMN], out_type=tf.int32)
return feature_cols, label
def smiles_labels_batch_queue(eval_params):
fname_queue = tf.train.string_input_producer(
[eval_params['substances_fname']],
num_epochs=None,
shuffle=True,
name="substances_fname_queue")
reader = tf.TextLineReader(
skip_header_lines=1,
name="substance_file_reader")
_, record = reader.read(queue=fname_queue)
substance_id, smiles, label = tf.decode_csv(
records=record,
record_defaults=[[""], [""], [1.0]],
field_delim=eval_params['substances_field_delim'])
smiles_batch, labels_batch = tf.train.shuffle_batch(
tensors = [smiles, label],
batch_size = eval_params['batch_size'],
capacity = eval_params['queue_capacity'],
min_after_dequeue = eval_params['queue_min_after_dequeue'],
num_threads = eval_params['queue_num_threads'],
seed = eval_params['queue_seed'])
return smiles_batch, labels_batch
def smiles_triple_batch_queue(eval_params):
fname_queue = tf.train.string_input_producer(
[eval_params['substances_fname']],
num_epochs=None,
shuffle=True,
name="substances_fname_queue")
reader = tf.TextLineReader(
skip_header_lines=1,
name="substance_file_reader")
_, record = reader.read(queue=fname_queue)
# entries = [
# target_id,
# substance_id, smiles,
# substance_plus_id, smiles_plus
# substance_minus_id, smiles_minus]
entries = tf.decode_csv(
records=record,
record_defaults=[[""], [""], [""], [""], [""], [""], [""]],
field_delim=eval_params['substances_field_delim'])
def read_pascifar(pascifar_path, queue):
""" Reads and parses files from the queue.
Args:
pascifar_path: a constant string tensor representing the path of the PASCIFAR dataset
queue: A queue of strings in the format: file, label
Returns:
image_path: a tf.string tensor. The absolute path of the image in the dataset
label: a int64 tensor with the label
"""
# Reader for text lines
reader = tf.TextLineReader(skip_header_lines=1)
# read a record from the queue
_, row = reader.read(queue)
# file,width,height,label
record_defaults = [[""], [0]]
image_path, label = tf.decode_csv(row, record_defaults, field_delim=",")
image_path = pascifar_path + tf.constant("/") + image_path
label = tf.cast(label, tf.int64)
return image_path, label
def read_bbbc006(all_files_queue):
"""Reads and parses examples from BBBC006 data files.
Recommendation: if you want N-way read parallelism, call this function
N times. This will give you N independent Readers reading different
files & positions within those files, which will give better mixing of
examples.
Args:
filename_queue: A queue of strings with the filenames to read from.
Returns:
An object representing a single example, with the following fields:
label: a [height, width, 2] uint8 Tensor with contours tensor in depth 0 and
segments tensor in depth 1.
uint8image: a [height, width, depth] uint8 Tensor with the image data
"""
class BBBC006Record(object):
pass
result = BBBC006Record()
# Read a record, getting filenames from the filename_queue.
text_reader = tf.TextLineReader()
_, csv_content = text_reader.read(all_files_queue)
i_path, c_path, s_path = tf.decode_csv(csv_content,
record_defaults=[[""], [""], [""]])
result.uint8image = read_from_queue(tf.read_file(i_path))
contour = read_from_queue(tf.read_file(c_path))
segment = read_from_queue(tf.read_file(s_path))
result.label = tf.concat([contour, segment], 2)
return result
def data_loader(csv_filename: str, params: Params, batch_size: int=128, data_augmentation: bool=False,
num_epochs: int=None, image_summaries: bool=False):
def input_fn():
# Choose case one csv file or list of csv files
if not isinstance(csv_filename, list):
filename_queue = tf.train.string_input_producer([csv_filename], num_epochs=num_epochs, name='filename_queue')
elif isinstance(csv_filename, list):
filename_queue = tf.train.string_input_producer(csv_filename, num_epochs=num_epochs, name='filename_queue')
# Skip lines that have already been processed
reader = tf.TextLineReader(name='CSV_Reader', skip_header_lines=0)
key, value = reader.read(filename_queue, name='file_reading_op')
default_line = [['None'], ['None']]
path, label = tf.decode_csv(value, record_defaults=default_line, field_delim=params.csv_delimiter,
name='csv_reading_op')
image, img_width = image_reading(path, resized_size=params.input_shape,
data_augmentation=data_augmentation, padding=True)
to_batch = {'images': image, 'images_widths': img_width, 'filenames': path, 'labels': label}
prepared_batch = tf.train.shuffle_batch(to_batch,
batch_size=batch_size,
min_after_dequeue=500,
num_threads=15, capacity=4000,
allow_smaller_final_batch=False,
name='prepared_batch_queue')
if image_summaries:
tf.summary.image('input/image', prepared_batch.get('images'), max_outputs=1)
tf.summary.text('input/labels', prepared_batch.get('labels')[:10])
tf.summary.text('input/widths', tf.as_string(prepared_batch.get('images_widths')))
return prepared_batch, prepared_batch.get('labels')
return input_fn
def parse_csv(rows_string_tensor):
"""Takes the string input tensor and returns a dict of rank-2 tensors."""
columns = tf.decode_csv(
rows_string_tensor, record_defaults=CSV_COLUMN_DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
# Remove unused columns
for col in UNUSED_COLUMNS:
features.pop(col)
for key, value in six.iteritems(features):
features[key] = tf.expand_dims(features[key], -1)
return features
def parse_csv(rows_string_tensor):
"""Takes the string input tensor and returns a dict of rank-2 tensors."""
# Takes a rank-1 tensor and converts it into rank-2 tensor
# Example if the data is ['csv,line,1', 'csv,line,2', ..] to
# [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
# tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
columns = tf.decode_csv(
rows_string_tensor, record_defaults=CSV_COLUMN_DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
# Remove unused columns
for col in UNUSED_COLUMNS:
features.pop(col)
return features
def parse_csv(rows_string_tensor):
"""Takes the string input tensor and returns a dict of rank-2 tensors."""
# Takes a rank-1 tensor and converts it into rank-2 tensor
# Example if the data is ['csv,line,1', 'csv,line,2', ..] to
# [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
# tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
row_columns = tf.expand_dims(rows_string_tensor, -1)
columns = tf.decode_csv(row_columns, record_defaults=CSV_COLUMN_DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
# Remove unused columns
for col in UNUSED_COLUMNS:
features.pop(col)
return features
def read_my_file_format(self, filename_queue):
reader = tf.TextLineReader()
key, record_string = reader.read(filename_queue)
# "a" means representative value to indicate type for csv cell value.
image_file_name, depth_file_name = tf.decode_csv(record_string, [["a"], ["a"]])
image_png_data = tf.read_file(image_file_name)
depth_png_data = tf.read_file(depth_file_name)
# channels=1 means image is read as gray-scale
image_decoded = tf.image.decode_png(image_png_data, channels=1)
image_decoded.set_shape([512, 512, 1])
depth_decoded = tf.image.decode_png(depth_png_data, channels=1)
depth_decoded.set_shape([512, 512, 1])
return image_decoded, depth_decoded
def batch_generator(filenames):
""" filenames is the list of files you want to read from.
In this case, it contains only heart.csv
"""
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.TextLineReader(skip_header_lines=1) # skip the first line in the file
_, value = reader.read(filename_queue)
record_defaults = [[''] for _ in range(N_FEATURES)]
# read in the 10 rows of data
content = tf.decode_csv(value, record_defaults = record_defaults,field_delim = '\t')
# pack all 9 features into a tensor
features = tf.stack(content[:N_FEATURES - 1])
# assign the last column to label
label = content[-1]
# minimum number elements in the queue after a dequeue, used to ensure
# that the samples are sufficiently mixed
# I think 10 times the BATCH_SIZE is sufficient
min_after_dequeue = 10 * BATCH_SIZE
# the maximum number of elements in the queue
capacity = 20 * BATCH_SIZE
# shuffle the data to generate BATCH_SIZE sample pairs
data_batch, label_batch = tf.train.batch([features, label], batch_size=BATCH_SIZE,
capacity=capacity, min_after_dequeue = min_after_dequeue,
allow_smaller_final_batch=True)
return data_batch, label_batch
# return features,label
def read_my_file_format(filename):
record_defaults = [[""]] + [[0]]
components = tf.decode_csv(filename, record_defaults=record_defaults, field_delim=" ")
imgName = components[0]
label = components[1:]
img_contents = tf.read_file(imgName)
img = tf.image.decode_jpeg(img_contents, channels=3)
return img, label
def read_audio_csv(filename_queue):
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
defaultVal = [[0.] for idx in range(WIDE*FEATURE_DIM + OUT_DIM)]
fileData = tf.decode_csv(value, record_defaults=defaultVal)
features = fileData[:WIDE*FEATURE_DIM]
features = tf.reshape(features, [WIDE, FEATURE_DIM])
labels = fileData[WIDE*FEATURE_DIM:]
return features, labels