def testWithUnicode(self):
def preprocessing_fn(inputs):
return {'a b': tf.string_join([inputs['a'], inputs['b']], separator=' ')}
input_data = [{'a': 'Hello', 'b': 'world'}, {'a': 'Hello', 'b': u'?????'}]
input_metadata = dataset_metadata.DatasetMetadata({
'a': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation()),
'b': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation()),
})
expected_data = [
{'a b': 'Hello world'},
{'a b': u'Hello ?????'.encode('utf-8')}
]
expected_metadata = dataset_metadata.DatasetMetadata({
'a b': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation())
})
self.assertAnalyzeAndTransformResults(
input_data, input_metadata, preprocessing_fn, expected_data,
expected_metadata)
python类string_join()的实例源码
def markdown_table(step):
# The text summary can also contain Markdown, including Markdown
# tables. Markdown tables look like this:
#
# | hello | there |
# |-------|-------|
# | this | is |
# | a | table |
#
# The leading and trailing pipes in each row are optional, and the text
# doesn't actually have to be neatly aligned, so we can create these
# pretty easily. Let's do so.
header_row = 'Pounds of chocolate | Happiness'
chocolate = tf.range(step)
happiness = tf.square(chocolate + 1)
chocolate_column = tf.as_string(chocolate)
happiness_column = tf.as_string(happiness)
table_rows = tf.string_join([chocolate_column, " | ", happiness_column])
table_body = tf.reduce_join(table_rows, separator='\n')
table = tf.string_join([header_row, "---|---", table_body], separator='\n')
preamble = 'We conducted an experiment and found the following data:\n\n'
result = tf.string_join([preamble, table])
tf.summary.text('chocolate_study', result)
def __init__(self, config, batch_size, one_hot=False):
self.lookup = None
reader = tf.TextLineReader()
filename_queue = tf.train.string_input_producer(["chargan.txt"])
key, x = reader.read(filename_queue)
vocabulary = self.get_vocabulary()
table = tf.contrib.lookup.string_to_index_table_from_tensor(
mapping = vocabulary, default_value = 0)
x = tf.string_join([x, tf.constant(" " * 64)])
x = tf.substr(x, [0], [64])
x = tf.string_split(x,delimiter='')
x = tf.sparse_tensor_to_dense(x, default_value=' ')
x = tf.reshape(x, [64])
x = table.lookup(x)
self.one_hot = one_hot
if one_hot:
x = tf.one_hot(x, len(vocabulary))
x = tf.cast(x, dtype=tf.float32)
x = tf.reshape(x, [1, int(x.get_shape()[0]), int(x.get_shape()[1]), 1])
else:
x = tf.cast(x, dtype=tf.float32)
x -= len(vocabulary)/2.0
x /= len(vocabulary)/2.0
x = tf.reshape(x, [1,1, 64, 1])
num_preprocess_threads = 8
x = tf.train.shuffle_batch(
[x],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity= 5000,
min_after_dequeue=500,
enqueue_many=True)
self.x = x
self.table = table
def simple_example(step):
# Text summaries log arbitrary text. This can be encoded with ASCII or
# UTF-8. Here's a simple example, wherein we greet the user on each
# step:
step_string = tf.as_string(step)
greeting = tf.string_join(['Hello from step ', step_string, '!'])
tf.summary.text('greeting', greeting)
def higher_order_tensors(step):
# We're not limited to passing scalar tensors to the summary
# operation. If we pass a rank-1 or rank-2 tensor, it'll be visualized
# as a table in TensorBoard. (For higher-ranked tensors, you'll see
# just a 2D slice of the data.)
#
# To demonstrate this, let's create a multiplication table.
# First, we'll create the table body, a `step`-by-`step` array of
# strings.
numbers = tf.range(step)
numbers_row = tf.expand_dims(numbers, 0) # shape: [1, step]
numbers_column = tf.expand_dims(numbers, 1) # shape: [step, 1]
products = tf.matmul(numbers_column, numbers_row) # shape: [step, step]
table_body = tf.as_string(products)
# Next, we'll create a header row and column, and a little
# multiplication sign to put in the corner.
bold_numbers = tf.string_join(['**', tf.as_string(numbers), '**'])
bold_row = tf.expand_dims(bold_numbers, 0)
bold_column = tf.expand_dims(bold_numbers, 1)
corner_cell = tf.constant(u'\u00d7'.encode('utf-8')) # MULTIPLICATION SIGN
# Now, we have to put the pieces together. Using `axis=0` stacks
# vertically; using `axis=1` juxtaposes horizontally.
table_body_and_top_row = tf.concat([bold_row, table_body], axis=0)
table_left_column = tf.concat([[[corner_cell]], bold_column], axis=0)
table_full = tf.concat([table_left_column, table_body_and_top_row], axis=1)
# The result, `table_full`, is a rank-2 string tensor of shape
# `[step + 1, step + 1]`. We can pass it directly to the summary, and
# we'll get a nicely formatted table in TensorBoard.
tf.summary.text('multiplication_table', table_full)
def generate_run(self, run_name, include_graph):
"""Create a run with a text summary, metadata, and optionally a graph."""
tf.reset_default_graph()
k1 = tf.constant(math.pi, name='k1')
k2 = tf.constant(math.e, name='k2')
result = (k1 ** k2) - k1
expected = tf.constant(20.0, name='expected')
error = tf.abs(result - expected, name='error')
message_prefix_value = 'error ' * 1000
true_length = len(message_prefix_value)
assert true_length > self._MESSAGE_PREFIX_LENGTH_LOWER_BOUND, true_length
message_prefix = tf.constant(message_prefix_value, name='message_prefix')
error_message = tf.string_join([message_prefix,
tf.as_string(error, name='error_string')],
name='error_message')
summary_message = tf.summary.text('summary_message', error_message)
sess = tf.Session()
writer = tf.summary.FileWriter(os.path.join(self.logdir, run_name))
if include_graph:
writer.add_graph(sess.graph)
options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
s = sess.run(summary_message, options=options, run_metadata=run_metadata)
writer.add_summary(s)
writer.add_run_metadata(run_metadata, self._METADATA_TAG)
writer.close()
def _read_from_disk_spatial(fpath, nframes, num_samples=25, start_frame=0,
file_prefix='', file_zero_padding=4, file_index=1,
dataset_dir='', step=None):
duration = nframes
if step is None:
if num_samples == 1:
step = tf.random_uniform([1], 0, nframes, dtype='int32')[0]
else:
step = tf.cast((duration-tf.constant(1)) /
(tf.constant(num_samples-1)), 'int32')
allimgs = []
with tf.variable_scope('read_rgb_video'):
for i in range(num_samples):
if num_samples == 1:
i = 1 # so that the random step value can be used
with tf.variable_scope('read_rgb_image'):
prefix = file_prefix + '_' if file_prefix else ''
impath = tf.string_join([
tf.constant(dataset_dir + '/'),
fpath, tf.constant('/'),
prefix,
tf.as_string(start_frame + i * step + file_index,
width=file_zero_padding, fill='0'),
tf.constant('.jpg')])
img_str = tf.read_file(impath)
allimgs.append(img_str)
return allimgs
def _read_from_disk_temporal(
fpath, nframes, num_samples=25,
optical_flow_frames=10, start_frame=0,
file_prefix='', file_zero_padding=4, file_index=1,
dataset_dir='', step=None):
duration = nframes
if step is None:
if num_samples == 1:
step = tf.random_uniform([1], 0, nframes-optical_flow_frames-1, dtype='int32')[0]
else:
step = tf.cast((duration-tf.constant(optical_flow_frames)) /
(tf.constant(num_samples)), 'int32')
allimgs = []
with tf.variable_scope('read_flow_video'):
for i in range(num_samples):
if num_samples == 1:
i = 1 # so that the random step value can be used
with tf.variable_scope('read_flow_image'):
flow_img = []
for j in range(optical_flow_frames):
with tf.variable_scope('read_flow_channels'):
for dr in ['x', 'y']:
prefix = file_prefix + '_' if file_prefix else ''
impath = tf.string_join([
tf.constant(dataset_dir + '/'),
fpath, tf.constant('/'),
prefix, '%s_' % dr,
tf.as_string(start_frame + i * step + file_index + j,
width=file_zero_padding, fill='0'),
tf.constant('.jpg')])
img_str = tf.read_file(impath)
flow_img.append(img_str)
allimgs.append(flow_img)
return allimgs
def __init__(self, data_path, filenames_file, params, dataset, mode):
self.data_path = data_path
self.params = params
self.dataset = dataset
self.mode = mode
self.left_image_batch = None
self.right_image_batch = None
# we load only one image for test, except if we trained a stereo model
if mode == 'test' and not self.params.do_stereo:
left_image_path = data_path
left_image_o = self.read_image(left_image_path)
else:
left_image_path = tf.string_join([self.data_path, split_line[0]])
right_image_path = tf.string_join([self.data_path, split_line[1]])
left_image_o = self.read_image(left_image_path)
right_image_o = self.read_image(right_image_path)
if mode == 'train':
# randomly flip images
do_flip = tf.random_uniform([], 0, 1)
left_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(right_image_o), lambda: left_image_o)
right_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(left_image_o), lambda: right_image_o)
# randomly augment images
do_augment = tf.random_uniform([], 0, 1)
left_image, right_image = tf.cond(do_augment > 0.5, lambda: self.augment_image_pair(left_image, right_image), lambda: (left_image, right_image))
left_image.set_shape( [None, None, 3])
right_image.set_shape([None, None, 3])
# capacity = min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 2048
capacity = min_after_dequeue + 4 * params.batch_size
self.left_image_batch, self.right_image_batch = tf.train.shuffle_batch([left_image, right_image],
params.batch_size, capacity, min_after_dequeue, params.num_threads)
elif mode == 'test':
self.left_image_batch = tf.stack([left_image_o, tf.image.flip_left_right(left_image_o)], 0)
self.left_image_batch.set_shape( [2, None, None, 3])
if self.params.do_stereo:
self.right_image_batch = tf.stack([right_image_o, tf.image.flip_left_right(right_image_o)], 0)
self.right_image_batch.set_shape( [2, None, None, 3])
def op(name,
guest,
display_name=None,
description=None,
collections=None):
"""Create a TensorFlow summary op to greet the given guest.
Arguments:
name: A name for this summary operation.
guest: A rank-0 string `Tensor`.
display_name: If set, will be used as the display name
in TensorBoard. Defaults to `name`.
description: A longform readable description of the summary data.
Markdown is supported.
collections: Which TensorFlow graph collections to add the summary
op to. Defaults to `['summaries']`. Can usually be ignored.
"""
# The `name` argument is used to generate the summary op node name.
# That node name will also involve the TensorFlow name scope.
# By having the display_name default to the name argument, we make
# the TensorBoard display clearer.
if display_name is None:
display_name = name
# We could put additional metadata other than the PLUGIN_NAME,
# but we don't need any metadata for this simple example.
summary_metadata = tf.SummaryMetadata(
display_name=display_name,
summary_description=description,
plugin_data=tf.SummaryMetadata.PluginData(
plugin_name=PLUGIN_NAME,
content=''))
message = tf.string_join(['Hello, ', guest, '!'])
# Return a summary op that is properly configured.
return tf.summary.tensor_summary(
name,
message,
summary_metadata=summary_metadata,
collections=collections)
def parse_csv(schema, instances, prediction):
"""A wrapper around decode_csv that parses csv instances based on provided Schema information.
"""
if prediction:
# For training and evaluation data, the expectation is the target column is always present.
# For prediction however, the target may or may not be present.
# - In true prediction use-cases, the target is unknown and never present.
# - In prediction for model evaluation use-cases, the target is present.
# To use a single prediction graph, the missing target needs to be detected by comparing
# number of columns in instances with number of columns defined in the schema. If there are
# fewer columns, then prepend a ',' (with assumption that target is always the first column).
#
# To get the number of columns in instances, split on the ',' on the first instance, and use
# the first dimension of the shape of the resulting substring values.
columns = tf.shape(tf.string_split([instances[0]], delimiter=',').values)[0]
instances = tf.cond(tf.less(columns, len(schema)),
lambda: tf.string_join([tf.constant(','), instances]),
lambda: instances)
# Convert the schema into a set of tensor defaults, to be used for parsing csv data.
defaults = []
for field in schema:
if field.length != 1:
# TODO: Support variable length, and list columns in csv.
raise ValueError('Unsupported schema field "%s". Length must be 1.' % field.name)
if field.type == SchemaFieldType.integer:
field_default = tf.constant(0, dtype=tf.int64)
elif field.type == SchemaFieldType.real:
field_default = tf.constant(0.0, dtype=tf.float32)
else:
# discrete, text, binary
field_default = tf.constant('', dtype=tf.string)
defaults.append([field_default])
values = tf.decode_csv(instances, defaults, name='csv')
parsed_instances = {}
for field, value in zip(schema, values):
# The parsed values are scalars, so each tensor is of shape (None,); turn them into tensors
# of shape (None, 1).
parsed_instances[field.name] = tf.expand_dims(value, axis=1, name=field.name)
return parsed_instances
def __init__(self, data_path, filenames_file, params, dataset, mode):
self.data_path = data_path
self.params = params
self.dataset = dataset
self.mode = mode
self.left_image_batch = None
self.right_image_batch = None
input_queue = tf.train.string_input_producer([filenames_file], shuffle=False)
line_reader = tf.TextLineReader()
_, line = line_reader.read(input_queue)
split_line = tf.string_split([line]).values
# we load only one image for test, except if we trained a stereo model
if mode == 'test' and not self.params.do_stereo:
left_image_path = tf.string_join([self.data_path, split_line[0]])
left_image_o = self.read_image(left_image_path)
else:
left_image_path = tf.string_join([self.data_path, split_line[0]])
right_image_path = tf.string_join([self.data_path, split_line[1]])
left_image_o = self.read_image(left_image_path)
right_image_o = self.read_image(right_image_path)
if mode == 'train':
# randomly flip images
do_flip = tf.random_uniform([], 0, 1)
left_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(right_image_o), lambda: left_image_o)
right_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(left_image_o), lambda: right_image_o)
# randomly augment images
do_augment = tf.random_uniform([], 0, 1)
left_image, right_image = tf.cond(do_augment > 0.5, lambda: self.augment_image_pair(left_image, right_image), lambda: (left_image, right_image))
left_image.set_shape( [None, None, 3])
right_image.set_shape([None, None, 3])
# capacity = min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 2048
capacity = min_after_dequeue + 4 * params.batch_size
self.left_image_batch, self.right_image_batch = tf.train.shuffle_batch([left_image, right_image],
params.batch_size, capacity, min_after_dequeue, params.num_threads)
elif mode == 'test':
self.left_image_batch = tf.stack([left_image_o, tf.image.flip_left_right(left_image_o)], 0)
self.left_image_batch.set_shape( [2, None, None, 3])
if self.params.do_stereo:
self.right_image_batch = tf.stack([right_image_o, tf.image.flip_left_right(right_image_o)], 0)
self.right_image_batch.set_shape( [2, None, None, 3])