def get_evaluation_input(inputs, params):
with tf.device("/cpu:0"):
# Create datasets
datasets = []
for data in inputs:
dataset = tf.data.Dataset.from_tensor_slices(data)
# Split string
dataset = dataset.map(lambda x: tf.string_split([x]).values,
num_parallel_calls=params.num_threads)
# Append <eos>
dataset = dataset.map(
lambda x: tf.concat([x, [tf.constant(params.eos)]], axis=0),
num_parallel_calls=params.num_threads
)
datasets.append(dataset)
dataset = tf.data.Dataset.zip(tuple(datasets))
# Convert tuple to dictionary
dataset = dataset.map(
lambda *x: {
"source": x[0],
"source_length": tf.shape(x[0])[0],
"references": x[1:]
},
num_parallel_calls=params.num_threads
)
dataset = dataset.padded_batch(
params.eval_batch_size,
{
"source": [tf.Dimension(None)],
"source_length": [],
"references": (tf.Dimension(None),) * (len(inputs) - 1)
},
{
"source": params.pad,
"source_length": 0,
"references": (params.pad,) * (len(inputs) - 1)
}
)
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
src_table = tf.contrib.lookup.index_table_from_tensor(
tf.constant(params.vocabulary["source"]),
default_value=params.mapping["source"][params.unk]
)
tgt_table = tf.contrib.lookup.index_table_from_tensor(
tf.constant(params.vocabulary["target"]),
default_value=params.mapping["target"][params.unk]
)
features["source"] = src_table.lookup(features["source"])
features["references"] = tuple(
tgt_table.lookup(item) for item in features["references"]
)
return features
python类string_split()的实例源码
def __init__(self, data_path, filenames_file, params, dataset, mode):
self.data_path = data_path
self.params = params
self.dataset = dataset
self.mode = mode
self.left_image_batch = None
self.right_image_batch = None
input_queue = tf.train.string_input_producer([filenames_file], shuffle=False)
line_reader = tf.TextLineReader()
_, line = line_reader.read(input_queue)
split_line = tf.string_split([line]).values
# we load only one image for test, except if we trained a stereo model
if mode == 'test' and not self.params.do_stereo:
left_image_path = tf.string_join([self.data_path, split_line[0]])
left_image_o = self.read_image(left_image_path)
else:
left_image_path = tf.string_join([self.data_path, split_line[0]])
right_image_path = tf.string_join([self.data_path, split_line[1]])
left_image_o = self.read_image(left_image_path)
right_image_o = self.read_image(right_image_path)
if mode == 'train':
# randomly flip images
do_flip = tf.random_uniform([], 0, 1)
left_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(right_image_o), lambda: left_image_o)
right_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(left_image_o), lambda: right_image_o)
# randomly augment images
do_augment = tf.random_uniform([], 0, 1)
left_image, right_image = tf.cond(do_augment > 0.5, lambda: self.augment_image_pair(left_image, right_image), lambda: (left_image, right_image))
left_image.set_shape( [None, None, 3])
right_image.set_shape([None, None, 3])
# capacity = min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 2048
capacity = min_after_dequeue + 4 * params.batch_size
self.left_image_batch, self.right_image_batch = tf.train.shuffle_batch([left_image, right_image],
params.batch_size, capacity, min_after_dequeue, params.num_threads)
elif mode == 'test':
self.left_image_batch = tf.stack([left_image_o, tf.image.flip_left_right(left_image_o)], 0)
self.left_image_batch.set_shape( [2, None, None, 3])
if self.params.do_stereo:
self.right_image_batch = tf.stack([right_image_o, tf.image.flip_left_right(right_image_o)], 0)
self.right_image_batch.set_shape( [2, None, None, 3])
def make_preprocessing_fn(frequency_threshold):
"""Creates a preprocessing function for criteo.
Args:
frequency_threshold: The frequency_threshold used when generating
vocabularies for categorical and text features.
Returns:
A preprocessing function.
"""
def preprocessing_fn(inputs):
"""User defined preprocessing function for criteo columns.
Args:
inputs: dictionary of input `tensorflow_transform.Column`.
Returns:
A dictionary of `tensorflow_transform.Column` representing the transformed
columns.
"""
# TODO(b/35001605) Make this "passthrough" more DRY.
result = {'score': inputs['score'], 'toplevel': inputs['toplevel']}
result['subreddit_id'] = tft.string_to_int(
inputs['subreddit'], frequency_threshold=frequency_threshold)
# TODO(b/35318962): Obviate the need for this workaround on Dense features.
# FeatureColumns expect shape (batch_size, 1), not just (batch_size)
# All features added to results up to this point are dense and require this
# workaround. All following features will be sparse.
result = {
k: tft.map(lambda x: tf.expand_dims(x, -1), v)
for k, v in result.items()
}
for name in ('author', 'comment_body', 'comment_parent_body'):
words = tft.map(tf.string_split, inputs[name])
# TODO(b/33467613) Translate these to bag-of-words style sparse features.
result[name + '_bow'] = tft.string_to_int(
words, frequency_threshold=frequency_threshold)
return result
return preprocessing_fn