def prepare_reader(self, filename_queue, batch_size=1024):
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
python类parse_example()的实例源码
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def example_serving_input_fn(default_batch_size=None):
"""Build the serving inputs.
Args:
default_batch_size (int): Batch size for the tf.placeholder shape
"""
feature_spec = {}
for feat in CONTINUOUS_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.int64)
for feat, _ in CATEGORICAL_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string)
example_bytestring = tf.placeholder(
shape=[default_batch_size],
dtype=tf.string,
)
features = tf.parse_example(example_bytestring, feature_spec)
return features, {'example': example_bytestring}
def example_serving_input_fn():
"""Build the serving inputs."""
example_bytestring = tf.placeholder(
shape=[None],
dtype=tf.string,
)
feature_scalars = tf.parse_example(
example_bytestring,
tf.feature_column.make_parse_example_spec(INPUT_COLUMNS)
)
return tf.estimator.export.ServingInputReceiver(
features,
{'example_proto': example_bytestring}
)
# [START serving-function]
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def serving_input_receiver_fn():
"""
A function to use for input processing when serving the model.
NOTES:
1) This should still work, but I haven't tested it since using I think TensorFlow 1.2.1
"""
feature_spec = {'str': tf.FixedLenFeature([1], tf.string)}
serialized_tf_example = tf.placeholder(dtype=tf.string, name='input_example_tensor')
receiver_tensors = {'example': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
# I could probably not do this and handle the data better within the graph
features['str'] = tf.reshape(features['str'], [-1])
data = full_onehot_process_line_as_2d_input(features['str'])
return tf.estimator.export.ServingInputReceiver(data, receiver_tensors)
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def example_serving_input_fn(default_batch_size=None):
"""Build the serving inputs.
Args:
default_batch_size (int): Batch size for the tf.placeholder shape
"""
feature_spec = {}
for feat in CONTINUOUS_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.float32)
for feat, _ in CATEGORICAL_COLS:
feature_spec[feat] = tf.FixedLenFeature(shape=[], dtype=tf.string)
example_bytestring = tf.placeholder(
shape=[default_batch_size],
dtype=tf.string,
)
feature_scalars = tf.parse_example(example_bytestring, feature_spec)
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_scalars.iteritems()
}
return features, {'example': example_bytestring}
def get_placeholder_input_fn(config, model_type, vocab_sizes, use_crosses):
"""Wrap the get input features function to provide the metadata."""
def get_input_features():
"""Read the input features from the given placeholder."""
columns = feature_columns(config, model_type, vocab_sizes, use_crosses)
feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(columns)
# Add a dense feature for the keys, use '' if not on the tf.Example proto.
feature_spec[KEY_FEATURE_COLUMN] = tf.FixedLenFeature(
[1], dtype=tf.string, default_value='')
# Add a placeholder for the serialized tf.Example proto input.
examples = tf.placeholder(tf.string, shape=(None,))
features = tf.parse_example(examples, feature_spec)
# Pass the input tensor so it can be used for export.
features[EXAMPLES_PLACEHOLDER_KEY] = examples
return features, None
# Return a function to input the feaures into the model from a placeholder.
return get_input_features
def read_and_decode_batch(filename_queue, batch_size, capacity, min_after_dequeue):
"""Dequeue a batch of data from the TFRecord.
Args:
filename_queue: Filename Queue of the TFRecord.
batch_size: How many records dequeued each time.
capacity: The capacity of the queue.
min_after_dequeue: Ensures a minimum amount of shuffling of examples.
Returns:
List of the dequeued (batch_label, batch_ids, batch_values).
"""
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
batch_serialized_example = tf.train.shuffle_batch([serialized_example],
batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue)
# The feature definition here should BE consistent with LibSVM TO TFRecord process.
features = tf.parse_example(batch_serialized_example,
features={
"label": tf.FixedLenFeature([], tf.float32),
"ids": tf.VarLenFeature(tf.int64),
"values": tf.VarLenFeature(tf.float32)
})
batch_label = features["label"]
batch_ids = features["ids"]
batch_values = features["values"]
return batch_label, batch_ids, batch_values
def prepare_serialized_examples(self, serialized_examples):
feature_map = {
'image_raw': tf.FixedLenFeature([784], tf.int64),
'label': tf.FixedLenFeature([], tf.int64),
}
features = tf.parse_example(serialized_examples, features=feature_map)
images = tf.cast(features["image_raw"], tf.float32) * (1. / 255)
labels = tf.cast(features['label'], tf.int32)
def dense_to_one_hot(label_batch, num_classes):
one_hot = tf.map_fn(lambda x : tf.cast(slim.one_hot_encoding(x, num_classes), tf.int32), label_batch)
one_hot = tf.reshape(one_hot, [-1, num_classes])
return one_hot
labels = dense_to_one_hot(labels, 10)
return images, labels
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
cluster_measurements.py 文件源码
项目:scalable_analytics
作者: broadinstitute
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _predict_input_fn():
"""Supplies the input to the model.
Returns:
A tuple consisting of 1) a dictionary of tensors whose keys are
the feature names, and 2) a tensor of target labels which for
clustering must be 'None'.
"""
# Add a placeholder for the serialized tf.Example proto input.
examples = tf.placeholder(tf.string, shape=(None,), name="examples")
raw_features = tf.parse_example(examples, _get_feature_columns())
dense = _raw_features_to_dense_tensor(raw_features)
return input_fn_utils.InputFnOps(
features={DENSE_KEY: dense},
labels=None,
default_inputs={EXAMPLE_KEY: examples})
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
### Newly
raw_labels = features["labels"]
raw_coarse = tf.SparseTensor(indices = raw_labels.indices, values = tf.reshape(tf.gather(tf.constant(self.label_belongs, dtype = tf.int64), raw_labels.values), [-1]), dense_shape = raw_labels.dense_shape)
coarse_labels = tf.sparse_to_indicator(raw_coarse, self.num_coarse_classes, name = 'coarse_transfer')
coarse_labels.set_shape([None, self.num_coarse_classes])
###
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
# return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
### Newly
return features["video_id"], concatenated_features, labels, coarse_labels, tf.ones([tf.shape(serialized_examples)[0]])
###
def prepare_serialized_examples(self, serialized_examples):
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def _predict_input_fn():
"""Supplies the input to the model.
Returns:
A tuple consisting of 1) a dictionary of tensors whose keys are
the feature names, and 2) a tensor of target labels if the mode
is not INFER (and None, otherwise).
"""
feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
feature_columns=_get_feature_columns(include_target_column=False))
feature_spec[FLAGS.id_field] = tf.FixedLenFeature([], dtype=tf.string)
feature_spec[FLAGS.target_field + "_string"] = tf.FixedLenFeature(
[], dtype=tf.string)
# Add a placeholder for the serialized tf.Example proto input.
examples = tf.placeholder(tf.string, shape=(None,), name="examples")
features = tf.parse_example(examples, feature_spec)
features[PREDICTION_KEY] = features[FLAGS.id_field]
inputs = {PREDICTION_EXAMPLES: examples}
return input_fn_utils.InputFnOps(
features=features, labels=None, default_inputs=inputs)
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]]), features["predictions"]
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"predictions": tf.FixedLenFeature([self.num_classes], tf.float32),
"labels": tf.VarLenFeature(tf.int64)}
features = tf.parse_example(serialized_examples, features=feature_map)
return features["predictions"]
def prepare_writer(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def prepare_reader(self, filename_queue, batch_size=1024):
"""Creates a single reader thread for pre-aggregated YouTube 8M Examples.
Args:
filename_queue: A tensorflow queue of filename locations.
Returns:
A tuple of video indexes, features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_examples = reader.read_up_to(filename_queue, batch_size)
# set the mapping from the fields to data types in the proto
num_features = len(self.feature_names)
assert num_features > 0, "self.feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)}
for feature_index in range(num_features):
feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
[self.feature_sizes[feature_index]], tf.float32)
features = tf.parse_example(serialized_examples, features=feature_map)
labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
labels.set_shape([None, self.num_classes])
concatenated_features = tf.concat([
features[feature_name] for feature_name in self.feature_names], 1)
return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
def get_input_op(self, fq, parsers):
reader = tf.TFRecordReader()
_, serialized_data = reader.read_up_to(fq, self.batch_size)
return tf.parse_example(serialized_data, parsers)
def example_serving_input_fn():
"""Build the serving inputs."""
example_bytestring = tf.placeholder(
shape=[None],
dtype=tf.string,
)
features = tf.parse_example(
example_bytestring,
tf.feature_column.make_parse_example_spec(INPUT_COLUMNS)
)
return tf.estimator.export.ServingInputReceiver(
features, {'example_proto': example_bytestring})
def build_prediction_graph(self):
"""Builds prediction graph and registers appropriate endpoints."""
examples = tf.placeholder(tf.string, shape=(None,))
features = {
'image': tf.FixedLenFeature(
shape=[IMAGE_PIXELS], dtype=tf.float32),
'key': tf.FixedLenFeature(
shape=[], dtype=tf.string),
}
parsed = tf.parse_example(examples, features)
images = parsed['image']
keys = parsed['key']
# Build a Graph that computes predictions from the inference model.
logits = inference(images, self.hidden1, self.hidden2)
softmax = tf.nn.softmax(logits)
prediction = tf.argmax(softmax, 1)
# Mark the inputs and the outputs
# Marking the input tensor with an alias with suffix _bytes. This is to
# indicate that this tensor value is raw bytes and will be base64 encoded
# over HTTP.
# Note that any output tensor marked with an alias with suffix _bytes, shall
# be base64 encoded in the HTTP response. To get the binary value, it
# should be base64 decoded.
tf.add_to_collection('inputs',
json.dumps({'examples_bytes': examples.name}))
tf.add_to_collection('outputs',
json.dumps({
'key': keys.name,
'prediction': prediction.name,
'scores': softmax.name
}))