python类VarLenFeature()的实例源码-第2页-面圈网

data.py 文件源码项目：seglink 作者: bgshih 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def input_stream(record_path, scope=None):
  """
  Input data stream
  ARGS
    `record_path`: tf records file path
  RETURN
    `streams`: data streams
  """
  with tf.device('/cpu:0'):
    with tf.variable_scope(scope or 'input_stream'):
      reader = tf.TFRecordReader()
      filename_queue = tf.train.string_input_producer([record_path], None)
      _, record_value = reader.read(filename_queue)
      features = tf.parse_single_example(record_value,
        {
          'image_jpeg': tf.FixedLenFeature([], tf.string),
          'image_name': tf.FixedLenFeature([], tf.string),
          'word_polygons': tf.VarLenFeature(tf.float32),
          # 'words': tf.VarLenFeature(tf.string) // FIXME: problem with parsing words
        })
      # decode jpeg image
      image = tf.cast(tf.image.decode_jpeg(features['image_jpeg'], channels=3), tf.float32)

      # extract bounding polygons
      word_polygons = tf.sparse_tensor_to_dense(features['word_polygons'])
      word_polygons = tf.reshape(word_polygons, [-1, WORD_POLYGON_DIM])

      # extract words
      # words = tf.sparse_tensor_to_dense(features['words'])

      # output streams
      streams = {'image': image,
                 'image_name': features['image_name'],
                 'image_jpeg': features['image_jpeg'],
                 'word_polygons': word_polygons}
      return streams

sequence_example_decoder.py 文件源码项目：conv_seq2seq 作者: tobyyouup 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def __init__(self, context_keys_to_features, sequence_keys_to_features,
               items_to_handlers):
    """Constructs the decoder.
    Args:
      keys_to_features: a dictionary from TF-Example keys to either
        tf.VarLenFeature or tf.FixedLenFeature instances. See tensorflow's
        parsing_ops.py.
      items_to_handlers: a dictionary from items (strings) to ItemHandler
        instances. Note that the ItemHandler's are provided the keys that they
        use to return the final item Tensors.
    """
    self._context_keys_to_features = context_keys_to_features
    self._sequence_keys_to_features = sequence_keys_to_features
    self._items_to_handlers = items_to_handlers

impl_helper_test.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def testMakeOutputDictError(self):
    schema = self.toSchema({'a': tf.VarLenFeature(tf.string)})

    # SparseTensor that cannot be represented as VarLenFeature.
    fetches = {
        'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]),
                                  values=np.array([10.0, 20.0, 30.0]),
                                  dense_shape=(1, 20))
    }
    with self.assertRaisesRegexp(
        ValueError, 'cannot be decoded by ListColumnRepresentation'):
      _ = impl_helper.make_output_dict(schema, fetches)

    # SparseTensor of invalid rank.
    fetches = {
        'a': tf.SparseTensorValue(
            indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]),
            values=np.array([10.0, 20.0, 30.0]),
            dense_shape=(1, 10, 10))
    }
    with self.assertRaisesRegexp(
        ValueError, 'cannot be decoded by ListColumnRepresentation'):
      _ = impl_helper.make_output_dict(schema, fetches)

    # SparseTensor with indices that are out of order.
    fetches = {
        'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]),
                                  values=np.array([10.0, 20.0, 30.0]),
                                  dense_shape=(3, 20))
    }
    with self.assertRaisesRegexp(
        ValueError, 'Encountered out-of-order sparse index'):
      _ = impl_helper.make_output_dict(schema, fetches)

impl_helper_test.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def testRunPreprocessingFn(self):
    schema = self.toSchema({
        'dense_1': tf.FixedLenFeature((), tf.float32),
        'dense_2': tf.FixedLenFeature((1, 2), tf.int64),
        'var_len': tf.VarLenFeature(tf.string),
        'sparse': tf.SparseFeature('ix', 'val', tf.float32, 100)
    })
    def preprocessing_fn(inputs):
      return {
          'dense_out': mappers.scale_to_0_1(inputs['dense_1']),
          'sparse_out': tf.sparse_reshape(inputs['sparse'], (1, 10)),
      }

    _, inputs, outputs = impl_helper.run_preprocessing_fn(
        preprocessing_fn, schema)

    # Verify that the input placeholders have the correct types.
    expected_dtype_and_shape = {
        'dense_1': (tf.float32, tf.TensorShape([None])),
        'dense_2': (tf.int64, tf.TensorShape([None, 1, 2])),
        'var_len': (tf.string, tf.TensorShape([None, None])),
        'sparse': (tf.float32, tf.TensorShape([None, None])),
        'dense_out': (tf.float32, tf.TensorShape([None])),
        'sparse_out': (tf.float32, tf.TensorShape([None, None])),
    }

    for key, tensor in itertools.chain(six.iteritems(inputs),
                                       six.iteritems(outputs)):
      dtype, shape = expected_dtype_and_shape[key]
      self.assertEqual(tensor.dtype, dtype)
      tensor.get_shape().assert_is_compatible_with(shape)

dataset_schema.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def as_feature_spec(self):
    """Returns a representation of this ColumnSchema as a feature spec.

    A feature spec (for a specific column) is one of a FixedLenFeature,
    SparseFeature or VarLenFeature.

    Returns:
      A representation of this ColumnSchema as a feature spec.
    """
    return self.representation.as_feature_spec(self)

dataset_schema.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def as_feature_spec(self, column):
    if column.domain.dtype not in _TF_EXAMPLE_ALLOWED_TYPES:
      raise ValueError('tf.Example parser supports only types {}, so it is '
                       'invalid to generate a feature_spec with type '
                       '{}.'.format(
                           _TF_EXAMPLE_ALLOWED_TYPES,
                           repr(column.domain.dtype)))
    return tf.VarLenFeature(column.domain.dtype)

dataset_schema.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def from_feature_spec(feature_spec):
  """Convert a feature_spec to a Schema.

  Args:
    feature_spec: a features specification in the format expected by
        tf.parse_example(), i.e.
        `{name: FixedLenFeature(...), name: VarLenFeature(...), ...'

  Returns:
    A Schema representing the provided set of columns.
  """
  return Schema({
      key: _from_parse_feature(parse_feature)
      for key, parse_feature in six.iteritems(feature_spec)
  })

dataset_schema.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _from_parse_feature(parse_feature):
  """Convert a single feature spec to a ColumnSchema."""

  # FixedLenFeature
  if isinstance(parse_feature, tf.FixedLenFeature):
    representation = FixedColumnRepresentation(parse_feature.default_value)
    return ColumnSchema(parse_feature.dtype, parse_feature.shape,
                        representation)

  # FixedLenSequenceFeature
  if isinstance(parse_feature, tf.FixedLenSequenceFeature):
    raise ValueError('DatasetSchema does not support '
                     'FixedLenSequenceFeature yet.')

  # VarLenFeature
  if isinstance(parse_feature, tf.VarLenFeature):
    representation = ListColumnRepresentation()
    return ColumnSchema(parse_feature.dtype, [None], representation)

  # SparseFeature
  if isinstance(parse_feature, tf.SparseFeature):
    index_field = SparseIndexField(name=parse_feature.index_key,
                                   is_sorted=parse_feature.already_sorted)
    representation = SparseColumnRepresentation(
        value_field_name=parse_feature.value_key,
        index_fields=[index_field])
    return ColumnSchema(parse_feature.dtype, [parse_feature.size],
                        representation)

  raise ValueError('Cannot interpret feature spec {} with type {}'.format(
      parse_feature, type(parse_feature)))

dataset_schema.py 文件源码项目：transform 作者: tensorflow 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def infer_column_schema_from_tensor(tensor):
  """Infer a ColumnSchema from a tensor."""
  if isinstance(tensor, tf.SparseTensor):
    # For SparseTensor, there's insufficient information to distinguish between
    # ListColumnRepresentation and SparseColumnRepresentation. So we just guess
    # the former, and callers are expected to handle the latter case on their
    # own (e.g. by requiring the user to provide the schema). This is a policy
    # motivated by the prevalence of VarLenFeature in current tf.Learn code.
    axes = [Axis(None)]
    representation = ListColumnRepresentation()
  else:
    axes = _shape_to_axes(tensor.get_shape(),
                          remove_batch_dimension=True)
    representation = FixedColumnRepresentation()
  return ColumnSchema(tensor.dtype, axes, representation)

test_input.py 文件源码项目：LiTeFlow 作者: petrux 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _decode(message):
    features = {
        'key': tf.FixedLenFeature([], tf.int64),
        'vector': tf.VarLenFeature(tf.int64)
    }
    parsed = tf.parse_single_example(
        serialized=message,
        features=features)
    key = parsed['key']
    vector = tf.sparse_tensor_to_dense(parsed['vector'])
    return key, vector

split_video.py 文件源码项目：Y8M 作者: mpekalski 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def frame_example_2_np(seq_example_bytes, 
                       max_quantized_value=2,
                       min_quantized_value=-2):
  feature_names=['rgb','audio']
  feature_sizes = [1024, 128]
  with tf.Graph().as_default():
    contexts, features = tf.parse_single_sequence_example(
        seq_example_bytes,
        context_features={"video_id": tf.FixedLenFeature(
            [], tf.string),
                          "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in feature_names
        })

    decoded_features = { name: tf.reshape(
        tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
        [-1, size]) for name, size in zip(feature_names, feature_sizes)
        }
    feature_matrices = {
        name: utils.Dequantize(decoded_features[name],
          max_quantized_value, min_quantized_value) for name in feature_names}

    with tf.Session() as sess:
      vid = sess.run(contexts['video_id'])
      labs = sess.run(contexts['labels'].values)
      rgb = sess.run(feature_matrices['rgb'])
      audio = sess.run(feature_matrices['audio'])

  return vid, labs, rgb, audio


#%% Split frame level file into three video level files: all, 1st half, 2nd half.

split_video.py 文件源码项目：Y8M 作者: mpekalski 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def build_graph():
    feature_names=['rgb','audio']
    feature_sizes = [1024, 128] 
    max_quantized_value=2
    min_quantized_value=-2

    seq_example_bytes = tf.placeholder(tf.string)
    contexts, features = tf.parse_single_sequence_example(
        seq_example_bytes,
        context_features={"video_id": tf.FixedLenFeature(
            [], tf.string),
                          "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in feature_names
        })

    decoded_features = { name: tf.reshape(
        tf.cast(tf.decode_raw(features[name], tf.uint8), tf.float32),
        [-1, size]) for name, size in zip(feature_names, feature_sizes)
        }
    feature_matrices = {
        name: utils.Dequantize(decoded_features[name],
          max_quantized_value, min_quantized_value) for name in feature_names}

    tf.add_to_collection("vid_tsr", contexts['video_id'])
    tf.add_to_collection("labs_tsr", contexts['labels'].values)
    tf.add_to_collection("rgb_tsr", feature_matrices['rgb'])
    tf.add_to_collection("audio_tsr", feature_matrices['audio'])
    tf.add_to_collection("seq_example_bytes", seq_example_bytes)

#   with tf.Session() as sess:
#       writer = tf.summary.FileWriter('./graphs', sess.graph)

record.py 文件源码项目：XMUNMT 作者: XMUNLP 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def input_pipeline(file_pattern, mode, capacity=64):
    keys_to_features = {
        "inputs": tf.VarLenFeature(tf.int64),
        "targets": tf.VarLenFeature(tf.int64)
    }

    items_to_handlers = {
        "inputs": tfexample_decoder.Tensor("inputs"),
        "targets": tfexample_decoder.Tensor("targets")
    }

    # Now the non-trivial case construction.
    with tf.name_scope("examples_queue"):
        training = (mode == "train")
        # Read serialized examples using slim parallel_reader.
        num_epochs = None if training else 1
        data_files = parallel_reader.get_data_files(file_pattern)
        num_readers = min(4 if training else 1, len(data_files))
        _, examples = parallel_reader.parallel_read([file_pattern],
                                                    tf.TFRecordReader,
                                                    num_epochs=num_epochs,
                                                    shuffle=training,
                                                    capacity=2 * capacity,
                                                    min_after_dequeue=capacity,
                                                    num_readers=num_readers)

        decoder = tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                     items_to_handlers)

        decoded = decoder.decode(examples, items=list(items_to_handlers))
        examples = {}

        for (field, tensor) in zip(keys_to_features, decoded):
            examples[field] = tensor

        # We do not want int64s as they do are not supported on GPUs.
        return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}

readfromtfrecord_batch.py 文件源码项目：SSD_tensorflow_VOC 作者: LevinJ 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def read_and_decode_single_example(filename_queue):

    # Unlike the TFRecordWriter, the TFRecordReader is symbolic
    reader = tf.TFRecordReader()
    # One can read a single serialized example from a filename
    # serialized_example is a Tensor of type string.
    _, serialized_example = reader.read(filename_queue)
    # The serialized example is converted back to actual values.
    # One needs to describe the format of the objects to be returned
    features = tf.parse_single_example(
        serialized_example,
        features={
            # We know the length of both fields. If not the
            # tf.VarLenFeature could be used
            'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
            'image/class/label': tf.FixedLenFeature(
                [], tf.int64, default_value=tf.zeros([], dtype=tf.int64))
        })
    # now return the converted data
    label = features['image/class/label']
    image = features['image/encoded']

#     image = tf.image.decode_jpeg(image, channels=3)
    image_format = features['image/format']



    return label, image, image_format

test_dataflow.py 文件源码项目：tefla 作者: openAGI 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64),
            "floats": tf.VarLenFeature(tf.float32),
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)

test_decoder.py 文件源码项目：tefla 作者: openAGI 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64),
            "floats": tf.VarLenFeature(tf.float32),
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)

textdataset.py 文件源码项目：tefla 作者: openAGI 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def example_reading_spec(self):
        data_fields = {
            "inputs": tf.VarLenFeature(tf.int64),
            "targets": tf.VarLenFeature(tf.int64)
        }
        data_items_to_decoders = None
        return (data_fields, data_items_to_decoders)

tfrecord_read.py 文件源码项目：Youtube8mdataset_kagglechallenge 作者: jasonlee27 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def prepare_reader(self,
                       filename_queue,
                       max_quantized_value=2,
                       min_quantized_value=-2):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        context_features, sequence_features = {"video_id": tf.FixedLenFeature([], tf.string),
                                               "labels": tf.VarLenFeature(tf.int64)}, None
        if self.sequence_data:
            sequence_features = {self.feature_name[0]: tf.FixedLenSequenceFeature([], dtype=tf.string),
                                 self.feature_name[1]: tf.FixedLenSequenceFeature([], dtype=tf.string), }
        else:
            context_features[self.feature_name[0]] = tf.FixedLenFeature(self.feature_size[0], tf.float32)
            context_features[self.feature_name[1]] = tf.FixedLenFeature(self.feature_size[1], tf.float32)

        contexts, features = tf.parse_single_sequence_example(serialized_example,
                                                              context_features=context_features,
                                                              sequence_features=sequence_features)
        labels = (tf.cast(contexts["labels"].values, tf.int64))

        if self.sequence_data:
            decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[0]], tf.uint8), tf.float32),
                                          [-1, self.feature_size[0]])
            video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)

            decoded_features = tf.reshape(tf.cast(tf.decode_raw(features[self.feature_name[1]], tf.uint8), tf.float32),
                                          [-1, self.feature_size[1]])
            audio_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value)

            num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames)
        else:
            video_matrix = contexts[self.feature_name[0]]
            audio_matrix = contexts[self.feature_name[1]]
            num_frames = tf.constant(-1)

        # Pad or truncate to 'max_frames' frames.
        # video_matrix = resize_axis(video_matrix, 0, self.max_frames)
        return contexts["video_id"], video_matrix, audio_matrix, labels, num_frames

tensorflow_dataframe_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)

tensorflow_dataframe_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)