mappers.py 文件源码-python代码片段

def bucketize(x, num_buckets, epsilon=None, name=None):
  """Returns a bucketized column, with a bucket index assigned to each input.

  Args:
    x: A numeric input `Tensor` whose values should be mapped to buckets.
    num_buckets: Values in the input `x` are divided into approximately
      equal-sized buckets, where the number of buckets is num_buckets.
    epsilon: (Optional) Error tolerance, typically a small fraction close to
      zero. If a value is not specified by the caller, a suitable value is
      computed based on experimental results.  For `num_buckets` less than 100,
      the value of 0.01 is chosen to handle a dataset of up to ~1 trillion input
      data values.  If `num_buckets` is larger, then epsilon is set to
      (1/`num_buckets`) to enforce a stricter error tolerance, because more
      buckets will result in smaller range for each bucket, and so we want the
      the boundaries to be less fuzzy.
      See analyzers.quantiles() for details.
    name: (Optional) A name for this operation.

  Returns:
    A `Tensor` of the same shape as `x`, with each element in the
    returned tensor representing the bucketized value. Bucketized value is
    in the range [0, num_buckets).

  Raises:
    ValueError: If value of num_buckets is not > 1.
  """
  with tf.name_scope(name, 'bucketize'):
    if not isinstance(num_buckets, int):
      raise TypeError('num_buckets must be an int, got %s', type(num_buckets))

    if num_buckets < 1:
      raise ValueError('Invalid num_buckets %d', num_buckets)

    if epsilon is None:
      # See explanation in args documentation for epsilon.
      epsilon = min(1.0 / num_buckets, 0.01)

    bucket_boundaries = analyzers.quantiles(x, num_buckets, epsilon)
    buckets = quantile_ops.bucketize_with_input_boundaries(
        x,
        boundaries=bucket_boundaries,
        name='assign_buckets')

    # Convert to int64 because int32 is not compatible with tf.Example parser.
    # See _TF_EXAMPLE_ALLOWED_TYPES in FixedColumnRepresentation()
    # in tf_metadata/dataset_schema.py
    return tf.to_int64(buckets)