python类learn()的实例源码-面圈网

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_dask_iris_classification(self):
    if HAS_DASK and HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      data = dd.from_pandas(data, npartitions=2)
      labels = pd.DataFrame(iris.target)
      labels = dd.from_pandas(labels, npartitions=2)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      predictions = data.map_partitions(classifier.predict).compute()
      score = accuracy_score(labels.compute(), predictions)
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_dask_iris_classification(self):
    if HAS_DASK and HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      data = dd.from_pandas(data, npartitions=2)
      labels = pd.DataFrame(iris.target)
      labels = dd.from_pandas(labels, npartitions=2)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      predictions = data.map_partitions(classifier.predict).compute()
      score = accuracy_score(labels.compute(), predictions)
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

input_pipeline.py 文件源码项目：seq2seq 作者: google 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

udc_inputs.py 文件源码项目：DualEncoder 作者: nachoaguadoc 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get_feature_columns(mode):
  feature_columns = []

  feature_columns.append(tf.contrib.layers.real_valued_column(
    column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="context_len", dimension=1, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="utterance", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="utterance_len", dimension=1, dtype=tf.int64))

  if mode == tf.contrib.learn.ModeKeys.TRAIN:
    # During training we have a label feature
    feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="label", dimension=1, dtype=tf.int64))

  if mode == tf.contrib.learn.ModeKeys.EVAL:
    # During evaluation we have distractors
    for i in range(9):
      feature_columns.append(tf.contrib.layers.real_valued_column(
        column_name="distractor_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
      feature_columns.append(tf.contrib.layers.real_valued_column(
        column_name="distractor_{}_len".format(i), dimension=1, dtype=tf.int64))

  return set(feature_columns)

udc_inputs.py 文件源码项目：DualEncoder 作者: nachoaguadoc 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def create_input_fn(mode, input_files, batch_size, num_epochs):
  def input_fn():
    features = tf.contrib.layers.create_feature_spec_for_parsing(
        get_feature_columns(mode))

    feature_map = tf.contrib.learn.io.read_batch_features(
        file_pattern=input_files,
        batch_size=batch_size,
        features=features,
        reader=tf.TFRecordReader,
        randomize_input=True,
        num_epochs=num_epochs,
        queue_capacity=200000 + batch_size * 10,
        name="read_batch_features_{}".format(mode))

    # This is an ugly hack because of a current bug in tf.learn
    # During evaluation TF tries to restore the epoch variable which isn't defined during training
    # So we define the variable manually here
    if mode == tf.contrib.learn.ModeKeys.TRAIN:
      tf.get_variable(
        "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
        initializer=tf.constant(0, dtype=tf.int64))

    if mode == tf.contrib.learn.ModeKeys.TRAIN:
      target = feature_map.pop("label")
    else:
      # In evaluation we have 10 classes (utterances).
      # The first one (index 0) is always the correct one
      target = tf.zeros([batch_size, 1], dtype=tf.int64)
    return feature_map, target
  return input_fn

input_pipeline.py 文件源码项目：conv_seq2seq 作者: tobyyouup 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_pandas_dataframe(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.DataFrame(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels[0], classifier.predict(data))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_pandas_series(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.Series(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels, classifier.predict(data))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_string_data_formats(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_pandas_dataframe(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.DataFrame(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels[0], list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")

io_test.py 文件源码项目：lsdc 作者: febert 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_string_data_formats(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))

input_pipeline.py 文件源码项目：automatic-summarization 作者: mozilla 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)

utils.py 文件源码项目：seq2seq 作者: google 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

utils.py 文件源码项目：conv_seq2seq 作者: tobyyouup 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

utils.py 文件源码项目：automatic-summarization 作者: mozilla 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn

task.py 文件源码项目：pydatalab 作者: googledatalab 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator

task.py 文件源码项目：pydatalab 作者: googledatalab 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator

util.py 文件源码项目：pydatalab 作者: googledatalab 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def read_examples(input_files, batch_size, shuffle, num_epochs=None):
  """Creates readers and queues for reading example protos."""
  files = []
  for e in input_files:
    for path in e.split(','):
      files.extend(file_io.get_matching_files(path))
  thread_count = multiprocessing.cpu_count()

  # The minimum number of instances in a queue from which examples are drawn
  # randomly. The larger this number, the more randomness at the expense of
  # higher memory requirements.
  min_after_dequeue = 1000

  # When batching data, the queue's capacity will be larger than the batch_size
  # by some factor. The recommended formula is (num_threads + a small safety
  # margin). For now, we use a single thread for reading, so this can be small.
  queue_size_multiplier = thread_count + 3

  # Convert num_epochs == 0 -> num_epochs is None, if necessary
  num_epochs = num_epochs or None

  # Build a queue of the filenames to be read.
  filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle)

  example_id, encoded_example = tf.TextLineReader().read_up_to(
      filename_queue, batch_size)

  if shuffle:
    capacity = min_after_dequeue + queue_size_multiplier * batch_size
    return tf.train.shuffle_batch(
        [example_id, encoded_example],
        batch_size,
        capacity,
        min_after_dequeue,
        enqueue_many=True,
        num_threads=thread_count)

  else:
    capacity = queue_size_multiplier * batch_size
    return tf.train.batch(
        [example_id, encoded_example],
        batch_size,
        capacity=capacity,
        enqueue_many=True,
        num_threads=thread_count)


# ==============================================================================
# Building the TF learn estimators
# ==============================================================================

variants_inference.py 文件源码项目：cloudml-examples 作者: googlegenomics 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _build_input_fn(input_file_pattern, batch_size, mode):
  """Build input function.

  Args:
    input_file_pattern: The file patter for examples
    batch_size: Batch size
    mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

  Returns:
    Tuple, dictionary of feature column name to tensor and labels.
  """
  def _input_fn():
    """Supplies the input to the model.

    Returns:
      A tuple consisting of 1) a dictionary of tensors whose keys are
      the feature names, and 2) a tensor of target labels if the mode
      is not INFER (and None, otherwise).
    """
    logging.info("Reading files from %s", input_file_pattern)
    input_files = sorted(list(tf.gfile.Glob(input_file_pattern)))
    logging.info("Reading files from %s", input_files)
    include_target_column = (mode != tf.contrib.learn.ModeKeys.INFER)
    features_spec = tf.contrib.layers.create_feature_spec_for_parsing(
        feature_columns=_get_feature_columns(include_target_column))

    if FLAGS.use_gzip:
      def gzip_reader():
        return tf.TFRecordReader(
            options=tf.python_io.TFRecordOptions(
                compression_type=TFRecordCompressionType.GZIP))
      reader_fn = gzip_reader
    else:
      reader_fn = tf.TFRecordReader

    features = tf.contrib.learn.io.read_batch_features(
        file_pattern=input_files,
        batch_size=batch_size,
        queue_capacity=3*batch_size,
        randomize_input=mode == tf.contrib.learn.ModeKeys.TRAIN,
        feature_queue_capacity=FLAGS.feature_queue_capacity,
        reader=reader_fn,
        features=features_spec)
    target = None
    if include_target_column:
      target = features.pop(FLAGS.target_field)
    return features, target

  return _input_fn

variants_inference.py 文件源码项目：cloudml-examples 作者: googlegenomics 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def _build_model_fn():
  """Build model function.

  Returns:
    A model function that can be passed to `Estimator` constructor.
  """
  def _model_fn(features, labels, mode):
    """Creates the prediction and its loss.

    Args:
      features: A dictionary of tensors keyed by the feature name.
      labels: A tensor representing the labels.
      mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

    Returns:
      A tuple consisting of the prediction, loss, and train_op.
    """
    # Generate one embedding per sparse feature column and concatenate them.
    concat_embeddings = tf.contrib.layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=_get_feature_columns(include_target_column=False))

    # Add one hidden layer.
    hidden_layer_0 = tf.contrib.layers.relu(
        concat_embeddings, FLAGS.hidden_units)

    # Output and logistic loss.
    logits = tf.contrib.layers.linear(hidden_layer_0, FLAGS.num_classes)

    predictions = tf.contrib.layers.softmax(logits)
    if mode == tf.contrib.learn.ModeKeys.INFER:
      predictions = {
          tf.contrib.learn.PredictionKey.PROBABILITIES: predictions,
          PREDICTION_KEY: features[PREDICTION_KEY]
      }
      output_alternatives = {
          DEFAULT_OUTPUT_ALTERNATIVE: (tf.contrib.learn.ProblemType.UNSPECIFIED,
                                       predictions)
      }
      return model_fn.ModelFnOps(
          mode=mode,
          predictions=predictions,
          output_alternatives=output_alternatives)

    target_one_hot = tf.one_hot(labels, FLAGS.num_classes)
    target_one_hot = tf.reduce_sum(
        input_tensor=target_one_hot, reduction_indices=[1])
    loss = tf.losses.softmax_cross_entropy(target_one_hot, logits)
    if mode == tf.contrib.learn.ModeKeys.EVAL:
      return predictions, loss, None

    opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=FLAGS.learning_rate,
        optimizer=opt)
    return model_fn.ModelFnOps(
        mode=mode, predictions=predictions, loss=loss, train_op=train_op)

  return _model_fn

variants_inference.py 文件源码项目：cloudml-examples 作者: googlegenomics 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _def_experiment(
    train_file_pattern, eval_file_pattern, batch_size):
  """Creates the function used to configure the experiment runner.

  This function creates a function that is used by the learn_runner
  module to create an Experiment.

  Args:
    train_file_pattern: The directory the train data can be found in.
    eval_file_pattern: The directory the test data can be found in.
    batch_size: Batch size

  Returns:
    A function that creates an Experiment object for the runner.
  """

  def _experiment_fn(output_dir):
    """Experiment function used by learn_runner to run training/eval/etc.

    Args:
      output_dir: String path of directory to use for outputs.

    Returns:
      tf.learn `Experiment`.
    """
    estimator = tf.contrib.learn.Estimator(
        model_fn=_build_model_fn(),
        model_dir=output_dir)
    train_input_fn = _build_input_fn(
        input_file_pattern=train_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.TRAIN)
    eval_input_fn = _build_input_fn(
        input_file_pattern=eval_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.EVAL)

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        train_steps=FLAGS.num_train_steps,
        eval_input_fn=eval_input_fn,
        eval_steps=FLAGS.num_eval_steps,
        eval_metrics=_create_evaluation_metrics(),
        min_eval_frequency=100,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(
                _predict_input_fn,
                exports_to_keep=5,
                default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)
        ])

  return _experiment_fn