def test_dask_iris_classification(self):
if HAS_DASK and HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
import dask.dataframe as dd # pylint: disable=g-import-not-at-top
random.seed(42)
iris = datasets.load_iris()
data = pd.DataFrame(iris.data)
data = dd.from_pandas(data, npartitions=2)
labels = pd.DataFrame(iris.target)
labels = dd.from_pandas(labels, npartitions=2)
classifier = learn.LinearClassifier(
feature_columns=learn.infer_real_valued_columns_from_input(data),
n_classes=3)
classifier.fit(data, labels, steps=100)
predictions = data.map_partitions(classifier.predict).compute()
score = accuracy_score(labels.compute(), predictions)
self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
python类learn()的实例源码
def test_dask_iris_classification(self):
if HAS_DASK and HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
import dask.dataframe as dd # pylint: disable=g-import-not-at-top
random.seed(42)
iris = datasets.load_iris()
data = pd.DataFrame(iris.data)
data = dd.from_pandas(data, npartitions=2)
labels = pd.DataFrame(iris.target)
labels = dd.from_pandas(labels, npartitions=2)
classifier = learn.LinearClassifier(
feature_columns=learn.infer_real_valued_columns_from_input(data),
n_classes=3)
classifier.fit(data, labels, steps=100)
predictions = data.map_partitions(classifier.predict).compute()
score = accuracy_score(labels.compute(), predictions)
self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
"""Creates an InputPipeline object from a dictionary definition.
Args:
def_dict: A dictionary defining the input pipeline.
It must have "class" and "params" that correspond to the class
name and constructor parameters of an InputPipeline, respectively.
mode: A value in tf.contrib.learn.ModeKeys
Returns:
A new InputPipeline object
"""
if not "class" in def_dict:
raise ValueError("Input Pipeline definition must have a class property.")
class_ = def_dict["class"]
if not hasattr(sys.modules[__name__], class_):
raise ValueError("Invalid Input Pipeline class: {}".format(class_))
pipeline_class = getattr(sys.modules[__name__], class_)
# Constructor arguments
params = {}
if "params" in def_dict:
params.update(def_dict["params"])
params.update(kwargs)
return pipeline_class(params=params, mode=mode)
def get_feature_columns(mode):
feature_columns = []
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="context_len", dimension=1, dtype=tf.int64))
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="utterance", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="utterance_len", dimension=1, dtype=tf.int64))
if mode == tf.contrib.learn.ModeKeys.TRAIN:
# During training we have a label feature
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="label", dimension=1, dtype=tf.int64))
if mode == tf.contrib.learn.ModeKeys.EVAL:
# During evaluation we have distractors
for i in range(9):
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="distractor_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
feature_columns.append(tf.contrib.layers.real_valued_column(
column_name="distractor_{}_len".format(i), dimension=1, dtype=tf.int64))
return set(feature_columns)
def create_input_fn(mode, input_files, batch_size, num_epochs):
def input_fn():
features = tf.contrib.layers.create_feature_spec_for_parsing(
get_feature_columns(mode))
feature_map = tf.contrib.learn.io.read_batch_features(
file_pattern=input_files,
batch_size=batch_size,
features=features,
reader=tf.TFRecordReader,
randomize_input=True,
num_epochs=num_epochs,
queue_capacity=200000 + batch_size * 10,
name="read_batch_features_{}".format(mode))
# This is an ugly hack because of a current bug in tf.learn
# During evaluation TF tries to restore the epoch variable which isn't defined during training
# So we define the variable manually here
if mode == tf.contrib.learn.ModeKeys.TRAIN:
tf.get_variable(
"read_batch_features_eval/file_name_queue/limit_epochs/epochs",
initializer=tf.constant(0, dtype=tf.int64))
if mode == tf.contrib.learn.ModeKeys.TRAIN:
target = feature_map.pop("label")
else:
# In evaluation we have 10 classes (utterances).
# The first one (index 0) is always the correct one
target = tf.zeros([batch_size, 1], dtype=tf.int64)
return feature_map, target
return input_fn
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
"""Creates an InputPipeline object from a dictionary definition.
Args:
def_dict: A dictionary defining the input pipeline.
It must have "class" and "params" that correspond to the class
name and constructor parameters of an InputPipeline, respectively.
mode: A value in tf.contrib.learn.ModeKeys
Returns:
A new InputPipeline object
"""
if not "class" in def_dict:
raise ValueError("Input Pipeline definition must have a class property.")
class_ = def_dict["class"]
if not hasattr(sys.modules[__name__], class_):
raise ValueError("Invalid Input Pipeline class: {}".format(class_))
pipeline_class = getattr(sys.modules[__name__], class_)
# Constructor arguments
params = {}
if "params" in def_dict:
params.update(def_dict["params"])
params.update(kwargs)
return pipeline_class(params=params, mode=mode)
def test_pandas_dataframe(self):
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
random.seed(42)
iris = datasets.load_iris()
data = pd.DataFrame(iris.data)
labels = pd.DataFrame(iris.target)
classifier = learn.LinearClassifier(
feature_columns=learn.infer_real_valued_columns_from_input(data),
n_classes=3)
classifier.fit(data, labels, steps=100)
score = accuracy_score(labels[0], classifier.predict(data))
self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
else:
print("No pandas installed. pandas-related tests are skipped.")
def test_pandas_series(self):
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
random.seed(42)
iris = datasets.load_iris()
data = pd.DataFrame(iris.data)
labels = pd.Series(iris.target)
classifier = learn.LinearClassifier(
feature_columns=learn.infer_real_valued_columns_from_input(data),
n_classes=3)
classifier.fit(data, labels, steps=100)
score = accuracy_score(labels, classifier.predict(data))
self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def test_string_data_formats(self):
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
with self.assertRaises(ValueError):
learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
with self.assertRaises(ValueError):
learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
def test_pandas_dataframe(self):
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
random.seed(42)
iris = datasets.load_iris()
data = pd.DataFrame(iris.data)
labels = pd.DataFrame(iris.target)
classifier = learn.LinearClassifier(
feature_columns=learn.infer_real_valued_columns_from_input(data),
n_classes=3)
classifier.fit(data, labels, steps=100)
score = accuracy_score(labels[0], list(classifier.predict(data)))
self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
else:
print("No pandas installed. pandas-related tests are skipped.")
def test_string_data_formats(self):
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
with self.assertRaises(ValueError):
learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
with self.assertRaises(ValueError):
learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
"""Creates an InputPipeline object from a dictionary definition.
Args:
def_dict: A dictionary defining the input pipeline.
It must have "class" and "params" that correspond to the class
name and constructor parameters of an InputPipeline, respectively.
mode: A value in tf.contrib.learn.ModeKeys
Returns:
A new InputPipeline object
"""
if not "class" in def_dict:
raise ValueError("Input Pipeline definition must have a class property.")
class_ = def_dict["class"]
if not hasattr(sys.modules[__name__], class_):
raise ValueError("Invalid Input Pipeline class: {}".format(class_))
pipeline_class = getattr(sys.modules[__name__], class_)
# Constructor arguments
params = {}
if "params" in def_dict:
params.update(def_dict["params"])
params.update(kwargs)
return pipeline_class(params=params, mode=mode)
def create_input_fn(pipeline,
batch_size,
bucket_boundaries=None,
allow_smaller_final_batch=False,
scope=None):
"""Creates an input function that can be used with tf.learn estimators.
Note that you must pass "factory funcitons" for both the data provider and
featurizer to ensure that everything will be created in the same graph.
Args:
pipeline: An instance of `seq2seq.data.InputPipeline`.
batch_size: Create batches of this size. A queue to hold a
reasonable number of batches in memory is created.
bucket_boundaries: int list, increasing non-negative numbers.
If None, no bucket is performed.
Returns:
An input function that returns `(feature_batch, labels_batch)`
tuples when called.
"""
def input_fn():
"""Creates features and labels.
"""
with tf.variable_scope(scope or "input_fn"):
data_provider = pipeline.make_data_provider()
features_and_labels = pipeline.read_from_data_provider(data_provider)
if bucket_boundaries:
_, batch = tf.contrib.training.bucket_by_sequence_length(
input_length=features_and_labels["source_len"],
bucket_boundaries=bucket_boundaries,
tensors=features_and_labels,
batch_size=batch_size,
keep_input=features_and_labels["source_len"] >= 1,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="bucket_queue")
else:
batch = tf.train.batch(
tensors=features_and_labels,
enqueue_many=False,
batch_size=batch_size,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="batch_queue")
# Separate features and labels
features_batch = {k: batch[k] for k in pipeline.feature_keys}
if set(batch.keys()).intersection(pipeline.label_keys):
labels_batch = {k: batch[k] for k in pipeline.label_keys}
else:
labels_batch = None
return features_batch, labels_batch
return input_fn
def create_input_fn(pipeline,
batch_size,
bucket_boundaries=None,
allow_smaller_final_batch=False,
scope=None):
"""Creates an input function that can be used with tf.learn estimators.
Note that you must pass "factory funcitons" for both the data provider and
featurizer to ensure that everything will be created in the same graph.
Args:
pipeline: An instance of `seq2seq.data.InputPipeline`.
batch_size: Create batches of this size. A queue to hold a
reasonable number of batches in memory is created.
bucket_boundaries: int list, increasing non-negative numbers.
If None, no bucket is performed.
Returns:
An input function that returns `(feature_batch, labels_batch)`
tuples when called.
"""
def input_fn():
"""Creates features and labels.
"""
with tf.variable_scope(scope or "input_fn"):
data_provider = pipeline.make_data_provider()
features_and_labels = pipeline.read_from_data_provider(data_provider)
if bucket_boundaries:
_, batch = tf.contrib.training.bucket_by_sequence_length(
input_length=features_and_labels["source_len"],
bucket_boundaries=bucket_boundaries,
tensors=features_and_labels,
batch_size=batch_size,
keep_input=features_and_labels["source_len"] >= 1,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="bucket_queue")
else:
batch = tf.train.batch(
tensors=features_and_labels,
enqueue_many=False,
batch_size=batch_size,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="batch_queue")
# Separate features and labels
features_batch = {k: batch[k] for k in pipeline.feature_keys}
if set(batch.keys()).intersection(pipeline.label_keys):
labels_batch = {k: batch[k] for k in pipeline.label_keys}
else:
labels_batch = None
return features_batch, labels_batch
return input_fn
def create_input_fn(pipeline,
batch_size,
bucket_boundaries=None,
allow_smaller_final_batch=False,
scope=None):
"""Creates an input function that can be used with tf.learn estimators.
Note that you must pass "factory funcitons" for both the data provider and
featurizer to ensure that everything will be created in the same graph.
Args:
pipeline: An instance of `seq2seq.data.InputPipeline`.
batch_size: Create batches of this size. A queue to hold a
reasonable number of batches in memory is created.
bucket_boundaries: int list, increasing non-negative numbers.
If None, no bucket is performed.
Returns:
An input function that returns `(feature_batch, labels_batch)`
tuples when called.
"""
def input_fn():
"""Creates features and labels.
"""
with tf.variable_scope(scope or "input_fn"):
data_provider = pipeline.make_data_provider()
features_and_labels = pipeline.read_from_data_provider(data_provider)
if bucket_boundaries:
_, batch = tf.contrib.training.bucket_by_sequence_length(
input_length=features_and_labels["source_len"],
bucket_boundaries=bucket_boundaries,
tensors=features_and_labels,
batch_size=batch_size,
keep_input=features_and_labels["source_len"] >= 1,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="bucket_queue")
else:
batch = tf.train.batch(
tensors=features_and_labels,
enqueue_many=False,
batch_size=batch_size,
dynamic_pad=True,
capacity=5000 + 16 * batch_size,
allow_smaller_final_batch=allow_smaller_final_batch,
name="batch_queue")
# Separate features and labels
features_batch = {k: batch[k] for k in pipeline.feature_keys}
if set(batch.keys()).intersection(pipeline.label_keys):
labels_batch = {k: batch[k] for k in pipeline.label_keys}
else:
labels_batch = None
return features_batch, labels_batch
return input_fn
def get_estimator(args, output_dir, features, stats, target_vocab_size):
# Check layers used for dnn models.
if is_dnn_model(args.model) and not args.hidden_layer_sizes:
raise ValueError('--hidden-layer-size* must be used with DNN models')
if is_linear_model(args.model) and args.hidden_layer_sizes:
raise ValueError('--hidden-layer-size* cannot be used with linear models')
# Build tf.learn features
feature_columns = build_feature_columns(features, stats, args.model)
# Set how often to run checkpointing in terms of steps.
config = tf.contrib.learn.RunConfig(
save_checkpoints_steps=args.min_eval_frequency)
train_dir = os.path.join(output_dir, 'train')
if args.model == 'dnn_regression':
estimator = tf.contrib.learn.DNNRegressor(
feature_columns=feature_columns,
hidden_units=args.hidden_layer_sizes,
config=config,
model_dir=train_dir,
optimizer=tf.train.AdamOptimizer(
args.learning_rate, epsilon=args.epsilon))
elif args.model == 'linear_regression':
estimator = tf.contrib.learn.LinearRegressor(
feature_columns=feature_columns,
config=config,
model_dir=train_dir,
optimizer=tf.train.FtrlOptimizer(
args.learning_rate,
l1_regularization_strength=args.l1_regularization,
l2_regularization_strength=args.l2_regularization))
elif args.model == 'dnn_classification':
estimator = tf.contrib.learn.DNNClassifier(
feature_columns=feature_columns,
hidden_units=args.hidden_layer_sizes,
n_classes=target_vocab_size,
config=config,
model_dir=train_dir,
optimizer=tf.train.AdamOptimizer(
args.learning_rate, epsilon=args.epsilon))
elif args.model == 'linear_classification':
estimator = tf.contrib.learn.LinearClassifier(
feature_columns=feature_columns,
n_classes=target_vocab_size,
config=config,
model_dir=train_dir,
optimizer=tf.train.FtrlOptimizer(
args.learning_rate,
l1_regularization_strength=args.l1_regularization,
l2_regularization_strength=args.l2_regularization))
else:
raise ValueError('bad --model-type value')
return estimator
def get_estimator(args, output_dir, features, stats, target_vocab_size):
# Check layers used for dnn models.
if is_dnn_model(args.model) and not args.hidden_layer_sizes:
raise ValueError('--hidden-layer-size* must be used with DNN models')
if is_linear_model(args.model) and args.hidden_layer_sizes:
raise ValueError('--hidden-layer-size* cannot be used with linear models')
# Build tf.learn features
feature_columns = build_feature_columns(features, stats, args.model)
# Set how often to run checkpointing in terms of steps.
config = tf.contrib.learn.RunConfig(
save_checkpoints_steps=args.min_eval_frequency)
train_dir = os.path.join(output_dir, 'train')
if args.model == 'dnn_regression':
estimator = tf.contrib.learn.DNNRegressor(
feature_columns=feature_columns,
hidden_units=args.hidden_layer_sizes,
config=config,
model_dir=train_dir,
optimizer=tf.train.AdamOptimizer(
args.learning_rate, epsilon=args.epsilon))
elif args.model == 'linear_regression':
estimator = tf.contrib.learn.LinearRegressor(
feature_columns=feature_columns,
config=config,
model_dir=train_dir,
optimizer=tf.train.FtrlOptimizer(
args.learning_rate,
l1_regularization_strength=args.l1_regularization,
l2_regularization_strength=args.l2_regularization))
elif args.model == 'dnn_classification':
estimator = tf.contrib.learn.DNNClassifier(
feature_columns=feature_columns,
hidden_units=args.hidden_layer_sizes,
n_classes=target_vocab_size,
config=config,
model_dir=train_dir,
optimizer=tf.train.AdamOptimizer(
args.learning_rate, epsilon=args.epsilon))
elif args.model == 'linear_classification':
estimator = tf.contrib.learn.LinearClassifier(
feature_columns=feature_columns,
n_classes=target_vocab_size,
config=config,
model_dir=train_dir,
optimizer=tf.train.FtrlOptimizer(
args.learning_rate,
l1_regularization_strength=args.l1_regularization,
l2_regularization_strength=args.l2_regularization))
else:
raise ValueError('bad --model-type value')
return estimator
def read_examples(input_files, batch_size, shuffle, num_epochs=None):
"""Creates readers and queues for reading example protos."""
files = []
for e in input_files:
for path in e.split(','):
files.extend(file_io.get_matching_files(path))
thread_count = multiprocessing.cpu_count()
# The minimum number of instances in a queue from which examples are drawn
# randomly. The larger this number, the more randomness at the expense of
# higher memory requirements.
min_after_dequeue = 1000
# When batching data, the queue's capacity will be larger than the batch_size
# by some factor. The recommended formula is (num_threads + a small safety
# margin). For now, we use a single thread for reading, so this can be small.
queue_size_multiplier = thread_count + 3
# Convert num_epochs == 0 -> num_epochs is None, if necessary
num_epochs = num_epochs or None
# Build a queue of the filenames to be read.
filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle)
example_id, encoded_example = tf.TextLineReader().read_up_to(
filename_queue, batch_size)
if shuffle:
capacity = min_after_dequeue + queue_size_multiplier * batch_size
return tf.train.shuffle_batch(
[example_id, encoded_example],
batch_size,
capacity,
min_after_dequeue,
enqueue_many=True,
num_threads=thread_count)
else:
capacity = queue_size_multiplier * batch_size
return tf.train.batch(
[example_id, encoded_example],
batch_size,
capacity=capacity,
enqueue_many=True,
num_threads=thread_count)
# ==============================================================================
# Building the TF learn estimators
# ==============================================================================
def _build_input_fn(input_file_pattern, batch_size, mode):
"""Build input function.
Args:
input_file_pattern: The file patter for examples
batch_size: Batch size
mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.
Returns:
Tuple, dictionary of feature column name to tensor and labels.
"""
def _input_fn():
"""Supplies the input to the model.
Returns:
A tuple consisting of 1) a dictionary of tensors whose keys are
the feature names, and 2) a tensor of target labels if the mode
is not INFER (and None, otherwise).
"""
logging.info("Reading files from %s", input_file_pattern)
input_files = sorted(list(tf.gfile.Glob(input_file_pattern)))
logging.info("Reading files from %s", input_files)
include_target_column = (mode != tf.contrib.learn.ModeKeys.INFER)
features_spec = tf.contrib.layers.create_feature_spec_for_parsing(
feature_columns=_get_feature_columns(include_target_column))
if FLAGS.use_gzip:
def gzip_reader():
return tf.TFRecordReader(
options=tf.python_io.TFRecordOptions(
compression_type=TFRecordCompressionType.GZIP))
reader_fn = gzip_reader
else:
reader_fn = tf.TFRecordReader
features = tf.contrib.learn.io.read_batch_features(
file_pattern=input_files,
batch_size=batch_size,
queue_capacity=3*batch_size,
randomize_input=mode == tf.contrib.learn.ModeKeys.TRAIN,
feature_queue_capacity=FLAGS.feature_queue_capacity,
reader=reader_fn,
features=features_spec)
target = None
if include_target_column:
target = features.pop(FLAGS.target_field)
return features, target
return _input_fn
def _build_model_fn():
"""Build model function.
Returns:
A model function that can be passed to `Estimator` constructor.
"""
def _model_fn(features, labels, mode):
"""Creates the prediction and its loss.
Args:
features: A dictionary of tensors keyed by the feature name.
labels: A tensor representing the labels.
mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.
Returns:
A tuple consisting of the prediction, loss, and train_op.
"""
# Generate one embedding per sparse feature column and concatenate them.
concat_embeddings = tf.contrib.layers.input_from_feature_columns(
columns_to_tensors=features,
feature_columns=_get_feature_columns(include_target_column=False))
# Add one hidden layer.
hidden_layer_0 = tf.contrib.layers.relu(
concat_embeddings, FLAGS.hidden_units)
# Output and logistic loss.
logits = tf.contrib.layers.linear(hidden_layer_0, FLAGS.num_classes)
predictions = tf.contrib.layers.softmax(logits)
if mode == tf.contrib.learn.ModeKeys.INFER:
predictions = {
tf.contrib.learn.PredictionKey.PROBABILITIES: predictions,
PREDICTION_KEY: features[PREDICTION_KEY]
}
output_alternatives = {
DEFAULT_OUTPUT_ALTERNATIVE: (tf.contrib.learn.ProblemType.UNSPECIFIED,
predictions)
}
return model_fn.ModelFnOps(
mode=mode,
predictions=predictions,
output_alternatives=output_alternatives)
target_one_hot = tf.one_hot(labels, FLAGS.num_classes)
target_one_hot = tf.reduce_sum(
input_tensor=target_one_hot, reduction_indices=[1])
loss = tf.losses.softmax_cross_entropy(target_one_hot, logits)
if mode == tf.contrib.learn.ModeKeys.EVAL:
return predictions, loss, None
opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=FLAGS.learning_rate,
optimizer=opt)
return model_fn.ModelFnOps(
mode=mode, predictions=predictions, loss=loss, train_op=train_op)
return _model_fn
def _def_experiment(
train_file_pattern, eval_file_pattern, batch_size):
"""Creates the function used to configure the experiment runner.
This function creates a function that is used by the learn_runner
module to create an Experiment.
Args:
train_file_pattern: The directory the train data can be found in.
eval_file_pattern: The directory the test data can be found in.
batch_size: Batch size
Returns:
A function that creates an Experiment object for the runner.
"""
def _experiment_fn(output_dir):
"""Experiment function used by learn_runner to run training/eval/etc.
Args:
output_dir: String path of directory to use for outputs.
Returns:
tf.learn `Experiment`.
"""
estimator = tf.contrib.learn.Estimator(
model_fn=_build_model_fn(),
model_dir=output_dir)
train_input_fn = _build_input_fn(
input_file_pattern=train_file_pattern,
batch_size=batch_size,
mode=tf.contrib.learn.ModeKeys.TRAIN)
eval_input_fn = _build_input_fn(
input_file_pattern=eval_file_pattern,
batch_size=batch_size,
mode=tf.contrib.learn.ModeKeys.EVAL)
return tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=train_input_fn,
train_steps=FLAGS.num_train_steps,
eval_input_fn=eval_input_fn,
eval_steps=FLAGS.num_eval_steps,
eval_metrics=_create_evaluation_metrics(),
min_eval_frequency=100,
export_strategies=[
saved_model_export_utils.make_export_strategy(
_predict_input_fn,
exports_to_keep=5,
default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)
])
return _experiment_fn