def testTrainOptimizerWithL1Reg(self):
"""Tests l1 regularized model has higher loss."""
def input_fn():
return {
'language': tf.SparseTensor(values=['hindi'],
indices=[[0, 0]],
shape=[1, 1])
}, tf.constant([[1]])
language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
classifier_no_reg = tf.contrib.learn.LinearClassifier(
feature_columns=[language])
classifier_with_reg = tf.contrib.learn.LinearClassifier(
feature_columns=[language],
optimizer=tf.train.FtrlOptimizer(learning_rate=1.0,
l1_regularization_strength=100.))
loss_no_reg = classifier_no_reg.fit(
input_fn=input_fn, steps=100).evaluate(
input_fn=input_fn, steps=1)['loss']
loss_with_reg = classifier_with_reg.fit(
input_fn=input_fn, steps=100).evaluate(
input_fn=input_fn, steps=1)['loss']
self.assertLess(loss_no_reg, loss_with_reg)
python类SparseTensor()的实例源码
def testSdcaOptimizerSparseFeatures(self):
"""Tests LinearClasssifier with SDCAOptimizer and sparse features."""
def input_fn():
return {
'example_id': tf.constant(['1', '2', '3']),
'price': tf.constant([[0.4], [0.6], [0.3]]),
'country': tf.SparseTensor(values=['IT', 'US', 'GB'],
indices=[[0, 0], [1, 3], [2, 1]],
shape=[3, 5]),
'weights': tf.constant([[1.0], [1.0], [1.0]])
}, tf.constant([[1], [0], [1]])
price = tf.contrib.layers.real_valued_column('price')
country = tf.contrib.layers.sparse_column_with_hash_bucket(
'country', hash_bucket_size=5)
sdca_optimizer = tf.contrib.linear_optimizer.SDCAOptimizer(
example_id_column='example_id')
classifier = tf.contrib.learn.LinearClassifier(
feature_columns=[price, country],
weight_column_name='weights',
optimizer=sdca_optimizer)
classifier.fit(input_fn=input_fn, steps=50)
scores = classifier.evaluate(input_fn=input_fn, steps=1)
self.assertGreater(scores['accuracy'], 0.9)
def testEval(self):
"""Tests that eval produces correct metrics.
"""
def input_fn():
return {
'age': tf.constant([[1], [2]]),
'language': tf.SparseTensor(values=['greek', 'chinese'],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
}, tf.constant([[1], [0]])
language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
age = tf.contrib.layers.real_valued_column('age')
classifier = tf.contrib.learn.LinearClassifier(
feature_columns=[age, language])
# Evaluate on trained mdoel
classifier.fit(input_fn=input_fn, steps=100)
classifier.evaluate(input_fn=input_fn, steps=1)
# TODO(ispir): Enable accuracy check after resolving the randomness issue.
# self.assertLess(evaluated_values['loss/mean'], 0.3)
# self.assertGreater(evaluated_values['accuracy/mean'], .95)
def testRegression(self):
"""Tests that loss goes down with training."""
def input_fn():
return {
'age': tf.constant([1]),
'language': tf.SparseTensor(values=['english'],
indices=[[0, 0]],
shape=[1, 1])
}, tf.constant([[10.]])
language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
age = tf.contrib.layers.real_valued_column('age')
classifier = tf.contrib.learn.LinearRegressor(
feature_columns=[age, language])
classifier.fit(input_fn=input_fn, steps=100)
loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
classifier.fit(input_fn=input_fn, steps=200)
loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
self.assertLess(loss2, loss1)
self.assertLess(loss2, 0.5)
def testSparseFeatures(self):
"""Tests SVM classifier with (hashed) sparse features."""
def input_fn():
return {
'example_id': tf.constant(['1', '2', '3']),
'price': tf.constant([[0.8], [0.6], [0.3]]),
'country': tf.SparseTensor(
values=['IT', 'US', 'GB'],
indices=[[0, 0], [1, 0], [2, 0]],
shape=[3, 1]),
}, tf.constant([[0], [1], [1]])
price = tf.contrib.layers.real_valued_column('price')
country = tf.contrib.layers.sparse_column_with_hash_bucket(
'country', hash_bucket_size=5)
svm_classifier = tf.contrib.learn.SVM(feature_columns=[price, country],
example_id_column='example_id',
l1_regularization=0.0,
l2_regularization=1.0)
svm_classifier.fit(input_fn=input_fn, steps=30)
accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy']
self.assertAlmostEqual(accuracy, 1.0, places=3)
def testWeightedSparseColumn(self):
ids = tf.contrib.layers.sparse_column_with_keys(
"ids", ["marlo", "omar", "stringer"])
ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"ids": ids_tensor,
"weights": weights_tensor}
output = feature_column_ops._Transformer(features).transform(weighted_ids)
with self.test_session():
tf.initialize_all_tables().run()
self.assertAllEqual(output[0].shape.eval(), ids_tensor.shape.eval())
self.assertAllEqual(output[0].indices.eval(), ids_tensor.indices.eval())
self.assertAllEqual(output[0].values.eval(), [2, 2, 0])
self.assertAllEqual(output[1].shape.eval(), weights_tensor.shape.eval())
self.assertAllEqual(output[1].indices.eval(),
weights_tensor.indices.eval())
self.assertEqual(output[1].values.dtype, tf.float32)
self.assertAllEqual(output[1].values.eval(), weights_tensor.values.eval())
def testCrossColumn(self):
language = tf.contrib.layers.sparse_column_with_hash_bucket(
"language", hash_bucket_size=3)
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_language = tf.contrib.layers.crossed_column(
[language, country], hash_bucket_size=15)
features = {
"language": tf.SparseTensor(values=["english", "spanish"],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [1, 0]],
shape=[2, 1])
}
output = feature_column_ops._Transformer(features).transform(
country_language)
with self.test_session():
self.assertEqual(output.values.dtype, tf.int64)
self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testCrossWithBucketizedColumn(self):
price_bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price"),
boundaries=[0., 10., 100.])
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_price = tf.contrib.layers.crossed_column(
[country, price_bucket], hash_bucket_size=15)
features = {
"price": tf.constant([[20.]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2])
}
output = feature_column_ops._Transformer(features).transform(country_price)
with self.test_session():
self.assertEqual(output.values.dtype, tf.int64)
self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testCrossWithCrossedColumn(self):
price_bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price"),
boundaries=[0., 10., 100.])
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_price = tf.contrib.layers.crossed_column(
[country, price_bucket], hash_bucket_size=15)
wire = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
wire_country_price = tf.contrib.layers.crossed_column(
[wire, country_price], hash_bucket_size=15)
features = {
"price": tf.constant([[20.]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2]),
"wire": tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [0, 1], [0, 2]],
shape=[1, 3])
}
output = feature_column_ops._Transformer(features).transform(
wire_country_price)
with self.test_session():
self.assertEqual(output.values.dtype, tf.int64)
self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
def testAllDNNColumns(self):
sparse_column = tf.contrib.layers.sparse_column_with_keys(
"ids", ["a", "b", "c", "unseen"])
real_valued_column = tf.contrib.layers.real_valued_column("income", 2)
one_hot_column = tf.contrib.layers.one_hot_column(sparse_column)
embedding_column = tf.contrib.layers.embedding_column(sparse_column, 10)
features = {
"ids": tf.SparseTensor(
values=["c", "b", "a"],
indices=[[0, 0], [1, 0], [2, 0]],
shape=[3, 1]),
"income": tf.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]])
}
output = tf.contrib.layers.input_from_feature_columns(features,
[one_hot_column,
embedding_column,
real_valued_column])
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10])
def testOneHotColumnFromWeightedSparseColumnFails(self):
ids_column = tf.contrib.layers.sparse_column_with_keys(
"ids", ["a", "b", "c", "unseen"])
ids_tensor = tf.SparseTensor(
values=["c", "b", "a", "c"],
indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
shape=[3, 2])
weighted_ids_column = tf.contrib.layers.weighted_sparse_column(ids_column,
"weights")
weights_tensor = tf.SparseTensor(
values=[10.0, 20.0, 30.0, 40.0],
indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
shape=[3, 2])
features = {"ids": ids_tensor, "weights": weights_tensor}
one_hot_column = tf.contrib.layers.one_hot_column(weighted_ids_column)
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
with self.assertRaisesRegexp(
ValueError,
"one_hot_column does not yet support weighted_sparse_column"):
_ = tf.contrib.layers.input_from_feature_columns(features,
[one_hot_column])
def testOneHotColumnFromMultivalentSparseColumnWithKeysSucceedsForDNN(self):
ids_column = tf.contrib.layers.sparse_column_with_keys(
"ids", ["a", "b", "c", "unseen"])
ids_tensor = tf.SparseTensor(
values=["c", "b", "a", "c"],
indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
shape=[3, 2])
one_hot_sparse = tf.contrib.layers.one_hot_column(ids_column)
features = {"ids": ids_tensor}
output = tf.contrib.layers.input_from_feature_columns(features,
[one_hot_sparse])
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
output.eval())
def testHashedEmbeddingColumnSucceedsForDNN(self):
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo", "omar"],
indices=[[0, 0], [1, 0], [1, 1], [2, 0]],
shape=[3, 2])
features = {"wire": wire_tensor}
# Big enough hash space so that hopefully there is no collision
embedded_sparse = tf.contrib.layers.hashed_embedding_column("wire", 1000, 3)
output = tf.contrib.layers.input_from_feature_columns(
features, [embedded_sparse], weight_collections=["my_collection"])
weights = tf.get_collection("my_collection")
grad = tf.gradients(output, weights)
with self.test_session():
tf.initialize_all_variables().run()
gradient_values = []
# Collect the gradient from the different partitions (one in this test)
for p in range(len(grad)):
gradient_values.extend(grad[p].values.eval())
gradient_values.sort()
self.assertAllEqual(gradient_values, [0.5]*6 + [2]*3)
def testEmbeddingColumnWithInitializerSucceedsForDNN(self):
hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"wire": wire_tensor}
init_value = 133.7
embeded_sparse = tf.contrib.layers.embedding_column(
hashed_sparse,
10, initializer=tf.constant_initializer(init_value))
output = tf.contrib.layers.input_from_feature_columns(features,
[embeded_sparse])
with self.test_session():
tf.initialize_all_variables().run()
output_eval = output.eval()
self.assertAllEqual(output_eval.shape, [2, 10])
self.assertAllClose(output_eval, np.tile(init_value, [2, 10]))
def testEmbeddingColumnWithMultipleInitializersFails(self):
hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"wire": wire_tensor}
embedded_sparse = tf.contrib.layers.embedding_column(
hashed_sparse,
10,
initializer=tf.truncated_normal_initializer(mean=42,
stddev=1337))
embedded_sparse_alternate = tf.contrib.layers.embedding_column(
hashed_sparse,
10,
initializer=tf.truncated_normal_initializer(mean=1337,
stddev=42))
# Makes sure that trying to use different initializers with the same
# embedding column explicitly fails.
with self.test_session():
with self.assertRaisesRegexp(
ValueError,
"Duplicate feature column key found for column: wire_embedding"):
tf.contrib.layers.input_from_feature_columns(
features, [embedded_sparse, embedded_sparse_alternate])
def testEmbeddingColumnWithCrossedColumnSucceedsForDNN(self):
a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa",
hash_bucket_size=100)
b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb",
hash_bucket_size=100)
crossed = tf.contrib.layers.crossed_column(
set([a, b]), hash_bucket_size=10000)
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"aaa": wire_tensor, "bbb": wire_tensor}
embeded_sparse = tf.contrib.layers.embedding_column(crossed, 10)
output = tf.contrib.layers.input_from_feature_columns(features,
[embeded_sparse])
with self.test_session():
tf.initialize_all_variables().run()
self.assertAllEqual(output.eval().shape, [2, 10])
def testWeightedSparseColumnFailsForDNN(self):
ids = tf.contrib.layers.sparse_column_with_keys(
"ids", ["marlo", "omar", "stringer"])
ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"ids": ids_tensor,
"weights": weights_tensor}
with self.test_session():
with self.assertRaisesRegexp(
ValueError,
"Error creating input layer for column: ids_weighted_by_weights"):
tf.initialize_all_tables().run()
tf.contrib.layers.input_from_feature_columns(features, [weighted_ids])
def testCrossedColumnFailsForDNN(self):
a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa",
hash_bucket_size=100)
b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb",
hash_bucket_size=100)
crossed = tf.contrib.layers.crossed_column(
set([a, b]), hash_bucket_size=10000)
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"aaa": wire_tensor, "bbb": wire_tensor}
with self.test_session():
with self.assertRaisesRegexp(
ValueError, "Error creating input layer for column: aaa_X_bbb"):
tf.initialize_all_variables().run()
tf.contrib.layers.input_from_feature_columns(features, [crossed])
def testDeepColumnsSucceedForDNN(self):
real_valued = tf.contrib.layers.real_valued_column("income", 3)
bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price", 2),
boundaries=[0., 10., 100.])
hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
features = {
"income": tf.constant([[20., 10, -5], [110, 0, -7], [-3, 30, 50]]),
"price": tf.constant([[20., 200], [110, 2], [-20, -30]]),
"wire": tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [2, 0]],
shape=[3, 1])
}
embeded_sparse = tf.contrib.layers.embedding_column(
hashed_sparse,
10, initializer=tf.constant_initializer(133.7))
output = tf.contrib.layers.input_from_feature_columns(
features, [real_valued, bucket, embeded_sparse])
with self.test_session():
tf.initialize_all_variables().run()
# size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21
self.assertAllEqual(output.eval().shape, [3, 21])
def testEmbeddingColumnWithWeightedSparseColumnForDNN(self):
ids = tf.contrib.layers.sparse_column_with_keys(
"ids", ["marlo", "omar", "stringer"])
ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"ids": ids_tensor,
"weights": weights_tensor}
embeded_sparse = tf.contrib.layers.embedding_column(
weighted_ids, 1, combiner="sum", initializer=init_ops.ones_initializer)
output = tf.contrib.layers.input_from_feature_columns(features,
[embeded_sparse])
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
# score: (sum of weights)
self.assertAllEqual(output.eval(), [[10.], [50.]])