def testBucketizedColumnHavingMultiDimensions(self):
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price", 2),
boundaries=[0., 10., 100.])
with tf.Graph().as_default():
# buckets 2, 3, 0
features = {"price": tf.constant([[20., 210], [110, 50], [-3, -30]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [1, 0]],
shape=[3, 2])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[bucket, country],
num_outputs=1))
with self.test_session() as sess:
tf.global_variables_initializer().run()
tf.initialize_all_tables().run()
# dimension = 2, bucket_size = 4, num_classes = 1
sess.run(column_to_variable[bucket][0].assign(
[[0.1], [0.2], [0.3], [0.4], [1], [2], [3], [4]]))
self.assertAllClose(output.eval(), [[0.3 + 4], [0.4 + 3], [0.1 + 1]])
python类initialize_all_tables()的实例源码
def testIntegerizedColumn(self):
product = tf.contrib.layers.sparse_column_with_integerized_feature(
"product", bucket_size=5)
with tf.Graph().as_default():
features = {"product": tf.SparseTensor(values=[0, 4, 2],
indices=[[0, 0], [1, 0], [2, 0]],
shape=[3, 1])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[product],
num_outputs=1))
with self.test_session() as sess:
tf.global_variables_initializer().run()
tf.initialize_all_tables().run()
product_weights = column_to_variable[product][0]
sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]])
def testIntegerizedColumnWithInvalidId(self):
product = tf.contrib.layers.sparse_column_with_integerized_feature(
"product", bucket_size=5)
with tf.Graph().as_default():
features = {"product": tf.SparseTensor(values=[5, 4, 7],
indices=[[0, 0], [1, 0], [2, 0]],
shape=[3, 1])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[product],
num_outputs=1))
with self.test_session() as sess:
tf.global_variables_initializer().run()
tf.initialize_all_tables().run()
product_weights = column_to_variable[product][0]
sess.run(product_weights.assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
self.assertAllClose(output.eval(), [[0.1], [0.5], [0.3]])
def testMulticlassWithSparseColumn(self):
with tf.Graph().as_default():
column = tf.contrib.layers.sparse_column_with_keys(
column_name="language",
keys=["english", "arabic", "hindi", "russian", "swahili"])
features = {
"language": tf.SparseTensor(
values=["hindi", "english", "arabic", "russian"],
indices=[[0, 0], [1, 0], [2, 0], [3, 0]],
shape=[4, 1])
}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[column],
num_outputs=3))
with self.test_session() as sess:
tf.global_variables_initializer().run()
tf.initialize_all_tables().run()
weights = column_to_variable[column][0]
self.assertEqual(weights.get_shape(), (5, 3))
sess.run(weights.assign([[0.1, 0.4, 0.7], [0.2, 0.5, 0.8],
[0.3, 0.6, 0.9], [0.4, 0.7, 1.0], [0.5, 0.8,
1.1]]))
self.assertAllClose(output.eval(), [[0.3, 0.6, 0.9], [0.1, 0.4, 0.7],
[0.2, 0.5, 0.8], [0.4, 0.7, 1.0]])
def testMulticlassWithBucketizedColumn(self):
column = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price"),
boundaries=[0., 100., 500., 1000.])
with tf.Graph().as_default():
# buckets 0, 2, 1, 2
features = {"price": tf.constant([[-3], [110], [20.], [210]])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[column],
num_outputs=3))
with self.test_session() as sess:
tf.global_variables_initializer().run()
tf.initialize_all_tables().run()
weights = column_to_variable[column][0]
self.assertEqual(weights.get_shape(), (5, 3))
sess.run(weights.assign([[0.1, 0.4, 0.7], [0.2, 0.5, 0.8],
[0.3, 0.6, 0.9], [0.4, 0.7, 1.0], [0.5, 0.8,
1.1]]))
self.assertAllClose(output.eval(), [[0.1, 0.4, 0.7], [0.3, 0.6, 0.9],
[0.2, 0.5, 0.8], [0.3, 0.6, 0.9]])
def testParseExample(self):
bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price", dimension=3),
boundaries=[0., 10., 100.])
wire_cast = tf.contrib.layers.sparse_column_with_keys(
"wire_cast", ["marlo", "omar", "stringer"])
# buckets 2, 3, 0
data = tf.train.Example(features=tf.train.Features(feature={
"price": tf.train.Feature(float_list=tf.train.FloatList(value=[20., 110,
-3])),
"wire_cast": tf.train.Feature(bytes_list=tf.train.BytesList(value=[
b"stringer", b"marlo"
])),
}))
output = tf.contrib.layers.parse_feature_columns_from_examples(
serialized=[data.SerializeToString()],
feature_columns=[bucket, wire_cast])
self.assertIn(bucket, output)
self.assertIn(wire_cast, output)
with self.test_session():
tf.initialize_all_tables().run()
self.assertAllEqual(output[bucket].eval(), [[2, 3, 0]])
self.assertAllEqual(output[wire_cast].indices.eval(), [[0, 0], [0, 1]])
self.assertAllEqual(output[wire_cast].values.eval(), [2, 0])
def new_model(session):
""" Initializes model from scratch and returns global step variable
Args:
session: Tensorflow session
Returns:
step: Global step variable
"""
logger.info('Initializing model from scratch ...')
session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer())
session.run(tf.initialize_all_tables())
return get_global_step()
def restore_model(session, saver, path):
""" Initializes a model that has been previously trained and
returns global step
Args:
session: Tensorflow session
saver: Tensorflow saver
path: Path where model to be loaded is
Returns:
Global step variable
"""
logger.info('Starting model from %s' % path)
session.run(tf.local_variables_initializer())
session.run(tf.initialize_all_tables())
saver.restore(session, path)
return get_global_step()
def testSparseColumnWithKeys(self):
keys_sparse = tf.contrib.layers.sparse_column_with_keys(
"wire", ["marlo", "omar", "stringer"])
wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
indices=[[0, 0], [1, 0], [1, 1]],
shape=[2, 2])
features = {"wire": wire_tensor}
output = feature_column_ops._Transformer(features).transform(keys_sparse)
with self.test_session():
tf.initialize_all_tables().run()
self.assertEqual(output.values.dtype, tf.int64)
self.assertAllEqual(output.values.eval(), [1, 2, 0])
self.assertAllEqual(output.indices.eval(), wire_tensor.indices.eval())
self.assertAllEqual(output.shape.eval(), wire_tensor.shape.eval())
def testOneHotColumnFromSparseColumnWithKeysSucceedsForDNN(self):
ids_column = tf.contrib.layers.sparse_column_with_keys(
"ids", ["a", "b", "c", "unseen"])
ids_tensor = tf.SparseTensor(
values=["c", "b", "a"], indices=[[0, 0], [1, 0], [2, 0]], shape=[3, 1])
one_hot_sparse = tf.contrib.layers.one_hot_column(ids_column)
features = {"ids": ids_tensor}
output = tf.contrib.layers.input_from_feature_columns(features,
[one_hot_sparse])
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]],
output.eval())
def testOneHotColumnFromSparseColumnWithHashBucketSucceedsForDNN(self):
hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("feat", 10)
wire_tensor = tf.SparseTensor(
values=["a", "b", "c1", "c2"],
indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
shape=[3, 2])
features = {"feat": wire_tensor}
one_hot_sparse = tf.contrib.layers.one_hot_column(hashed_sparse)
output = tf.contrib.layers.input_from_feature_columns(features,
[one_hot_sparse])
with self.test_session():
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllEqual([3, 10], output.eval().shape)
def testJointPredictions(self):
country = tf.contrib.layers.sparse_column_with_keys(
column_name="country",
keys=["us", "finland"])
language = tf.contrib.layers.sparse_column_with_keys(
column_name="language",
keys=["english", "finnish", "hindi"])
with tf.Graph().as_default():
features = {
"country": tf.SparseTensor(values=["finland", "us"],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
"language": tf.SparseTensor(values=["hindi", "english"],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
}
output, variables, bias = (
tf.contrib.layers.joint_weighted_sum_from_feature_columns(
features, [country, language], num_outputs=1))
# Assert that only a single weight is created.
self.assertEqual(len(variables), 1)
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllClose(output.eval(), [[0.], [0.]])
sess.run(bias.assign([0.1]))
self.assertAllClose(output.eval(), [[0.1], [0.1]])
# shape is [5,1] because 1 class and 2 + 3 features.
self.assertEquals(variables[0].get_shape().as_list(), [5, 1])
# score: bias + country_weight + language_weight
sess.run(variables[0].assign([[0.1], [0.2], [0.3], [0.4], [0.5]]))
self.assertAllClose(output.eval(), [[0.8], [0.5]])
def testPredictionsWithWeightedSparseColumn(self):
language = tf.contrib.layers.sparse_column_with_keys(
column_name="language",
keys=["english", "finnish", "hindi"])
weighted_language = tf.contrib.layers.weighted_sparse_column(
sparse_id_column=language,
weight_column_name="age")
with tf.Graph().as_default():
features = {
"language": tf.SparseTensor(values=["hindi", "english"],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
"age": tf.SparseTensor(values=[10.0, 20.0],
indices=[[0, 0], [1, 0]],
shape=[2, 1])
}
output, column_to_variable, bias = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [weighted_language], num_outputs=1))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertAllClose(output.eval(), [[0.], [0.]])
sess.run(bias.assign([0.1]))
self.assertAllClose(output.eval(), [[0.1], [0.1]])
# score: bias + age*language_weight[index]
sess.run(column_to_variable[weighted_language][0].assign(
[[0.1], [0.2], [0.3]]))
self.assertAllClose(output.eval(), [[3.1], [2.1]])
def testCrossUsageInPredictions(self):
language = tf.contrib.layers.sparse_column_with_hash_bucket(
"language", hash_bucket_size=3)
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_language = tf.contrib.layers.crossed_column(
[language, country], hash_bucket_size=10)
with tf.Graph().as_default():
features = {
"language": tf.SparseTensor(values=["english", "spanish"],
indices=[[0, 0], [1, 0]],
shape=[2, 1]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [1, 0]],
shape=[2, 1])
}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [country_language],
num_outputs=1))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
weights = column_to_variable[country_language][0]
sess.run(weights.assign(weights + 0.4))
self.assertAllClose(output.eval(), [[0.4], [0.4]])
def testMultivalentCrossUsageInPredictions(self):
language = tf.contrib.layers.sparse_column_with_hash_bucket(
"language", hash_bucket_size=3)
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_language = tf.contrib.layers.crossed_column(
[language, country], hash_bucket_size=10)
with tf.Graph().as_default():
features = {
"language": tf.SparseTensor(values=["english", "spanish"],
indices=[[0, 0], [0, 1]],
shape=[1, 2]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2])
}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [country_language],
num_outputs=1))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
weights = column_to_variable[country_language][0]
sess.run(weights.assign(weights + 0.4))
# There are four crosses each with 0.4 weight.
# score = 0.4 + 0.4 + 0.4 + 0.4
self.assertAllClose(output.eval(), [[1.6]])
def testMultivalentCrossUsageInPredictionsWithPartition(self):
# bucket size has to be big enough to allwo sharding.
language = tf.contrib.layers.sparse_column_with_hash_bucket(
"language", hash_bucket_size=64 << 19)
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=64 << 18)
country_language = tf.contrib.layers.crossed_column(
[language, country], hash_bucket_size=64 << 18)
with tf.Graph().as_default():
features = {
"language": tf.SparseTensor(values=["english", "spanish"],
indices=[[0, 0], [0, 1]],
shape=[1, 2]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2])
}
with tf.variable_scope(
"weighted_sum_from_feature_columns",
features.values(),
partitioner=tf.min_max_variable_partitioner(
max_partitions=10, min_slice_size=((64 << 20) - 1))) as scope:
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [country, language, country_language],
num_outputs=1,
scope=scope))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
self.assertEqual(2, len(column_to_variable[country]))
self.assertEqual(3, len(column_to_variable[language]))
self.assertEqual(2, len(column_to_variable[country_language]))
weights = column_to_variable[country_language]
for partition_variable in weights:
sess.run(partition_variable.assign(partition_variable + 0.4))
# There are four crosses each with 0.4 weight.
# score = 0.4 + 0.4 + 0.4 + 0.4
self.assertAllClose(output.eval(), [[1.6]])
def testMulticlassWithRealValuedColumnHavingMultiDimensions(self):
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
age = tf.contrib.layers.real_valued_column("age")
# The following RealValuedColumn has 3 dimensions.
incomes = tf.contrib.layers.real_valued_column("incomes", 3)
with tf.Graph().as_default():
features = {"age": tf.constant([[1], [1]]),
"incomes": tf.constant([[100., 200., 300.], [10., 20., 30.]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [1, 0]],
shape=[2, 2])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [country, age, incomes],
num_outputs=5))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
incomes_weights = column_to_variable[incomes][0]
sess.run(incomes_weights.assign([[0.01, 0.1, 1., 10., 100.],
[0.02, 0.2, 2., 20., 200.],
[0.03, 0.3, 3., 30., 300.]]))
self.assertAllClose(output.eval(), [[14., 140., 1400., 14000., 140000.],
[1.4, 14., 140., 1400., 14000.]])
def testMulticlassWithBucketizedColumnHavingMultiDimensions(self):
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price", 2),
boundaries=[0., 10., 100.])
with tf.Graph().as_default():
# buckets 2, 3, 0
features = {"price": tf.constant([[20., 210], [110, 50], [-3, -30]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [1, 0]],
shape=[3, 2])}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[bucket, country],
num_outputs=5))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
# dimension = 2, bucket_size = 4, num_classes = 5
sess.run(column_to_variable[bucket][0].assign(
[[0.1, 1, 10, 100, 1000], [0.2, 2, 20, 200, 2000],
[0.3, 3, 30, 300, 3000], [0.4, 4, 40, 400, 4000],
[5, 50, 500, 5000, 50000], [6, 60, 600, 6000, 60000],
[7, 70, 700, 7000, 70000], [8, 80, 800, 8000, 80000]]))
self.assertAllClose(
output.eval(),
[[0.3 + 8, 3 + 80, 30 + 800, 300 + 8000, 3000 + 80000],
[0.4 + 7, 4 + 70, 40 + 700, 400 + 7000, 4000 + 70000],
[0.1 + 5, 1 + 50, 10 + 500, 100 + 5000, 1000 + 50000]])
def testCrossWithBucketizedColumn(self):
price_bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price"),
boundaries=[0., 10., 100.])
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_price = tf.contrib.layers.crossed_column(
[country, price_bucket], hash_bucket_size=10)
with tf.Graph().as_default():
features = {
"price": tf.constant([[20.]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2])
}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(features,
[country_price],
num_outputs=1))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
weights = column_to_variable[country_price][0]
sess.run(weights.assign(weights + 0.4))
# There are two crosses each with 0.4 weight.
# score = 0.4 + 0.4
self.assertAllClose(output.eval(), [[0.8]])
def testCrossWithCrossedColumn(self):
price_bucket = tf.contrib.layers.bucketized_column(
tf.contrib.layers.real_valued_column("price"),
boundaries=[0., 10., 100.])
language = tf.contrib.layers.sparse_column_with_hash_bucket(
"language", hash_bucket_size=3)
country = tf.contrib.layers.sparse_column_with_hash_bucket(
"country", hash_bucket_size=5)
country_language = tf.contrib.layers.crossed_column(
[language, country], hash_bucket_size=10)
country_language_price = tf.contrib.layers.crossed_column(
set([country_language, price_bucket]),
hash_bucket_size=15)
with tf.Graph().as_default():
features = {
"price": tf.constant([[20.]]),
"country": tf.SparseTensor(values=["US", "SV"],
indices=[[0, 0], [0, 1]],
shape=[1, 2]),
"language": tf.SparseTensor(values=["english", "spanish"],
indices=[[0, 0], [0, 1]],
shape=[1, 2])
}
output, column_to_variable, _ = (
tf.contrib.layers.weighted_sum_from_feature_columns(
features, [country_language_price],
num_outputs=1))
with self.test_session() as sess:
tf.initialize_all_variables().run()
tf.initialize_all_tables().run()
weights = column_to_variable[country_language_price][0]
sess.run(weights.assign(weights + 0.4))
# There are two crosses each with 0.4 weight.
# score = 0.4 + 0.4 + 0.4 + 0.4
self.assertAllClose(output.eval(), [[1.6]])