python类SparseTensor()的实例源码

linear_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def testTrainOptimizerWithL1Reg(self):
    """Tests l1 regularized model has higher loss."""

    def input_fn():
      return {
          'language': tf.SparseTensor(values=['hindi'],
                                      indices=[[0, 0]],
                                      shape=[1, 1])
      }, tf.constant([[1]])

    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
    classifier_no_reg = tf.contrib.learn.LinearClassifier(
        feature_columns=[language])
    classifier_with_reg = tf.contrib.learn.LinearClassifier(
        feature_columns=[language],
        optimizer=tf.train.FtrlOptimizer(learning_rate=1.0,
                                         l1_regularization_strength=100.))
    loss_no_reg = classifier_no_reg.fit(
        input_fn=input_fn, steps=100).evaluate(
            input_fn=input_fn, steps=1)['loss']
    loss_with_reg = classifier_with_reg.fit(
        input_fn=input_fn, steps=100).evaluate(
            input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss_no_reg, loss_with_reg)
linear_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def testSdcaOptimizerSparseFeatures(self):
    """Tests LinearClasssifier with SDCAOptimizer and sparse features."""

    def input_fn():
      return {
          'example_id': tf.constant(['1', '2', '3']),
          'price': tf.constant([[0.4], [0.6], [0.3]]),
          'country': tf.SparseTensor(values=['IT', 'US', 'GB'],
                                     indices=[[0, 0], [1, 3], [2, 1]],
                                     shape=[3, 5]),
          'weights': tf.constant([[1.0], [1.0], [1.0]])
      }, tf.constant([[1], [0], [1]])

    price = tf.contrib.layers.real_valued_column('price')
    country = tf.contrib.layers.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)
    sdca_optimizer = tf.contrib.linear_optimizer.SDCAOptimizer(
        example_id_column='example_id')
    classifier = tf.contrib.learn.LinearClassifier(
        feature_columns=[price, country],
        weight_column_name='weights',
        optimizer=sdca_optimizer)
    classifier.fit(input_fn=input_fn, steps=50)
    scores = classifier.evaluate(input_fn=input_fn, steps=1)
    self.assertGreater(scores['accuracy'], 0.9)
linear_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def testEval(self):
    """Tests that eval produces correct metrics.
    """

    def input_fn():
      return {
          'age': tf.constant([[1], [2]]),
          'language': tf.SparseTensor(values=['greek', 'chinese'],
                                      indices=[[0, 0], [1, 0]],
                                      shape=[2, 1]),
      }, tf.constant([[1], [0]])

    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
    age = tf.contrib.layers.real_valued_column('age')
    classifier = tf.contrib.learn.LinearClassifier(
        feature_columns=[age, language])

    # Evaluate on trained mdoel
    classifier.fit(input_fn=input_fn, steps=100)
    classifier.evaluate(input_fn=input_fn, steps=1)

    # TODO(ispir): Enable accuracy check after resolving the randomness issue.
    # self.assertLess(evaluated_values['loss/mean'], 0.3)
    # self.assertGreater(evaluated_values['accuracy/mean'], .95)
linear_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def testRegression(self):
    """Tests that loss goes down with training."""

    def input_fn():
      return {
          'age': tf.constant([1]),
          'language': tf.SparseTensor(values=['english'],
                                      indices=[[0, 0]],
                                      shape=[1, 1])
      }, tf.constant([[10.]])

    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
    age = tf.contrib.layers.real_valued_column('age')

    classifier = tf.contrib.learn.LinearRegressor(
        feature_columns=[age, language])
    classifier.fit(input_fn=input_fn, steps=100)
    loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    classifier.fit(input_fn=input_fn, steps=200)
    loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']

    self.assertLess(loss2, loss1)
    self.assertLess(loss2, 0.5)
svm_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def testSparseFeatures(self):
    """Tests SVM classifier with (hashed) sparse features."""

    def input_fn():
      return {
          'example_id': tf.constant(['1', '2', '3']),
          'price': tf.constant([[0.8], [0.6], [0.3]]),
          'country': tf.SparseTensor(
              values=['IT', 'US', 'GB'],
              indices=[[0, 0], [1, 0], [2, 0]],
              shape=[3, 1]),
      }, tf.constant([[0], [1], [1]])

    price = tf.contrib.layers.real_valued_column('price')
    country = tf.contrib.layers.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)
    svm_classifier = tf.contrib.learn.SVM(feature_columns=[price, country],
                                          example_id_column='example_id',
                                          l1_regularization=0.0,
                                          l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy']
    self.assertAlmostEqual(accuracy, 1.0, places=3)
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def testWeightedSparseColumn(self):
    ids = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["marlo", "omar", "stringer"])
    ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
                                 indices=[[0, 0], [1, 0], [1, 1]],
                                 shape=[2, 2])
    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
    weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
                                     indices=[[0, 0], [1, 0], [1, 1]],
                                     shape=[2, 2])
    features = {"ids": ids_tensor,
                "weights": weights_tensor}
    output = feature_column_ops._Transformer(features).transform(weighted_ids)
    with self.test_session():
      tf.initialize_all_tables().run()
      self.assertAllEqual(output[0].shape.eval(), ids_tensor.shape.eval())
      self.assertAllEqual(output[0].indices.eval(), ids_tensor.indices.eval())
      self.assertAllEqual(output[0].values.eval(), [2, 2, 0])
      self.assertAllEqual(output[1].shape.eval(), weights_tensor.shape.eval())
      self.assertAllEqual(output[1].indices.eval(),
                          weights_tensor.indices.eval())
      self.assertEqual(output[1].values.dtype, tf.float32)
      self.assertAllEqual(output[1].values.eval(), weights_tensor.values.eval())
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def testCrossColumn(self):
    language = tf.contrib.layers.sparse_column_with_hash_bucket(
        "language", hash_bucket_size=3)
    country = tf.contrib.layers.sparse_column_with_hash_bucket(
        "country", hash_bucket_size=5)
    country_language = tf.contrib.layers.crossed_column(
        [language, country], hash_bucket_size=15)
    features = {
        "language": tf.SparseTensor(values=["english", "spanish"],
                                    indices=[[0, 0], [1, 0]],
                                    shape=[2, 1]),
        "country": tf.SparseTensor(values=["US", "SV"],
                                   indices=[[0, 0], [1, 0]],
                                   shape=[2, 1])
    }
    output = feature_column_ops._Transformer(features).transform(
        country_language)
    with self.test_session():
      self.assertEqual(output.values.dtype, tf.int64)
      self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testCrossWithBucketizedColumn(self):
    price_bucket = tf.contrib.layers.bucketized_column(
        tf.contrib.layers.real_valued_column("price"),
        boundaries=[0., 10., 100.])
    country = tf.contrib.layers.sparse_column_with_hash_bucket(
        "country", hash_bucket_size=5)
    country_price = tf.contrib.layers.crossed_column(
        [country, price_bucket], hash_bucket_size=15)
    features = {
        "price": tf.constant([[20.]]),
        "country": tf.SparseTensor(values=["US", "SV"],
                                   indices=[[0, 0], [0, 1]],
                                   shape=[1, 2])
    }
    output = feature_column_ops._Transformer(features).transform(country_price)
    with self.test_session():
      self.assertEqual(output.values.dtype, tf.int64)
      self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def testCrossWithCrossedColumn(self):
    price_bucket = tf.contrib.layers.bucketized_column(
        tf.contrib.layers.real_valued_column("price"),
        boundaries=[0., 10., 100.])
    country = tf.contrib.layers.sparse_column_with_hash_bucket(
        "country", hash_bucket_size=5)
    country_price = tf.contrib.layers.crossed_column(
        [country, price_bucket], hash_bucket_size=15)
    wire = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
    wire_country_price = tf.contrib.layers.crossed_column(
        [wire, country_price], hash_bucket_size=15)
    features = {
        "price": tf.constant([[20.]]),
        "country": tf.SparseTensor(values=["US", "SV"],
                                   indices=[[0, 0], [0, 1]],
                                   shape=[1, 2]),
        "wire": tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                indices=[[0, 0], [0, 1], [0, 2]],
                                shape=[1, 3])
    }
    output = feature_column_ops._Transformer(features).transform(
        wire_country_price)
    with self.test_session():
      self.assertEqual(output.values.dtype, tf.int64)
      self.assertTrue(all(x < 15 and x >= 0 for x in output.values.eval()))
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testAllDNNColumns(self):
    sparse_column = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["a", "b", "c", "unseen"])

    real_valued_column = tf.contrib.layers.real_valued_column("income", 2)
    one_hot_column = tf.contrib.layers.one_hot_column(sparse_column)
    embedding_column = tf.contrib.layers.embedding_column(sparse_column, 10)
    features = {
        "ids": tf.SparseTensor(
            values=["c", "b", "a"],
            indices=[[0, 0], [1, 0], [2, 0]],
            shape=[3, 1]),
        "income": tf.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]])
    }
    output = tf.contrib.layers.input_from_feature_columns(features,
                                                          [one_hot_column,
                                                           embedding_column,
                                                           real_valued_column])
    with self.test_session():
      tf.initialize_all_variables().run()
      tf.initialize_all_tables().run()
      self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def testOneHotColumnFromWeightedSparseColumnFails(self):
    ids_column = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["a", "b", "c", "unseen"])
    ids_tensor = tf.SparseTensor(
        values=["c", "b", "a", "c"],
        indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
        shape=[3, 2])
    weighted_ids_column = tf.contrib.layers.weighted_sparse_column(ids_column,
                                                                   "weights")
    weights_tensor = tf.SparseTensor(
        values=[10.0, 20.0, 30.0, 40.0],
        indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
        shape=[3, 2])
    features = {"ids": ids_tensor, "weights": weights_tensor}
    one_hot_column = tf.contrib.layers.one_hot_column(weighted_ids_column)
    with self.test_session():
      tf.initialize_all_variables().run()
      tf.initialize_all_tables().run()
      with self.assertRaisesRegexp(
          ValueError,
          "one_hot_column does not yet support weighted_sparse_column"):
        _ = tf.contrib.layers.input_from_feature_columns(features,
                                                         [one_hot_column])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def testOneHotColumnFromMultivalentSparseColumnWithKeysSucceedsForDNN(self):
    ids_column = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["a", "b", "c", "unseen"])
    ids_tensor = tf.SparseTensor(
        values=["c", "b", "a", "c"],
        indices=[[0, 0], [1, 0], [2, 0], [2, 1]],
        shape=[3, 2])
    one_hot_sparse = tf.contrib.layers.one_hot_column(ids_column)
    features = {"ids": ids_tensor}
    output = tf.contrib.layers.input_from_feature_columns(features,
                                                          [one_hot_sparse])

    with self.test_session():
      tf.initialize_all_variables().run()
      tf.initialize_all_tables().run()
      self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
                          output.eval())
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testHashedEmbeddingColumnSucceedsForDNN(self):
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo", "omar"],
                                  indices=[[0, 0], [1, 0], [1, 1], [2, 0]],
                                  shape=[3, 2])

    features = {"wire": wire_tensor}
    # Big enough hash space so that hopefully there is no collision
    embedded_sparse = tf.contrib.layers.hashed_embedding_column("wire", 1000, 3)
    output = tf.contrib.layers.input_from_feature_columns(
        features, [embedded_sparse], weight_collections=["my_collection"])
    weights = tf.get_collection("my_collection")
    grad = tf.gradients(output, weights)
    with self.test_session():
      tf.initialize_all_variables().run()
      gradient_values = []
      # Collect the gradient from the different partitions (one in this test)
      for p in range(len(grad)):
        gradient_values.extend(grad[p].values.eval())
      gradient_values.sort()
      self.assertAllEqual(gradient_values, [0.5]*6 + [2]*3)
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def testEmbeddingColumnWithInitializerSucceedsForDNN(self):
    hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
                                  shape=[2, 2])
    features = {"wire": wire_tensor}
    init_value = 133.7
    embeded_sparse = tf.contrib.layers.embedding_column(
        hashed_sparse,
        10, initializer=tf.constant_initializer(init_value))
    output = tf.contrib.layers.input_from_feature_columns(features,
                                                          [embeded_sparse])

    with self.test_session():
      tf.initialize_all_variables().run()
      output_eval = output.eval()
      self.assertAllEqual(output_eval.shape, [2, 10])
      self.assertAllClose(output_eval, np.tile(init_value, [2, 10]))
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def testEmbeddingColumnWithMultipleInitializersFails(self):
    hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
                                  shape=[2, 2])
    features = {"wire": wire_tensor}
    embedded_sparse = tf.contrib.layers.embedding_column(
        hashed_sparse,
        10,
        initializer=tf.truncated_normal_initializer(mean=42,
                                                    stddev=1337))
    embedded_sparse_alternate = tf.contrib.layers.embedding_column(
        hashed_sparse,
        10,
        initializer=tf.truncated_normal_initializer(mean=1337,
                                                    stddev=42))

    # Makes sure that trying to use different initializers with the same
    # embedding column explicitly fails.
    with self.test_session():
      with self.assertRaisesRegexp(
          ValueError,
          "Duplicate feature column key found for column: wire_embedding"):
        tf.contrib.layers.input_from_feature_columns(
            features, [embedded_sparse, embedded_sparse_alternate])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def testEmbeddingColumnWithCrossedColumnSucceedsForDNN(self):
    a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa",
                                                         hash_bucket_size=100)
    b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb",
                                                         hash_bucket_size=100)
    crossed = tf.contrib.layers.crossed_column(
        set([a, b]), hash_bucket_size=10000)
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
                                  shape=[2, 2])
    features = {"aaa": wire_tensor, "bbb": wire_tensor}
    embeded_sparse = tf.contrib.layers.embedding_column(crossed, 10)
    output = tf.contrib.layers.input_from_feature_columns(features,
                                                          [embeded_sparse])
    with self.test_session():
      tf.initialize_all_variables().run()
      self.assertAllEqual(output.eval().shape, [2, 10])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testWeightedSparseColumnFailsForDNN(self):
    ids = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["marlo", "omar", "stringer"])
    ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
                                 indices=[[0, 0], [1, 0], [1, 1]],
                                 shape=[2, 2])
    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
    weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
                                     indices=[[0, 0], [1, 0], [1, 1]],
                                     shape=[2, 2])
    features = {"ids": ids_tensor,
                "weights": weights_tensor}
    with self.test_session():
      with self.assertRaisesRegexp(
          ValueError,
          "Error creating input layer for column: ids_weighted_by_weights"):
        tf.initialize_all_tables().run()
        tf.contrib.layers.input_from_feature_columns(features, [weighted_ids])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def testCrossedColumnFailsForDNN(self):
    a = tf.contrib.layers.sparse_column_with_hash_bucket("aaa",
                                                         hash_bucket_size=100)
    b = tf.contrib.layers.sparse_column_with_hash_bucket("bbb",
                                                         hash_bucket_size=100)
    crossed = tf.contrib.layers.crossed_column(
        set([a, b]), hash_bucket_size=10000)
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
                                  shape=[2, 2])
    features = {"aaa": wire_tensor, "bbb": wire_tensor}
    with self.test_session():
      with self.assertRaisesRegexp(
          ValueError, "Error creating input layer for column: aaa_X_bbb"):
        tf.initialize_all_variables().run()
        tf.contrib.layers.input_from_feature_columns(features, [crossed])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def testDeepColumnsSucceedForDNN(self):
    real_valued = tf.contrib.layers.real_valued_column("income", 3)
    bucket = tf.contrib.layers.bucketized_column(
        tf.contrib.layers.real_valued_column("price", 2),
        boundaries=[0., 10., 100.])
    hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
    features = {
        "income": tf.constant([[20., 10, -5], [110, 0, -7], [-3, 30, 50]]),
        "price": tf.constant([[20., 200], [110, 2], [-20, -30]]),
        "wire": tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                indices=[[0, 0], [1, 0], [2, 0]],
                                shape=[3, 1])
    }
    embeded_sparse = tf.contrib.layers.embedding_column(
        hashed_sparse,
        10, initializer=tf.constant_initializer(133.7))
    output = tf.contrib.layers.input_from_feature_columns(
        features, [real_valued, bucket, embeded_sparse])
    with self.test_session():
      tf.initialize_all_variables().run()
      # size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21
      self.assertAllEqual(output.eval().shape, [3, 21])
feature_column_ops_test.py 文件源码 项目:lsdc 作者: febert 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def testEmbeddingColumnWithWeightedSparseColumnForDNN(self):
    ids = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["marlo", "omar", "stringer"])
    ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
                                 indices=[[0, 0], [1, 0], [1, 1]],
                                 shape=[2, 2])
    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
    weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
                                     indices=[[0, 0], [1, 0], [1, 1]],
                                     shape=[2, 2])
    features = {"ids": ids_tensor,
                "weights": weights_tensor}
    embeded_sparse = tf.contrib.layers.embedding_column(
        weighted_ids, 1, combiner="sum", initializer=init_ops.ones_initializer)
    output = tf.contrib.layers.input_from_feature_columns(features,
                                                          [embeded_sparse])
    with self.test_session():
      tf.initialize_all_variables().run()
      tf.initialize_all_tables().run()
      # score: (sum of weights)
      self.assertAllEqual(output.eval(), [[10.], [50.]])


问题


面经


文章

微信
公众号

扫码关注公众号