impl_test.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:transform 作者: tensorflow 项目源码 文件源码
def testStringToTFIDFEmptyDoc(self):
    def preprocessing_fn(inputs):
      inputs_as_ints = tft.string_to_int(tf.string_split(inputs['a']))
      out_index, out_values = tft.tfidf(inputs_as_ints, 6)
      return {
          'tf_idf': out_values,
          'index': out_index
      }
    input_data = [{'a': 'hello hello world'},
                  {'a': ''},
                  {'a': 'hello goodbye hello world'},
                  {'a': 'I like pie pie pie'}]
    input_schema = dataset_metadata.DatasetMetadata({
        'a': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation())
    })

    log_5_over_2 = 1.91629073187
    log_5_over_3 = 1.51082562376
    expected_transformed_data = [{
        'tf_idf': [(2/3)*log_5_over_3, (1/3)*log_5_over_3],
        'index': [0, 2]
    }, {
        'tf_idf': [],
        'index': []
    }, {
        'tf_idf': [(2/4)*log_5_over_3, (1/4)*log_5_over_3, (1/4)*log_5_over_2],
        'index': [0, 2, 4]
    }, {
        'tf_idf': [(3/5)*log_5_over_2, (1/5)*log_5_over_2, (1/5)*log_5_over_2],
        'index': [1, 3, 5]
    }]
    expected_transformed_schema = dataset_metadata.DatasetMetadata({
        'tf_idf': sch.ColumnSchema(tf.float32, [None],
                                   sch.ListColumnRepresentation()),
        'index': sch.ColumnSchema(tf.int64, [None],
                                  sch.ListColumnRepresentation())
    })
    self.assertAnalyzeAndTransformResults(
        input_data, input_schema, preprocessing_fn, expected_transformed_data,
        expected_transformed_schema)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号