def testUniquesAnalyzerWithTokenization(self):
def preprocessing_fn(inputs):
return {
'index': tft.string_to_int(tf.string_split(inputs['a']))
}
input_data = [{'a': 'hello hello world'}, {'a': 'hello goodbye world'}]
input_metadata = dataset_metadata.DatasetMetadata({
'a': sch.ColumnSchema(tf.string, [], sch.FixedColumnRepresentation())
})
expected_data = [{'index': [0, 0, 1]}, {'index': [0, 2, 1]}]
expected_metadata = dataset_metadata.DatasetMetadata({
'index': sch.ColumnSchema(
sch.IntDomain(tf.int64, -1, 2, True,
'vocab_string_to_int_uniques'),
[None], sch.ListColumnRepresentation())
})
self.assertAnalyzeAndTransformResults(
input_data, input_metadata, preprocessing_fn, expected_data,
expected_metadata)
评论列表
文章目录