def testNGrams(self):
string_tensor = tf.constant(['abc', 'def', 'fghijklm', 'z', ''])
tokenized_tensor = tf.string_split(string_tensor, delimiter='')
output_tensor = mappers.ngrams(
tokens=tokenized_tensor,
ngram_range=(1, 5),
separator='')
self.assertSparseOutput(
expected_indices=[
[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5],
[1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5],
[2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [2, 6], [2, 7],
[2, 8], [2, 9], [2, 10], [2, 11], [2, 12], [2, 13], [2, 14],
[2, 15], [2, 16], [2, 17], [2, 18], [2, 19], [2, 20], [2, 21],
[2, 22], [2, 23], [2, 24], [2, 25], [2, 26], [2, 27], [2, 28],
[2, 29], [3, 0]],
expected_values=[
'a', 'ab', 'abc', 'b', 'bc', 'c',
'd', 'de', 'def', 'e', 'ef', 'f',
'f', 'fg', 'fgh', 'fghi', 'fghij', 'g', 'gh', 'ghi', 'ghij',
'ghijk', 'h', 'hi', 'hij', 'hijk', 'hijkl', 'i', 'ij', 'ijk',
'ijkl', 'ijklm', 'j', 'jk', 'jkl', 'jklm', 'k', 'kl', 'klm', 'l',
'lm', 'm', 'z'],
expected_shape=[5, 30],
actual_sparse_tensor=output_tensor,
close_values=False)
评论列表
文章目录