def make_copy(num_examples, min_len, max_len):
"""
Generates a dataset where the target is equal to the source.
Sequence lengths are chosen randomly from [min_len, max_len].
Args:
num_examples: Number of examples to generate
min_len: Minimum sequence length
max_len: Maximum sequence length
Returns:
An iterator of (source, target) string tuples.
"""
for _ in range(num_examples):
turn_length = np.random.choice(np.arange(min_len, max_len + 1))
source_tokens = np.random.choice(
list(VOCABULARY), size=turn_length, replace=True)
target_tokens = source_tokens
yield " ".join(source_tokens), " ".join(target_tokens)
评论列表
文章目录