def randomly_grouped_by(key_from_example: Callable[[LabeledExample], Any], training_share: float = .9) -> Callable[
[List[LabeledExample]], Tuple[List[LabeledExample], List[LabeledExample]]]:
def split(examples: List[LabeledExample]) -> Tuple[List[LabeledExample], List[LabeledExample]]:
examples_by_directory = group(examples, key=key_from_example)
directories = examples_by_directory.keys()
# split must be the same every time:
random.seed(42)
keys = set(random.sample(directories, int(training_share * len(directories))))
training_examples = [example for example in examples if key_from_example(example) in keys]
test_examples = [example for example in examples if key_from_example(example) not in keys]
return training_examples, test_examples
return split
评论列表
文章目录