def _sort_dataset_by_padding(dataset: Dataset,
sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index
padding_noise: float = 0.0) -> Dataset:
"""
Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in
``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of
``(field_name, padding_key)`` tuples.
"""
instances_with_lengths = []
for instance in dataset.instances:
padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths())
if padding_noise > 0.0:
noisy_lengths = {}
for field_name, field_lengths in padding_lengths.items():
noisy_lengths[field_name] = add_noise_to_dict_values(field_lengths, padding_noise)
padding_lengths = noisy_lengths
instance_with_lengths = ([padding_lengths[field_name][padding_key]
for (field_name, padding_key) in sorting_keys],
instance)
instances_with_lengths.append(instance_with_lengths)
instances_with_lengths.sort(key=lambda x: x[0])
return Dataset([instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths])
评论列表
文章目录