def __init__(self, base_directory: Path):
super().__init__(
corpus_name="german-speechdata-package-v2",
base_directory=base_directory,
base_source_url_or_directory="http://www.repository.voxforge1.org/downloads/de/",
tar_gz_extension=".tar.gz",
subdirectory_depth=1,
umlaut_decoder=UmlautDecoder.none,
training_test_split=TrainingTestSplit.by_directory(),
tags_to_ignore=[],
# exclude those 7 audio files because the first 2 are corrupt, the last 5 are empty:
id_filter_regex=re.compile("(?!^2014-03-24-13-39-24_Kinect-RAW)"
"(?!^2014-03-27-11-50-33_Kinect-RAW)"
"(?!^2014-03-18-15-34-19_Realtek)"
"(?!^2014-06-17-13-46-27_Kinect-RAW)"
"(?!^2014-06-17-13-46-27_Realtek)"
"(?!^2014-06-17-13-46-27_Samson)"
"(?!^2014-06-17-13-46-27_Yamaha)"
"(^.*$)"))
评论列表
文章目录