def clarin_corpora_sorted_by_size(base_directory: Path) -> List[GermanClarinCorpus]:
return [
sc1(base_directory),
pd2(base_directory),
ziptel(base_directory),
sc10(base_directory),
GermanClarinCorpus("all.HEMPEL.4.cmdi.11610.1490680796", base_directory),
GermanClarinCorpus("all.PD1.3.cmdi.16312.1490681066", base_directory),
GermanClarinCorpus("all.VM1.3.cmdi.1508.1490625070", base_directory,
id_filter_regex=vm1_id_german_filter_regex,
training_test_split=TrainingTestSplit.training_only),
GermanClarinCorpus("all.RVG-J.1.cmdi.18181.1490681704", base_directory),
GermanClarinCorpus("all.ALC.4.cmdi.16602.1490632862", base_directory,
training_test_split=TrainingTestSplit.randomly_grouped_by(lambda e: e.id[:3])),
GermanClarinCorpus("all.VM2.3.cmdi.4260.1490625316", base_directory,
id_filter_regex=vm2_id_german_filter_regex,
training_test_split=TrainingTestSplit.training_only)
]
评论列表
文章目录