def load(corpus_csv_file: Path,
sampled_training_example_count: Optional[int] = None) -> 'Corpus':
import csv
with corpus_csv_file.open(encoding='utf8') as opened_csv:
reader = csv.reader(opened_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def to_absolute(audio_file_path: Path) -> Path:
return audio_file_path if audio_file_path.is_absolute() else Path(
corpus_csv_file.parent) / audio_file_path
examples = [
(
LabeledExampleFromFile(
audio_file=to_absolute(Path(audio_file_path)), id=id, label=label,
positional_label=None if positional_label == "" else PositionalLabel.deserialize(
positional_label)), Phase[phase])
for id, audio_file_path, label, phase, positional_label in reader]
return Corpus(training_examples=[e for e, phase in examples if phase == Phase.training],
test_examples=[e for e, phase in examples if phase == Phase.test],
sampled_training_example_count=sampled_training_example_count)
评论列表
文章目录