def __init__(self,
audio_file: Path,
id: Optional[str] = None,
sample_rate_to_convert_to: int = 16000,
label: Optional[str] = "nolabel",
fourier_window_length: int = 512,
hop_length: int = 128,
mel_frequency_count: int = 128,
label_with_tags: str = None,
positional_label: Optional[PositionalLabel] = None):
# The default values for hop_length and fourier_window_length are powers of 2 near the values specified in the wave2letter paper.
if id is None:
id = name_without_extension(audio_file)
self.audio_file = audio_file
super().__init__(
id=id, get_raw_audio=lambda: librosa.load(str(self.audio_file), sr=self.sample_rate)[0],
label=label, sample_rate=sample_rate_to_convert_to,
fourier_window_length=fourier_window_length, hop_length=hop_length, mel_frequency_count=mel_frequency_count,
label_with_tags=label_with_tags, positional_label=positional_label)
评论列表
文章目录