def _generate_spectrograms(self):
for row in tqdm(self.meta.itertuples(), total=len(self.meta)):
specfile = self.work_dir + row.filename + '.mel.spec.npy'
if os.path.exists(specfile):
continue
audio = load_audio(self.data_dir + 'audio/' + row.filename, 44100)
# audio *= 1.0 / np.max(np.abs(audio))
spec = librosa.feature.melspectrogram(audio, sr=44100, n_fft=self.FFT, fmax=self.FMAX,
hop_length=self.HOP, n_mels=self.BANDS)
# spec = librosa.logamplitude(spec)
freqs = librosa.core.mel_frequencies(n_mels=self.BANDS, fmax=self.FMAX)
spec = librosa.core.perceptual_weighting(spec, freqs, ref_power=np.max)
reduced_spec = skim.measure.block_reduce(spec, block_size=(3, 2), func=np.mean)
np.save(specfile, spec.astype('float16'), allow_pickle=False)
np.save(specfile[:-4] + '.ds.npy', reduced_spec.astype('float16'), allow_pickle=False)
评论列表
文章目录