def compute_spectrograms(filename):
out_rate = 12000
N_FFT = 512
HOP_LEN = 256
frames, rate = librosa.load(filename, sr=out_rate, mono=True)
if len(frames) < out_rate*3:
# if less then 3 second - can't process
raise Exception("Audio duration is too short")
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2,
ref_power=1.0)
# now going through spectrogram with the stride of the segment duration
for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
yield x[:, start_idx:start_idx + SEGMENT_DUR]
评论列表
文章目录