def preprocess_input(audio_path, dim_ordering='default'):
'''Reads an audio file and outputs a Mel-spectrogram.
'''
if dim_ordering == 'default':
dim_ordering = K.image_dim_ordering()
assert dim_ordering in {'tf', 'th'}
if librosa_exists():
import librosa
else:
raise RuntimeError('Librosa is required to process audio files.\n' +
'Install it via `pip install librosa` \nor visit ' +
'http://librosa.github.io/librosa/ for details.')
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
DURA = 29.12
src, sr = librosa.load(audio_path, sr=SR)
n_sample = src.shape[0]
n_sample_wanted = int(DURA * SR)
# trim the signal at the center
if n_sample < n_sample_wanted: # if too short
src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
elif n_sample > n_sample_wanted: # if too long
src = src[(n_sample - n_sample_wanted) / 2:
(n_sample + n_sample_wanted) / 2]
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS) ** 2,
ref_power=1.0)
if dim_ordering == 'th':
x = np.expand_dims(x, axis=0)
elif dim_ordering == 'tf':
x = np.expand_dims(x, axis=3)
return x
audio_conv_utils.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录