def _mfcc_and_labels(audio, labels):
""" Convert to MFCC features and corresponding (interpolated) labels.
Returns:
A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
array, both with the same shape.
"""
mfcc_sample_rate = 100.0
winfunc = lambda x: np.hamming(x)
mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
mfcc_labels = interp_func(t_mfcc)
return mfcc_features, mfcc_labels
评论列表
文章目录