def feature_extract(songfile_name):
'''
takes: filename
outputs: audio feature representation from that file (currently cqt)
**assumes working directory contains raw song files**
returns a tuple containing songfile name and numpy array of song features
'''
song_loc = os.path.abspath(songfile_name)
y, sr = librosa.load(song_loc)
desire_spect_len = 2580
C = librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None,
n_bins=84, bins_per_octave=12, tuning=None,
filter_scale=1, norm=1, sparsity=0.01, real=False)
# get log-power spectrogram with noise floor of -80dB
C = librosa.logamplitude(C**2, ref_power=np.max)
# scale log-power spectrogram to positive integer value for smaller footpint
noise_floor_db = 80
scaling_factor = (2**16 - 1)/noise_floor_db
C += noise_floor_db
C *= scaling_factor
C = C.astype('uint16')
# if spectral respresentation too long, crop it, otherwise, zero-pad
if C.shape[1] >= desire_spect_len:
C = C[:,0:desire_spect_len]
else:
C = np.pad(C,((0,0),(0,desire_spect_len-C.shape[1])), 'constant')
return songfile_name, C
preprocess_audio.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录