preprocess_audio.py 文件源码-python代码片段

def feature_extract(songfile_name):
    '''
    takes: filename
    outputs: audio feature representation from that file (currently cqt)
    **assumes working directory contains raw song files**
    returns a tuple containing songfile name and numpy array of song features
    '''
    song_loc = os.path.abspath(songfile_name)
    y, sr = librosa.load(song_loc)
    desire_spect_len = 2580
    C = librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None,
                    n_bins=84, bins_per_octave=12, tuning=None,
                    filter_scale=1, norm=1, sparsity=0.01, real=False)
    # get log-power spectrogram with noise floor of -80dB
    C = librosa.logamplitude(C**2, ref_power=np.max)
    # scale log-power spectrogram to positive integer value for smaller footpint
    noise_floor_db = 80
    scaling_factor = (2**16 - 1)/noise_floor_db
    C += noise_floor_db
    C *= scaling_factor
    C = C.astype('uint16')
    # if spectral respresentation too long, crop it, otherwise, zero-pad
    if C.shape[1] >= desire_spect_len:
        C = C[:,0:desire_spect_len]
    else:
        C = np.pad(C,((0,0),(0,desire_spect_len-C.shape[1])), 'constant')
    return songfile_name, C