preprocess_audio.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:coversongs-dual-convnet 作者: markostam 项目源码 文件源码
def create_feature_matrix_spark(song_files):
    # cqt wrapper
    def log_cqt(y,sr):
        C =  librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None, 
        n_bins=84, bins_per_octave=12, tuning=None,
        filter_scale=1, norm=1, sparsity=0.01, real=True)
        # get log-power spectrogram with noise floor of -80dB
        C = librosa.logamplitude(C**2, ref_power=np.max)
        # scale log-power spectrogram to positive integer value for smaller footpint
        noise_floor_db = 80
        scaling_factor = (2**16 - 1)/noise_floor_db
        C += noise_floor_db
        C *= scaling_factor
        C = C.astype('uint16')
        return C
    # padding wrapper
    def padding(C,desired_spect_len):
        if C.shape[1] >= desired_spect_len:
            C = C[:,0:desired_spect_len]
        else:
            C = np.pad(C,((0,0),(0,desired_spect_len-C.shape[1])), 'constant')
        return C
    # load try-catch wrapper
    def try_load(filename):
        try:
            sys.stdout.write('Processing: %s \r' % os.path.basename(filename))
            sys.stdout.flush()
            return librosa.load(filename)
        except:
            pass
    # transormations
    filesRDD = sc.parallelize(song_files)
    rawAudioRDD = filesRDD.map(lambda x: (os.path.basename(x),try_load(x))).filter(lambda x: x[1] != None)
    rawCQT = rawAudioRDD.map(lambda x: (x[int(0)], log_cqt(x[int(1)][int(0)],x[int(1)][int(1)])))
    paddedCQT = rawCQT.map(lambda x: (x[0],padding(x[1],2580)))
    return paddedCQT.collect()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号