MFCC_old.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:Speaker_recognition 作者: Mgajurel 项目源码 文件源码
def extract(self, signal, filename):
        if signal.ndim > 1:
            self.dprint("INFO: Input signal has more than 1 channel; the channels will be averaged.")
            signal = mean(signal, axis=1)
        assert len(signal) > 5 * self.FRAME_LEN, "Signal too short!"

        #Pre Emphasis
        #signal = signal[0] + signal[1]-a*signal[0] + signal[2]-a*signal[1] + ...
        signal = np.append(signal[0], signal[1:] - self.PRE_EMP * signal[:-1])

        #framming the signal
        signal_length = len(signal)
        if signal_length <= self.FRAME_LEN:
            num_frames = 1
        else:
            num_frames = 1 + int(math.ceil((1.0*signal_length-self.FRAME_LEN)/self.FRAME_STEP))

        pad_signal_length = int((num_frames-1)*self.FRAME_STEP + self.FRAME_LEN)
        z = np.zeros((pad_signal_length - signal_length,))
        pad_signal = np.concatenate((signal, z))
        indices = np.tile(np.arange(0, self.FRAME_LEN), (num_frames, 1)) + np.tile(np.arange(0, num_frames * self.FRAME_STEP, self.FRAME_STEP), (self.FRAME_LEN, 1)).T
        indices = np.array(indices,dtype=np.int32)
        frames = pad_signal[indices]

        #windowing the signal
        #passing the signal through hamming window
        win = np.hamming(self.FRAME_LEN)
        frames *= win

        #Magnitude spectrum
        if np.shape(frames)[1] > self.NFFT:
            self.dprint("Warning, frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid."%(np.shape(frames)[1], self.NFFT))

        mag_frames = np.absolute(np.fft.rfft(frames, self.NFFT))

        #Power Spectrum
        pspec = ((1.0 / self.NFFT) * ((mag_frames) ** 2))


        #Filter Bank
        pspec = np.where(pspec == 0,np.finfo(float).eps,pspec) # if things are all zeros we get problems

        energy = np.sum(pspec,1) #this stores the total energy in each frame
        energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log

        fbank = self.get_filterbanks()
        filter_banks = np.dot(pspec, fbank)
        filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability

        # MFCC Calculation
        filter_banks = np.log(filter_banks)
        mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, : self.num_ceps] # Keep 2-13


        nframes, ncoeff = np.shape(mfcc)
        n = np.arange(ncoeff)
        lift = 1 + (self.cep_lifter / 2) * np.sin(np.pi * n / self.cep_lifter)
        mfcc *= lift
        if self.appendEnergy:
            mfcc[:,0] = np.log(energy) # replace first cepstral coefficient with log of frame energy
        np.savetxt(filename, mfcc, fmt='%.8f', delimiter=',')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号