python类dct()的实例源码

audioFeaGen.py 文件源码 项目:UWdepressionX 作者: jyfeather 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def a_dct(l, m):
    tmp = dct(l, type=2)
    tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
    return tmp[tmp_idx[:m]].tolist()
mel_coefficients.py 文件源码 项目:Speaker-Recognition 作者: orchidas 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def mfcc(s,fs, nfiltbank):

    #divide into segments of 25 ms with overlap of 10ms
    nSamples = np.int32(0.025*fs)
    overlap = np.int32(0.01*fs)
    nFrames = np.int32(np.ceil(len(s)/(nSamples-overlap)))
    #zero padding to make signal length long enough to have nFrames
    padding = ((nSamples-overlap)*nFrames) - len(s)
    if padding > 0:
        signal = np.append(s, np.zeros(padding))
    else:
        signal = s
    segment = np.empty((nSamples, nFrames))
    start = 0
    for i in range(nFrames):
        segment[:,i] = signal[start:start+nSamples]
        start = (nSamples-overlap)*i

    #compute periodogram
    nfft = 512
    periodogram = np.empty((nFrames,nfft/2 + 1))
    for i in range(nFrames):
        x = segment[:,i] * hamming(nSamples)
        spectrum = fftshift(fft(x,nfft))
        periodogram[i,:] = abs(spectrum[nfft/2-1:])/nSamples

    #calculating mfccs    
    fbank = mel_filterbank(nfft, nfiltbank, fs)
    #nfiltbank MFCCs for each frame
    mel_coeff = np.empty((nfiltbank,nFrames))
    for i in range(nfiltbank):
        for k in range(nFrames):
            mel_coeff[i,k] = np.sum(periodogram[k,:]*fbank[:,i])

    mel_coeff = np.log10(mel_coeff)
    mel_coeff = dct(mel_coeff)
    #exclude 0th order coefficient (much larger than others)
    mel_coeff[0,:]= np.zeros(nFrames)
    return mel_coeff
celebA_estimators.py 文件源码 项目:csgm 作者: AshishBora 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def dct2(image_channel):
    return fftpack.dct(fftpack.dct(image_channel.T, norm='ortho').T, norm='ortho')
test_cnn.py 文件源码 项目:CNN_denoise 作者: weedwind 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def gen_post(feat_list, stat_file, model, win_size_before = 15, win_size_after = 15, num_targets = 31):
   model.eval()             # Put the model in test mode (the opposite of model.train(), essentially)

   m, v = read_mv(stat_file)
   if m is None or v is None:
      raise Exception("mean or variance vector does not exist")

   with open(feat_list) as f:
      for line in f:
         line = line.strip()
         if len(line) < 1: continue
         print ("generating features for file", line)
         io = htk_io.fopen(line)
         utt_feat = io.getall()
         utt_feat -= m       # normalize mean
         utt_feat /= (np.sqrt(v) + eps)     # normalize var
         feat_numpy = org_data(utt_feat, win_size_before, win_size_after)
         out_feat = np.zeros((utt_feat.shape[0], num_targets))
         for i in range(feat_numpy.shape[0] // 100):     # chop the speech into shorter segments, to prevent gpu out of memory
             start_idx = i * 100
             end_idx = i * 100 + 100
             feat_chunk = feat_numpy[start_idx:end_idx]
             feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
             x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
             scores = model(x)
             out_feat[start_idx:end_idx] = scores.data.cpu().numpy()
         num_remain = feat_numpy.shape[0] % 100
         if num_remain > 0:
            feat_chunk = feat_numpy[-num_remain:]
            feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
            x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
            scores = model(x)
            out_feat[-num_remain:] = scores.data.cpu().numpy()

         out_feat = dct(out_feat, type=2, axis=1, norm='ortho')[:,1:numcep+1]
         out_feat_delta = delta(out_feat, 2)
         out_feat_ddelta = delta(out_feat_delta, 2)
         out_feat = np.concatenate((out_feat, out_feat_delta, out_feat_ddelta), axis = 1)   

         out_file = line.replace(".fea", ".mfc")
         io = htk_io.fopen(out_file, mode="wb", veclen = out_feat.shape[1])
         io.writeall(out_feat)
         print ("features saved in %s\n" %out_file)
MFCC_old.py 文件源码 项目:Speaker_recognition 作者: Mgajurel 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def extract(self, signal, filename):
        if signal.ndim > 1:
            self.dprint("INFO: Input signal has more than 1 channel; the channels will be averaged.")
            signal = mean(signal, axis=1)
        assert len(signal) > 5 * self.FRAME_LEN, "Signal too short!"

        #Pre Emphasis
        #signal = signal[0] + signal[1]-a*signal[0] + signal[2]-a*signal[1] + ...
        signal = np.append(signal[0], signal[1:] - self.PRE_EMP * signal[:-1])

        #framming the signal
        signal_length = len(signal)
        if signal_length <= self.FRAME_LEN:
            num_frames = 1
        else:
            num_frames = 1 + int(math.ceil((1.0*signal_length-self.FRAME_LEN)/self.FRAME_STEP))

        pad_signal_length = int((num_frames-1)*self.FRAME_STEP + self.FRAME_LEN)
        z = np.zeros((pad_signal_length - signal_length,))
        pad_signal = np.concatenate((signal, z))
        indices = np.tile(np.arange(0, self.FRAME_LEN), (num_frames, 1)) + np.tile(np.arange(0, num_frames * self.FRAME_STEP, self.FRAME_STEP), (self.FRAME_LEN, 1)).T
        indices = np.array(indices,dtype=np.int32)
        frames = pad_signal[indices]

        #windowing the signal
        #passing the signal through hamming window
        win = np.hamming(self.FRAME_LEN)
        frames *= win

        #Magnitude spectrum
        if np.shape(frames)[1] > self.NFFT:
            self.dprint("Warning, frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid."%(np.shape(frames)[1], self.NFFT))

        mag_frames = np.absolute(np.fft.rfft(frames, self.NFFT))

        #Power Spectrum
        pspec = ((1.0 / self.NFFT) * ((mag_frames) ** 2))


        #Filter Bank
        pspec = np.where(pspec == 0,np.finfo(float).eps,pspec) # if things are all zeros we get problems

        energy = np.sum(pspec,1) #this stores the total energy in each frame
        energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log

        fbank = self.get_filterbanks()
        filter_banks = np.dot(pspec, fbank)
        filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability

        # MFCC Calculation
        filter_banks = np.log(filter_banks)
        mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, : self.num_ceps] # Keep 2-13


        nframes, ncoeff = np.shape(mfcc)
        n = np.arange(ncoeff)
        lift = 1 + (self.cep_lifter / 2) * np.sin(np.pi * n / self.cep_lifter)
        mfcc *= lift
        if self.appendEnergy:
            mfcc[:,0] = np.log(energy) # replace first cepstral coefficient with log of frame energy
        np.savetxt(filename, mfcc, fmt='%.8f', delimiter=',')
preprocessing.py 文件源码 项目:ip-avsr 作者: lzuwei 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compute_dct_features(X, image_shape, no_coeff=30, method='zigzag'):
    """
    compute 2D-dct features of a given image.
    Type 2 DCT and finds the DCT coefficents with the largest mean normalized variance
    :param X: 1 dimensional input image in 'c' format
    :param image_shape: image shape
    :param no_coeff: number of coefficients to extract
    :param method: method to extract coefficents, zigzag, variance
    :return: dct features
    """
    X_dct = fft.dct(X, norm='ortho')

    if method == 'zigzag':
        out = np.zeros((len(X_dct), no_coeff), dtype=X_dct.dtype)
        for i in xrange(len(X_dct)):
            image = X_dct[i].reshape(image_shape)
            out[i] = zigzag(image)[1:no_coeff + 1]
        return out
    elif method == 'rel_variance':
        X_dct = X_dct[:, 1:]
        # mean coefficient per frequency
        mean_dct = np.mean(X_dct, 0)
        # mean normalize
        mean_norm_dct = X_dct - mean_dct
        # find standard deviation for each frequency component
        std_dct = np.std(mean_norm_dct, 0)
        # sort by largest variance
        idxs = np.argsort(std_dct)[::-1][:no_coeff]
        # return DCT coefficients with the largest variance
        return X_dct[:, idxs]
    elif method == 'variance':
        X_dct = X_dct[:, 1:]
        # find standard deviation for each frequency component
        std_dct = np.std(X_dct, 0)
        # sort by largest variance
        idxs = np.argsort(std_dct)[::-1][:no_coeff]
        # return DCT coefficients with the largest variance
        return X_dct[:, idxs]
    elif method == 'energy':
        X_dct = X_dct[:, 1:]
        X_sum = np.abs(X_dct)
        X_sum = np.sum(X_sum, 0)
        idxs = np.argsort(X_sum)[::-1][:no_coeff]
        return X_dct[:, idxs]
    else:
        raise NotImplementedError("method not implemented, use only 'zigzag', 'variance', 'rel_variance")


问题


面经


文章

微信
公众号

扫码关注公众号