python类dct()的实例源码

linalg.py 文件源码 项目:sporco 作者: bwohlberg 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def dctii(x, axes=None):
    """
    Compute a multi-dimensional DCT-II over specified array axes. This
    function is implemented by calling the one-dimensional DCT-II
    :func:`scipy.fftpack.dct` with normalization mode 'ortho' for each
    of the specified axes.

    Parameters
    ----------
    a : array_like
      Input array
    axes : sequence of ints, optional (default None)
      Axes over which to compute the DCT-II.

    Returns
    -------
    y : ndarray
      DCT-II of input array
    """

    if axes is None:
        axes = list(range(x.ndim))
    for ax in axes:
        x = fftpack.dct(x, type=2, axis=ax, norm='ortho')
    return x
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
chebfft.py 文件源码 项目:spmpython 作者: mikaem 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def chebfft(v):
    N = len(v) - 1
    if (N == 0): w = 0;
    x = cos(pi*arange(0,N+1)/N)
    K = fft.fftfreq(2*N, 0.5/N)
    K[N] = 0
    ii = arange(0,N)
    v = hstack([v])
    V = hstack([v, flipud(v[1:N])])
    U = real(fft.fft(V))
    uu = dct(v, 1)
    uv = hstack((uu, uu[1:-1][::-1]))
    W = real(fft.ifft(1j*K*U))
    ww = dst(K*uv, 1)
    from IPython import embed; embed()
    w = zeros(N+1)
    w[1:N] = -W[1:N]/sqrt(1-x[1:N]**2)
    w[0] = sum(ii.T**2*U[0:len(ii)])/float(N) + 0.5*N*U[N]
    w[N] = sum(((-1)**(ii+1))*(ii.T**2)*U[0:len(ii)])/float(N) + 0.5*(-1)**(N+1)*N*U[N]

    return w
qmf_realtime_class.py 文件源码 项目:ASP 作者: TUIlmenauAMS 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def DCT4(samples):
    """
        Method to create DCT4 transformation using DCT3

        Arguments   :

            samples : (1D Array) Input samples to be transformed

        Returns     :

            y       :  (1D Array) Transformed output samples

    """

    # Initialize
    samplesup=np.zeros(2*N, dtype = np.float32)
    # Upsample signal:
    samplesup[1::2]=samples

    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2

    return y[0:N]

#The DST4 transform:
base.py 文件源码 项目:Artificial-Intelligence-with-Python 作者: PacktPublishing 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
base.py 文件源码 项目:tfkaldi 作者: vrenkens 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def mfcc(signal, samplerate, conf):
    '''
    Compute MFCC features from an audio signal.

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
        samplerate: the samplerate of the signal we are working with.
        conf: feature configuration

    Returns:
        A numpy array of size (NUMFRAMES by numcep) containing features. Each
        row holds 1 feature vector, a numpy vector containing the signal
        log-energy
    '''

    feat, energy = fbank(signal, samplerate, conf)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
    feat = lifter(feat, float(conf['ceplifter']))
    return feat, numpy.log(energy)
base.py 文件源码 项目:aenet 作者: znaoya 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
base.py 文件源码 项目:aenet 作者: znaoya 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def mfccVTLP(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, alpha=1.0):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbankVTLP(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,False,alpha)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
__init__.py 文件源码 项目:shenfun 作者: spectralDNS 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def clenshaw_curtis1D(u, quad="GC"):
    assert u.ndim == 1
    N = u.shape[0]
    if quad == 'GL':
        w = np.arange(0, N, 1, dtype=float)
        w[2:] = 2./(1-w[2:]**2)
        w[0] = 1
        w[1::2] = 0
        ak = dct(u, 1)
        ak /= (N-1)
        return np.sqrt(np.sum(ak*w))

    elif quad == 'GC':
        d = np.zeros(N)
        k = 2*(1 + np.arange((N-1)//2))
        d[::2] = (2./N)/np.hstack((1., 1.-k*k))
        w = dct(d, type=3)
        return np.sqrt(np.sum(u*w))
numpy_fft.py 文件源码 项目:mpiFFT4py 作者: spectralDNS 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def dct(a, b, type=2, axis=0, **kw):
    if iscomplexobj(a):
        b.real[:] = dct1(a.real, type=type, axis=axis)
        b.imag[:] = dct1(a.imag, type=type, axis=axis)
        return b

    else:
        b[:] = dct1(a, type=type, axis=axis)
        return b

# Define functions taking both input array and output array
MFCC.py 文件源码 项目:Speaker_recognition 作者: Mgajurel 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
         winfunc=lambda x:numpy.ones((x,))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """

    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def dct_compress(X, n_components, window_size=128):
    """
    Compress using the DCT

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        The input signal to compress. Should be 1-dimensional

    n_components : int
        The number of DCT components to keep. Setting n_components to about
        .5 * window_size can give compression with fairly good reconstruction.

    window_size : int
        The input X is broken into windows of window_size, each of which are
        then compressed with the DCT.

    Returns
    -------
    X_compressed : ndarray, shape=(num_windows, window_size)
       A 2D array of non-overlapping DCT coefficients. For use with uncompress

    Reference
    ---------
    http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
    """
    if len(X) % window_size != 0:
        append = np.zeros((window_size - len(X) % window_size))
        X = np.hstack((X, append))
    num_frames = len(X) // window_size
    X_strided = X.reshape((num_frames, window_size))
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def overlap_dct_compress(X, n_components, window_size):
    """
    Overlap (at 50% of window_size) and compress X.

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to compress

    n_components : int
        number of DCT components to keep

    window_size : int
        Size of windows to take

    Returns
    -------
    X_dct : ndarray, shape=(n_windows, n_components)
        Windowed and compressed version of X
    """
    X_strided = halfoverlap(X, window_size)
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct


# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def dct_compress(X, n_components, window_size=128):
    """
    Compress using the DCT

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        The input signal to compress. Should be 1-dimensional

    n_components : int
        The number of DCT components to keep. Setting n_components to about
        .5 * window_size can give compression with fairly good reconstruction.

    window_size : int
        The input X is broken into windows of window_size, each of which are
        then compressed with the DCT.

    Returns
    -------
    X_compressed : ndarray, shape=(num_windows, window_size)
       A 2D array of non-overlapping DCT coefficients. For use with uncompress

    Reference
    ---------
    http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
    """
    if len(X) % window_size != 0:
        append = np.zeros((window_size - len(X) % window_size))
        X = np.hstack((X, append))
    num_frames = len(X) // window_size
    X_strided = X.reshape((num_frames, window_size))
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def overlap_dct_compress(X, n_components, window_size):
    """
    Overlap (at 50% of window_size) and compress X.

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to compress

    n_components : int
        number of DCT components to keep

    window_size : int
        Size of windows to take

    Returns
    -------
    X_dct : ndarray, shape=(n_windows, n_components)
        Windowed and compressed version of X
    """
    X_strided = halfoverlap(X, window_size)
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct


# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
test_audio_extract.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def run_world_dct_example():
    # on chromebook
    # enc 114.229
    # synth 5.165
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c

    start = time.time()
    spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    dct_buf = fftpack.dct(spectrogram_ct)
    n_fft = 512
    n_dct = 20
    dct_buf = dct_buf[:, :n_dct]
    idct_buf = np.zeros((dct_buf.shape[0], n_fft + 1))
    idct_buf[:, :n_dct] = dct_buf
    ispectrogram_ct = fftpack.idct(idct_buf)
    enc_done = time.time()

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
    synth_done = time.time()

    print("enc time: {}".format(enc_done - start))
    print("synth time: {}".format(synth_done - enc_done))
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_dct.wav", fs, soundsc(y))


#run_world_mgc_example()
#run_world_base_example()
soundfunc.py 文件源码 项目:SONGSHTR 作者: songeater 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_config():
    config = {}
    config['sound_file'] = "harvestmoon-mono-hp500.wav"
    config['save_file'] = config['sound_file'] + "_modelsave_"
    config['blocksize']=13000
    config['compressed_blocksize'] = (config['blocksize']//2+1)
    config['seqlen'] = 80 # in number of blocks...
    config['win_edge'] = int(config['blocksize'] / 2)   
    config['out_step'] = 1 # in number of blocks...
    config['batchsize'] = 5 # in number of blocks...
    config['domain'] = "rfft" # either "rfft" or "dct"
    if config['domain'] == "dct": config['win_edge'] = int(config['blocksize'] / 2) # if dct, have to set this
    return config
soundfunc.py 文件源码 项目:SONGSHTR 作者: songeater 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def conv_to_dct(signal, blocksize, edge, out_blocksize):
    blocks1 = []
    blocks2 = []
    for i in range(0, signal.shape[0]-blocksize-edge, blocksize-edge):
        dct_block = dct(signal[i:i+blocksize], norm='ortho')
        blocks1.append(dct_block)
    if blocksize > out_blocksize:
        for opw in range(len(blocks1)): blocks2.append(blocks1[opw][0:out_blocksize])
    return blocks2
signal_data_features.py 文件源码 项目:PIE_ISAE_Essais_Vol 作者: YuanxiangFranck 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_dct(x, n_dct):
    """
    Renvoie les n_dct plus grands coefficients de la transformée en 
    cosinus discrète 
    """
    # Compute FFT coefficients (complex)
    coeffs = dct(x.transpose())
    # Sort according to absolute value
    coeffs_sorted = coeffs.ravel()[np.argsort(-np.abs(coeffs)).ravel()] \
    .reshape(coeffs.shape)
    # n_fft largest coefficients
    n_coeffs = coeffs_sorted[:,:n_dct]
    # Return the coefficients
    return n_coeffs
preprocessing.py 文件源码 项目:sbrt2017 作者: igormq 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def _call(self, signal):
        """Compute MFCC features from an audio signal.

        Args:
            signal: the audio signal from which to compute features. Should be
            an N*1 array

        Returns:
            A numpy array of size (NUMFRAMES by numcep) containing features.
            Each row holds 1 feature vector.
        """
        feat, energy = super(MFCC, self)._call(signal)

        feat = np.log(feat)
        feat = dct(feat, type=2, axis=1, norm='ortho')[:, :self.num_cep]
        feat = self._lifter(feat, self.cep_lifter)

        if self.append_energy:
            # replace first cepstral coefficient with log of frame energy
            feat[:, 0] = np.log(energy + self.eps)

        if self.d:
            d = delta(feat, 2)
            feat = np.hstack([feat, d])

            if self.dd:
                feat = np.hstack([feat, delta(d, 2)])

        return feat
base.py 文件源码 项目:DTW_physionet2016 作者: JJGO 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
         nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
         winfunc=lambda x: numpy.ones((x, ))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return,  default 13
    :param nfilt: the number of filters in the filterbank,  default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz,  default is 0.
    :param highfreq: highest band edge of mel filters. In Hz,  default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true,  the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,  type=2,  axis=1,  norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
base.py 文件源码 项目:DTW_physionet2016 作者: JJGO 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
         nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
         winfunc=lambda x: numpy.ones((x, ))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return,  default 13
    :param nfilt: the number of filters in the filterbank,  default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz,  default is 0.
    :param highfreq: highest band edge of mel filters. In Hz,  default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true,  the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,  type=2,  axis=1,  norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
dct.py 文件源码 项目:website-4mb 作者: dettoman 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def dct_gen(frame):
    # read Frame
    [r, c, d] = frame.shape
    framebw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    ##########  DCT  ##########
    # Reshape in 8x8 type array
    framedct = np.reshape(framebw / 255.0, (-1, 8), order='C')
    # Apply DCT line and column wise
    X = sft.dct(framedct, axis=1, norm='ortho')

    X = np.reshape(X, (-1, c), order='C')
    X = np.reshape(X.T, (-1, 8), order='C')
    X = sft.dct(X, axis=1, norm='ortho')
    # shape back to original shape
    X = (np.reshape(X, (-1, r), order='C')).T

    # Zeroing DC-Coefficients
    X[::8,::8] = 0

    ##########  IDCT  ##########
    # Reshape in 8x8 type array
    X = np.reshape(X, (-1, 8), order='C')
    # Apply IDCT line and column wise
    X = sft.idct(X, axis=1, norm='ortho')
    X = np.reshape(X, (-1, c), order='C')
    X = np.reshape(X.T, (-1, 8), order='C')
    x = sft.idct(X, axis=1, norm='ortho')
    # shape back to original shape
    x = (np.reshape(x, (-1, r), order='C')).T
    # cast into output image
    x = x*255

    frameout = np.zeros_like(frame)
    frameout[:, :, 0] = x
    frameout[:, :, 1] = x
    frameout[:, :, 2] = x
    return frameout
calcmfcc.py 文件源码 项目:Automatic_Speech_Recognition 作者: zzw922cn 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def calcMFCC(signal,samplerate=16000,win_length=0.025,win_step=0.01,feature_len=13,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97,cep_lifter=22,appendEnergy=True,mode='mfcc'):
    """Caculate Features.
    Args:
        signal: 1-D numpy array.
        samplerate: Sampling rate. Defaulted to 16KHz.
        win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
        win_step: Interval between the start points of adjacent frames.
            Defaulted to 0.01, which is 10ms.
        feature_len: Numbers of features. Defaulted to 13.
        filters_num: Numbers of filters. Defaulted to 26.
        NFFT: Size of FFT. Defaulted to 512.
        low_freq: Lowest frequency.
        high_freq: Highest frequency.
        pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
            the energy of signal at higher frequency. Defaulted to 0.97.
        cep_lifter: Numbers of lifter for cepstral. Defaulted to 22.
        appendEnergy: Wheter to append energy. Defaulted to True.
        mode: 'mfcc' or 'fbank'.
            'mfcc': Mel-Frequency Cepstral Coefficients(MFCC).
                    Complete process: Mel filtering -> log -> DCT.
            'fbank': Apply Mel filtering -> log.

    Returns:
        2-D numpy array with shape (NUMFRAMES, features). Each frame containing feature_len of features.
    """
    filters_num = 2*feature_len
    feat,energy=fbank(signal,samplerate,win_length,win_step,filters_num,NFFT,low_freq,high_freq,pre_emphasis_coeff)
    feat=numpy.log(feat)
    # Performing DCT and get first 13 coefficients
    if mode == 'mfcc':
        feat=dct(feat,type=2,axis=1,norm='ortho')[:,:feature_len]
        feat=lifter(feat,cep_lifter)
    elif mode == 'fbank':
        feat = feat[:,:feature_len]
    if appendEnergy:
        # Replace the first coefficient with logE and get 2-13 coefficients.
        feat[:,0]=numpy.log(energy)
    return feat
qmf_realtime_synthesis.py 文件源码 项目:ASP 作者: TUIlmenauAMS 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
qmf_realtime_analysis.py 文件源码 项目:ASP 作者: TUIlmenauAMS 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
qmf_realtime.py 文件源码 项目:ASP 作者: TUIlmenauAMS 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
   return y[0:N]
foldingmat_filterbank.py 文件源码 项目:ASP 作者: TUIlmenauAMS 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def DCT4(samples):
   #use a DCT3 to implement a DCT4:
   samplesup=np.zeros(2*N)
   #upsample signal:
   samplesup[1::2]=samples
   y=spfft.dct(samplesup,type=3)/2
   return y[0:N]
feature_computer.py 文件源码 项目:ASRT_SpeechRecognition 作者: nl8590687 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def mfcc(signal, samplerate, conf):
    '''
    Compute MFCC features from an audio signal.
    ??????????MFCC????
    Args:
    ???
        signal: the audio signal from which to compute features. Should be an
            N*1 array
            ?????????????????????N*1???
        samplerate: the samplerate of the signal we are working with.
            ??????????
        conf: feature configuration
            ?????

    Returns:
    ????
        A numpy array of size (NUMFRAMES by numcep) containing features. Each
        row holds 1 feature vector, a numpy vector containing the signal
        log-energy
        ???????????numpy?????????????????numpy???????????
    '''

    feat, energy = fbank(signal, samplerate, conf)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
    feat = lifter(feat, float(conf['ceplifter']))
    return feat, numpy.log(energy)
checkDCT.py 文件源码 项目:UWdepressionX 作者: jyfeather 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def a_dct(l, m):
    tmp = dct(l, type=2, norm = 'ortho')
    tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
    tmp[tmp_idx[m:]] = 0
    return tmp


问题


面经


文章

微信
公众号

扫码关注公众号