def dctii(x, axes=None):
"""
Compute a multi-dimensional DCT-II over specified array axes. This
function is implemented by calling the one-dimensional DCT-II
:func:`scipy.fftpack.dct` with normalization mode 'ortho' for each
of the specified axes.
Parameters
----------
a : array_like
Input array
axes : sequence of ints, optional (default None)
Axes over which to compute the DCT-II.
Returns
-------
y : ndarray
DCT-II of input array
"""
if axes is None:
axes = list(range(x.ndim))
for ax in axes:
x = fftpack.dct(x, type=2, axis=ax, norm='ortho')
return x
python类dct()的实例源码
def run_fft_dct_example():
random_state = np.random.RandomState(1999)
fs, d = fetch_sample_speech_fruit()
n_fft = 64
X = d[0]
X_stft = stft(X, n_fft)
X_rr = complex_to_real_view(X_stft)
X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
X_dct_sub = X_dct[1:] - X_dct[:-1]
std = X_dct_sub.std(axis=0, keepdims=True)
X_dct_sub += .01 * std * random_state.randn(
X_dct_sub.shape[0], X_dct_sub.shape[1])
X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
X_irr = real_to_complex_view(X_idct)
X_r = istft(X_irr, n_fft)[:len(X)]
SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
print(SNR)
wavfile.write("fftdct_orig.wav", fs, soundsc(X))
wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
def chebfft(v):
N = len(v) - 1
if (N == 0): w = 0;
x = cos(pi*arange(0,N+1)/N)
K = fft.fftfreq(2*N, 0.5/N)
K[N] = 0
ii = arange(0,N)
v = hstack([v])
V = hstack([v, flipud(v[1:N])])
U = real(fft.fft(V))
uu = dct(v, 1)
uv = hstack((uu, uu[1:-1][::-1]))
W = real(fft.ifft(1j*K*U))
ww = dst(K*uv, 1)
from IPython import embed; embed()
w = zeros(N+1)
w[1:N] = -W[1:N]/sqrt(1-x[1:N]**2)
w[0] = sum(ii.T**2*U[0:len(ii)])/float(N) + 0.5*N*U[N]
w[N] = sum(((-1)**(ii+1))*(ii.T**2)*U[0:len(ii)])/float(N) + 0.5*(-1)**(N+1)*N*U[N]
return w
def DCT4(samples):
"""
Method to create DCT4 transformation using DCT3
Arguments :
samples : (1D Array) Input samples to be transformed
Returns :
y : (1D Array) Transformed output samples
"""
# Initialize
samplesup=np.zeros(2*N, dtype = np.float32)
# Upsample signal:
samplesup[1::2]=samples
y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
return y[0:N]
#The DST4 transform:
base.py 文件源码
项目:Artificial-Intelligence-with-Python
作者: PacktPublishing
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def mfcc(signal, samplerate, conf):
'''
Compute MFCC features from an audio signal.
Args:
signal: the audio signal from which to compute features. Should be an
N*1 array
samplerate: the samplerate of the signal we are working with.
conf: feature configuration
Returns:
A numpy array of size (NUMFRAMES by numcep) containing features. Each
row holds 1 feature vector, a numpy vector containing the signal
log-energy
'''
feat, energy = fbank(signal, samplerate, conf)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
feat = lifter(feat, float(conf['ceplifter']))
return feat, numpy.log(energy)
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def mfccVTLP(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, alpha=1.0):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbankVTLP(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,False,alpha)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def clenshaw_curtis1D(u, quad="GC"):
assert u.ndim == 1
N = u.shape[0]
if quad == 'GL':
w = np.arange(0, N, 1, dtype=float)
w[2:] = 2./(1-w[2:]**2)
w[0] = 1
w[1::2] = 0
ak = dct(u, 1)
ak /= (N-1)
return np.sqrt(np.sum(ak*w))
elif quad == 'GC':
d = np.zeros(N)
k = 2*(1 + np.arange((N-1)//2))
d[::2] = (2./N)/np.hstack((1., 1.-k*k))
w = dct(d, type=3)
return np.sqrt(np.sum(u*w))
def dct(a, b, type=2, axis=0, **kw):
if iscomplexobj(a):
b.real[:] = dct1(a.real, type=type, axis=axis)
b.imag[:] = dct1(a.imag, type=type, axis=axis)
return b
else:
b[:] = dct1(a, type=type, axis=axis)
return b
# Define functions taking both input array and output array
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
winfunc=lambda x:numpy.ones((x,))):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def dct_compress(X, n_components, window_size=128):
"""
Compress using the DCT
Parameters
----------
X : ndarray, shape=(n_samples,)
The input signal to compress. Should be 1-dimensional
n_components : int
The number of DCT components to keep. Setting n_components to about
.5 * window_size can give compression with fairly good reconstruction.
window_size : int
The input X is broken into windows of window_size, each of which are
then compressed with the DCT.
Returns
-------
X_compressed : ndarray, shape=(num_windows, window_size)
A 2D array of non-overlapping DCT coefficients. For use with uncompress
Reference
---------
http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
"""
if len(X) % window_size != 0:
append = np.zeros((window_size - len(X) % window_size))
X = np.hstack((X, append))
num_frames = len(X) // window_size
X_strided = X.reshape((num_frames, window_size))
X_dct = fftpack.dct(X_strided, norm='ortho')
if n_components is not None:
X_dct = X_dct[:, :n_components]
return X_dct
def overlap_dct_compress(X, n_components, window_size):
"""
Overlap (at 50% of window_size) and compress X.
Parameters
----------
X : ndarray, shape=(n_samples,)
Input signal to compress
n_components : int
number of DCT components to keep
window_size : int
Size of windows to take
Returns
-------
X_dct : ndarray, shape=(n_windows, n_components)
Windowed and compressed version of X
"""
X_strided = halfoverlap(X, window_size)
X_dct = fftpack.dct(X_strided, norm='ortho')
if n_components is not None:
X_dct = X_dct[:, :n_components]
return X_dct
# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
def dct_compress(X, n_components, window_size=128):
"""
Compress using the DCT
Parameters
----------
X : ndarray, shape=(n_samples,)
The input signal to compress. Should be 1-dimensional
n_components : int
The number of DCT components to keep. Setting n_components to about
.5 * window_size can give compression with fairly good reconstruction.
window_size : int
The input X is broken into windows of window_size, each of which are
then compressed with the DCT.
Returns
-------
X_compressed : ndarray, shape=(num_windows, window_size)
A 2D array of non-overlapping DCT coefficients. For use with uncompress
Reference
---------
http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
"""
if len(X) % window_size != 0:
append = np.zeros((window_size - len(X) % window_size))
X = np.hstack((X, append))
num_frames = len(X) // window_size
X_strided = X.reshape((num_frames, window_size))
X_dct = fftpack.dct(X_strided, norm='ortho')
if n_components is not None:
X_dct = X_dct[:, :n_components]
return X_dct
def overlap_dct_compress(X, n_components, window_size):
"""
Overlap (at 50% of window_size) and compress X.
Parameters
----------
X : ndarray, shape=(n_samples,)
Input signal to compress
n_components : int
number of DCT components to keep
window_size : int
Size of windows to take
Returns
-------
X_dct : ndarray, shape=(n_windows, n_components)
Windowed and compressed version of X
"""
X_strided = halfoverlap(X, window_size)
X_dct = fftpack.dct(X_strided, norm='ortho')
if n_components is not None:
X_dct = X_dct[:, :n_components]
return X_dct
# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense
def run_world_dct_example():
# on chromebook
# enc 114.229
# synth 5.165
fs, d = fetch_sample_speech_tapestry()
d = d.astype("float32") / 2 ** 15
def enc():
temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
temporal_positions_h, f0_h, vuv_h)
temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
temporal_positions_h, f0_h, vuv_h)
return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c
start = time.time()
spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
dct_buf = fftpack.dct(spectrogram_ct)
n_fft = 512
n_dct = 20
dct_buf = dct_buf[:, :n_dct]
idct_buf = np.zeros((dct_buf.shape[0], n_fft + 1))
idct_buf[:, :n_dct] = dct_buf
ispectrogram_ct = fftpack.idct(idct_buf)
enc_done = time.time()
y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
synth_done = time.time()
print("enc time: {}".format(enc_done - start))
print("synth time: {}".format(synth_done - enc_done))
#y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
wavfile.write("out_dct.wav", fs, soundsc(y))
#run_world_mgc_example()
#run_world_base_example()
def get_config():
config = {}
config['sound_file'] = "harvestmoon-mono-hp500.wav"
config['save_file'] = config['sound_file'] + "_modelsave_"
config['blocksize']=13000
config['compressed_blocksize'] = (config['blocksize']//2+1)
config['seqlen'] = 80 # in number of blocks...
config['win_edge'] = int(config['blocksize'] / 2)
config['out_step'] = 1 # in number of blocks...
config['batchsize'] = 5 # in number of blocks...
config['domain'] = "rfft" # either "rfft" or "dct"
if config['domain'] == "dct": config['win_edge'] = int(config['blocksize'] / 2) # if dct, have to set this
return config
def conv_to_dct(signal, blocksize, edge, out_blocksize):
blocks1 = []
blocks2 = []
for i in range(0, signal.shape[0]-blocksize-edge, blocksize-edge):
dct_block = dct(signal[i:i+blocksize], norm='ortho')
blocks1.append(dct_block)
if blocksize > out_blocksize:
for opw in range(len(blocks1)): blocks2.append(blocks1[opw][0:out_blocksize])
return blocks2
signal_data_features.py 文件源码
项目:PIE_ISAE_Essais_Vol
作者: YuanxiangFranck
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def get_dct(x, n_dct):
"""
Renvoie les n_dct plus grands coefficients de la transformée en
cosinus discrète
"""
# Compute FFT coefficients (complex)
coeffs = dct(x.transpose())
# Sort according to absolute value
coeffs_sorted = coeffs.ravel()[np.argsort(-np.abs(coeffs)).ravel()] \
.reshape(coeffs.shape)
# n_fft largest coefficients
n_coeffs = coeffs_sorted[:,:n_dct]
# Return the coefficients
return n_coeffs
def _call(self, signal):
"""Compute MFCC features from an audio signal.
Args:
signal: the audio signal from which to compute features. Should be
an N*1 array
Returns:
A numpy array of size (NUMFRAMES by numcep) containing features.
Each row holds 1 feature vector.
"""
feat, energy = super(MFCC, self)._call(signal)
feat = np.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :self.num_cep]
feat = self._lifter(feat, self.cep_lifter)
if self.append_energy:
# replace first cepstral coefficient with log of frame energy
feat[:, 0] = np.log(energy + self.eps)
if self.d:
d = delta(feat, 2)
feat = np.hstack([feat, d])
if self.dd:
feat = np.hstack([feat, delta(d, 2)])
return feat
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
winfunc=lambda x: numpy.ones((x, ))):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
feat = lifter(feat, ceplifter)
if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13,
nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True,
winfunc=lambda x: numpy.ones((x, ))):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
feat = lifter(feat, ceplifter)
if appendEnergy: feat[:, 0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def dct_gen(frame):
# read Frame
[r, c, d] = frame.shape
framebw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
########## DCT ##########
# Reshape in 8x8 type array
framedct = np.reshape(framebw / 255.0, (-1, 8), order='C')
# Apply DCT line and column wise
X = sft.dct(framedct, axis=1, norm='ortho')
X = np.reshape(X, (-1, c), order='C')
X = np.reshape(X.T, (-1, 8), order='C')
X = sft.dct(X, axis=1, norm='ortho')
# shape back to original shape
X = (np.reshape(X, (-1, r), order='C')).T
# Zeroing DC-Coefficients
X[::8,::8] = 0
########## IDCT ##########
# Reshape in 8x8 type array
X = np.reshape(X, (-1, 8), order='C')
# Apply IDCT line and column wise
X = sft.idct(X, axis=1, norm='ortho')
X = np.reshape(X, (-1, c), order='C')
X = np.reshape(X.T, (-1, 8), order='C')
x = sft.idct(X, axis=1, norm='ortho')
# shape back to original shape
x = (np.reshape(x, (-1, r), order='C')).T
# cast into output image
x = x*255
frameout = np.zeros_like(frame)
frameout[:, :, 0] = x
frameout[:, :, 1] = x
frameout[:, :, 2] = x
return frameout
def calcMFCC(signal,samplerate=16000,win_length=0.025,win_step=0.01,feature_len=13,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97,cep_lifter=22,appendEnergy=True,mode='mfcc'):
"""Caculate Features.
Args:
signal: 1-D numpy array.
samplerate: Sampling rate. Defaulted to 16KHz.
win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
win_step: Interval between the start points of adjacent frames.
Defaulted to 0.01, which is 10ms.
feature_len: Numbers of features. Defaulted to 13.
filters_num: Numbers of filters. Defaulted to 26.
NFFT: Size of FFT. Defaulted to 512.
low_freq: Lowest frequency.
high_freq: Highest frequency.
pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
the energy of signal at higher frequency. Defaulted to 0.97.
cep_lifter: Numbers of lifter for cepstral. Defaulted to 22.
appendEnergy: Wheter to append energy. Defaulted to True.
mode: 'mfcc' or 'fbank'.
'mfcc': Mel-Frequency Cepstral Coefficients(MFCC).
Complete process: Mel filtering -> log -> DCT.
'fbank': Apply Mel filtering -> log.
Returns:
2-D numpy array with shape (NUMFRAMES, features). Each frame containing feature_len of features.
"""
filters_num = 2*feature_len
feat,energy=fbank(signal,samplerate,win_length,win_step,filters_num,NFFT,low_freq,high_freq,pre_emphasis_coeff)
feat=numpy.log(feat)
# Performing DCT and get first 13 coefficients
if mode == 'mfcc':
feat=dct(feat,type=2,axis=1,norm='ortho')[:,:feature_len]
feat=lifter(feat,cep_lifter)
elif mode == 'fbank':
feat = feat[:,:feature_len]
if appendEnergy:
# Replace the first coefficient with logE and get 2-13 coefficients.
feat[:,0]=numpy.log(energy)
return feat
def DCT4(samples):
#use a DCT3 to implement a DCT4:
samplesup=np.zeros(2*N)
#upsample signal:
samplesup[1::2]=samples
y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
return y[0:N]
def DCT4(samples):
#use a DCT3 to implement a DCT4:
samplesup=np.zeros(2*N)
#upsample signal:
samplesup[1::2]=samples
y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
return y[0:N]
def DCT4(samples):
#use a DCT3 to implement a DCT4:
samplesup=np.zeros(2*N)
#upsample signal:
samplesup[1::2]=samples
y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
return y[0:N]
def DCT4(samples):
#use a DCT3 to implement a DCT4:
samplesup=np.zeros(2*N)
#upsample signal:
samplesup[1::2]=samples
y=spfft.dct(samplesup,type=3)/2
return y[0:N]
def mfcc(signal, samplerate, conf):
'''
Compute MFCC features from an audio signal.
??????????MFCC????
Args:
???
signal: the audio signal from which to compute features. Should be an
N*1 array
?????????????????????N*1???
samplerate: the samplerate of the signal we are working with.
??????????
conf: feature configuration
?????
Returns:
????
A numpy array of size (NUMFRAMES by numcep) containing features. Each
row holds 1 feature vector, a numpy vector containing the signal
log-energy
???????????numpy?????????????????numpy???????????
'''
feat, energy = fbank(signal, samplerate, conf)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:, :int(conf['numcep'])]
feat = lifter(feat, float(conf['ceplifter']))
return feat, numpy.log(energy)
def a_dct(l, m):
tmp = dct(l, type=2, norm = 'ortho')
tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
tmp[tmp_idx[m:]] = 0
return tmp