def a_dct(l, m):
tmp = dct(l, type=2)
tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
return tmp[tmp_idx[:m]].tolist()
python类dct()的实例源码
def mfcc(s,fs, nfiltbank):
#divide into segments of 25 ms with overlap of 10ms
nSamples = np.int32(0.025*fs)
overlap = np.int32(0.01*fs)
nFrames = np.int32(np.ceil(len(s)/(nSamples-overlap)))
#zero padding to make signal length long enough to have nFrames
padding = ((nSamples-overlap)*nFrames) - len(s)
if padding > 0:
signal = np.append(s, np.zeros(padding))
else:
signal = s
segment = np.empty((nSamples, nFrames))
start = 0
for i in range(nFrames):
segment[:,i] = signal[start:start+nSamples]
start = (nSamples-overlap)*i
#compute periodogram
nfft = 512
periodogram = np.empty((nFrames,nfft/2 + 1))
for i in range(nFrames):
x = segment[:,i] * hamming(nSamples)
spectrum = fftshift(fft(x,nfft))
periodogram[i,:] = abs(spectrum[nfft/2-1:])/nSamples
#calculating mfccs
fbank = mel_filterbank(nfft, nfiltbank, fs)
#nfiltbank MFCCs for each frame
mel_coeff = np.empty((nfiltbank,nFrames))
for i in range(nfiltbank):
for k in range(nFrames):
mel_coeff[i,k] = np.sum(periodogram[k,:]*fbank[:,i])
mel_coeff = np.log10(mel_coeff)
mel_coeff = dct(mel_coeff)
#exclude 0th order coefficient (much larger than others)
mel_coeff[0,:]= np.zeros(nFrames)
return mel_coeff
def dct2(image_channel):
return fftpack.dct(fftpack.dct(image_channel.T, norm='ortho').T, norm='ortho')
def gen_post(feat_list, stat_file, model, win_size_before = 15, win_size_after = 15, num_targets = 31):
model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
m, v = read_mv(stat_file)
if m is None or v is None:
raise Exception("mean or variance vector does not exist")
with open(feat_list) as f:
for line in f:
line = line.strip()
if len(line) < 1: continue
print ("generating features for file", line)
io = htk_io.fopen(line)
utt_feat = io.getall()
utt_feat -= m # normalize mean
utt_feat /= (np.sqrt(v) + eps) # normalize var
feat_numpy = org_data(utt_feat, win_size_before, win_size_after)
out_feat = np.zeros((utt_feat.shape[0], num_targets))
for i in range(feat_numpy.shape[0] // 100): # chop the speech into shorter segments, to prevent gpu out of memory
start_idx = i * 100
end_idx = i * 100 + 100
feat_chunk = feat_numpy[start_idx:end_idx]
feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
scores = model(x)
out_feat[start_idx:end_idx] = scores.data.cpu().numpy()
num_remain = feat_numpy.shape[0] % 100
if num_remain > 0:
feat_chunk = feat_numpy[-num_remain:]
feat_tensor = torch.from_numpy(feat_chunk).type(gpu_dtype)
x = Variable(feat_tensor.type(gpu_dtype), volatile = True)
scores = model(x)
out_feat[-num_remain:] = scores.data.cpu().numpy()
out_feat = dct(out_feat, type=2, axis=1, norm='ortho')[:,1:numcep+1]
out_feat_delta = delta(out_feat, 2)
out_feat_ddelta = delta(out_feat_delta, 2)
out_feat = np.concatenate((out_feat, out_feat_delta, out_feat_ddelta), axis = 1)
out_file = line.replace(".fea", ".mfc")
io = htk_io.fopen(out_file, mode="wb", veclen = out_feat.shape[1])
io.writeall(out_feat)
print ("features saved in %s\n" %out_file)
def extract(self, signal, filename):
if signal.ndim > 1:
self.dprint("INFO: Input signal has more than 1 channel; the channels will be averaged.")
signal = mean(signal, axis=1)
assert len(signal) > 5 * self.FRAME_LEN, "Signal too short!"
#Pre Emphasis
#signal = signal[0] + signal[1]-a*signal[0] + signal[2]-a*signal[1] + ...
signal = np.append(signal[0], signal[1:] - self.PRE_EMP * signal[:-1])
#framming the signal
signal_length = len(signal)
if signal_length <= self.FRAME_LEN:
num_frames = 1
else:
num_frames = 1 + int(math.ceil((1.0*signal_length-self.FRAME_LEN)/self.FRAME_STEP))
pad_signal_length = int((num_frames-1)*self.FRAME_STEP + self.FRAME_LEN)
z = np.zeros((pad_signal_length - signal_length,))
pad_signal = np.concatenate((signal, z))
indices = np.tile(np.arange(0, self.FRAME_LEN), (num_frames, 1)) + np.tile(np.arange(0, num_frames * self.FRAME_STEP, self.FRAME_STEP), (self.FRAME_LEN, 1)).T
indices = np.array(indices,dtype=np.int32)
frames = pad_signal[indices]
#windowing the signal
#passing the signal through hamming window
win = np.hamming(self.FRAME_LEN)
frames *= win
#Magnitude spectrum
if np.shape(frames)[1] > self.NFFT:
self.dprint("Warning, frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid."%(np.shape(frames)[1], self.NFFT))
mag_frames = np.absolute(np.fft.rfft(frames, self.NFFT))
#Power Spectrum
pspec = ((1.0 / self.NFFT) * ((mag_frames) ** 2))
#Filter Bank
pspec = np.where(pspec == 0,np.finfo(float).eps,pspec) # if things are all zeros we get problems
energy = np.sum(pspec,1) #this stores the total energy in each frame
energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log
fbank = self.get_filterbanks()
filter_banks = np.dot(pspec, fbank)
filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability
# MFCC Calculation
filter_banks = np.log(filter_banks)
mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, : self.num_ceps] # Keep 2-13
nframes, ncoeff = np.shape(mfcc)
n = np.arange(ncoeff)
lift = 1 + (self.cep_lifter / 2) * np.sin(np.pi * n / self.cep_lifter)
mfcc *= lift
if self.appendEnergy:
mfcc[:,0] = np.log(energy) # replace first cepstral coefficient with log of frame energy
np.savetxt(filename, mfcc, fmt='%.8f', delimiter=',')
def compute_dct_features(X, image_shape, no_coeff=30, method='zigzag'):
"""
compute 2D-dct features of a given image.
Type 2 DCT and finds the DCT coefficents with the largest mean normalized variance
:param X: 1 dimensional input image in 'c' format
:param image_shape: image shape
:param no_coeff: number of coefficients to extract
:param method: method to extract coefficents, zigzag, variance
:return: dct features
"""
X_dct = fft.dct(X, norm='ortho')
if method == 'zigzag':
out = np.zeros((len(X_dct), no_coeff), dtype=X_dct.dtype)
for i in xrange(len(X_dct)):
image = X_dct[i].reshape(image_shape)
out[i] = zigzag(image)[1:no_coeff + 1]
return out
elif method == 'rel_variance':
X_dct = X_dct[:, 1:]
# mean coefficient per frequency
mean_dct = np.mean(X_dct, 0)
# mean normalize
mean_norm_dct = X_dct - mean_dct
# find standard deviation for each frequency component
std_dct = np.std(mean_norm_dct, 0)
# sort by largest variance
idxs = np.argsort(std_dct)[::-1][:no_coeff]
# return DCT coefficients with the largest variance
return X_dct[:, idxs]
elif method == 'variance':
X_dct = X_dct[:, 1:]
# find standard deviation for each frequency component
std_dct = np.std(X_dct, 0)
# sort by largest variance
idxs = np.argsort(std_dct)[::-1][:no_coeff]
# return DCT coefficients with the largest variance
return X_dct[:, idxs]
elif method == 'energy':
X_dct = X_dct[:, 1:]
X_sum = np.abs(X_dct)
X_sum = np.sum(X_sum, 0)
idxs = np.argsort(X_sum)[::-1][:no_coeff]
return X_dct[:, idxs]
else:
raise NotImplementedError("method not implemented, use only 'zigzag', 'variance', 'rel_variance")