def phormants(x, Fs):
N = len(x)
w = numpy.hamming(N)
# Apply window and high pass filter.
x1 = x * w
x1 = lfilter([1], [1., 0.63], x1)
# Get LPC.
ncoeff = 2 + Fs / 1000
A, e, k = lpc(x1, ncoeff)
#A, e, k = lpc(x1, 8)
# Get roots.
rts = numpy.roots(A)
rts = [r for r in rts if numpy.imag(r) >= 0]
# Get angles.
angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts))
# Get frequencies.
frqs = sorted(angz * (Fs / (2 * math.pi)))
return frqs
python类hamming()的实例源码
def hamming(M):
"""Returns the Hamming window.
The Hamming window is defined as
.. math::
w(n) = 0.54 - 0.46\\cos\\left(\\frac{2\\pi{n}}{M-1}\\right)
\\qquad 0 \\leq n \\leq M-1
Args:
M (:class:`~int`):
Number of points in the output window. If zero or less, an empty
array is returned.
Returns:
~cupy.ndarray: Output ndarray.
.. seealso:: :func:`numpy.hamming`
"""
if M < 1:
return from_data.array([])
if M == 1:
return basic.ones(1, float)
n = ranges.arange(0, M)
return 0.54 - 0.46 * trigonometric.cos(2.0 * numpy.pi * n / (M - 1))
cochleagram_extractor.py 文件源码
项目:speech_feature_extractor
作者: ZhihaoDU
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type):
fcoefs, f = make_erb_filters(sr, channel_number, 50)
fcoefs = np.flipud(fcoefs)
xf = erb_frilter_bank(xx, fcoefs)
if win_type == 'hanning':
window = np.hanning(channel_number)
elif win_type == 'hamming':
window = np.hamming(channel_number)
elif win_type == 'triangle':
window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1)))
else:
window = np.ones(channel_number)
window = window.reshape((channel_number, 1))
xe = np.power(xf, 2.0)
frames = 1 + ((np.size(xe, 1)-win_len) // shift_len)
cochleagram = np.zeros((channel_number, frames))
for i in range(frames):
one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1))
cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1))
cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram)
return cochleagram
def log_power_spectrum_extractor(x, win_len, shift_len, win_type, is_log=False):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spect = np.zeros((win_len // 2 + 1, frames), dtype=np.float64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
if is_log:
spect[:, i] = np.log(np.power(np.abs(stft[0: win_len//2+1, i]), 2.))
else:
spect[:, i] = np.power(np.abs(stft[0: win_len//2+1, i]), 2.)
return spect
def stft_extractor(x, win_len, shift_len, win_type):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spect = np.zeros((win_len // 2 + 1, frames), dtype=np.complex64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
spect[:, i] = stft[: win_len//2+1, i]
return spect
def rasta_plp_extractor(x, sr, plp_order=0, do_rasta=True):
spec = log_power_spectrum_extractor(x, int(sr*0.02), int(sr*0.01), 'hamming', False)
bark_filters = int(np.ceil(freq2bark(sr//2)))
wts = get_fft_bark_mat(sr, int(sr*0.02), bark_filters)
bark_spec = np.matmul(wts, spec)
if do_rasta:
bark_spec = np.where(bark_spec == 0.0, np.finfo(float).eps, bark_spec)
log_bark_spec = np.log(bark_spec)
rasta_log_bark_spec = rasta_filt(log_bark_spec)
bark_spec = np.exp(rasta_log_bark_spec)
post_spec = postaud(bark_spec, sr/2.)
if plp_order > 0:
lpcas = do_lpc(post_spec, plp_order)
else:
lpcas = post_spec
return lpcas
def spectrum_extractor(x, win_len, shift_len, win_type, is_log):
samples = x.shape[0]
frames = (samples - win_len) // shift_len
stft = np.zeros((win_len, frames), dtype=np.complex64)
spectrum = np.zeros((win_len // 2 + 1, frames), dtype=np.float64)
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'triangle':
window = (1 - (np.abs(win_len - 1 - 2 * np.arange(1, win_len + 1, 1)) / (win_len + 1)))
else:
window = np.ones(win_len)
for i in range(frames):
one_frame = x[i*shift_len: i*shift_len+win_len]
windowed_frame = np.multiply(one_frame, window)
stft[:, i] = np.fft.fft(windowed_frame, win_len)
if is_log:
spectrum[:, i] = np.log(np.abs(stft[0: win_len//2+1, i]))
else:
spectrum[:, i] = np.abs(stft[0: win_len // 2 + 1:, i])
return spectrum
def smooth(s,lengthscale,parallel=True):
"""smoothes s vertically"""
if len(s.shape) == 1:
s=s[...,None]
nChans = s.shape[1]
lengthscale=2*round(float(lengthscale)/2)
W = np.hamming(min(lengthscale,s.shape[0]))
W/= np.sum(W)
if s.shape[1]>1:
if parallel:
njobs=JOBLIB_NCORES
else:
njobs=1
slidingMean = (Parallel(n_jobs=njobs,backend=JOBLIB_BACKEND,temp_folder=JOBLIB_TEMPFOLDER)
(delayed(smoothLine)(s[:,chan],W) for chan in range(nChans)))
return np.array(slidingMean).T
else:
return smoothLine(s[:,0],W)[...,None]
def __init__(self, sampling_rate=16000, frame_width=0.032, frame_shift=0.01, num_mel_filters=40, window_func="hanning",
using_delta=True, using_delta_delta=True):
assert window_func in ["hanning", "hamming"]
self.sampling_rate = sampling_rate
self.frame_width = frame_width
self.sampling_rate = sampling_rate
self.frame_width = frame_width
self.frame_shift = frame_shift
self.num_fft = int(sampling_rate * frame_width)
self.num_mel_filters = num_mel_filters
if window_func == "hanning":
self.window_func = lambda x:np.hanning(x)
elif winfunc == "hamming":
self.window_func = lambda x:np.hamming(x)
self.using_delta = using_delta
self.using_delta_delta = using_delta_delta
self.fbank = fft.get_filterbanks(nfft=self.num_fft, nfilt=num_mel_filters, samplerate=sampling_rate)
def Specgram(X, W, H):
"""A function to compute the spectrogram of a signal
:parm X: N x 1 Audio Signal
:param W: Window Size
:param H HopSize
:returns: S, an N x NBins spectrogram array
"""
Q = W/H
if Q - np.floor(Q) > 0:
print('Warning: Window size is not integer multiple of hop size\n')
win = np.hamming(W)
NWin = int(np.floor((len(X) - W)/float(H)) + 1)
S = np.zeros((NWin, W))
for i in range(NWin):
x = X[i*H:i*H+W]
S[i, :] = np.abs(np.fft.fft(win*x))
#Second half of the spectrum is redundant for real signals
if W % 2 == 0:
#Even Case
S = S[:, 0:W/2]
else:
#Odd Case
S = S[:, 0:(W-1)/2+1]
return S
def _mfcc_and_labels(audio, labels):
""" Convert to MFCC features and corresponding (interpolated) labels.
Returns:
A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
array, both with the same shape.
"""
mfcc_sample_rate = 100.0
winfunc = lambda x: np.hamming(x)
mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
mfcc_labels = interp_func(t_mfcc)
return mfcc_features, mfcc_labels
def make_spectrogram(self, seg_length, window_func=np.hamming):
"""Computes the spectrogram of the wave.
seg_length: number of samples in each segment
window_func: function used to compute the window
returns: Spectrogram
"""
n = len(self.ys)
window = window_func(seg_length)
start, end, step = 0, seg_length, seg_length / 2
spec_map = {}
while end < n:
ys = self.ys[start:end] * window
hs = np.fft.rfft(ys)
t = (start + end) / 2.0 / self.framerate
spec_map[t] = Spectrum(hs, self.framerate)
start += step
end += step
return Spectrogram(spec_map, seg_length, window_func)
def discontinuity(num_periods=30, hamming=False):
"""Plots the spectrum of a sinusoid with/without windowing.
num_periods: how many periods to compute
hamming: boolean whether to apply Hamming window
"""
signal = thinkdsp.SinSignal(freq=440)
duration = signal.period * num_periods
wave = signal.make_wave(duration)
if hamming:
wave.hamming()
print(len(wave.ys), wave.ys[0], wave.ys[-1])
spectrum = wave.make_spectrum()
spectrum.plot(high=60)
def three_spectrums():
"""Makes a plot showing three spectrums for a sinusoid.
"""
thinkplot.preplot(rows=1, cols=3)
pyplot.subplots_adjust(wspace=0.3, hspace=0.4,
right=0.95, left=0.1,
top=0.95, bottom=0.05)
xticks = range(0, 900, 200)
thinkplot.subplot(1)
thinkplot.config(xticks=xticks)
discontinuity(num_periods=30, hamming=False)
thinkplot.subplot(2)
thinkplot.config(xticks=xticks)
discontinuity(num_periods=30.25, hamming=False)
thinkplot.subplot(3)
thinkplot.config(xticks=xticks)
discontinuity(num_periods=30.25, hamming=True)
thinkplot.save(root='windowing1')
def _smooth(params, win, type="HAMMING"):
win = int(win+0.5)
if win >= len(params)-1:
win = len(params)-1
if win % 2 == 0:
win+=1
s = np.r_[params[win-1:0:-1],params,params[-1:-win:-1]]
if type=="HAMMING":
w = np.hamming(win)
third = int(win/5)
#w[:third] = 0
else:
w = np.ones(win)
y = np.convolve(w/w.sum(),s,mode='valid')
return y[(win/2):-(win/2)]
def __init__(self, master, input_connect=None):
Component.__init__(self, master, input_connect)
self.master = master
self.progeny = []
self.dur_since_last_birth = 0
self.curr_period = default.CURR_GEN_PERIOD
self.curr_dur = default.CURR_GRAIN_LEN
self.curr_lag = default.CURR_GEN_LAG
self.curr_period_jitter = default.CURR_GEN_PERIOD_JITTER
self.curr_dur_jitter = default.CURR_GRAIN_LEN_JITTER
self.curr_lag_jitter = default.CURR_LAG_JITTER
def generate_envelope(length):
""" Generates hamming windowing function. """
envelope = list(np.hamming(length))
return(envelope)
self.envelope_generator = generate_envelope
def iFFT(Y, output_length=None, window=False):
""" Inverse real-valued Fourier Transform
Parameters
----------
Y : array_like
Frequency domain data [Nsignals x Nbins]
output_length : int, optional
Lenght of returned time-domain signal (Default: 2 x len(Y) + 1)
win : boolean, optional
Weights the resulting time-domain signal with a Hann
Returns
-------
y : array_like
Reconstructed time-domain signal
"""
Y = _np.atleast_2d(Y)
y = _np.fft.irfft(Y, n=output_length)
if window:
if window not in {'hann', 'hamming', 'blackman', 'kaiser'}:
raise ValueError('Selected window must be one of hann, hamming, blackman or kaiser')
no_of_signals, no_of_samples = y.shape
if window == 'hann':
window_array = _np.hanning(no_of_samples)
elif window == 'hamming':
window_array = _np.hamming(no_of_samples)
elif window == 'blackman':
window_array = _np.blackman(no_of_samples)
elif window == 'kaiser':
window_array = _np.kaiser(no_of_samples, 3)
y = window_array * y
return y
def synthesis_speech(noisy_speech, ideal_mask, win_type, win_len, shift_len, syn_method='A&R'):
samples = noisy_speech.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = noisy_speech[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * ideal_mask[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, win_len))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
def synthesis_speech(ns, mk, win_type, win_len, shift_len, syn_method='A&R'):
samples = ns.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = ns[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * mk[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, 320))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
def fir1(n, wn):
# Variables definition :
nbands = len(wn) + 1
ff = np.array((0, wn[0], wn[0], wn[1], wn[1], 1))
f0 = np.mean(ff[2:4])
l = n + 1
mags = np.array(range(nbands)) % 2
aa = np.ravel(np.matlib.repmat(mags, 2, 1), order='F')
# Get filter coefficients :
h = firls(l - 1, ff, aa)
# Apply a window to coefficients :
wind = np.hamming(l)
b = np.matrix(h.T * wind)
c = np.matrix(np.exp(-1j * 2 * np.pi * (f0 / 2) * np.array(range(l))))
b = b / abs(c * b.T)
return np.squeeze(np.array(b)), 1
def fir1(N, Wn):
# Variables definition :
nbands = len(Wn) + 1
ff = np.array((0, Wn[0], Wn[0], Wn[1], Wn[1], 1))
f0 = np.mean(ff[2:4])
L = N + 1
mags = np.array(range(nbands)) % 2
aa = np.ravel(repmat(mags, 2, 1), order='F')
# Get filter coefficients :
h = firls(L - 1, ff, aa)
# Apply a window to coefficients :
Wind = np.hamming(L)
b = np.matrix(h.T * Wind)
c = np.matrix(np.exp(-1j * 2 * np.pi * (f0 / 2) * np.array(range(L))))
b = b / abs(c * b.T)
return np.ndarray.squeeze(np.array(b)), 1
####################################################################
# - Filt the signal :
####################################################################
def __init__(self, sample_rate, frame_length):
'''
Initialize MFCC Calculator.
@param sample_rate: audio sample rate
@param input_size: length of magnitude spectrum (half of FFT size assumed)
'''
self.sample_rate = sample_rate
self.nyquist = sample_rate / 2.0
self.min_hz = 0
self.max_hz = self.nyquist
self.input_size = frame_length / 2
self.num_bands = 40
self.window = np.hamming(frame_length)
self.filter_matrix = None
return
def __init__(self, samplerate, framelen):
'''
Initialize perceptual loudness using samplerate and framelength
'''
self.framelen = framelen
self.window = np.hamming(framelen)
self.filter = np.zeros(framelen / 2)
for i in xrange(framelen / 2):
self.filter[i] = self.__weightA(float(i * samplerate) / framelen)
return
def __init__(self, path, time_wnd = 25, time_off = 10):
wave_src = wave.open(path, "rb")
para_src = wave_src.getparams()
self.rate = int(para_src[2])
self.cur_size = 0
self.tot_size = int(para_src[3])
# default 400 160
self.wnd_size = int(self.rate * 0.001 * time_wnd)
self.wnd_rate = int(self.rate * 0.001 * time_off)
self.ham = np.hamming(self.wnd_size+1)
self.ham = np.sqrt(self.ham[0:self.wnd_size])
self.ham = self.ham / np.sqrt(np.sum(np.square(self.ham[range(0,self.wnd_size, self.wnd_rate)])))
self.data = np.fromstring(wave_src.readframes(wave_src.getnframes()), dtype=np.int16)
self.upper_bound = np.max(np.abs(self.data))
def __init__(self, path, time_wnd = 25, time_off = 10):
wave_src = wave.open(path, "rb")
para_src = wave_src.getparams()
self.rate = int(para_src[2])
self.cur_size = 0
self.tot_size = int(para_src[3])
# default 400 160
self.wnd_size = int(self.rate * 0.001 * time_wnd)
self.wnd_rate = int(self.rate * 0.001 * time_off)
self.ham = np.hamming(self.wnd_size+1)
self.ham = np.sqrt(self.ham[0:self.wnd_size])
self.ham = self.ham / np.sqrt(np.sum(np.square(self.ham[range(0,self.wnd_size, self.wnd_rate)])))
self.data = np.fromstring(wave_src.readframes(wave_src.getnframes()), dtype=np.int16)
self.upper_bound = np.max(np.abs(self.data))
def getFFT(data,rate):
"""Given some data and rate, returns FFTfreq and FFT (half)."""
data=data*np.hamming(len(data))
fft=np.fft.fft(data)
fft=np.abs(fft)
#fft=10*np.log10(fft)
freq=np.fft.fftfreq(len(fft),1.0/rate)
return freq[:int(len(freq)/2)],fft[:int(len(fft)/2)]
def smooth(x,window_len=11,window='hanning'):
if x.ndim != 1:
raise ValueError, "smooth only accepts 1 dimension arrays."
if x.size < window_len:
return x
# raise ValueError, "Input vector needs to be bigger than window size."
if window_len<3:
return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise ValueError, "Window is one of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
s=numpy.r_[x[window_len-1:0:-1],x,x[-1:-window_len:-1]]
if window == 'flat': #moving average
w=numpy.ones(window_len,'d')
else:
w=eval('numpy.'+window+'(window_len)')
y=numpy.convolve(w/w.sum(),s,mode='valid')
y = y[(window_len/2-1) : -(window_len/2)-1]
return y
def smooth(values, window):
oavg = numpy.mean(abs(values))
#weights = numpy.repeat(1.0, window)/window
weights = numpy.hamming(window)
sma = numpy.convolve(values, weights, 'valid')
sma = sma[0:len(values)]
navg = numpy.mean(abs(sma))
sma = sma * (oavg / navg)
return sma
def hamming(self):
"""Apply a Hamming window to the wave.
"""
self.ys *= np.hamming(len(self.ys))
def window_plot():
"""Makes a plot showing a sinusoid, hamming window, and their product.
"""
signal = thinkdsp.SinSignal(freq=440)
duration = signal.period * 10.25
wave1 = signal.make_wave(duration)
wave2 = signal.make_wave(duration)
ys = numpy.hamming(len(wave1.ys))
window = thinkdsp.Wave(ys, wave1.framerate)
wave2.hamming()
thinkplot.preplot(rows=3, cols=1)
pyplot.subplots_adjust(wspace=0.3, hspace=0.3,
right=0.95, left=0.1,
top=0.95, bottom=0.05)
thinkplot.subplot(1)
wave1.plot()
thinkplot.config(axis=[0, duration, -1.07, 1.07])
thinkplot.subplot(2)
window.plot()
thinkplot.config(axis=[0, duration, -1.07, 1.07])
thinkplot.subplot(3)
wave2.plot()
thinkplot.config(axis=[0, duration, -1.07, 1.07],
xlabel='time (s)')
thinkplot.save(root='windowing2')