def stft(wav, n_fft=1024, overlap=4, dt=tf.int32, absp=False):
assert (wav.shape[0] > n_fft)
X = tf.placeholder(dtype=dt,shape=wav.shape)
X = tf.cast(X,tf.float32)
hop = n_fft / overlap
## prepare constant variable
Pi = tf.constant(np.pi, dtype=tf.float32)
W = tf.constant(scipy.hanning(n_fft), dtype=tf.float32)
S = tf.pack([tf.fft(tf.cast(tf.multiply(W,X[i:i+n_fft]),\
tf.complex64)) for i in range(1, wav.shape[0] - n_fft, hop)])
abs_S = tf.complex_abs(S)
sess = tf.Session()
if absp:
return sess.run(abs_S, feed_dict={X:wav})
else:
return sess.run(S, feed_dict={X:wav})
python类hanning()的实例源码
def istft(X, scale = 1, overlap=4):
fftsize=(X.shape[1]-1)*2
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1]
x = scipy.zeros(X.shape[0]*hop)
wsum = scipy.zeros(X.shape[0]*hop)
for n,i in enumerate(range(0, len(x)-fftsize, hop)):
x[i:i+fftsize] += scipy.real(np.fft.irfft(X[n])) * w # overlap-add
wsum[i:i+fftsize] += w ** 2.
pos = wsum != 0
x[pos] /= wsum[pos]
x = x * scale
return x.astype(np.int16)
def ltsd_vad(x, fs, threshold=9, winsize=8192):
# winsize based on sample rate
# 1024 for fs = 16000
orig_dtype = x.dtype
orig_scale_min = x.min()
orig_scale_max = x.max()
x = (x - x.min()) / (x.max() - x.min())
# works with 16 bit
x = x * (2 ** 15)
x = x.astype("int32")
window = sp.hanning(winsize)
ltsd = LTSD(winsize, window, 5)
s_vad = ltsd.compute(x)
# LTSD is 50% overlap, so each "step" covers 4096 samples
# +1 to cover the extra edge window
n_samples = int(((len(s_vad) + 1) * winsize) // 2)
time_s = n_samples / float(fs)
time_points = np.linspace(0, time_s, len(s_vad))
time_samples = (fs * time_points).astype(np.int32)
time_samples = time_samples
f_vad = np.zeros_like(x, dtype=np.bool)
offset = winsize
for n, (ss, es) in enumerate(zip(time_samples[:-1], time_samples[1:])):
sss = ss - offset
if sss < 0:
sss = 0
ses = es - offset
if ses < 0:
ses = 0
if s_vad[n + 1] < threshold:
f_vad[sss:ses] = False
else:
f_vad[sss:ses] = True
f_vad[ses:] = False
x = x.astype("float64")
x = x / float(2 ** 15)
x = x * (orig_scale_max - orig_scale_min) + orig_scale_min
x = x.astype(orig_dtype)
return x[f_vad], f_vad
def ltsd_vad(x, fs, threshold=9, winsize=8192):
# winsize based on sample rate
# 1024 for fs = 16000
orig_dtype = x.dtype
orig_scale_min = x.min()
orig_scale_max = x.max()
x = (x - x.min()) / (x.max() - x.min())
# works with 16 bit
x = x * (2 ** 15)
x = x.astype("int32")
window = sp.hanning(winsize)
ltsd = LTSD(winsize, window, 5)
s_vad = ltsd.compute(x)
# LTSD is 50% overlap, so each "step" covers 4096 samples
# +1 to cover the extra edge window
n_samples = int(((len(s_vad) + 1) * winsize) // 2)
time_s = n_samples / float(fs)
time_points = np.linspace(0, time_s, len(s_vad))
time_samples = (fs * time_points).astype(np.int32)
time_samples = time_samples
f_vad = np.zeros_like(x, dtype=np.bool)
offset = winsize
for n, (ss, es) in enumerate(zip(time_samples[:-1], time_samples[1:])):
sss = ss - offset
if sss < 0:
sss = 0
ses = es - offset
if ses < 0:
ses = 0
if s_vad[n + 1] < threshold:
f_vad[sss:ses] = False
else:
f_vad[sss:ses] = True
f_vad[ses:] = False
x = x.astype("float64")
x = x / float(2 ** 15)
x = x * (orig_scale_max - orig_scale_min) + orig_scale_min
x = x.astype(orig_dtype)
return x[f_vad], f_vad
def stft(x, fftsize=1024, overlap=4):
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1] # better reconstruction with this trick +1)[:-1]
return np.array([np.fft.rfft(w*x[i:i+fftsize]) for i in range(0, len(x)-fftsize, hop)])
def istft(X, scale = 1, overlap=4):
fftsize=(X.shape[1]-1)*2
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1]
x = scipy.zeros(X.shape[0]*hop)
wsum = scipy.zeros(X.shape[0]*hop)
for n,i in enumerate(range(0, len(x)-fftsize, hop)):
x[i:i+fftsize] += scipy.real(np.fft.irfft(X[n])) * w # overlap-add
wsum[i:i+fftsize] += w ** 2.
pos = wsum != 0
x[pos] /= wsum[pos]
x = x * scale
return x.astype(np.int16)
def stft(x, fftsize=1024, overlap=4):
hop = fftsize / overlap
w = scipy.hanning(fftsize)
return np.array([np.fft.rfft(w*x[i:i+fftsize]) for i in range(0, len(x)-fftsize, hop)])
def create_window(size):
"""
A normalized Hanning window of given size. Useful for analyzing sinusoidal
signals.
"""
return normalized_window(scipy.hanning(size))
def stft(x, fftsize=1024, overlap=4):
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1] # better reconstruction with this trick +1)[:-1]
return np.array([np.fft.rfft(w*x[i:i+fftsize]) for i in range(0, len(x)-fftsize, hop)])
def istft(X, overlap=4):
fftsize=(X.shape[1]-1)*2
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1]
x = scipy.zeros(X.shape[0]*hop)
wsum = scipy.zeros(X.shape[0]*hop)
for n,i in enumerate(range(0, len(x)-fftsize, hop)):
x[i:i+fftsize] += scipy.real(np.fft.irfft(X[n])) * w # overlap-add
wsum[i:i+fftsize] += w ** 2.
pos = wsum != 0
x[pos] /= wsum[pos]
return x
signal_processing.py 文件源码
项目:bird-species-classification
作者: johnmartinsson
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def stft(x, fs, framesz, hop):
framesamp = int(framesz*fs)
hopsamp = int(hop*fs)
w = scipy.hanning(framesamp)
X = scipy.array([scipy.fft(w*x[i:i+framesamp])
for i in range(0, len(x)-framesamp, hopsamp)])
return X
def sinusoid_analysis(X, input_sample_rate, resample_block=128, copy=True):
"""
Contruct a sinusoidal model for the input signal.
Parameters
----------
X : ndarray
Input signal to model
input_sample_rate : int
The sample rate of the input signal
resample_block : int, optional (default=128)
Controls the step size of the sinusoidal model
Returns
-------
frequencies_hz : ndarray
Frequencies for the sinusoids, in Hz.
magnitudes : ndarray
Magnitudes of sinusoids returned in ``frequencies``
References
----------
D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
"""
X = np.array(X, copy=copy)
resample_to = 8000
if input_sample_rate != resample_to:
if input_sample_rate % resample_to != 0:
raise ValueError("Input sample rate must be a multiple of 8k!")
# Should be able to use resample... ?
# resampled_count = round(len(X) * resample_to / input_sample_rate)
# X = sg.resample(X, resampled_count, window=sg.hanning(len(X)))
X = sg.decimate(X, input_sample_rate // resample_to, zero_phase=True)
step_size = 2 * round(resample_block / input_sample_rate * resample_to / 2.)
a, g, e = lpc_analysis(X, order=8, window_step=step_size,
window_size=2 * step_size)
f, m = lpc_to_frequency(a, g)
f_hz = f * resample_to / (2 * np.pi)
return f_hz, m
def sinusoid_analysis(X, input_sample_rate, resample_block=128, copy=True):
"""
Contruct a sinusoidal model for the input signal.
Parameters
----------
X : ndarray
Input signal to model
input_sample_rate : int
The sample rate of the input signal
resample_block : int, optional (default=128)
Controls the step size of the sinusoidal model
Returns
-------
frequencies_hz : ndarray
Frequencies for the sinusoids, in Hz.
magnitudes : ndarray
Magnitudes of sinusoids returned in ``frequencies``
References
----------
D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
"""
X = np.array(X, copy=copy)
resample_to = 8000
if input_sample_rate != resample_to:
if input_sample_rate % resample_to != 0:
raise ValueError("Input sample rate must be a multiple of 8k!")
# Should be able to use resample... ?
# resampled_count = round(len(X) * resample_to / input_sample_rate)
# X = sg.resample(X, resampled_count, window=sg.hanning(len(X)))
X = sg.decimate(X, input_sample_rate // resample_to, zero_phase=True)
step_size = 2 * round(resample_block / input_sample_rate * resample_to / 2.)
a, g, e = lpc_analysis(X, order=8, window_step=step_size,
window_size=2 * step_size)
f, m = lpc_to_frequency(a, g)
f_hz = f * resample_to / (2 * np.pi)
return f_hz, m
def __init__(self, signal_frames, window=scipy.hanning, positive_only=True):
"""
:param signal_frames: signal represented as SignalFrames instance
:param window: STFT window function - produces 1D window which will
be normalized
"""
self.signal_frames = signal_frames
x_frames = signal_frames.frames
w = normalized_window(window(signal_frames.frame_size))
# complex spectra of windowed blocks of signal - STFT
self.X_complex = np.fft.fft(x_frames * w)
# linear magnitude spectrogram
self.X_mag = abs(self.X_complex) / self.X_complex.shape[1]
# spectra of signal shifted in time
# This fakes looking at the previous frame shifted by one sample.
# In order to work only with one frame of size N and not N + 1, we fill the
# missing value with zero. This should not introduce a large error, since the
# borders of the amplitude frame will go to zero anyway due to applying a
# window function in the STFT tranform.
X_prev_time = np.fft.fft(shift_right(x_frames) * w)
# spectra shifted in frequency
X_prev_freq = shift_right(self.X_complex)
# cross-spectra - ie. spectra of cross-correlation between the
# respective time-domain signals
X_cross_time = cross_spectrum(self.X_complex, X_prev_time)
X_cross_freq = cross_spectrum(self.X_complex, X_prev_freq)
# instantaneous frequency estimates
# normalized frequencies in range [0.0, 1.0] - from DC to sample rate
self.X_inst_freqs = estimate_instant_freqs(X_cross_time)
# instantaneous group delay estimates
# relative coordinates within the frame with range [-0.5, 0.5] where
# 0.0 is the frame center
self.X_group_delays = estimate_group_delays(X_cross_freq)
if positive_only:
self.X_mag = positive_freq_magnitudes(self.X_mag)
self.X_complex, self.X_inst_freqs, self.X_group_delays = [
select_positive_freq_fft(values) for values in
[self.X_complex, self.X_inst_freqs, self.X_group_delays]
]
def STFT(x, window_size, noverlap, time_start, Ts, mode='psd'):
print 'Ts:', Ts
f0 = 1.0/Ts
print 'F Zero:', f0
print 'Frquencia de Nyquist:', f0/2, 'Hz'
print 'Resolução em frquencia:', f0/window_size, 'Hz'
print 'Resolução temporal:', Ts, 'seconds'
#
window = scipy.hanning(window_size)
stft_data = np.array([np.fft.fft(window*data)
for data in SlidingWindow(x, len(window), noverlap)]
)
#
if len(window) % 2:
numFreqs = stft_data.shape[1]/2+1
else:
numFreqs = stft_data.shape[1]/2
freqs = np.fft.fftfreq(window_size, Ts)[:numFreqs]
stft_data = stft_data[:, :numFreqs]
time_values = np.arange(window_size/2,
len(x)-window_size/2+1,
window_size-noverlap) * Ts + time_start
#
if mode == 'psd':
# Also include scaling factors for one-sided densities and dividing by
# the sampling frequency, if desired. Scale everything, except the DC
# component and the NFFT/2 component:
stft_data *= 2.
# MATLAB divides by the sampling frequency so that density function
# has units of dB/Hz and can be integrated by the plotted frequency
# values. Perform the same scaling here.
#if scale_by_freq:
scale_by_freq = True
if scale_by_freq:
Fs = 1/Ts
stft_data /= Fs
# Scale the spectrum by the norm of the window to compensate for
# windowing loss; see Bendat & Piersol Sec 11.5.2.
stft_data /= (np.abs(window)**2).sum()
else:
# In this case, preserve power in the segment, not amplitude
stft_data /= np.abs(window).sum()**2
stft_data = stft_data * np.conjugate(stft_data)
stft_data = stft_data.real
elif mode == 'magnitude':
stft_data = np.absolute(stft_data)
elif mode == 'angle' or mode == 'phase':
stft_data = np.angle(stft_data)
if mode == 'phase':
stft_data = np.unwrap(stft_data, axis=0)
return stft_data.T, freqs, time_values