def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
python类hanning()的实例源码
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def make_filterbank(self):
erb_max = hz2erb(self.sr/2.0)
erb_freqs = np.arange(0, self.n_bins) * erb_max / float(self.n_bins - 1)
self.hz_freqs = erb2hz(erb_freqs)
self.widths = np.round(0.5 * (self.n_bins - 1) / erb_max *
9.26 * 0.00437 * self.sr * np.exp(-erb_freqs / 9.26) - 0.5)
self.filters = []
for b in range(self.n_bins):
w = self.widths[b]
f = self.hz_freqs[b]
exponential = np.exp(
np.complex(0,1) * 2 * np.pi * f / self.sr *
np.arange(-w, w + 1))
self.filters.append(np.hanning(2 * w + 1) * exponential)
def __init__(self, window = None, fft_size = 1024, hop_size = 512):
if window is None:
self.window = np.hanning(fft_size)
else:
self.window = window
self.fft_size = fft_size
self.hop_size = hop_size
self.window_size = len(self.window)
self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, window = None, fft_size = 1024, hop_size = 512, sample_rate = 44100):
if window is None:
self.window = np.hanning(fft_size)
else:
self.window = window
self.fft_size = fft_size
self.hop_size = hop_size
self.sample_rate = sample_rate
self.window_size = len(self.window)
self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, img, start_pos, HOG_flag=0, dataformat=1, resize=1):
self.HOG_flag = HOG_flag
self.padding = 2
self.dataformat = dataformat
self.resize = resize
self.img_size = img.shape[0],img.shape[1]
if self.dataformat:
w,h = start_pos[2]-start_pos[0],start_pos[3]-start_pos[1]
self.pos = start_pos[0],start_pos[1],w,h
else:
self.pos = start_pos
if self.resize:
self.pos = tuple([ele/2 for ele in self.pos])
self.img_size = img.shape[0]/2, img.shape[1]/2
img = cv2.resize(img,self.img_size[::-1])
object_size = self.pos[2:]
if self.HOG_flag:
self.target_size = 32,32
self.l = 0.0001
self.sigma = 0.6
self.f = 0.012
else:
self.target_size = object_size[0]*self.padding,object_size[1]*self.padding
self.l = 0.0001
self.sigma = 0.2
self.f = 0.02
output_sigma_factor = 1/float(8)
output_sigma = np.sqrt(np.prod(self.target_size)) * output_sigma_factor
self.cos_window = np.outer(np.hanning(self.target_size[0]), np.hanning(self.target_size[1]))
self.y = tracker.generate_gaussian(self.target_size, output_sigma)
x = tracker.get_window(img, self.pos, self.padding)
x = tracker.getFeature(x, self.cos_window,self.HOG_flag)
self.alpha = tracker.train(x, self.y, self.sigma, self.l)
self.z = x
def ideal_binary_mask(noisy_speech, clean_speech, snr, sr):
win_len = sr * 0.02
shift_len = sr * 0.01
noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning')
clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning')
spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0)
ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0)
return ibm
def ideal_ratio_mask(noisy_speech, clean_speech, sr):
win_len = sr * 0.02
shift_len = sr * 0.01
noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning')
clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning')
irm = np.abs(clean_spect) / np.abs(noisy_spect)
return irm
def synthesis_speech(noisy_speech, ideal_mask, win_type, win_len, shift_len, syn_method='A&R'):
samples = noisy_speech.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = noisy_speech[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * ideal_mask[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, win_len))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type):
fcoefs, f = make_erb_filters(sr, channel_number, 50)
fcoefs = np.flipud(fcoefs)
xf = erb_frilter_bank(xx, fcoefs)
if win_type == 'hanning':
window = np.hanning(channel_number)
elif win_type == 'hamming':
window = np.hamming(channel_number)
elif win_type == 'triangle':
window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1)))
else:
window = np.ones(channel_number)
window = window.reshape((channel_number, 1))
xe = np.power(xf, 2.0)
frames = 1 + ((np.size(xe, 1)-win_len) // shift_len)
cochleagram = np.zeros((channel_number, frames))
for i in range(frames):
one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1))
cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1))
# c1 = np.where(c1 == 0.0, np.finfo(float).eps, c1)
cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram)
cochleagram = np.power(cochleagram, 1./3)
return cochleagram
def ideal_binary_mask(noisy_speech, clean_speech, snr):
noisy_spect = stft_extractor(noisy_speech, 320, 160, 'hanning')
clean_spect = stft_extractor(clean_speech, 320, 160, 'hanning')
spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0)
ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0)
return ibm
def synthesis_speech(ns, mk, win_type, win_len, shift_len, syn_method='A&R'):
samples = ns.shape[0]
frames = (samples - win_len) // shift_len
if win_type == 'hanning':
window = np.hanning(win_len)
elif win_type == 'hamming':
window = np.hamming(win_len)
elif win_type == 'rectangle':
window = np.ones(win_len)
to_ifft = np.zeros(win_len, dtype=np.complex64)
clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32)
for i in range(frames):
one_frame = ns[i * shift_len: i * shift_len + win_len]
windowed_frame = np.multiply(one_frame, window)
stft = np.fft.fft(windowed_frame, win_len)
masked_abs = np.abs(stft[:win_len//2+1]) * mk[:, i]
to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1]))
to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1])
speech_seg = np.real(np.fft.ifft(to_ifft, 320))
if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER':
clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg
window_sum[i*shift_len:i*shift_len+win_len] += window
elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM':
speech_seg = np.multiply(speech_seg, window)
clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg
window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.)
# if i > 0:
# clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5
window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum)
return clean_speech / window_sum
AlternativePeriodicityScoring.py 文件源码
项目:SlidingWindowVideoTDA
作者: ctralie
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def getCutlerDavisFrequencyScore(I, doPlot = False):
"""
Compute the frequency score suggested by Cutler and Davis, with a slight
modification using Kurtosis instead of mean versus standard deviation
:param I: An Nxd matrix representing a video with N frames at a resolution of
d pixels
:doPlot: If true, show the SSM and average power spectrum across all columns
"""
N = I.shape[0]
(D, _) = getSSM(I, N)
F = np.zeros(N)
#For linearly detrending
A = np.ones((N, 2))
A[:, 1] = np.arange(N)
#Compute the power spectrum column by column
for i in range(N):
x = D[:, i]
#Linearly detrend
mb = np.linalg.lstsq(A, x)[0]
y = x - A.dot(mb)
#Apply Hann Window
y = y*np.hanning(N)
#Add on power spectrum
F += np.abs(np.fft.fft(y))**2
#Compute kurtosis of normalized averaged power spectrum
F = F/np.sum(F)
F[0:2] = 0 #Ignore DC component
F[-1] = 0
kurt = scipy.stats.kurtosis(F, fisher = False)
M = np.mean(F)
S = np.std(F)
if doPlot:
plt.subplot(121)
plt.imshow(D, cmap='afmhot', interpolation = 'none')
plt.subplot(122)
plt.plot(F)
plt.hold(True)
plt.plot([0, N], [M, M], 'b')
plt.plot([0, N], [M+2*S, M+2*S])
plt.title("Kurtosis = %.3g"%kurt)
return (np.max(F) - M)/S
def hanningWindow(nPix, percentage):
"""
Return a Hanning window in 2D
Args:
size (int): size of the final image
percentage (TYPE): percentage of the image that is apodized
Returns:
real: 2D apodization mask
"""
M = np.ceil(nPix*percentage/100.0)
win = np.hanning(M)
winOut = np.ones(nPix)
winOut[0:M/2] = win[0:M/2]
winOut[-M/2:] = win[-M/2:]
return np.outer(winOut, winOut)
# @jit
# def conv(spec, psf, nPixBorder):
# nx, ny, nlambda = spec.shape
# nxPSF, nyPSF, nPSF = psf.shape
# out = np.zeros_like(spec)
# for i in range(nx-2*nPixBorder):
# for j in range(ny-2*nPixBorder):
# for k in range(nxPSF):
# for l in range(nyPSF):
# out[i,j,0] += spec[i+k-nxPSF/2+nPixBorder,j+l-nyPSF/2+nPixBorder,0] * psf[k,l,i]
# return out
def fft(data, fs):
n = data.shape[-1]
window = np.hanning(n)
windowed = data * window
spectrum = np.fft.fft(windowed)
freq = np.fft.fftfreq(n, 1 / fs)
half_n = np.ceil(n / 2)
spectrum_half = (2 / n) * spectrum[..., :half_n]
freq_half = freq[:half_n]
return freq_half, np.abs(spectrum_half)
def smooth1d(x, window_len):
s = np.r_[2*x[0] - x[window_len:1:-1], x, 2*x[-1] - x[-1:-window_len:-1]]
w = np.hanning(window_len)
y = np.convolve(w/w.sum(), s, mode='same')
return y[window_len-1:-window_len+1]
def _init_stretching(self):
# Resp. index of current audio chunk and computed phase
self._i1, self._i2 = 0, 0
self._N, self._H = self.chunk_size, int(self.chunk_size / 4)
self._win = numpy.hanning(self._N)
self._phi = numpy.zeros(self._N, dtype=self.y.dtype)
self._sy = numpy.zeros(len(self.y), dtype=self.y.dtype)
if not hasattr(self, '_sf'):
self.stretch_factor = 1.0
self._zero_padding()
def get_spectral_features(audio, fs, lf_limit=20):
"""
This function calculates the spectral centroid and spectral spread of an audio array.
:param audio: Audio array
:param fs: Sample rate of audio file
:param lf_limit: Low frequency limit, in Hz, to be analysed. Defaults to 20Hz.
:return: Returns the spectral centroid and spectral spread
"""
# use a hanning window
window = np.hanning(len(audio))
next_pow_2 = int(pow(2, np.ceil(np.log2(len(window)))))
# get frequency domain representation
spectrum = np.fft.fft((window * audio), next_pow_2)
spectrum = np.absolute(spectrum[0:int(len(spectrum) / 2) + 1])
freq = np.arange(0, len(spectrum), 1) * (fs / (2.0 * (len(spectrum) - 1)))
# find lowest frequency index, zeros used to unpack result
lf_limit_idx = np.where(freq >= lf_limit)[0][0]
spectrum = spectrum[lf_limit_idx:]
freq = freq[lf_limit_idx:]
# calculate centroid and spread
centroid = sum(spectrum * freq) / float(sum(spectrum))
spread = np.sqrt(sum(((freq - centroid) ** 2) * spectrum) / sum(spectrum))
return centroid, spread
def spectrogram(samples, sample_rate, frame_len, fps, batch=50):
"""
Computes a magnitude spectrogram for a given vector of samples at a given
sample rate (in Hz), frame length (in samples) and frame rate (in Hz).
Allows to transform multiple frames at once for improved performance (with
a default value of 50, more is not always better). Returns a numpy array.
"""
if len(samples) < frame_len:
return np.empty((0, frame_len // 2 + 1), dtype=samples.dtype)
win = np.hanning(frame_len)
hopsize = sample_rate // fps
num_frames = max(0, (len(samples) - frame_len) // hopsize + 1)
batch = min(batch, num_frames)
if batch <= 1 or not samples.flags.c_contiguous:
rfft = rfft_builder(samples[:frame_len], n=frame_len)
spect = np.vstack(np.abs(rfft(samples[pos:pos + frame_len] * win))
for pos in range(0, len(samples) - frame_len + 1,
int(hopsize)))
else:
rfft = rfft_builder(np.empty((batch, frame_len), samples.dtype),
n=frame_len, threads=1)
frames = np.lib.stride_tricks.as_strided(
samples, shape=(num_frames, frame_len),
strides=(samples.strides[0] * hopsize, samples.strides[0]))
spect = [np.abs(rfft(frames[pos:pos + batch] * win))
for pos in range(0, num_frames - batch + 1, batch)]
if num_frames % batch:
spect.extend(spectrogram(
samples[(num_frames // batch * batch) * hopsize:],
sample_rate, frame_len, fps, batch=1))
spect = np.vstack(spect)
return spect
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(frameSize/2), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
cols = int(cols)
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)