def __nearest_pow_2(self,x):
"""
Find power of two nearest to x
>>> _nearest_pow_2(3)
2.0
>>> _nearest_pow_2(15)
16.0
:type x: float
:param x: Number
:rtype: Int
:return: Nearest power of 2 to x
"""
a = math.pow(2, math.ceil(np.log2(x)))
b = math.pow(2, math.floor(np.log2(x)))
if abs(a - x) < abs(b - x):
return a
else:
return b
# calculate spectrogram of signals
python类spectrogram()的实例源码
def __nearest_pow_2(self,x):
"""
Find power of two nearest to x
>>> _nearest_pow_2(3)
2.0
>>> _nearest_pow_2(15)
16.0
:type x: float
:param x: Number
:rtype: Int
:return: Nearest power of 2 to x
"""
a = math.pow(2, math.ceil(np.log2(x)))
b = math.pow(2, math.floor(np.log2(x)))
if abs(a - x) < abs(b - x):
return a
else:
return b
# calculate spectrogram of signals
def __spectrogram(self,data,samp_rate,window,per_lap,wlen,mult):
samp_rate = float(samp_rate)
if not wlen:
wlen = samp_rate/100.0
npts=len(data)
nfft = int(self.__nearest_pow_2(wlen * samp_rate))
if nfft > npts:
nfft = int(self.__nearest_pow_2(npts / 8.0))
if mult is not None:
mult = int(self.__nearest_pow_2(mult))
mult = mult * nfft
nlap = int(nfft * float(per_lap))
end = npts / samp_rate
window = signal.get_window(window,nfft)
specgram, freq, time = mlab.specgram(data, Fs=samp_rate,window=window,NFFT=nfft,
pad_to=mult, noverlap=nlap)
return specgram,freq,time
# Sort data by experimental conditions and plot spectrogram for analog signals (e.g. LFP)
def calculate_features_for_VAD(sound_frames, frequencies_axis, spectrogram):
features = numpy.empty((spectrogram.shape[0], 3))
# smooted_spectrogram, smoothed_frequencies_axis = smooth_spectrogram(spectrogram, frequencies_axis, 24)
for time_ind in range(spectrogram.shape[0]):
mean_spectrum = spectrogram[time_ind].mean()
if mean_spectrum > 0.0:
sfm = -10.0 * math.log10(stats.gmean(spectrogram[time_ind]) / mean_spectrum)
else:
sfm = 0.0
# max_freq = smoothed_frequencies_axis[smooted_spectrogram[time_ind].argmax()]
max_freq = frequencies_axis[spectrogram[time_ind].argmax()]
features[time_ind][0] = numpy.square(sound_frames[time_ind]).mean()
features[time_ind][1] = sfm
features[time_ind][2] = max_freq
"""medfilt_order = 3
for feature_ind in range(features.shape[0]):
features[feature_ind] = signal.medfilt(features[feature_ind], medfilt_order)"""
return features
def __init__(self, **params):
"""Create an AudioAnalyzer
All keyword arguments are gathered and stored in the instance's
self.params. Keyword arguments relate to STFT parameters,
"""
# List of loaded songs
self.songs = []
self.motifs = []
# Reference to and spectrogram of currently active song
self.active_song = None
self.Sxx = None
self.params = params
# Reference to the neural net used for processing
self.classifier = None
plot_spectrograms.py 文件源码
项目:BirdAudioDetectionChallenge2016
作者: RSPB
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def plot_spectrogram(data, rate, name, dir_label):
window_size = 2 ** 10
overlap = window_size // 8
window = sig.tukey(M=window_size, alpha=0.25)
freq, time, spectrogram = sig.spectrogram(data, fs=rate, window=window, nperseg=window_size, scaling='density', noverlap=overlap)
spectrogram = np.log10(np.flipud(spectrogram))
try:
if spectrogram.shape[1] > 512:
spec_padded = spectrogram[:512,:512]
elif spectrogram.shape[1] < 512:
spec_padded = np.pad(spectrogram, ((0, 0), (0, 512 - spectrogram.shape[1])), mode='median')[:512, :]
else:
spec_padded = spectrogram
except Exception as e:
print('ERROR!')
print('Fault in: {}'.format(name))
raise
spec_padded = transform.downscale_local_mean(spec_padded, (2, 2))
final_path = os.path.join(output_dir, dir_label, name + '.png')
plt.imsave(final_path, spec_padded, cmap=plt.get_cmap('gray'))
def __spectrogram(self,data,samp_rate,window,per_lap,wlen,mult):
samp_rate = float(samp_rate)
if not wlen:
wlen = samp_rate/100.0
npts=len(data)
nfft = int(self.__nearest_pow_2(wlen * samp_rate))
if nfft > npts:
nfft = int(self.__nearest_pow_2(npts / 8.0))
if mult is not None:
mult = int(self.__nearest_pow_2(mult))
mult = mult * nfft
nlap = int(nfft * float(per_lap))
end = npts / samp_rate
window = signal.get_window(window,nfft)
specgram, freq, time = mlab.specgram(data, Fs=samp_rate,window=window,NFFT=nfft,
pad_to=mult, noverlap=nlap)
return specgram,freq,time
# Sort data by experimental conditions and plot spectrogram for analog signals (e.g. LFP)
def __init__(self, data, Fs, name='', start=0):
"""Create a SongFile for storing signal data
Inputs:
data: a numpy array with time series data. For use with PyAudio,
ensure the format of data is the same as the player
Fs: sampling frequency, ideally a float
Keyword Arguments:
name: a string identifying where this SongFile came from
start: a value in seconds indicating that the SongFile's data does
not come from the start of a longer signal
"""
# Values passed into the init
self.data = data
self.Fs = Fs
# Post-processed values (does not include spectrogram)
self.time = None
self.freq = None
self.classification = None
self.entropy = None
self.power = None
self.name = name
self.start = start
self.length = len(self.data) / self.Fs
def power_spectrum(signal: np.ndarray,
fs: int,
window_width: int,
window_overlap: int) -> (np.ndarray, np.ndarray, np.ndarray):
"""
Computes the power spectrum of the specified signal.
A periodic Hann window with the specified width and overlap is used.
Parameters
----------
signal: numpy.ndarray
The input signal
fs: int
Sampling frequency of the input signal
window_width: int
Width of the Hann windows in samples
window_overlap: int
Overlap between Hann windows in samples
Returns
-------
f: numpy.ndarray
Array of frequency values for the first axis of the returned spectrogram
t: numpy.ndarray
Array of time values for the second axis of the returned spectrogram
sxx: numpy.ndarray
Power spectrogram of the input signal with axes [frequency, time]
"""
f, t, sxx = spectrogram(x=signal,
fs=fs,
window=hann(window_width, sym=False),
noverlap=window_overlap,
mode="magnitude")
return f, t, (1.0 / window_width) * (sxx ** 2)
def mel_spectrum(power_spectrum: np.ndarray,
mel_fbank: np.ndarray = None,
fs: int = None,
window_width: int = None,
n_filt: int = 40) -> np.ndarray:
"""
Computes a Mel spectrogram from the specified power spectrogram.
Optionally, precomputed Mel filter banks can be passed to this function, in which case the n_filt, fs, and
window_width parameters are ignored. If precomputed Mel filter banks are used, the caller has to ensure that they
have correct shape.
Parameters
----------
power_spectrum: numpy.ndarray
The power spectrogram from which a Mel spectrogram should be computed
mel_fbank: numpy.ndarray, optional
Precomputed Mel filter banks
fs: int
Sampling frequency of the signal from which the power spectrogram was computed. Ignored if precomputed Mel
filter banks are used.
window_width: int
Window width in samples that was used to comput the power spectrogram. Ignored if precomputed Mel filter banks
are used.
n_filt: int
Number of Mel filter banks to use. Ignored if precomputed Mel filter banks are used.
Returns
-------
numpy.ndarray
Mel spectrogram computed from the specified power spectrogram
"""
if mel_fbank is None:
_, mel_fbank = mel_filter_bank(fs, window_width, n_filt)
filter_banks = np.dot(mel_fbank, power_spectrum)
return filter_banks
def power_to_db(spectrum: np.ndarray,
clip_below: float = None,
clip_above: float = None) -> np.ndarray:
"""
Convert a spectrogram to the Decibel scale.
Optionally, frequencies with amplitudes below or above a certain threshold can be clipped.
Parameters
----------
spectrum: numpy.ndarray
The spectrogram to convert
clip_below: float, optional
Clip frequencies below the specified amplitude in dB
clip_above: float, optional
Clip frequencies above the specified amplitude in dB
Returns
-------
numpy.ndarray
The spectrogram on the Decibel scale
"""
# there might be zeros, fix them to the lowest non-zero power in the spectrogram
epsilon = np.min(spectrum[np.where(spectrum > 0)])
sxx = np.where(spectrum > epsilon, spectrum, epsilon)
sxx = 10 * np.log10(sxx / np.max(sxx))
if clip_below is not None:
sxx = np.maximum(sxx, clip_below)
if clip_above is not None:
sxx = np.minimum(sxx, clip_above)
return sxx
def get_spectrogram(audio, sampling_rate):
return signal.spectrogram(audio, fs=sampling_rate, window='hanning',
nperseg=M, noverlap=M-window_shift,
detrend=False, scaling='spectrum')
# def plot_spectrogram(freqs, times, Sx):
# print(Sx.shape)
# f, ax = plt.subplots()
# ax.pcolormesh(times, freqs / 1000, 10 * np.log10(Sx), cmap='viridis')
# ax.set_ylabel('Frequency [kHz]')
# ax.set_xlabel('Time [s]')
# plt.show()
def show_VAD_features(sound_data, sampling_frequency):
assert sampling_frequency >= 8000, 'Sampling frequency is inadmissible!'
n_data = len(sound_data)
assert (n_data > 0) and ((n_data % 2) == 0), 'Sound data are wrong!'
frame_size = int(round(FRAME_DURATION * float(sampling_frequency)))
n_fft_points = 2
while n_fft_points < frame_size:
n_fft_points *= 2
sound_signal = numpy.empty((int(n_data / 2),))
for ind in range(sound_signal.shape[0]):
sound_signal[ind] = float(struct.unpack('<h', sound_data[(ind * 2):(ind * 2 + 2)])[0])
frequencies_axis, time_axis, spectrogram = signal.spectrogram(
sound_signal, fs=sampling_frequency, window='hamming', nperseg=frame_size, noverlap=0, nfft=n_fft_points,
scaling='spectrum', mode='psd'
)
spectrogram = spectrogram.transpose()
if spectrogram.shape[0] <= INIT_SILENCE_FRAMES:
return []
if (sound_signal.shape[0] % frame_size) == 0:
sound_frames = numpy.reshape(sound_signal, (spectrogram.shape[0], frame_size))
else:
sound_frames = numpy.reshape(sound_signal[0:int(sound_signal.shape[0] / frame_size) * frame_size],
(spectrogram.shape[0], frame_size))
features = calculate_features_for_VAD(sound_frames, frequencies_axis, spectrogram).transpose()
time_axis = time_axis.transpose()
del spectrogram
del frequencies_axis
plt.subplot(411)
plt.plot(time_axis, features[0])
plt.title('Short-time Energy')
plt.grid(True)
plt.subplot(412)
plt.plot(time_axis, features[1])
plt.title('Spectral Flatness Measure')
plt.grid(True)
plt.subplot(413)
plt.plot(time_axis, features[2])
plt.title('Most Dominant Frequency Component')
plt.grid(True)
plt.subplot(414)
x = numpy.repeat(time_axis, 4)
y = []
for time_ind in range(time_axis.shape[0]):
y += [sound_frames[time_ind][0], sound_frames[time_ind].max(), sound_frames[time_ind].min(),
sound_frames[time_ind][-1]]
y = numpy.array(y)
plt.plot(x, y)
plt.title('Wave File')
plt.grid(True)
plt.show()
del sound_frames
del time_axis
def get_data_sample(self, idx):
"""Get a sample from the spectrogram and the corresponding class
Inputs:
idx: an ndarray of integer indices
Returns:
data: data is a
(1,1,img_rows,img_cols) slice from Sxx and classification is
the corresponding integer class from
self.active_song.classification
If idx exceeds the dimensions of the data, throws IndexError
If there is not a processed, active song, throws TypeError
"""
img_rows = self.params.get('img_rows', self.Sxx.shape[0])
img_cols = self.params.get('img_cols', 1)
if self.Sxx is None or self.active_song.classification is None:
raise TypeError('No active song from which to get data')
if np.amax(idx) > self.Sxx.shape[1]:
raise IndexError('Data index of sample out of bounds, only {0} '
'samples in the dataset'.format(self.Sxx.shape[1] - img_cols))
if np.amin(idx) < 0:
raise IndexError('Data index of sample out of bounds, '
'negative index requested')
# index out the data
max_idx = (self.Sxx.shape[1] - 1)
data_slices = [self.Sxx[0:img_rows, np.minimum(
max_idx, idx + i)].T.reshape(idx.size, 1, img_rows) for i in range(img_cols)]
data = np.stack(data_slices, axis=-1)
# scale the input
data = np.log10(data)
data -= np.amin(data)
data /= np.amax(data)
return data
def mel_filter_bank(fs: int,
window_width: int,
n_filt: int = 40) -> (np.ndarray, np.ndarray):
"""
Computes Mel filter banks for the specified parameters.
A power spectrogram can be converted to a Mel spectrogram by multiplying it with the filter bank. This method exists
so that the computation of Mel filter banks does not have to be repeated for each computation of a Mel spectrogram.
The coefficients of Mel filter banks depend on the sampling frequency and the window width that were used to
generate power spectrograms.
Parameters
----------
fs: int
The sampling frequency of signals
window_width: int
The window width in samples used to generate spectrograms
n_filt: int
The number of filters to compute
Returns
-------
f: numpy.ndarray
Array of Hertz frequency values for the filter banks
filters: numpy.ndarray
Array of Mel filter bank coefficients. The first axis corresponds to different filters, and the second axis
corresponds to the original frequency bands
"""
n_fft = window_width
low_freq_mel = 0
high_freq_mel = (2595 * np.log10(1 + (fs / 2) / 700)) # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, n_filt + 2) # Equally spaced in Mel scale
hz_points = (700 * (10 ** (mel_points / 2595) - 1)) # Convert Mel to Hz
bin = np.floor((n_fft + 1) * hz_points / fs)
fbank = np.zeros((n_filt, int(np.floor(n_fft / 2 + 1))))
for m in range(1, n_filt + 1):
f_m_minus = int(bin[m - 1]) # left
f_m = int(bin[m]) # center
f_m_plus = int(bin[m + 1]) # right
for k in range(f_m_minus, f_m):
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
for k in range(f_m, f_m_plus):
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
return hz_points[1:n_filt + 1], fbank