def get_seeds(self, audio_filepath):
"""Get the seeds file to pass to the HLL tracker.
Parameters
----------
audio_filepath : str
Path to audio file.
Returns
-------
seeds_fpath : str
Path to the seeds output file.
"""
y, sr = librosa.load(audio_filepath, sr=44100)
y_harmonic = librosa.effects.harmonic(y)
cqt, samples, freqs = self._compute_cqt(y_harmonic, sr)
seeds = self._pick_seeds_cqt(cqt, freqs, samples)
seeds_fpath = tmp.mktemp('.csv')
with open(seeds_fpath, 'w') as fhandle:
writer = csv.writer(fhandle, delimiter=',')
writer.writerows(seeds)
return seeds_fpath
python类cqt()的实例源码
def _pick_seeds_cqt(self, cqt, cqt_freqs, samples):
"""Compute a CQT.
Parameters
----------
cqt : np.array [n_samples, n_freqs]
Log amplitude CQT.
freqs : np.array [n_freqs]
CQT frequencies.
samples : np.array [n_samples]
CQT time stamps.
Returns
-------
seeds : np.array [n_seeds, 2]
Array of time, frequency seeds
"""
seeds = []
for i, freq in enumerate(cqt_freqs):
freq_band = cqt[i, :]
freq_band_smooth = self._moving_average(freq_band)
peak_locs = librosa.util.peak_pick(
freq_band_smooth, self.pre_max, self.post_max, self.pre_avg,
self.post_avg, self.delta, self.wait
)
if len(peak_locs) > 0:
peak_locs = peak_locs[
(freq_band[peak_locs] > self.peak_thresh)
]
for peak_loc in peak_locs:
sample = samples[peak_loc]
seeds.append([sample, freq])
seeds = np.array(seeds)
return seeds
def transform_audio(self, y):
'''Compute the CQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))
C = fix_length(C, n_frames)
cqtm, phase = magphase(C)
if self.log:
cqtm = amplitude_to_db(cqtm, ref=np.max)
return {'mag': cqtm.T.astype(np.float32)[self.idx],
'phase': np.angle(phase).T.astype(np.float32)[self.idx]}
def transform_audio(self, y):
'''Compute the HCQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
cqtm, phase = [], []
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
for h in self.harmonics:
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin * h,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))
C = fix_length(C, n_frames)
C, P = magphase(C)
if self.log:
C = amplitude_to_db(C, ref=np.max)
cqtm.append(C)
phase.append(P)
cqtm = np.asarray(cqtm).astype(np.float32)
phase = np.angle(np.asarray(phase)).astype(np.float32)
return {'mag': self._index(cqtm),
'phase': self._index(phase)}
def compute_spec(audio_file,spectro_file):
# Get actual audio
audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
# Compute spectrogram
if config['spectrogram_type']=='cqt':
spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'], real=False)
elif config['spectrogram_type']=='mel':
spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
elif config['spectrogram_type']=='stft':
spec = librosa.stft(y=audio,n_fft=config['n_fft'])
# Write results:
with open(spectro_file, "w") as f:
pickle.dump(spec, f, protocol=-1) # spec shape: MxN.
def rcqt(PS, iters=500, sr=22050, hop_length=512, n_bins=84, bins_per_octave=12):
sig_len = (PS.shape[1]-1)*hop_length
p = 2 * np.pi * np.random.random_sample(PS.shape) - np.pi
for i in range(iters):
print i
S = PS * np.exp(1j*p)
X = icqt(S, sr=sr, hop_length=hop_length,
bins_per_octave=bins_per_octave)
X = librosa.util.fix_length(X, sig_len)
p = np.angle(cqt(X, sr=sr, hop_length=hop_length, n_bins=n_bins,
bins_per_octave=bins_per_octave))
return X
def feature_extract(songfile_name):
'''
takes: filename
outputs: audio feature representation from that file (currently cqt)
**assumes working directory contains raw song files**
returns a tuple containing songfile name and numpy array of song features
'''
song_loc = os.path.abspath(songfile_name)
y, sr = librosa.load(song_loc)
desire_spect_len = 2580
C = librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None,
n_bins=84, bins_per_octave=12, tuning=None,
filter_scale=1, norm=1, sparsity=0.01, real=False)
# get log-power spectrogram with noise floor of -80dB
C = librosa.logamplitude(C**2, ref_power=np.max)
# scale log-power spectrogram to positive integer value for smaller footpint
noise_floor_db = 80
scaling_factor = (2**16 - 1)/noise_floor_db
C += noise_floor_db
C *= scaling_factor
C = C.astype('uint16')
# if spectral respresentation too long, crop it, otherwise, zero-pad
if C.shape[1] >= desire_spect_len:
C = C[:,0:desire_spect_len]
else:
C = np.pad(C,((0,0),(0,desire_spect_len-C.shape[1])), 'constant')
return songfile_name, C
def create_feature_matrix_spark(song_files):
# cqt wrapper
def log_cqt(y,sr):
C = librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None,
n_bins=84, bins_per_octave=12, tuning=None,
filter_scale=1, norm=1, sparsity=0.01, real=True)
# get log-power spectrogram with noise floor of -80dB
C = librosa.logamplitude(C**2, ref_power=np.max)
# scale log-power spectrogram to positive integer value for smaller footpint
noise_floor_db = 80
scaling_factor = (2**16 - 1)/noise_floor_db
C += noise_floor_db
C *= scaling_factor
C = C.astype('uint16')
return C
# padding wrapper
def padding(C,desired_spect_len):
if C.shape[1] >= desired_spect_len:
C = C[:,0:desired_spect_len]
else:
C = np.pad(C,((0,0),(0,desired_spect_len-C.shape[1])), 'constant')
return C
# load try-catch wrapper
def try_load(filename):
try:
sys.stdout.write('Processing: %s \r' % os.path.basename(filename))
sys.stdout.flush()
return librosa.load(filename)
except:
pass
# transormations
filesRDD = sc.parallelize(song_files)
rawAudioRDD = filesRDD.map(lambda x: (os.path.basename(x),try_load(x))).filter(lambda x: x[1] != None)
rawCQT = rawAudioRDD.map(lambda x: (x[int(0)], log_cqt(x[int(1)][int(0)],x[int(1)][int(1)])))
paddedCQT = rawCQT.map(lambda x: (x[0],padding(x[1],2580)))
return paddedCQT.collect()
def _compute_cqt(self, y, sr):
"""Compute a CQT.
Parameters
----------
y : np.array
Audio signal
sr : float
Audio singal sample rate
Returns
-------
cqt_log : np.array [n_samples, n_freqs]
Log amplitude CQT.
samples : np.array [n_samples]
CQT time stamps.
freqs : np.array [n_freqs]
CQT frequencies.
"""
fmin = librosa.note_to_hz(self.min_note)
bins_per_octave = 12
n_cqt_bins = bins_per_octave * self.n_octaves
cqt = np.abs(librosa.cqt(
y, sr=sr, hop_length=self.hop_size, fmin=fmin,
filter_scale=self.filter_scale,
bins_per_octave=bins_per_octave, n_bins=n_cqt_bins
))
cqt = self._norm_matrix(cqt)
n_time_frames = cqt.shape[1]
freqs = librosa.cqt_frequencies(
fmin=fmin, bins_per_octave=bins_per_octave,
n_bins=n_cqt_bins
)
samples = librosa.frames_to_samples(
range(n_time_frames), hop_length=self.hop_size
)
return cqt, samples, freqs