def get_mfcc(x, sr, n_mfcc=20):
"""Compute MFCC features from raw audio, using librosa.
Librosa must be installed.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
n_mfcc (int): number of coefficients to retain
Returns:
2d-array: MFCC features
"""
mfcc_all = librosa.feature.mfcc(x, sr)
n_coeff, n_frames = mfcc_all.shape
t = librosa.frames_to_time(np.arange(n_frames), sr=sr, hop_length=512)
return t, mfcc_all[:n_mfcc].T
python类frames_to_time()的实例源码
def get_beats(x, sr):
"""Track beats in an audio excerpt, using librosa's standard
beat tracker.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
Returns:
2d-array: beat times and beat intervals
"""
_, beat_frames = librosa.beat.beat_track(x, sr=sr)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
t = beat_times[:-1,]
beat_intervals = np.diff(beat_times)
return t, beat_intervals
def get_onsets(x, sr):
"""Compute inter-onset intervals (IOI) from audio, using librosa.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
Returns:
2d-array: onset times and IOI
"""
onset_frames = librosa.onset.onset_detect(x, sr=sr)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
t = onset_times[:-1,]
onset_intervals = np.diff(onset_times)
return t, onset_intervals
def decode_events(self, encoded):
'''Decode labeled events into (time, value) pairs
Parameters
----------
encoded : np.ndarray, shape=(n_frames, m)
Frame-level annotation encodings as produced by ``encode_events``.
Real-valued inputs are thresholded at 0.5.
Returns
-------
[(time, value)] : iterable of tuples
where `time` is the event time and `value` is an
np.ndarray, shape=(m,) of the encoded value at that time
'''
if np.isrealobj(encoded):
encoded = (encoded >= 0.5)
times = frames_to_time(np.arange(encoded.shape[0]),
sr=self.sr,
hop_length=self.hop_length)
return zip(times, encoded)
def _crossFadeRegion(self): # Computes the cross fade region for the mixed song
Na = self.beats['in'].shape[0]-1
scores = [self._score(i, Na) for i in xrange(2, int(Na/4))]
noBeats = np.argmax(scores)+2
inDuration = librosa.get_duration(y=self.Yin, sr=self.sr)
fadeInStart = librosa.frames_to_time(self.beats['in'], sr=self.sr)[-int(noBeats/2)]
fadeIn = inDuration - fadeInStart
fadeOut = librosa.frames_to_time(self.beats['out'], sr=self.sr)[int(noBeats/2)]
print "Best Power Corelation Scores=", np.max(scores)
print "Number of beats in cross fade region=", noBeats
print "fadeInStart=", fadeInStart
print "fadeOutEnd=", fadeOut
print "Cross Fade Time=", fadeIn+fadeOut
self.crossFade = [fadeInStart*1000, fadeOut*1000] # In milliseconds
def calculateFeatures(filename):
y, sr = librosa.load(filename)
hop_length = 512
section_length = 30
permit_length = 0 #No overlap
n_paras = 4
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
n_beats = len(beat_frames)
y_harmonic, _ = librosa.effects.hpss(y)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=1)
mfcc_delta = librosa.feature.delta(mfcc)
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median)
delta_rms = np.square(beat_mfcc_delta[1])
prev_delta_sums = np.empty((n_beats-section_length+1))
total_sum = np.sum(delta_rms[:section_length])
prev_delta_sums[0] = total_sum
for pos in range(n_beats-section_length):
total_sum = total_sum - delta_rms[pos] + delta_rms[pos+section_length]
prev_delta_sums[pos+1] = total_sum
prev_delta_sums_delta = librosa.feature.delta(prev_delta_sums)
para_init_locs = []
for n_p in range(n_paras):
lowest = 50
lowest_loc = 0
for loc, each_sum_delta in enumerate(prev_delta_sums_delta):
#Check valid loc
valid_loc = True
for each_loc in para_init_locs:
if loc > each_loc - (section_length - permit_length) and loc < each_loc + (section_length - permit_length):
valid_loc = False
break
if each_sum_delta <= lowest and valid_loc:
lowest = each_sum_delta
lowest_loc = loc
para_init_locs.append(lowest_loc)
para_init_locs.sort()
#print(para_init_locs)
all_features = np.empty((n_paras*section_length, 2)) #0 - mfcc, 1 - chroma
for n_p in range(n_paras):
all_features[n_p*section_length:(n_p+1)*section_length, 0] = beat_mfcc_delta[0][para_init_locs[n_p]:para_init_locs[n_p]+section_length] / 250
all_features[n_p*section_length:(n_p+1)*section_length, 1] = np.argmax(beat_chroma[:, para_init_locs[n_p]:para_init_locs[n_p]+section_length], axis=0)/11
return all_features.reshape((n_paras*section_length*2))