def midi_notes_encoding(audio):
"""
Compute frame-based midi encoding of audio
:param audio: 1-D array of audio time series
"""
pitches, magnitudes = librosa.piptrack(audio)
pitches = np.transpose(pitches)
magnitudes = np.transpose(magnitudes)
lc = np.zeros((pitches.shape[0], 88), dtype=np.float32)
for i in range(pitches.shape[0]):
# Count non-zero entries of pitches
nz_count = len(np.nonzero(pitches[i])[0])
# Keep a maximum of 6 detected pitches
num_ind_to_keep = min(nz_count, 6)
ind_of_largest_pitches = np.argpartition(
magnitudes[i], -num_ind_to_keep)[-num_ind_to_keep:] \
if num_ind_to_keep != 0 else []
# Convert the largest pitches to midi notes
overtone_limit = librosa.midi_to_hz(96)[0]
ind_of_largest_pitches = filter(
lambda x: pitches[i, x] <= overtone_limit,
ind_of_largest_pitches)
midi_notes = librosa.hz_to_midi(pitches[i, ind_of_largest_pitches])
midi_notes = midi_notes.round()
# Normalize magnitudes of pitches
midi_mags = magnitudes[i, ind_of_largest_pitches] / \
np.linalg.norm(magnitudes[i, ind_of_largest_pitches], 1)
np.put(lc[i], midi_notes.astype(np.int64) - [9], midi_mags)
return lc
评论列表
文章目录