def test_dtw_aligner():
x, fs = librosa.load(example_audio_file(), sr=None)
assert fs == 16000
x_fast = librosa.effects.time_stretch(x, 2.0)
X = _get_mcep(x, fs)
Y = _get_mcep(x_fast, fs)
D = X.shape[-1]
# Create padded pair
X, Y = adjast_frame_lengths(X, Y, divisible_by=2)
# Add utterance axis
X = X.reshape(1, -1, D)
Y = Y.reshape(1, -1, D)
X_aligned, Y_aligned = DTWAligner().transform((X, Y))
assert X_aligned.shape == Y_aligned.shape
assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
X_aligned, Y_aligned = IterativeDTWAligner(
n_iter=2, max_iter_gmm=10, n_components_gmm=2).transform((X, Y))
assert X_aligned.shape == Y_aligned.shape
assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
# Custom dist function
from nnmnkwii.metrics import melcd
X_aligned, Y_aligned = DTWAligner(dist=melcd).transform((X, Y))
assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
评论列表
文章目录