def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
"""Iterative algorithm for phase retrival from a magnitude spectrogram.
Args:
mag: Magnitude spectrogram.
phase_angle: Initial condition for phase.
n_fft: Size of the FFT.
hop: Stride of FFT. Defaults to n_fft/2.
num_iters: Griffin-Lim iterations to perform.
Returns:
audio: 1-D array of float32 sound samples.
"""
fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
complex_specgram = inv_magphase(mag, phase_angle)
for i in range(num_iters):
audio = librosa.istft(complex_specgram, **ifft_config)
if i != num_iters - 1:
complex_specgram = librosa.stft(audio, **fft_config)
_, phase = librosa.magphase(complex_specgram)
phase_angle = np.angle(phase)
complex_specgram = inv_magphase(mag, phase_angle)
return audio
python类magphase()的实例源码
def transform_audio(self, y):
'''Compute the STFT magnitude and phase.
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT magnitude
data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT phase
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
D = stft(y, hop_length=self.hop_length,
n_fft=self.n_fft)
D = fix_length(D, n_frames)
mag, phase = magphase(D)
if self.log:
mag = amplitude_to_db(mag, ref=np.max)
return {'mag': mag.T[self.idx].astype(np.float32),
'phase': np.angle(phase.T)[self.idx].astype(np.float32)}
def transform_audio(self, y):
'''Compute the CQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))
C = fix_length(C, n_frames)
cqtm, phase = magphase(C)
if self.log:
cqtm = amplitude_to_db(cqtm, ref=np.max)
return {'mag': cqtm.T.astype(np.float32)[self.idx],
'phase': np.angle(phase).T.astype(np.float32)[self.idx]}
def transform_audio(self, y):
'''Compute the HCQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
cqtm, phase = [], []
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
for h in self.harmonics:
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin * h,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))
C = fix_length(C, n_frames)
C, P = magphase(C)
if self.log:
C = amplitude_to_db(C, ref=np.max)
cqtm.append(C)
phase.append(P)
cqtm = np.asarray(cqtm).astype(np.float32)
phase = np.angle(np.asarray(phase)).astype(np.float32)
return {'mag': self._index(cqtm),
'phase': self._index(phase)}
def parse_audio(self, audio_path):
if self.augment:
y = load_randomly_augmented_audio(audio_path, self.sample_rate)
else:
y = load_audio(audio_path)
if self.noiseInjector:
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# STFT
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(D)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
def parse_audio(self, audio_path):
if self.augment:
y = load_randomly_augmented_audio(audio_path)
else:
y = load_audio(audio_path)
if self.noiseInjector:
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# STFT
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(D)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
def parse_audio(self, audio_path):
if self.augment:
y = load_randomly_augmented_audio(audio_path)
else:
y = load_audio(audio_path)
if self.noiseInjector:
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# STFT
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(D)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect