def ispec(spec, frame_length, step_length=None, window="hann",
nb_iter=48, normalize=True, db=False, padding=False):
""" Invert power spectrogram back to raw waveform
Parameters
----------
spec : np.ndarray [shape=(t, n_fft / 2 + 1)]
magnitude, power, or DB spectrogram of STFT
frame_length: int
number of samples point for 1 frame
step_length: int
number of samples point for 1 step (when shifting the frames)
If unspecified, defaults `frame_length / 4`.
window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
- a window specification (string, tuple, or number);
see `scipy.signal.get_window`
- a window function, such as `scipy.signal.hanning`
- a vector or array of length `n_fft`
nb_iter: int
number of iteration, the higher the better audio quality
db: bool
if the given spectrogram is in decibel (dB) units (used logarithm)
normalize: bool
normalize output raw signal to have mean=0., and std=1.
"""
spec = spec.astype('float64')
# ====== check arguments ====== #
frame_length = int(frame_length)
if step_length is None:
step_length = frame_length // 4
else:
step_length = int(step_length)
# ====== convert to power spectrogram ====== #
if db:
spec = db2power(spec)
# ====== iterative estmate best phase ====== #
X_best = copy.deepcopy(spec)
nfft = (spec.shape[1] - 1) * 2
for i in range(nb_iter):
X_t = istft(X_best, frame_length=frame_length, step_length=step_length,
window=window, padding=padding)
est = stft(X_t, frame_length=frame_length, step_length=step_length,
nfft=nfft, window=window, padding=padding, energy=False)
phase = est / np.maximum(1e-8, np.abs(est))
X_best = spec * phase
X_t = istft(X_best, frame_length=frame_length, step_length=step_length,
window=window, padding=padding)
y = np.real(X_t)
if normalize:
y = (y - y.mean()) / y.std()
return y
# ===========================================================================
# F0 analysis
# ===========================================================================
评论列表
文章目录