def expand(self, audio):
ori_len = audio.shape[0]
tmp = resample(audio, r=0.5, type='sinc_best')
down_len = tmp.shape[0]
tmp = resample(tmp, r=(ori_len+1) / float(down_len), type='sinc_best')
tmp = librosa.stft(audio, 1024)
phase = np.divide(tmp, np.abs(tmp))
spec_input = np.abs(librosa.stft(audio, 1024))[0:n_input, ::]
spec_input = spec_input[::, 0:spec_input.shape[1]//n_len*n_len]
spec_input = np.split(spec_input,
spec_input.shape[1]//n_len, axis=1)
spec_input = np.asarray(spec_input)
spec_input = np.expand_dims(spec_input, axis=-1)
feed_dict = {self.input_op: np.log1p(spec_input) / 12.0}
debug = self.sess.run(self.debug_op, feed_dict=feed_dict)
np.save('debug.npy', debug)
S = self.sess.run(self.eva_op, feed_dict=feed_dict)
S[S >= 5e3] = 5e3
S[S <= 0] = 0
print ('mean', np.mean(S))
print (np.sum(np.isinf(S)))
S = np.squeeze(np.concatenate(np.split(S, S.shape[0]), axis=2),
axis=(0, -1))
phase = phase[..., :S.shape[1]]
print (phase.shape)
print (S.shape)
print (np.sum(np.isinf(np.multiply(S, phase))))
X = librosa.istft(np.multiply(S, phase))
return X
评论列表
文章目录