def _high_frequency_completion(self, x, transformed):
"""
Please see Sect. 3.2 and 3.3 in the following paper to know why we complete the
unvoiced synthesized voice of the original voice into high frequency range
of F0 transformed voice.
- K. Kobayashi et al., "F0 transformation techniques for statistical voice
conversion with direct waveform modification with spectral differential,"
Proc. IEEE SLT 2016, pp. 693-700. 2016.
"""
# construct feature extractor and synthesis
feat = FeatureExtractor(fs=self.fs)
f0, spc, ap = feat.analyze(x)
uf0 = np.zeros(len(f0))
# synthesis
synth = Synthesizer(fs=self.fs)
unvoice_anasyn = synth.synthesis_spc(uf0, spc, ap)
# HPF for synthesized speech
fil = firwin(255, self.f0rate, pass_zero=False)
HPFed_unvoice_anasyn = lfilter(fil, 1, unvoice_anasyn)
if len(HPFed_unvoice_anasyn) > len(transformed):
return transformed + HPFed_unvoice_anasyn[:len(transformed)]
else:
transformed[:len(HPFed_unvoice_anasyn)] += HPFed_unvoice_anasyn
return transformed
评论列表
文章目录