def get_vowel_segments(media_path, n_fft=2048):
downsample = 1
samplerate = 44100 // downsample
win_s = n_fft // downsample # fft size
hop_s = n_fft // downsample # hop size
s = source(media_path, samplerate, hop_s)
samplerate = s.samplerate
tolerance = 0.6
pitch_o = pitch("yin", win_s, hop_s, samplerate)
pitch_o.set_unit("Hz")
pitch_o.set_tolerance(tolerance)
pitches = []
confidences = []
# total number of frames read
total_frames = 0
samples=[]
pitches=[]
while True:
samples, read = s()
pitch_ = pitch_o(samples)[0]
#pitch = int(round(pitch))
confidence = pitch_o.get_confidence()
#print("%f %f %f" % (total_frames / float(samplerate), pitch, confidence))
pitches += [pitch_]
confidences += [confidence]
total_frames += read
if read < hop_s: break
pitches = np.array(pitches)
confidences = np.array(confidences)
cleaned_pitches = ma.masked_where(confidences < tolerance, pitches)
cleaned_pitches = ma.masked_where(cleaned_pitches > 1000, cleaned_pitches)
try: output = list(np.logical_not(cleaned_pitches.mask))
except: output = []
return output
评论列表
文章目录