def ltsd_vad(x, fs, threshold=9, winsize=8192):
# winsize based on sample rate
# 1024 for fs = 16000
orig_dtype = x.dtype
orig_scale_min = x.min()
orig_scale_max = x.max()
x = (x - x.min()) / (x.max() - x.min())
# works with 16 bit
x = x * (2 ** 15)
x = x.astype("int32")
window = sp.hanning(winsize)
ltsd = LTSD(winsize, window, 5)
s_vad = ltsd.compute(x)
# LTSD is 50% overlap, so each "step" covers 4096 samples
# +1 to cover the extra edge window
n_samples = int(((len(s_vad) + 1) * winsize) // 2)
time_s = n_samples / float(fs)
time_points = np.linspace(0, time_s, len(s_vad))
time_samples = (fs * time_points).astype(np.int32)
time_samples = time_samples
f_vad = np.zeros_like(x, dtype=np.bool)
offset = winsize
for n, (ss, es) in enumerate(zip(time_samples[:-1], time_samples[1:])):
sss = ss - offset
if sss < 0:
sss = 0
ses = es - offset
if ses < 0:
ses = 0
if s_vad[n + 1] < threshold:
f_vad[sss:ses] = False
else:
f_vad[sss:ses] = True
f_vad[ses:] = False
x = x.astype("float64")
x = x / float(2 ** 15)
x = x * (orig_scale_max - orig_scale_min) + orig_scale_min
x = x.astype(orig_dtype)
return x[f_vad], f_vad
评论列表
文章目录