vad.py 文件源码-python代码片段

def show_VAD_features(sound_data, sampling_frequency):
    assert sampling_frequency >= 8000, 'Sampling frequency is inadmissible!'
    n_data = len(sound_data)
    assert (n_data > 0) and ((n_data % 2) == 0), 'Sound data are wrong!'
    frame_size = int(round(FRAME_DURATION * float(sampling_frequency)))
    n_fft_points = 2
    while n_fft_points < frame_size:
        n_fft_points *= 2
    sound_signal = numpy.empty((int(n_data / 2),))
    for ind in range(sound_signal.shape[0]):
        sound_signal[ind] = float(struct.unpack('<h', sound_data[(ind * 2):(ind * 2 + 2)])[0])
    frequencies_axis, time_axis, spectrogram = signal.spectrogram(
        sound_signal, fs=sampling_frequency, window='hamming', nperseg=frame_size, noverlap=0, nfft=n_fft_points,
        scaling='spectrum', mode='psd'
    )
    spectrogram = spectrogram.transpose()
    if spectrogram.shape[0] <= INIT_SILENCE_FRAMES:
        return []
    if (sound_signal.shape[0] % frame_size) == 0:
        sound_frames = numpy.reshape(sound_signal, (spectrogram.shape[0], frame_size))
    else:
        sound_frames = numpy.reshape(sound_signal[0:int(sound_signal.shape[0] / frame_size) * frame_size],
                                     (spectrogram.shape[0], frame_size))
    features = calculate_features_for_VAD(sound_frames, frequencies_axis, spectrogram).transpose()
    time_axis = time_axis.transpose()
    del spectrogram
    del frequencies_axis
    plt.subplot(411)
    plt.plot(time_axis, features[0])
    plt.title('Short-time Energy')
    plt.grid(True)
    plt.subplot(412)
    plt.plot(time_axis, features[1])
    plt.title('Spectral Flatness Measure')
    plt.grid(True)
    plt.subplot(413)
    plt.plot(time_axis, features[2])
    plt.title('Most Dominant Frequency Component')
    plt.grid(True)
    plt.subplot(414)
    x = numpy.repeat(time_axis, 4)
    y = []
    for time_ind in range(time_axis.shape[0]):
        y += [sound_frames[time_ind][0], sound_frames[time_ind].max(), sound_frames[time_ind].min(),
              sound_frames[time_ind][-1]]
    y = numpy.array(y)
    plt.plot(x, y)
    plt.title('Wave File')
    plt.grid(True)
    plt.show()
    del sound_frames
    del time_axis