python类logamplitude()的实例源码

features.py 文件源码 项目:dcase2016_task4 作者: pafoster 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def feature_extraction(y=None, fs=None, statistics=True, include_mfcc0=True, include_delta=True, include_acceleration=True, mfcc_params=None, delta_params=None, acceleration_params=None):
    # Extract features, Mel Frequency Cepstral Coefficients
    eps = numpy.spacing(1)

    # Windowing function
    if mfcc_params['window'] == 'hamming_asymmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hamming_symmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=True)
    elif mfcc_params['window'] == 'hann_asymmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hann_symmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=True)
    else:
        window = None

    # Calculate Static Coefficients
    magnitude_spectrogram = numpy.abs(librosa.stft(y + eps, n_fft=mfcc_params['n_fft'], win_length=mfcc_params['win_length'], hop_length=mfcc_params['hop_length'], window=window))**2
    mel_basis = librosa.filters.mel(sr=fs, n_fft=mfcc_params['n_fft'], n_mels=mfcc_params['n_mels'], fmin=mfcc_params['fmin'], fmax=mfcc_params['fmax'], htk=mfcc_params['htk'])
    mel_spectrum = numpy.dot(mel_basis, magnitude_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.logamplitude(mel_spectrum))

    # Collect the feature matrix
    feature_matrix = mfcc
    if include_delta:
        # Delta coefficients
        mfcc_delta = librosa.feature.delta(mfcc, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        mfcc_delta2 = librosa.feature.delta(mfcc, order=2, **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))


    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]

    feature_matrix = feature_matrix.T

    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
audio_conv_utils.py 文件源码 项目:keras 作者: NVIDIA 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def preprocess_input(audio_path, dim_ordering='default'):
    """Reads an audio file and outputs a Mel-spectrogram.

    # Arguments
        audio_path: path to the target audio file.
        dim_ordering: data format for the output spectrogram image.

    # Returns
        3D Numpy tensor encoding the Mel-spectrogram.

    # Raises
        ImportError: if librosa is not available.
    """
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa is None:
        raise ImportError('Librosa is required to process audio files. '
                          'Install it via `pip install librosa` or visit '
                          'http://librosa.github.io/librosa/ for details.')

    # mel-spectrogram parameters
    sr = 12000
    n_fft = 512
    n_mels = 96
    hop_length = 256
    duration = 29.12

    src, sr = librosa.load(audio_path, sr=sr)
    n_sample = src.shape[0]
    n_sample_wanted = int(duration * sr)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) // 2:
                  (n_sample + n_sample_wanted) // 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=sr, hop_length=hop_length,
                      n_fft=n_fft, n_mels=n_mels) ** 2,
              ref_power=1.0)

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x


问题


面经


文章

微信
公众号

扫码关注公众号