python类stft()的实例源码-面圈网

fft.py 文件源码项目：Mendelssohn 作者: diggerdu 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def stft(wav, n_fft=1024, overlap=4, dt=tf.int32, absp=False):
    assert (wav.shape[0] > n_fft)
    X = tf.placeholder(dtype=dt,shape=wav.shape)
    X = tf.cast(X,tf.float32)
    hop = n_fft / overlap

    ## prepare constant variable
    Pi = tf.constant(np.pi, dtype=tf.float32)
    W = tf.constant(scipy.hanning(n_fft), dtype=tf.float32)
    S = tf.pack([tf.fft(tf.cast(tf.multiply(W,X[i:i+n_fft]),\
            tf.complex64)) for i in range(1, wav.shape[0] - n_fft, hop)])
    abs_S = tf.complex_abs(S)
    sess = tf.Session()
    if absp:
        return sess.run(abs_S, feed_dict={X:wav})
    else:
        return sess.run(S, feed_dict={X:wav})

utils.py 文件源码项目：magenta 作者: tensorflow 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
  """Iterative algorithm for phase retrival from a magnitude spectrogram.

  Args:
    mag: Magnitude spectrogram.
    phase_angle: Initial condition for phase.
    n_fft: Size of the FFT.
    hop: Stride of FFT. Defaults to n_fft/2.
    num_iters: Griffin-Lim iterations to perform.

  Returns:
    audio: 1-D array of float32 sound samples.
  """
  fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
  ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
  complex_specgram = inv_magphase(mag, phase_angle)
  for i in range(num_iters):
    audio = librosa.istft(complex_specgram, **ifft_config)
    if i != num_iters - 1:
      complex_specgram = librosa.stft(audio, **fft_config)
      _, phase = librosa.magphase(complex_specgram)
      phase_angle = np.angle(phase)
      complex_specgram = inv_magphase(mag, phase_angle)
  return audio

data_utils.py 文件源码项目：speech-enhancement-WGAN 作者: jerrygood0703 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def make_spectrum(self, filename, use_normalize):
        sr, y = wav.read(filename)
        if sr != 16000:
            raise ValueError('Sampling rate is expected to be 16kHz!')
        if y.dtype!='float32':
            y = np.float32(y/32767.)

        D=librosa.stft(y,n_fft=512,hop_length=256,win_length=512,window=scipy.signal.hamming)
        Sxx=np.log10(abs(D)**2) 
        if use_normalize:
            mean = np.mean(Sxx, axis=1).reshape((257,1))
            std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
            Sxx = (Sxx-mean)/std  
        slices = []
        for i in range(0, Sxx.shape[1]-self.FRAMELENGTH, self.OVERLAP):
            slices.append(Sxx[:,i:i+self.FRAMELENGTH])
        return np.array(slices)

signal_test.py 文件源码项目：odin 作者: imito 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_stft_istft(self):
        try:
            import librosa
            ds = F.load_digit_wav()
            name = ds.keys()[0]
            path = ds[name]

            y, _ = speech.read(path, pcm=True)
            hop_length = int(0.01 * 8000)
            stft = signal.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            stft_ = librosa.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(stft, stft_.T))

            y1 = signal.istft(stft, hop_length=hop_length, window='hann')
            y2 = librosa.istft(stft_, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(y1, y2))
        except ImportError:
            print("test_stft_istft require librosa.")

audio.py 文件源码项目：Tacotron 作者: barronalex 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def griffinlim(spectrogram, n_iter=50, window='hann', n_fft=2048, win_length=2048, hop_length=-1, verbose=False):
    if hop_length == -1:
        hop_length = n_fft // 4

    angles = np.exp(2j * np.pi * np.random.rand(*spectrogram.shape))

    t = tqdm(range(n_iter), ncols=100, mininterval=2.0, disable=not verbose)
    for i in t:
        full = np.abs(spectrogram).astype(np.complex) * angles
        inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)
        rebuilt = librosa.stft(inverse, n_fft = n_fft, hop_length = hop_length, win_length = win_length, window = window)
        angles = np.exp(1j * np.angle(rebuilt))

        if verbose:
            diff = np.abs(spectrogram) - np.abs(rebuilt)
            t.set_postfix(loss=np.linalg.norm(diff, 'fro'))

    full = np.abs(spectrogram).astype(np.complex) * angles
    inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)

    return inverse

transforms.py 文件源码项目：audio 作者: willfrey 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def __call__(self, y):
        """Short-time Fourier transform (STFT).

        Returns a real-valued matrix
        Returns a complex-valued matrix D such that
            `np.abs(D[f, t])` is the magnitude of frequency bin `f`
            at frame `t`

            `np.angle(D[f, t])` is the phase of frequency bin `f`
            at frame `t`

        Parameters
        ----------
        y : np.ndarray [shape=(n,)], real-valued
            the input signal (audio time series)

        Returns
        -------
        D : np.ndarray [shape=(1 + n_fft/2, t), dtype=dtype]
        STFT matrix

        """
        return librosa.stft(y, **self.__dict__)

trainModel.py 文件源码项目：Sound-classification-on-Raspberry-Pi-with-Tensorflow 作者: GianlucaPaolocci 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def extract_features(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    return mfccs,chroma,mel,contrast,tonnetz

classiPi.py 文件源码项目：Sound-classification-on-Raspberry-Pi-with-Tensorflow 作者: GianlucaPaolocci 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def extract_features():
    X = sounddevice.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sounddevice.wait()
    X= np.squeeze(X)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    features = np.vstack([features,ext_features])
    return features

labeled_example.py 文件源码项目：speechless 作者: JuliusKunze 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _complex_spectrogram(self) -> ndarray:
        return librosa.stft(y=self.get_raw_audio(), n_fft=self.fourier_window_length, hop_length=self.hop_length)

generate.py 文件源码项目：Tacotron_pytorch 作者: root20 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def _griffin_lim(S, n_fft, win_length, hop_length, num_iters):
    angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
    S_complex = np.abs(S).astype(np.complex)
    for i in range(num_iters):
        if i > 0:
            angles = np.exp(1j * np.angle(librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)))
        y = librosa.istft(S_complex * angles, hop_length=hop_length, win_length=win_length)
    return y

util.py 文件源码项目：source_separation_ml_jeju 作者: hjkwon0609 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def create_spectrogram_from_audio(data):
    global setting
    spectrogram = librosa.stft(data, n_fft=Config.n_fft, hop_length=Config.hop_length).transpose()

    # divide the real and imaginary components of each element 
    # concatenate the matrix with the real components and the matrix with imaginary components
    # (DataCorruptionError when saving complex numbers in TFRecords)

    # concatenated = np.concatenate([np.real(spectrogram), np.imag(spectrogram)], axis=1)
    return spectrogram # [num_time_frames, num_freq_bins]

neural_network_audioset.py 文件源码项目：TensorFlow_AudioSet_Example 作者: DantesLegacy 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

neural_network_audioset.py 文件源码项目：TensorFlow_AudioSet_Example 作者: DantesLegacy 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

utils.py 文件源码项目：tacotron 作者: jinfagang 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def get_spectrograms(sound_file):
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=None)  # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft,
                     hop_length=hp.hop_length,
                     win_length=hp.win_length)

    # magnitude spectrogram
    magnitude = np.abs(D)  # (1+n_fft/2, T)

    # power spectrogram
    power = magnitude ** 2  # (1+n_fft/2, T)

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels)  # (n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32))  # (T, n_mels), (T, 1+n_fft/2)

utils.py 文件源码项目：tacotron 作者: jinfagang 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)

test_gen_spec.py 文件源码项目：DDAE 作者: jerrygood0703 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def make_spectrum_phase(y, FRAMESIZE, OVERLAP, FFTSIZE):
    D=librosa.stft(y,n_fft=FRAMESIZE,hop_length=OVERLAP,win_length=FFTSIZE,window=scipy.signal.hamming)
    Sxx = np.log10(abs(D)**2) 
    phase = np.exp(1j * np.angle(D))
    mean = np.mean(Sxx, axis=1).reshape((257,1))
    std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
    Sxx = (Sxx-mean)/std  
    return Sxx, phase, mean, std

audio_utils.py 文件源码项目：gcForest 作者: kingfengji 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature

fft.py 文件源码项目：pumpp 作者: bmcfee 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def transform_audio(self, y):
        '''Compute the STFT magnitude and phase.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        return {'mag': mag.T[self.idx].astype(np.float32),
                'phase': np.angle(phase.T)[self.idx].astype(np.float32)}

create_spectrograms.py 文件源码项目：tartarus 作者: sergiooramas 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def compute_spec(audio_file,spectro_file):
    # Get actual audio
    audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
    # Compute spectrogram
    if config['spectrogram_type']=='cqt':
        spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'], real=False)
    elif config['spectrogram_type']=='mel':
        spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
    elif config['spectrogram_type']=='stft':
        spec = librosa.stft(y=audio,n_fft=config['n_fft'])
    # Write results:
    with open(spectro_file, "w") as f:
        pickle.dump(spec, f, protocol=-1) # spec shape: MxN.

eva_model.py 文件源码项目：Mendelssohn 作者: diggerdu 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def expand(self, audio):
        ori_len = audio.shape[0]
        tmp = resample(audio, r=0.5, type='sinc_best')
        down_len = tmp.shape[0]
        tmp = resample(tmp, r=(ori_len+1) / float(down_len), type='sinc_best')
        tmp = librosa.stft(audio, 1024)
        phase = np.divide(tmp, np.abs(tmp))
        spec_input = np.abs(librosa.stft(audio, 1024))[0:n_input, ::]
        spec_input = spec_input[::, 0:spec_input.shape[1]//n_len*n_len]
        spec_input = np.split(spec_input,
                              spec_input.shape[1]//n_len, axis=1)
        spec_input = np.asarray(spec_input)
        spec_input = np.expand_dims(spec_input, axis=-1)
        feed_dict = {self.input_op: np.log1p(spec_input) / 12.0}
        debug = self.sess.run(self.debug_op, feed_dict=feed_dict)
        np.save('debug.npy', debug)
        S = self.sess.run(self.eva_op, feed_dict=feed_dict)
        S[S >= 5e3] = 5e3
        S[S <= 0] = 0
        print ('mean', np.mean(S))
        print (np.sum(np.isinf(S)))
        S = np.squeeze(np.concatenate(np.split(S, S.shape[0]), axis=2),
                       axis=(0, -1))
        phase = phase[..., :S.shape[1]]
        print (phase.shape)
        print (S.shape)
        print (np.sum(np.isinf(np.multiply(S, phase))))

        X = librosa.istft(np.multiply(S, phase))
        return X

utils.py 文件源码项目：tacotron 作者: Kyubyong 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def get_spectrograms(sound_file): 
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=hp.sr) # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft, 
                     hop_length=hp.hop_length, 
                     win_length=hp.win_length) 

    # magnitude spectrogram
    magnitude = np.abs(D) #(1+n_fft/2, T)

    # power spectrogram
    power = magnitude**2 #(1+n_fft/2, T) 

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels) #(n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32)) # (T, n_mels), (T, 1+n_fft/2)

utils.py 文件源码项目：tacotron 作者: Kyubyong 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)

MusicEmotionMachine.py 文件源码项目：apicultor 作者: sonidosmutantes 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def source_separation(self, x):   
                if not Duration()(x) > 10:
                    stftx = librosa.stft(x)
                    real = stftx.real
                    imag = stftx.imag
                    ssp = find_sparse_source_points(real, imag) #find sparsity in the signal
                    cos_dist = cosine_distance(ssp) #cosine distance from sparse data
                    sources = find_number_of_sources(cos_dist) #find possible number of sources
                    if (sources == 0) or (sources == 1):  #this means x is an instrumental track and doesn't have more than one source        
                        print "There's only one visible source"   
                        return x 
                    else:
                        print "Separating sources"              
                        xs = NMF(stftx, sources)
                        return xs[0] #take the bass part #TODO: correct NMF to return noiseless reconstruction
                else: 
                    stftx = librosa.stft(x[:441000]) #take 10 seconds of signal data to find sources
                    print "It can take some time to find any source in this signal"           
                    real = stftx.real
                    imag = stftx.imag
                    ssp = find_sparse_source_points(real, imag) #find sparsity in the signal
                    cos_dist = cosine_distance(ssp) #cosine distance from sparse data
                    sources = find_number_of_sources(cos_dist) #find possible number of sources
                    if (sources == 0) or (sources == 1):  #this means x is an instrumental track and doesn't have more than one source 
                        print "There's only one visible source"        
                        return x    
                    else:  
                        print "Separating sources"          
                        xs = NMF(librosa.stft(x), sources)
                        return xs[0] #take the bass part #TODO: correct NMF to return noiseless reconstruction

MusicEmotionMachine.py 文件源码项目：apicultor 作者: sonidosmutantes 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def sad_music_remix(self, neg_arous_dir, files, decisions, harmonic = None):
                for subdirs, dirs, sounds in os.walk(neg_arous_dir):   
                    fx = random.choice(sounds[::-1])                    
                    fy = random.choice(sounds[:])                      
                x = MonoLoader(filename = neg_arous_dir + '/' + fx)()  
                y = MonoLoader(filename = neg_arous_dir + '/' + fy)()  
                fx = fx.split('.')[0]                                  
                fy = fy.split('.')[0]                                  
                fx = np.where(files == fx)[0][0]                       
                fy = np.where(files == fy)[0][0]                       
                if harmonic is False or None:                          
                    dec_x = get_coordinate(fx, 1, decisions)                                                        
                    dec_y = get_coordinate(fy, 1, decisions)
                else:
                    dec_x = get_coordinate(fx, 2, decisions)
                    dec_y = get_coordinate(fy, 2, decisions)
                x = self.source_separation(x)   
                x = scratch_music(x, dec_x)
                x = x[np.nonzero(x)]                            
                y = scratch_music(y, dec_y)
                y = y[np.nonzero(y)]                            
                x, y = same_time(x,y)                                                                       
                negative_arousal_samples = [i/i.max() for i in (x,y)]                                                                       
                negative_arousal_x = np.array(negative_arousal_samples).sum(axis=0)                                                           
                negative_arousal_x = 0.5*negative_arousal_x/negative_arousal_x.max()                                                              
                if harmonic is True:                                   
                    return librosa.decompose.hpss(librosa.stft(negative_arousal_x), margin = (1.0, 5.0))[0]                 
                if harmonic is False or harmonic is None:
                    onsets = hfc_onsets(np.float32(negative_arousal_x))
                    interv = seconds_to_indices(onsets)
                    steps = overlapped_intervals(interv)
                    output = librosa.effects.remix(negative_arousal_x, steps[::-1], align_zeros = False)
                    output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 3)
                    remix_filename = 'data/emotions/remixes/sad/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg' 
                    MonoWriter(filename=remix_filename, format = 'ogg', sampleRate = 44100)(np.float32(output))
                    subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])

MusicEmotionMachine.py 文件源码项目：apicultor 作者: sonidosmutantes 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def happy_music_remix(self, pos_arous_dir, files, decisions, harmonic = None):
                for subdirs, dirs, sounds in os.walk(pos_arous_dir):   
                    fx = random.choice(sounds[::-1])                    
                    fy = random.choice(sounds[:])                      
                x = MonoLoader(filename = pos_arous_dir + '/' + fx)()  
                y = MonoLoader(filename = pos_arous_dir + '/' + fy)()  
                fx = fx.split('.')[0]                                  
                fy = fy.split('.')[0]                                  
                fx = np.where(files == fx)[0][0]                       
                fy = np.where(files == fy)[0][0]                       
                if harmonic is False or None:                          
                    dec_x = get_coordinate(fx, 3, decisions)                                                        
                    dec_y = get_coordinate(fy, 3, decisions)
                else:
                    dec_x = get_coordinate(fx, 0, decisions)
                    dec_y = get_coordinate(fy, 0, decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)  
                positive_arousal_samples = [i/i.max() for i in (x,y)]  
                positive_arousal_x = np.float32(positive_arousal_samples).sum(axis=0) 
                positive_arousal_x = 0.5*positive_arousal_x/positive_arousal_x.max()
        if harmonic is True:
                    return librosa.decompose.hpss(librosa.stft(positive_arousal_x), margin = (1.0, 5.0))[0]  
        if harmonic is False or harmonic is None:
                    interv = RhythmExtractor2013()(positive_arousal_x)[1] * 44100
                    steps = overlapped_intervals(interv)
                    output = librosa.effects.remix(positive_arousal_x, steps, align_zeros = False)
                    output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 4)
                    remix_filename = 'data/emotions/remixes/happy/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                    MonoWriter(filename=remix_filename, format = 'ogg', sampleRate = 44100)(np.float32(output))
                    subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])

MusicEmotionMachine.py 文件源码项目：apicultor 作者: sonidosmutantes 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def not_angry_music_remix(self, neg_arous_dir, files, decisions):
                sounds = []
                for i in range(len(neg_arous_dir)):
                    for subdirs, dirs, s in os.walk(neg_arous_dir[i]):                                  
                        sounds.append(subdirs + '/' + random.choice(s))
                fx = random.choice(sounds[::-1])
                fy = random.choice(sounds[:])                    
                x = MonoLoader(filename = fx)()  
                y = MonoLoader(filename = fy)()  
                fx = fx.split('/')[1].split('.')[0]                                  
                fy = fy.split('/')[1].split('.')[0]                                  
                fx = np.where(files == fx)[0]                      
                fy = np.where(files == fy)[0]                      
                dec_x = get_coordinate(fx, choice(range(1,3)), decisions)               
                dec_y = get_coordinate(fy, choice(range(1,3)), decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)
                morph = stft.morph(x1 = x,x2 = y,fs = 44100,w1=np.hanning(1025),N1=2048,w2=np.hanning(1025),N2=2048,H1=512,smoothf=0.1,balancef=0.7)
                onsets = hfc_onsets(np.float32(morph))
                interv = seconds_to_indices(onsets)
                steps = overlapped_intervals(interv)
                output = librosa.effects.remix(morph, steps[::-1], align_zeros = False)
                output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 4)
                remix_filename = 'data/emotions/remixes/not angry/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                MonoWriter(filename = remix_filename, sampleRate = 44100, format = 'ogg')(np.float32(output))
                subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])

MusicEmotionMachine.py 文件源码项目：apicultor 作者: sonidosmutantes 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def not_relaxed_music_remix(self, pos_arous_dir, files, decisions):
                sounds = []
                for i in range(len(pos_arous_dir)):
                    for subdirs, dirs, s in os.walk(pos_arous_dir[i]):                                  
                        sounds.append(subdirs + '/' + random.choice(s))
                fx = random.choice(sounds[::-1])
                fy = random.choice(sounds[:])                    
                x = MonoLoader(filename = fx)()  
                y = MonoLoader(filename = fy)()  
                fx = fx.split('/')[1].split('.')[0]                                  
                fy = fy.split('/')[1].split('.')[0]                                  
                fx = np.where(files == fx)[0]                     
                fy = np.where(files == fy)[0]                      
                dec_x = get_coordinate(fx, choice([0,1,3]), decisions)               
                dec_y = get_coordinate(fy, choice([0,1,3]), decisions)
                x = self.source_separation(x) 
                x = scratch_music(x, dec_x)                            
                y = scratch_music(y, dec_y)
                x = x[np.nonzero(x)]                           
                y = y[np.nonzero(y)]
                x, y = same_time(x,y)
                morph = stft.morph(x1 = x,x2 = y,fs = 44100,w1=np.hanning(1025),N1=2048,w2=np.hanning(1025),N2=2048,H1=512,smoothf=0.01,balancef=0.7)
                interv = RhythmExtractor2013()(np.float32(morph))[1] * 44100
                steps = overlapped_intervals(interv)
                output = librosa.effects.remix(morph, steps[::-1], align_zeros = False)
                output = librosa.effects.pitch_shift(output, sr = 44100, n_steps = 3)
                remix_filename = 'data/emotions/remixes/not relaxed/'+str(time.strftime("%Y%m%d-%H:%M:%S"))+'multitag_remix.ogg'
                MonoWriter(filename = remix_filename, sampleRate = 44100, format = 'ogg')(np.float32(output)) 
                subprocess.call(["ffplay", "-nodisp", "-autoexit", remix_filename])

data_loader.py 文件源码项目：deepspeech.pytorch 作者: SeanNaren 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path, self.sample_rate)
        else:
            y = load_audio(audio_path)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect

features.py 文件源码项目：genrec 作者: kkanellis 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self, tex_wnd, fft_len=512, sr=22050):
        self.tex_wnd = tex_wnd
        self.an_wnd_len = fft_len
        self.sr = sr

        # calc signal spectrum
        self.fft_tex_wnds = np.abs(
            librosa.stft(
                y=tex_wnd,
                n_fft=fft_len,
                hop_length=fft_len,
            )
        )

audio_utils.py 文件源码项目：gcforest 作者: w821881341 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature