python类load()的实例源码

speech_data.py 文件源码 项目:skill-voice-recognition 作者: TREE-Edu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def load(self,image_names):
        print("loading %d images"%len(image_names))
        return list(map(self.load_image,image_names)) # python3 map object WTF
speech_data.py 文件源码 项目:skill-voice-recognition 作者: TREE-Edu 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def next_batch(self, batch_size, fake_data=False):
        """Return the next `batch_size` examples from this data set."""
        if fake_data:
            fake_image = [1] * width * height
            if self.one_hot:
                fake_label = [1] + [0] * 9
            else:
                fake_label = 0
            return [fake_image for _ in xrange(batch_size)], [
                    fake_label for _ in xrange(batch_size)]
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            # Finished epoch
            self._epochs_completed += 1
            # Shuffle the data
            perm = numpy.arange(self._num_examples)
            numpy.random.shuffle(perm)
            # self._images = self._images[perm]
            self._image_names = self._image_names[perm]
            self._labels = self._labels[perm]
            # Start next epoch
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self.load(self._image_names[start:end]), self._labels[start:end]


# multi-label
utils.py 文件源码 项目:magenta 作者: tensorflow 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def load_audio(path, sample_length=64000, sr=16000):
  """Loading of a wave file.

  Args:
    path: Location of a wave file to load.
    sample_length: The truncated total length of the final wave file.
    sr: Samples per a second.

  Returns:
    out: The audio in samples from -1.0 to 1.0
  """
  audio, _ = librosa.load(path, sr=sr)
  audio = audio[:sample_length]
  return audio
audio_reader.py 文件源码 项目:the-wavenet-pianist 作者: 821760408-sp 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def load_generic_audio(directory, sample_rate):
    """Generator that yields audio waveforms from the directory."""

    def randomize_files(fns):
        for _ in fns:
            file_index = random.randint(0, len(fns) - 1)
            yield fns[file_index]

    files = find_files(directory)
    id_reg_exp = re.compile(FILE_PATTERN)
    print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    for filename in randomized_files:
        ids = id_reg_exp.findall(filename)
        if not ids:
            # The file name does not match the pattern containing ids, so
            # there is no id.
            category_id = None
        else:
            # The file name matches the pattern for containing ids.
            category_id = int(ids[0][0])
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        # Normalize audio
        audio = librosa.util.normalize(audio) * 0.8
        # Trim the last 5 seconds to account for music rollout
        audio = audio[:-5 * sample_rate]
        audio = np.reshape(audio, (-1, 1))
        yield audio, filename, category_id
load_wavfile_and_save_lc_embedding.py 文件源码 项目:the-wavenet-pianist 作者: 821760408-sp 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def load_wav(wavfile, sr, mono=True):
    audio, _ = librosa.load(wavfile, sr=sr, mono=mono)
    # Normalize audio
    audio = librosa.util.normalize(audio) * 0.8
    lc = AudioReader.midi_notes_encoding(audio)

    fn = os.path.abspath(wavfile).strip('.wav')
    fn = "{}_lc_embedding.npy".format(fn)
    with open(fn, 'w') as f:
        np.save(f, lc)
generate.py 文件源码 项目:the-wavenet-pianist 作者: 821760408-sp 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def create_seed(filename,
                sample_rate,
                quantization_channels,
                window_size):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    quantized = mu_law_encode(audio, quantization_channels)
    cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size),
                        lambda: tf.size(quantized),
                        lambda: tf.constant(window_size))

    return quantized[:cut_index]
generate.py 文件源码 项目:the-wavenet-pianist 作者: 821760408-sp 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def load_lc_embedding(lc_embedding):
    with open(lc_embedding, 'r') as f:
        return np.load(f)
utils.py 文件源码 项目:pyVSR 作者: georgesterpu 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def read_wav_file(file):
    r"""
    Loads wav files from disk and resamples to 22050 Hz
    The output is shaped as [timesteps, 1]
    Parameters
    ----------
    file

    Returns
    -------

    """
    import librosa
    data, sr = librosa.load(file)
    return np.expand_dims(data, axis=-1)
neural_network_audioset.py 文件源码 项目:TensorFlow_AudioSet_Example 作者: DantesLegacy 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X,sr = librosa.load(fp)
        raw_sounds.append(X)
    return raw_sounds
neural_network_audioset.py 文件源码 项目:TensorFlow_AudioSet_Example 作者: DantesLegacy 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz
audio_reader.py 文件源码 项目:CNN-for-single-channel-speech-enhancement 作者: zhr1201 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def norm_audio(self):
        '''Normalize the audio files
        used before training using a independent script'''
        for file in self.audiofiles:
            audio, sr = librosa.load(file, sr=16000)
            div_fac = 1 / np.max(np.abs(audio)) / 3.0
            audio = audio * div_fac
            librosa.output.write_wav(file, audio, sr)
        for file in self.noisefiles:
            audio, sr = librosa.load(file, sr=16000)
            div_fac = 1 / np.max(np.abs(audio)) / 3.0
            audio = audio * div_fac
            librosa.output.write_wav(file, audio, sr)
dataset_recorder_generator.py 文件源码 项目:Personal_AI_Assistant 作者: PratylenClub 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def callback(recognizer, audio):
    try:
        sentence = recognizer.recognize_google(audio, language=language)
        wave_file_name = "train.wav"
        wav_file = open(wave_file_name,"wb")
        wav_file.write(audio.get_wav_data())
        wav_file.close()
        wave, sample_rate = librosa.load(wave_file_name, mono=True, sr=None)
        wave = wave[::3]
        save_recording(wave_file_name,wave,sentence,CSV_BIG_ONE)

    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))
timit_for_srnn.py 文件源码 项目:srnn 作者: marcofraccaro 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def load_wav_files(files):
    wav_files = []
    for i, f in enumerate(files):
        print i, f
        wav_files += [librosa.load(f, sr=SAMPLINGRATE)[0]]
    return wav_files
utils.py 文件源码 项目:tacotron 作者: jinfagang 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def get_spectrograms(sound_file):
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=None)  # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft,
                     hop_length=hp.hop_length,
                     win_length=hp.win_length)

    # magnitude spectrogram
    magnitude = np.abs(D)  # (1+n_fft/2, T)

    # power spectrogram
    power = magnitude ** 2  # (1+n_fft/2, T)

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels)  # (n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32))  # (T, n_mels), (T, 1+n_fft/2)
test_sound_loading.py 文件源码 项目:aupyom 作者: pierre-rouanet 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_load_sound(self):
        s1 = Sound.from_file(self.audio_file)

        y, sr = librosa.load(self.audio_file)
        s2 = Sound(y, sr)

        self.assertTrue(numpy.all(s1.y == s2.y))

        s3 = Sound(numpy.random.rand(random.randint(1, 100000)),
                   random.choice((88200, 44100, 22050, 11025)))
sound.py 文件源码 项目:aupyom 作者: pierre-rouanet 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def from_file(cls, filename, sr=22050):
        """ Loads an audiofile, uses sr=22050 by default. """
        y, sr = librosa.load(filename, sr=sr)
        return cls(y, sr)

    # Chunk iterator
music_parsing.py 文件源码 项目:DeepRemix 作者: DeepRemix 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def parse_wav(filename, n_mfcc=40):
    '''
    Parses a single wav file into MFCC's and sample rate.

    Arguments:
        filename - Name of input wav file.
        n_mfcc   - Number of coefficients to use.

    Returns:
        A tuple with a numpy array with cepstrum coefficients, and sample rate.

    Raises:

    '''

    song_data = np.array([])
    sample_rate = -1
    if filename[-4:] == '.wav':
        try:
            y_data, sample_rate = librosa.load(filename)
            #  will need to experiment with different values for n_mfcc
            song_data = librosa.feature.mfcc(y=y_data,
                                             sr=sample_rate,
                                             n_mfcc=n_mfcc)
        except:
            sys.exit(1)

    return (song_data, sample_rate)
han16.py 文件源码 项目:EUSIPCO2017 作者: Veleslavia 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def compute_spectrograms(filename):
    out_rate = 22050

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate:
        # if less then 1 second - can't process
        raise Exception("Audio duration is too short")

    normalized_audio = _normalize(frames)
    melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
    logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]
audio_utils.py 文件源码 项目:gcForest 作者: kingfengji 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature
gtzan.py 文件源码 项目:gcForest 作者: kingfengji 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, cache=None, **kwargs):
        super(GTZAN, self).__init__(**kwargs)
        if kwargs.get('conf') is not None:
            conf = kwargs['conf']
            cache = conf.get('cache', None)
        data_set_path = osp.join(DEFAULT_IMAGEST_BASE, self.data_set)
        self.data_set_path = data_set_path
        self.cache = cache
        X, y = parse_anno_file(data_set_path)
        if cache == 'raw':
            import librosa
            from tqdm import trange
            X_new = np.zeros((len(X), 1, 661500, 1))
            for i in trange(len(X)):
                x,_ = librosa.load(osp.join(DEFAULT_DATA_BASE, X[i]))
                x_len = min(661500, len(x))
                X_new[i,:,:x_len,0] = x[:x_len]
        if cache is not None and cache != 'raw':
            X = self.load_cache_X(X, cache)
            if cache == 'mfcc':
                X_new = np.zeros((len(X), X[0].shape[0], 1280, 1))
                for i, x in enumerate(X):
                    x_len = min(x.shape[1], 1280)
                    X_new[i,:,:x_len,0] = x[:,:x_len]
                X = X_new

        # layout_X
        if self.layout_x == 'rel_path':
            self.X = X
        else:
            self.X = self.init_layout_X(X)
        # layout_y
        self.y = self.init_layout_y(y)


问题


面经


文章

微信
公众号

扫码关注公众号