def load(self,image_names):
print("loading %d images"%len(image_names))
return list(map(self.load_image,image_names)) # python3 map object WTF
python类load()的实例源码
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1] * width * height
if self.one_hot:
fake_label = [1] + [0] * 9
else:
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
# self._images = self._images[perm]
self._image_names = self._image_names[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self.load(self._image_names[start:end]), self._labels[start:end]
# multi-label
def load_audio(path, sample_length=64000, sr=16000):
"""Loading of a wave file.
Args:
path: Location of a wave file to load.
sample_length: The truncated total length of the final wave file.
sr: Samples per a second.
Returns:
out: The audio in samples from -1.0 to 1.0
"""
audio, _ = librosa.load(path, sr=sr)
audio = audio[:sample_length]
return audio
def load_generic_audio(directory, sample_rate):
"""Generator that yields audio waveforms from the directory."""
def randomize_files(fns):
for _ in fns:
file_index = random.randint(0, len(fns) - 1)
yield fns[file_index]
files = find_files(directory)
id_reg_exp = re.compile(FILE_PATTERN)
print("files length: {}".format(len(files)))
randomized_files = randomize_files(files)
for filename in randomized_files:
ids = id_reg_exp.findall(filename)
if not ids:
# The file name does not match the pattern containing ids, so
# there is no id.
category_id = None
else:
# The file name matches the pattern for containing ids.
category_id = int(ids[0][0])
audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
# Normalize audio
audio = librosa.util.normalize(audio) * 0.8
# Trim the last 5 seconds to account for music rollout
audio = audio[:-5 * sample_rate]
audio = np.reshape(audio, (-1, 1))
yield audio, filename, category_id
load_wavfile_and_save_lc_embedding.py 文件源码
项目:the-wavenet-pianist
作者: 821760408-sp
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def load_wav(wavfile, sr, mono=True):
audio, _ = librosa.load(wavfile, sr=sr, mono=mono)
# Normalize audio
audio = librosa.util.normalize(audio) * 0.8
lc = AudioReader.midi_notes_encoding(audio)
fn = os.path.abspath(wavfile).strip('.wav')
fn = "{}_lc_embedding.npy".format(fn)
with open(fn, 'w') as f:
np.save(f, lc)
def create_seed(filename,
sample_rate,
quantization_channels,
window_size):
audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
quantized = mu_law_encode(audio, quantization_channels)
cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size),
lambda: tf.size(quantized),
lambda: tf.constant(window_size))
return quantized[:cut_index]
def load_lc_embedding(lc_embedding):
with open(lc_embedding, 'r') as f:
return np.load(f)
def read_wav_file(file):
r"""
Loads wav files from disk and resamples to 22050 Hz
The output is shaped as [timesteps, 1]
Parameters
----------
file
Returns
-------
"""
import librosa
data, sr = librosa.load(file)
return np.expand_dims(data, axis=-1)
neural_network_audioset.py 文件源码
项目:TensorFlow_AudioSet_Example
作者: DantesLegacy
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def load_sound_files(file_paths):
raw_sounds = []
for fp in file_paths:
X,sr = librosa.load(fp)
raw_sounds.append(X)
return raw_sounds
neural_network_audioset.py 文件源码
项目:TensorFlow_AudioSet_Example
作者: DantesLegacy
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
audio_reader.py 文件源码
项目:CNN-for-single-channel-speech-enhancement
作者: zhr1201
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def norm_audio(self):
'''Normalize the audio files
used before training using a independent script'''
for file in self.audiofiles:
audio, sr = librosa.load(file, sr=16000)
div_fac = 1 / np.max(np.abs(audio)) / 3.0
audio = audio * div_fac
librosa.output.write_wav(file, audio, sr)
for file in self.noisefiles:
audio, sr = librosa.load(file, sr=16000)
div_fac = 1 / np.max(np.abs(audio)) / 3.0
audio = audio * div_fac
librosa.output.write_wav(file, audio, sr)
dataset_recorder_generator.py 文件源码
项目:Personal_AI_Assistant
作者: PratylenClub
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def callback(recognizer, audio):
try:
sentence = recognizer.recognize_google(audio, language=language)
wave_file_name = "train.wav"
wav_file = open(wave_file_name,"wb")
wav_file.write(audio.get_wav_data())
wav_file.close()
wave, sample_rate = librosa.load(wave_file_name, mono=True, sr=None)
wave = wave[::3]
save_recording(wave_file_name,wave,sentence,CSV_BIG_ONE)
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
def load_wav_files(files):
wav_files = []
for i, f in enumerate(files):
print i, f
wav_files += [librosa.load(f, sr=SAMPLINGRATE)[0]]
return wav_files
def get_spectrograms(sound_file):
'''Extracts melspectrogram and log magnitude from given `sound_file`.
Args:
sound_file: A string. Full path of a sound file.
Returns:
Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
'''
# Loading sound file
y, sr = librosa.load(sound_file, sr=None) # or set sr to hp.sr.
# stft. D: (1+n_fft//2, T)
D = librosa.stft(y=y,
n_fft=hp.n_fft,
hop_length=hp.hop_length,
win_length=hp.win_length)
# magnitude spectrogram
magnitude = np.abs(D) # (1+n_fft/2, T)
# power spectrogram
power = magnitude ** 2 # (1+n_fft/2, T)
# mel spectrogram
S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels) # (n_mels, T)
return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32)) # (T, n_mels), (T, 1+n_fft/2)
def test_load_sound(self):
s1 = Sound.from_file(self.audio_file)
y, sr = librosa.load(self.audio_file)
s2 = Sound(y, sr)
self.assertTrue(numpy.all(s1.y == s2.y))
s3 = Sound(numpy.random.rand(random.randint(1, 100000)),
random.choice((88200, 44100, 22050, 11025)))
def from_file(cls, filename, sr=22050):
""" Loads an audiofile, uses sr=22050 by default. """
y, sr = librosa.load(filename, sr=sr)
return cls(y, sr)
# Chunk iterator
def parse_wav(filename, n_mfcc=40):
'''
Parses a single wav file into MFCC's and sample rate.
Arguments:
filename - Name of input wav file.
n_mfcc - Number of coefficients to use.
Returns:
A tuple with a numpy array with cepstrum coefficients, and sample rate.
Raises:
'''
song_data = np.array([])
sample_rate = -1
if filename[-4:] == '.wav':
try:
y_data, sample_rate = librosa.load(filename)
# will need to experiment with different values for n_mfcc
song_data = librosa.feature.mfcc(y=y_data,
sr=sample_rate,
n_mfcc=n_mfcc)
except:
sys.exit(1)
return (song_data, sample_rate)
def compute_spectrograms(filename):
out_rate = 22050
frames, rate = librosa.load(filename, sr=out_rate, mono=True)
if len(frames) < out_rate:
# if less then 1 second - can't process
raise Exception("Audio duration is too short")
normalized_audio = _normalize(frames)
melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)
# now going through spectrogram with the stride of the segment duration
for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]
def get_feature_aqibsaeed_1(X, sr, au_path=None):
"""
http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
"""
import librosa
if au_path is not None:
X, sr = librosa.load(au_path)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
return feature
def __init__(self, cache=None, **kwargs):
super(GTZAN, self).__init__(**kwargs)
if kwargs.get('conf') is not None:
conf = kwargs['conf']
cache = conf.get('cache', None)
data_set_path = osp.join(DEFAULT_IMAGEST_BASE, self.data_set)
self.data_set_path = data_set_path
self.cache = cache
X, y = parse_anno_file(data_set_path)
if cache == 'raw':
import librosa
from tqdm import trange
X_new = np.zeros((len(X), 1, 661500, 1))
for i in trange(len(X)):
x,_ = librosa.load(osp.join(DEFAULT_DATA_BASE, X[i]))
x_len = min(661500, len(x))
X_new[i,:,:x_len,0] = x[:x_len]
if cache is not None and cache != 'raw':
X = self.load_cache_X(X, cache)
if cache == 'mfcc':
X_new = np.zeros((len(X), X[0].shape[0], 1280, 1))
for i, x in enumerate(X):
x_len = min(x.shape[1], 1280)
X_new[i,:,:x_len,0] = x[:,:x_len]
X = X_new
# layout_X
if self.layout_x == 'rel_path':
self.X = X
else:
self.X = self.init_layout_X(X)
# layout_y
self.y = self.init_layout_y(y)