def load_wav_chunks(filenames):
num_files = len(filenames)
max_chunks = 100000
all_chunks = np.zeros([max_chunks, chunk_size])
total_chunks = 0
for file_idx in range(num_files):
filename = filenames[file_idx]
print("[" + str(file_idx).zfill(3) + "]: " + filename)
rate, data = wavfile.read(filename)
if (rate != desired_rate or len(data.shape) > 1):
data = resample(data, rate, desired_rate)
data = data.astype(np.float32) * (1.0 / 32768.0)
chunks = create_chunks(data)
num_chunks = len(chunks)
all_chunks[total_chunks:total_chunks+num_chunks] = np.array(chunks)
total_chunks += num_chunks
all_chunks = all_chunks[0:total_chunks]
return all_chunks
python类read()的实例源码
def encode(self, s):
"""Transform a string with a filename into a list of float32.
Args:
s: path to the file with a waveform.
Returns:
samples: list of int16s
"""
# Make sure that the data is a single channel, 16bit, 16kHz wave.
# TODO(chorowski): the directory may not be writable, this should fallback
# to a temp path, and provide instructions for instaling sox.
if not s.endswith(".wav"):
out_filepath = s + ".wav"
if not os.path.exists(out_filepath):
call(["sox", "-r", "16k", "-b", "16", "-c", "1", s, out_filepath])
s = out_filepath
rate, data = wavfile.read(s)
assert rate == self._sample_rate
assert len(data.shape) == 1
if data.dtype not in [np.float32, np.float64]:
data = data.astype(np.float32) / np.iinfo(data.dtype).max
return data.tolist()
def load_audio(filename, b_normalize=True):
"""Load the audiofile at the provided filename using scipy.io.wavfile.
Optionally normalizes the audio to the maximum value.
Parameters
----------
filename : str
File to load.
b_normalize : bool, optional
Normalize to the maximum value.
"""
sr, s = wavfile.read(filename)
if b_normalize:
s = s.astype(np.float32)
s = (s / np.max(np.abs(s)))
s -= np.mean(s)
return s
def get_audio_feature():
'''
??wav????mfcc???????
'''
audio_filename = "audio.wav"
#??wav?????fs????? audio???
fs, audio = wav.read(audio_filename)
#??mfcc??
inputs = mfcc(audio, samplerate=fs)
# ???????????????????
feature_inputs = np.asarray(inputs[np.newaxis, :])
feature_inputs = (feature_inputs - np.mean(feature_inputs))/np.std(feature_inputs)
#?????????
feature_seq_len = [feature_inputs.shape[1]]
return feature_inputs, feature_seq_len
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def fetch_sample_audio_chords(n_samples=None):
url = "https://dl.dropboxusercontent.com/u/15378192/piano_chords.tar.gz"
partial_path = get_resource_dir("chords")
full_path = os.path.join(partial_path, "piano_chords.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
chords = []
logger.info("Loading audio files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
chords.append(d)
return fs, chords, wav_names
def fetch_sample_speech_fruit(n_samples=None):
url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
partial_path = get_resource_dir("fruit")
full_path = os.path.join(partial_path, "audio.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
speech = []
logger.info("Loading speech files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
speech.append(d)
return fs, speech, wav_names
def readwav(file):
# wavio.py
# Author: Warren Weckesser
# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
"""
Read a wav file.
Returns the frame rate, sample width (in bytes) and a numpy array
containing the data.
This function does not read compressed wav files.
"""
wav = wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
data = wav.readframes(nframes)
wav.close()
array = _wav2array(nchannels, sampwidth, data)
return rate, sampwidth, array
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def fetch_sample_audio_chords(n_samples=None):
url = "https://dl.dropboxusercontent.com/u/15378192/piano_chords.tar.gz"
partial_path = get_resource_dir("chords")
full_path = os.path.join(partial_path, "piano_chords.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
chords = []
logger.info("Loading audio files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
chords.append(d)
return fs, chords, wav_names
def readwav(file):
# wavio.py
# Author: Warren Weckesser
# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
"""
Read a wav file.
Returns the frame rate, sample width (in bytes) and a numpy array
containing the data.
This function does not read compressed wav files.
"""
wav = wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
data = wav.readframes(nframes)
wav.close()
array = _wav2array(nchannels, sampwidth, data)
return rate, sampwidth, array
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def fetch_sample_speech_fruit(n_samples=None):
url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
partial_path = get_resource_dir("fruit")
full_path = os.path.join(partial_path, "audio.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
speech = []
logger.info("Loading speech files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
speech.append(d)
return fs, speech, wav_names
def readwav(file):
# wavio.py
# Author: Warren Weckesser
# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
"""
Read a wav file.
Returns the frame rate, sample width (in bytes) and a numpy array
containing the data.
This function does not read compressed wav files.
"""
wav = wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
data = wav.readframes(nframes)
wav.close()
array = _wav2array(nchannels, sampwidth, data)
return rate, sampwidth, array
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def fetch_sample_audio_chords(n_samples=None):
url = "https://dl.dropboxusercontent.com/u/15378192/piano_chords.tar.gz"
partial_path = get_resource_dir("chords")
full_path = os.path.join(partial_path, "piano_chords.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
chords = []
logger.info("Loading audio files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
chords.append(d)
return fs, chords, wav_names
def fetch_sample_speech_fruit(n_samples=None):
url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
partial_path = get_resource_dir("fruit")
full_path = os.path.join(partial_path, "audio.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
speech = []
logger.info("Loading speech files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
speech.append(d)
return fs, speech, wav_names
def readwav(file):
# wavio.py
# Author: Warren Weckesser
# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
"""
Read a wav file.
Returns the frame rate, sample width (in bytes) and a numpy array
containing the data.
This function does not read compressed wav files.
"""
wav = wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
data = wav.readframes(nframes)
wav.close()
array = _wav2array(nchannels, sampwidth, data)
return rate, sampwidth, array
def fetch_sample_audio_chords(n_samples=None):
url = "https://dl.dropboxusercontent.com/u/15378192/piano_chords.tar.gz"
partial_path = get_resource_dir("chords")
full_path = os.path.join(partial_path, "piano_chords.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
chords = []
logger.info("Loading audio files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
chords.append(d)
return fs, chords, wav_names
def fetch_sample_speech_fruit(n_samples=None):
url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
partial_path = get_resource_dir("fruit")
full_path = os.path.join(partial_path, "audio.tar.gz")
if not os.path.exists(full_path):
download(url, full_path)
tf = tarfile.open(full_path)
wav_names = [fname for fname in tf.getnames()
if ".wav" in fname.split(os.sep)[-1]]
speech = []
logger.info("Loading speech files...")
for wav_name in wav_names[:n_samples]:
f = tf.extractfile(wav_name)
fs, d = wavfile.read(f)
d = d.astype('float32') / (2 ** 15)
speech.append(d)
return fs, speech, wav_names