def wavWrite(y, fs, nbits, audioFile):
""" Write samples to WAV file
Args:
samples: (ndarray / 2D ndarray) (floating point) sample vector
mono: DIM: nSamples
stereo: DIM: nSamples x nChannels
fs: (int) Sample rate in Hz
nBits: (int) Number of bits
fnWAV: (string) WAV file name to write
"""
if nbits == 8:
intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
fX = np.int8(intsamples)
elif nbits == 16:
intsamples = y * AudioIO.normFact['int' + str(nbits)]
fX = np.int16(intsamples)
elif nbits > 16:
fX = y
write(audioFile, fs, fX)
python类write()的实例源码
def sound(x,fs):
""" Plays a wave file using the pyglet library. But first, it has to be written.
Termination of the playback is being performed by any keyboard input and Enter.
Args:
x: (array) Floating point samples
fs: (int) The sampling rate
"""
import pyglet as pg
global player
# Call the writing function
AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
# Initialize playback engine
player = pg.media.Player()
# Initialize the object with the audio file
playback = pg.media.load('testPlayback.wav')
# Set it to player
player.queue(playback)
# Sound call
player.play()
# Killed by "keyboard"
kill = raw_input()
if kill or kill == '':
AudioIO.stop()
# Remove the dummy wave write
os.remove('testPlayback.wav')
def download_bundle(bundle_name, target_dir, force_reload=False):
"""Downloads a Magenta bundle to target directory.
Args:
bundle_name: A string Magenta bundle name to download.
target_dir: A string local directory in which to write the bundle.
force_reload: A boolean that when True, reloads the bundle even if present.
"""
bundle_target = os.path.join(target_dir, bundle_name)
if not os.path.exists(bundle_target) or force_reload:
response = urllib.request.urlopen(
'http://download.magenta.tensorflow.org/models/%s' % bundle_name)
data = response.read()
local_file = open(bundle_target, 'wb')
local_file.write(data)
local_file.close()
def wavWrite(y, fs, nbits, audioFile):
""" Write samples to WAV file
Args:
samples: (ndarray / 2D ndarray) (floating point) sample vector
mono: DIM: nSamples
stereo: DIM: nSamples x nChannels
fs: (int) Sample rate in Hz
nBits: (int) Number of bits
fnWAV: (string) WAV file name to write
"""
if nbits == 8:
intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
fX = np.int8(intsamples)
elif nbits == 16:
intsamples = y * AudioIO.normFact['int' + str(nbits)]
fX = np.int16(intsamples)
elif nbits > 16:
fX = y
write(audioFile, fs, fX)
def sound(x,fs):
""" Plays a wave file using the pyglet library. But first, it has to be written.
Termination of the playback is being performed by any keyboard input and Enter.
Args:
x: (array) Floating point samples
fs: (int) The sampling rate
"""
import pyglet as pg
global player
# Call the writing function
AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
# Initialize playback engine
player = pg.media.Player()
# Initialize the object with the audio file
playback = pg.media.load('testPlayback.wav')
# Set it to player
player.queue(playback)
# Sound call
player.play()
# Killed by "keyboard"
kill = raw_input()
if kill or kill == '':
AudioIO.stop()
# Remove the dummy wave write
os.remove('testPlayback.wav')
def run_phase_reconstruction_example():
fs, d = fetch_sample_speech_tapestry()
# actually gives however many components you say! So double what .m file
# says
fftsize = 512
step = 64
X_s = np.abs(stft(d, fftsize=fftsize, step=step, real=False,
compute_onesided=False))
X_t = iterate_invert_spectrogram(X_s, fftsize, step, verbose=True)
"""
import matplotlib.pyplot as plt
plt.specgram(d, cmap="gray")
plt.savefig("1.png")
plt.close()
plt.imshow(X_s, cmap="gray")
plt.savefig("2.png")
plt.close()
"""
wavfile.write("phase_original.wav", fs, soundsc(d))
wavfile.write("phase_reconstruction.wav", fs, soundsc(X_t))
def run_fft_dct_example():
random_state = np.random.RandomState(1999)
fs, d = fetch_sample_speech_fruit()
n_fft = 64
X = d[0]
X_stft = stft(X, n_fft)
X_rr = complex_to_real_view(X_stft)
X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
X_dct_sub = X_dct[1:] - X_dct[:-1]
std = X_dct_sub.std(axis=0, keepdims=True)
X_dct_sub += .01 * std * random_state.randn(
X_dct_sub.shape[0], X_dct_sub.shape[1])
X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
X_irr = real_to_complex_view(X_idct)
X_r = istft(X_irr, n_fft)[:len(X)]
SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
print(SNR)
wavfile.write("fftdct_orig.wav", fs, soundsc(X))
wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
def run_ltsd_example():
fs, d = fetch_sample_speech_tapestry()
winsize = 1024
d = d.astype("float32") / 2 ** 15
d -= d.mean()
pad = 3 * fs
noise_pwr = np.percentile(d, 1) ** 2
noise_pwr = max(1E-9, noise_pwr)
d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
_, vad_segments = ltsd_vad(d, fs, winsize=winsize)
v_up = np.where(vad_segments == True)[0]
s = v_up[0]
st = v_up[-1] + int(.5 * fs)
d = d[s:st]
bname = "tapestry.wav".split(".")[0]
wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
def run_phase_reconstruction_example():
fs, d = fetch_sample_speech_tapestry()
# actually gives however many components you say! So double what .m file
# says
fftsize = 512
step = 64
X_s = np.abs(stft(d, fftsize=fftsize, step=step, real=False,
compute_onesided=False))
X_t = iterate_invert_spectrogram(X_s, fftsize, step, verbose=True)
"""
import matplotlib.pyplot as plt
plt.specgram(d, cmap="gray")
plt.savefig("1.png")
plt.close()
plt.imshow(X_s, cmap="gray")
plt.savefig("2.png")
plt.close()
"""
wavfile.write("phase_original.wav", fs, soundsc(d))
wavfile.write("phase_reconstruction.wav", fs, soundsc(X_t))
def run_fft_dct_example():
random_state = np.random.RandomState(1999)
fs, d = fetch_sample_speech_fruit()
n_fft = 64
X = d[0]
X_stft = stft(X, n_fft)
X_rr = complex_to_real_view(X_stft)
X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
X_dct_sub = X_dct[1:] - X_dct[:-1]
std = X_dct_sub.std(axis=0, keepdims=True)
X_dct_sub += .01 * std * random_state.randn(
X_dct_sub.shape[0], X_dct_sub.shape[1])
X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
X_irr = real_to_complex_view(X_idct)
X_r = istft(X_irr, n_fft)[:len(X)]
SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
print(SNR)
wavfile.write("fftdct_orig.wav", fs, soundsc(X))
wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
def run_ltsd_example():
fs, d = fetch_sample_speech_tapestry()
winsize = 1024
d = d.astype("float32") / 2 ** 15
d -= d.mean()
pad = 3 * fs
noise_pwr = np.percentile(d, 1) ** 2
noise_pwr = max(1E-9, noise_pwr)
d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
_, vad_segments = ltsd_vad(d, fs, winsize=winsize)
v_up = np.where(vad_segments == True)[0]
s = v_up[0]
st = v_up[-1] + int(.5 * fs)
d = d[s:st]
bname = "tapestry.wav".split(".")[0]
wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
def wavWrite(y, fs, nbits, audioFile):
""" Write samples to WAV file
Args:
samples: (ndarray / 2D ndarray) (floating point) sample vector
mono: DIM: nSamples
stereo: DIM: nSamples x nChannels
fs: (int) Sample rate in Hz
nBits: (int) Number of bits
fnWAV: (string) WAV file name to write
"""
if nbits == 8:
intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
fX = np.int8(intsamples)
elif nbits == 16:
intsamples = y * AudioIO.normFact['int' + str(nbits)]
fX = np.int16(intsamples)
elif nbits > 16:
fX = y
write(audioFile, fs, fX)
def wavWrite(y, fs, nbits, audioFile):
""" Write samples to WAV file
Args:
samples: (ndarray / 2D ndarray) (floating point) sample vector
mono: DIM: nSamples
stereo: DIM: nSamples x nChannels
fs: (int) Sample rate in Hz
nBits: (int) Number of bits
fnWAV: (string) WAV file name to write
"""
if nbits == 8:
intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
fX = np.int8(intsamples)
elif nbits == 16:
intsamples = y * AudioIO.normFact['int' + str(nbits)]
fX = np.int16(intsamples)
elif nbits > 16:
fX = y
write(audioFile, fs, fX)
def sound(x,fs):
""" Plays a wave file using the pyglet library. But first, it has to be written.
Termination of the playback is being performed by any keyboard input and Enter.
Args:
x: (array) Floating point samples
fs: (int) The sampling rate
"""
import pyglet as pg
global player
# Call the writing function
AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
# Initialize playback engine
player = pg.media.Player()
# Initialize the object with the audio file
playback = pg.media.load('testPlayback.wav')
# Set it to player
player.queue(playback)
# Sound call
player.play()
# Killed by "keyboard"
kill = raw_input()
if kill or kill == '':
AudioIO.stop()
# Remove the dummy wave write
os.remove('testPlayback.wav')
def urlretrieve(url, filename, reporthook=None, data=None):
'''
This function is adpated from: https://github.com/fchollet/keras
Original work Copyright (c) 2014-2015 keras contributors
'''
def chunk_read(response, chunk_size=8192, reporthook=None):
total_size = response.info().get('Content-Length').strip()
total_size = int(total_size)
count = 0
while 1:
chunk = response.read(chunk_size)
if not chunk:
break
count += 1
if reporthook:
reporthook(count, chunk_size, total_size)
yield chunk
response = urlopen(url, data)
with open(filename, 'wb') as fd:
for chunk in chunk_read(response, reporthook=reporthook):
fd.write(chunk)
def generate_and_save_samples(sample_fn, length, count, dir, rate, levels):
def save_samples(data):
data = (data * np.reshape(np.arange(levels) / (levels-1), [levels, 1, 1])).sum(
axis=1, keepdims=True)
value = np.iinfo(np.int16).max
audio = (utils.inverse_mulaw(data * 2 - 1) * value).astype(np.int16)
for idx, sample in enumerate(audio):
filename = os.path.join(dir, 'sample_{}.wav'.format(idx))
wavfile.write(filename, rate, np.squeeze(sample))
samples = chainer.Variable(
chainer.cuda.cupy.zeros([count, levels, 1, length], dtype='float32'))
one_hot_ref = chainer.cuda.cupy.eye(levels).astype('float32')
with tqdm.tqdm(total=length) as bar:
for i in range(length):
probs = F.softmax(sample_fn(samples))[:, :, 0, 0, i]
samples.data[:, :, 0, i] = one_hot_ref[utils.sample_from(probs.data.get())]
bar.update()
samples.to_cpu()
save_samples(samples.data)
def save_audio_file(filename, quantized_signal, quantization_steps=256, format="16bit_pcm", sampling_rate=48000):
quantized_signal = quantized_signal.astype(float)
normalized_signal = (quantized_signal / quantization_steps - 0.5) * 2.0
# inv mu-law companding transformation (ITU-T, 1988)
mu = quantization_steps - 1
signals_1d = np.sign(normalized_signal) * ((1 + mu) ** np.absolute(normalized_signal)) / mu
if format == "16bit_pcm":
max = 1<<15
type = np.int16
elif format == "32bit_pcm":
max = 1<<31
type = np.int32
elif format == "8bit_pcm":
max = 1<<8 - 1
type = np.uint8
signals_1d *= max
audio = signals_1d.reshape((-1, 1)).astype(type)
audio = np.repeat(audio, 2, axis=1)
wavfile.write(filename, sampling_rate, audio)
# convert signal to 1xW image
def test():
return
num_test = 100
x_cpu = sampler0.memories_test[0:num_test]
x = Variable(torch.FloatTensor(x_cpu))
y = generator(x).data.cpu().numpy()
chunk_size = y.shape[2]
output_stft = np.zeros([num_test, chunk_size], dtype=np.complex128)
for i in range(num_test):
output_flat = y[i]
output_complex = output_flat[0] + 1j * output_flat[1]
output_stft[i] = output_complex
output_wav = stft.istft(output_stft / spectrogram.spec_norm)
wavfile.write("test.wav", 8000, output_wav.reshape(-1))
def test():
print("testing...")
fake = generator(fixed_noise).data.cpu().numpy()
print(fake.shape)
fake = fake.reshape(-1, sampler.num_memory_channels, sampler.num_history, sampler.memory_size)
print(fake.shape)
fake_real = fake[0:,0:1,0:sampler.num_history,0:sampler.memory_size]
fake_imag = fake[0:,1:2,0:sampler.num_history,0:sampler.memory_size]
fake = fake_real + 1j * fake_imag
print(fake.shape)
fake = fake.reshape(-1, sampler.memory_size)
print(fake.shape)
output_wav = stft.istft(fake / spectrogram.spec_norm)
print(output_wav.shape)
wavfile.write("test.wav", 8000, output_wav.reshape(-1))
def test():
model.load("music.tflearn")
X = fft_sequences[10].reshape([1, num_history, fft_stored_size])
X = np.random.uniform(low=-0.1, high=0.1, size=[1, num_history, fft_stored_size])
num_test = 10
output = np.zeros([num_test, num_history, fft_stored_size])
for test_i in range(num_test):
Y = np.array(model.predict(X)[0])
output[test_i] = Y.reshape([num_history, fft_stored_size])
X = Y.reshape([1, num_history, fft_stored_size])
X[np.where(np.square(X) < 0.0001)] = 0.0
#X *= 1.5
#raw test
#output[test_i] = fft_next[test_i]
#print(output[test_i])
wav = convert(output)
print("wav: " + str(wav.shape))
wavfile.write("test.wav", 8000, wav)
def main():
#Select filename
filename = 'bach10sec.wav'
#Test loudest_band with bach, send only one of the two channels
music, frame_rate, nframes, nchannels = read_wave(filename, False)
#Plot FFT of input signal
#plot_fft(music[:,0], frame_rate, -5000, 5000)
(low, high, loudest) = loudest_band(music[:,0], frame_rate, 75)
print("low: ", low, " Hz")
print("high: ", high, " Hz")
#Plot FFT of output signal
#plot_fft(loudest, frame_rate, -5000, 5000)
#Write file
loudest = loudest/np.max(loudest) #normalize to amplidue one
loudest = loudest.astype(np.float32) #convert to 32-bit float
wavfile.write(filename[0:-4]+'_filtered.wav', frame_rate, loudest)
def main():
#Select filename
filename = 'scary.wav'
#Test loudest_band with bach, send only one of the two channels
music, frame_rate, nframes, nchannels = read_wave(filename, False)
#Plot FFT of input signal
plot_fft(music[:,0], frame_rate, -5000, 5000)
(low, high, loudest) = loudest_band(music[:,0], frame_rate, 1000)
print("low: ", low, " Hz")
print("high: ", high, " Hz")
#Plot FFT of output signal
plot_fft(loudest, frame_rate, -5000, 5000)
#Write file
loudest = loudest/np.max(loudest) #normalize to amplidue one
loudest = loudest.astype(np.float32) #convert to 32-bit float
wavfile.write(filename[0:-4]+'_filtered.wav', frame_rate, loudest)
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite
def midiwrap():
"""
Wrapper to midi read and midi write
"""
try:
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
except ImportError:
logger.info("Need GPL licensed midi utils, downloading...",
"http://www.iro.umontreal.ca/~lisa/deep/midi.zip")
url = "http://www.iro.umontreal.ca/~lisa/deep/midi.zip"
partial_path = get_resource_dir("")
full_path = os.path.join(partial_path, "midi.zip")
if not os.path.exists(full_path):
download(url, full_path)
zip_ref = zipfile.ZipFile(full_path, 'r')
zip_ref.extractall(partial_path)
zip_ref.close()
sys.path.insert(1, get_resource_dir(""))
from midi.utils import midiread, midiwrite
sys.path.pop(1)
return midiread, midiwrite