def _words_list_to_file(anim_words):
"""
Creates an audio file from the words specified in the given list
"""
anim_files = [os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"audio",
word.lower() + ".wav"
)
for word in anim_words
]
out_audio = AudioSegment.from_wav(anim_files[0])
for word_file in anim_files[1:]:
out_audio += AudioSegment.from_wav(word_file)
out_file = out_audio.export("/tmp/file.wav", format="wav")
return "/tmp/file.wav"
python类from_wav()的实例源码
def cut_audio(line):
''' cuts the audio file at the specified start and stop times,
and then exports to the desiginated output folder
line format: [AUDIO_FILE_NAME, START, STOP]
'''
audio_file_name = line[0]
file_type = audio_file_name[-4:]
if file_type == '.wav':
audio = AudioSegment.from_wav(args.in_dir + audio_file_name)
cut = audio[cvt_time(line[1]):cvt_time(line[2])]
#create name for cut audio file
file_num = get_num(audio_file_name)
cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.wav'
#export to output folder
cut.export(args.out_dir + cut_name, format="wav")
print('---> ' + cut_name + '\n')
elif file_type == '.mp3':
audio = AudioSegment.from_mp3(args.in_dir + audio_file_name)
cut = audio[cvt_time(line[1]):cvt_time(line[2])]
#create name for cut audio file
file_num = get_num(audio_file_name)
cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.mp3'
#export to output folder
cut.export(args.out_dir + cut_name, format="mp3")
print('---> ' + cut_name + '\n')
else:
#error, incompatible file type
print('**** ' + audio_file_name + ' caused an error')
print('**** ' + file_type + ' incompatible file type')
print('**** skipping file\n')
def test_text_to_speech(self):
catch_requests()
text = ('hello')
audio = utils.text_to_speech(text=text, synthesizer=self.synthesizer, synth_args=self.synth_args, sentence_break='. ')
sample = AudioSegment.from_wav('tests/test_files/test.wav')
audio.export('.test_utils/test.mp3', format='mp3')
self.assertEquals(len(audio), len(sample))
def test_text_to_speech_sentence_break(self):
catch_requests()
text = ('hello ' * 51)
audio = utils.text_to_speech(text=text, synthesizer=self.synthesizer, synth_args=self.synth_args, sentence_break=' ')
sample = AudioSegment.from_wav('tests/test_files/test.wav')
self.assertGreater(len(audio), len(sample) * 50)
def tags_to_wav(media_path,out_dir,tag_pairs):
basename=os.path.splitext(os.path.basename(media_path))[0]
wav_source=True
if media_path.lower()[-4:] not in ('.mp3','.wav'): # Creates a temporary WAV
wav_source=False # if input is MP4
temp_filename=media_path.split('/')[-1]+'_temp.wav'
audio_path='/var/tmp/'+temp_filename # Pathname for temp WAV
subprocess.call(['ffmpeg', '-y', '-i', media_path, audio_path]) # '-y' option overwrites existing file if present
else:
audio_path=media_path
try:
if audio_path[-4:].lower()=='.mp3':
song = AudioSegment.from_mp3(audio_path)
else:
song = AudioSegment.from_wav(audio_path)
except Exception as inst:
print(inst)
sys.exit(2)
for pair in tag_pairs:
start = pair[0]
duration = pair[1]-pair[0]
clip_pathname=os.path.join(out_dir,basename+"_start_"+str(start)+"_dur_"+str(duration)+".wav")
start_msec = float(start) * 1000.0
duration_msec = float(duration) * 1000
if not os.path.exists(clip_pathname):
clip_data = song[start_msec:start_msec+duration_msec]
clip_data=clip_data.set_channels(1)
clip_data.export(clip_pathname, format="wav")
def excerpt_segments(segments_df,inputfile,out_dir,mono):
try:
song = AudioSegment.from_wav(inputfile)
except:
return "ERROR: "+inputfile+" can't be found."
start = float(segments_df[segments_df['Names']=="<START>"]['Instants'])
end = float(segments_df[segments_df['Names']=="<END>"]['Instants'])
start_msec = start * 1000.0
end_msec = end * 1000
clip_data = song[start_msec:end_msec]
clip_pathname=out_dir+inputfile.split('/')[-1][:-4]+'_reading_excerpt'+'.wav'
if mono==True:
clip_data=clip_data.set_channels(1)
clip_data.export(clip_pathname, format="wav", parameters=["-ar 48000", "-acodec pcm_s24le"])
def play(filepath, content_type='audio/wav'):
"""
Will attempt to play various audio file types (wav, ogg, mp3).
"""
if 'wav' in content_type:
sound = AudioSegment.from_wav(filepath)
elif 'ogg' in content_type or 'opus' in content_type:
sound = AudioSegment.from_ogg(filepath)
elif 'mp3' in content_type or 'mpeg' in content_type:
sound = AudioSegment.from_mp3(filepath)
pydub_play(sound)
def synthesize(self, text, src, dst):
"""
Synthesize .wav from text
src is the folder that contains all syllables .wav files
dst is the destination folder to save the synthesized file
"""
print("Synthesizing ...")
delay = 0
increment = 355 # milliseconds
pause = 500 # pause for punctuation
syllables = lazy_pinyin(text, style=pypinyin.TONE3)
# initialize to be complete silence, each character takes up ~500ms
result = AudioSegment.silent(duration=500*len(text))
for syllable in syllables:
path = src+syllable+".wav"
sound_file = Path(path)
# insert 500 ms silence for punctuation marks
if syllable in TextToSpeech.punctuation:
short_silence = AudioSegment.silent(duration=pause)
result = result.overlay(short_silence, position=delay)
delay += increment
continue
# skip sound file that doesn't exist
if not sound_file.is_file():
continue
segment = AudioSegment.from_wav(path)
result = result.overlay(segment, position=delay)
delay += increment
directory = dst
if not os.path.exists(directory):
os.makedirs(directory)
result.export(directory+"generated.wav", format="wav")
print("Exported.")
def text_to_speech(text, synthesizer, synth_args, sentence_break):
"""
Converts given text to a pydub AudioSegment using a specified speech
synthesizer. At the moment, IBM Watson's text-to-speech API is the only
available synthesizer.
:param text:
The text that will be synthesized to audio.
:param synthesizer:
The text-to-speech synthesizer to use. At the moment, 'watson' is the
only available input.
:param synth_args:
A dictionary of arguments to pass to the synthesizer. Parameters for
authorization (username/password) should be passed here.
:param sentence_break:
A string that identifies a sentence break or another logical break in
the text. Necessary for text longer than 50 words. Defaults to '. '.
"""
if len(text.split()) < 50:
if synthesizer == 'watson':
with open('.temp.wav', 'wb') as temp:
temp.write(watson_request(text=text, synth_args=synth_args).content)
response = AudioSegment.from_wav('.temp.wav')
os.remove('.temp.wav')
return response
else:
raise ValueError('"' + synthesizer + '" synthesizer not found.')
else:
segments = []
for i, sentence in enumerate(text.split(sentence_break)):
if synthesizer == 'watson':
with open('.temp' + str(i) + '.wav', 'wb') as temp:
temp.write(watson_request(text=sentence, synth_args=synth_args).content)
segments.append(AudioSegment.from_wav('.temp' + str(i) + '.wav'))
os.remove('.temp' + str(i) + '.wav')
else:
raise ValueError('"' + synthesizer + '" synthesizer not found.')
response = segments[0]
for segment in segments[1:]:
response = response + segment
return response
def audio_generator(dict_dir, text, output_dest):
with open(dict_dir + "/myDict.py") as f:
myDict = ast.literal_eval(f.read())
textList = text.split(" ")
mainList = []
for i in textList:
if i in myDict.keys():
mainList.append(AudioSegment.from_wav(dict_dir + "/" + myDict[i]))
# Check to see if at least one word was generated
if mainList == []:
raise Exception('\033[91m' + "None of the words you entered was" +
" spoken by your figure." + '\033[0m')
# If a file with the default name exits, create a new name with a
# new suffix
res = 0
while(os.path.exists(output_dest + "/output" + str(res) + ".wav")):
res += 1
mainAudio = mainList[0]
# Concatenate selected audio words
for i in range(1, len(mainList)):
mainAudio += mainList[i]
# Export the joined audio
mainAudio.export(output_dest + '/output' + str(res) + '.wav', format="wav")
if os.path.exists(output_dest + "/output" + str(res) + ".wav"):
print ('\033[94m' + "Speech-Hacker: " +
"Your audio was generated at: " + output_dest + "/output" +
str(res) + ".wav" + '\033[0m')
else:
print ("Speech-Hacker: " '\033[91m' +
"Failed to generate your requested audio." + '\033[0m')
def wavconvert(wav, codec):
from pydub import AudioSegment
song = AudioSegment.from_wav(wav)
fn = os.path.splitext(wav)
out = fn[0]+'.'+codec
tags = {
'artist' : 'Various Artists',
'album' : 'WeChat Voice',
'year' : time.strftime('%Y-%m-%d'),
'comments': 'This album is awesome!'
}
parameters = ['-q:a', '0']
if codec.lower() == 'ogg':
parameters = ['-q:a', '0']
elif codec.lower() in ['mp3', 'mp2', 'mpa']:
parameters = ['-q:a', '6']
elif codec.lower() in ['aac', 'mp4', 'm4a']:
parameters = ['-q:a', '0']
codec = 'mp4'
song.export(out, format=codec, parameters=parameters, tags=tags)
return(out)
pass
def audio_to_export(sourcepath,wavepath,start,end):
wav = AudioSegment.from_wav(sourcepath)
wav[start*1000:end*1000].export(wavepath, format="wav") # ?e