def cut_audio(line):
''' cuts the audio file at the specified start and stop times,
and then exports to the desiginated output folder
line format: [AUDIO_FILE_NAME, START, STOP]
'''
audio_file_name = line[0]
file_type = audio_file_name[-4:]
if file_type == '.wav':
audio = AudioSegment.from_wav(args.in_dir + audio_file_name)
cut = audio[cvt_time(line[1]):cvt_time(line[2])]
#create name for cut audio file
file_num = get_num(audio_file_name)
cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.wav'
#export to output folder
cut.export(args.out_dir + cut_name, format="wav")
print('---> ' + cut_name + '\n')
elif file_type == '.mp3':
audio = AudioSegment.from_mp3(args.in_dir + audio_file_name)
cut = audio[cvt_time(line[1]):cvt_time(line[2])]
#create name for cut audio file
file_num = get_num(audio_file_name)
cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.mp3'
#export to output folder
cut.export(args.out_dir + cut_name, format="mp3")
print('---> ' + cut_name + '\n')
else:
#error, incompatible file type
print('**** ' + audio_file_name + ' caused an error')
print('**** ' + file_type + ' incompatible file type')
print('**** skipping file\n')
python类from_mp3()的实例源码
def test_episode_text_setter(self):
catch_requests()
episode_title = 'Test Episode 1'
self.podcast.add_episode('hello', 'plain', episode_title, 'Test Episode Author', sentence_break=' ', synth_args=self.synth_args)
self.podcast.publish(episode_title)
before_audio = AudioSegment.from_mp3(self.output_path + '/test_episode_1.mp3')
before_length = len(before_audio)
self.podcast.episodes[episode_title].text = 'hello ' * 60
after_audio = AudioSegment.from_mp3(self.output_path + '/test_episode_1.mp3')
after_length = len(after_audio)
self.assertGreater(after_length, before_length * 50)
def covert_voice_to_text(self, file_name, msg_content, msg, user, msg_type_id):#????
file = os.path.join(self.temp_pwd,file_name)
from pydub import AudioSegment
try:
temp_file = os.path.join(self.temp_pwd, 'temp_voice.wav')
sound = AudioSegment.from_mp3(file)
print 'xdc::::::::::', os.path.join(self.temp_pwd, 'temp_voice.wav')
sound.export(temp_file, format="wav")
self.temp_msg_content = msg_content
self.temp_msg = msg
self.temp_user = user
self.temp_msg_type_id = msg_type_id
self.baidu_yuyin.voice_to_text(temp_file, self.voice_to_text_complete)
except Exception, e:
raise e
return
# pass
def tags_to_wav(media_path,out_dir,tag_pairs):
basename=os.path.splitext(os.path.basename(media_path))[0]
wav_source=True
if media_path.lower()[-4:] not in ('.mp3','.wav'): # Creates a temporary WAV
wav_source=False # if input is MP4
temp_filename=media_path.split('/')[-1]+'_temp.wav'
audio_path='/var/tmp/'+temp_filename # Pathname for temp WAV
subprocess.call(['ffmpeg', '-y', '-i', media_path, audio_path]) # '-y' option overwrites existing file if present
else:
audio_path=media_path
try:
if audio_path[-4:].lower()=='.mp3':
song = AudioSegment.from_mp3(audio_path)
else:
song = AudioSegment.from_wav(audio_path)
except Exception as inst:
print(inst)
sys.exit(2)
for pair in tag_pairs:
start = pair[0]
duration = pair[1]-pair[0]
clip_pathname=os.path.join(out_dir,basename+"_start_"+str(start)+"_dur_"+str(duration)+".wav")
start_msec = float(start) * 1000.0
duration_msec = float(duration) * 1000
if not os.path.exists(clip_pathname):
clip_data = song[start_msec:start_msec+duration_msec]
clip_data=clip_data.set_channels(1)
clip_data.export(clip_pathname, format="wav")
def play_mp3_pydub(filename):
if not filename:
return
## l = logging.getLogger("pydub.converter")
## l.setLevel(logging.DEBUG)
## l.addHandler(logging.StreamHandler())
if filename.startswith("http"):
try:
mp3file = urllib.request.urlopen(filename)
with tempfile.NamedTemporaryFile() as tmpfile:
tmpfile.write(mp3file.read())
tmpfile.seek(0)
song = AudioSegment.from_mp3(tmpfile)
play(song)
return
except:
print("Open file From Url failed")
return
else:
song = AudioSegment.from_mp3(filename)
play(song)
def play(filepath, content_type='audio/wav'):
"""
Will attempt to play various audio file types (wav, ogg, mp3).
"""
if 'wav' in content_type:
sound = AudioSegment.from_wav(filepath)
elif 'ogg' in content_type or 'opus' in content_type:
sound = AudioSegment.from_ogg(filepath)
elif 'mp3' in content_type or 'mpeg' in content_type:
sound = AudioSegment.from_mp3(filepath)
pydub_play(sound)
def mp3_to_wav(mp3_file):
target = mp3_file.replace(".mp3", ".wav")
if os.path.exists(mp3_file):
voice = AudioSegment.from_mp3(mp3_file)
voice.export(target, format="wav")
return target
else:
print u"????"
def use_cloud(self, sourcefile_path, token):
# sound = AudioSegment.from_mp3("big.mp3")
# sound.export("/output", format="wav")
fp = wave.open(sourcefile_path, 'rb')
nf = fp.getnframes()
f_len = nf * 2
audio_data = fp.readframes(nf)
cuid = "xxxxxxxxxx" # my xiaomi phone MAC
srv_url = 'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token
http_header = [
'Content-Type: audio/pcm; rate=8000',
'Content-Length: %d' % f_len
]
c = pycurl.Curl()
c.setopt(pycurl.URL, str(srv_url)) # curl doesn't support unicode
# c.setopt(c.RETURNTRANSFER, 1)
c.setopt(c.HTTPHEADER, http_header) # must be list, not dict
c.setopt(c.POST, 1)
c.setopt(c.CONNECTTIMEOUT, 30)
c.setopt(c.TIMEOUT, 30)
c.setopt(c.WRITEFUNCTION, self.dump_res)
c.setopt(c.POSTFIELDS, audio_data)
c.setopt(c.POSTFIELDSIZE, f_len)
c.perform() # pycurl.perform() has no return val
def getOutput(sourcefile_path, targetfile_path):
song = AudioSegment.from_mp3(sourcefile_path).export(targetfile_path, format="wav")
voiceService = VoiceService()
voiceService.voicepro(targetfile_path)
while True:
if voiceService.isOk:
usage = json.loads(voiceService.buff)
result = usage['result']
return result
def load_files(self, mp3_path: str, srt_path: str):
"""
Loads files into MP3Slicer
:param mp3_path: path to mp3 file
:param srt_path: path to srt file
"""
self.mp3 = AudioSegment.from_mp3(mp3_path)
self.srt = pysrt.open(srt_path)
def _filter(ctx_msg):
if ctx_msg.get('format') == 'media' and ctx_msg['raw_ctx'].get('media_type') == 'voice':
m = re.match('\[??\]\(([/_A-Za-z0-9]+\.mp3)\)', ctx_msg.get('content'))
if m:
core.echo('????????????……', ctx_msg)
mp3_path = m.group(1)
wav_path = os.path.splitext(mp3_path)[0] + '.wav'
voice = AudioSegment.from_mp3(mp3_path)
voice.export(wav_path, format='wav')
service = os.environ.get('SPEECH_RECOGNITION_SERVICE', '').lower()
text = None
service_full_name = None
if service == 'baidu':
service_full_name = '??????'
text = _recognize_baidu(
wav_path,
get_source(ctx_msg),
os.environ.get('BAIDU_SPEECH_API_KEY'),
os.environ.get('BAIDU_SPEECH_SECRET_KEY'),
language='zh'
)
elif service == 'bing':
service_full_name = '??????'
text = _recognize_bing(
wav_path,
os.environ.get('BING_SPEECH_API_KEY'),
language='zh-CN'
)
else:
print('Unknown speech recognition service name.', file=sys.stderr)
if text:
reply = '?????' + service_full_name + '??\n%s\n\n??????????????????……' % text
ctx_msg['text'] = text
ctx_msg['from_voice'] = True
else:
reply = '???????????????'
core.echo(reply, ctx_msg)
os.remove(wav_path)
def transcode(file):
'''File transcode function'''
rec = AudioSegment.from_mp3(file)
rec_wav = rec.export(file.replace('mp3', 'wav'), format='wav')
rec_wav.close()
def convert_mp3_to_wav(mp3_path, wav_path, max_length=10):
try:
if os.path.exists(wav_path):
return True #todo ideally check here whether file is valid but blah for now
sound = AudioSegment.from_mp3(mp3_path)
sound = sound[:max_length*1000]
sound.export(wav_path, format='wav')
except (IOError, CouldntDecodeError) as ie:
print "Error while converting mp3 to wav: %s" % mp3_path
return False
return True
def extract(file):
"""
Extracts audio from a given file
First the audio is converted into wav format
"""
s = file.split('.')
file_format = s[len(s) - 1]
try:
song = AudioSegment.from_file(file, file_format)
#song = AudioSegment.from_mp3(file)
song = song[: 30 * 1000 ]
song.export(file[:-3] + "wav", format="wav")
file = file[:-3] + "wav"
except Exception as e:
print(e)
try:
(rate, data) = scipy.io.wavfile.read(file)
mfcc_feat = mfcc(data,rate)
#redusing mfcc dimension to 104
mm = np.transpose(mfcc_feat)
mf = np.mean(mm,axis=1)
cf = np.cov(mm)
ff=mf
#ff is a vector of size 104
for i in range(mm.shape[0]):
ff = np.append(ff,np.diag(cf,i))
if file_format != 'wav':
os.remove(file)
return ff.reshape(1, -1)
except Exception as e:
print(e)