def audio2mp3(folder, verbose=True):
"""
@brief Convert any audio files to mp3
@param folder The folder containing audio files to be converted in mp3
"""
folder = utils.abs_path_dir(folder)
filelist = os.listdir(folder)
for index, entire_fn in enumerate(filelist):
if verbose:
print(str(index + 1) + "/" + str(len(filelist)) + " " + entire_fn)
filen = entire_fn.split(".")[0]
extension = entire_fn.split(".")[1]
print(filen)
print(extension)
print(folder + entire_fn)
print(folder + filen)
audio = AudioSegment.from_file(folder + entire_fn, format=extension)
audio.export(folder + filen + ".mp3", format="mp3")
if verbose:
print("Conversion done")
python类from_file()的实例源码
def _play(self):
audios = []
for wav in self._wfs:
audios.append(AudioSegment.from_file(wav))
if len(audios) == 1:
playback.play(audios[0])
elif len(audios) > 1:
wait = audios[0].overlay(audios[1])
for i in range(2,len(audios)):
wait = wait.overlay(audios[i])
playback.play(wait)
# example
def _play(self):
audios = []
for wav in self._wfs:
audios.append(AudioSegment.from_file(wav))
if len(audios) == 1:
playback.play(audios[0])
elif len(audios) > 1:
wait = audios[0].overlay(audios[1])
for i in range(2,len(audios)):
wait = wait.overlay(audios[i])
playback.play(wait)
# example
def _playback(filename, start, end=None, playback_time=None):
"""
Plays back a wav file from the start point (in seconds) to the end point (in seconds)
:param filename: filename to playback
:param start: start time, in seconds. No more than 3 places after decimal or loss of precision
:param end: end time, in seconds. Same as above
:param playback_time: time to play back. use instead of end
"""
file_name, file_extension = os.path.splitext(filename)
# This method will play back filetypes whose extension matches the coded
# This includes wav and mp3 so we should be good
audio = AudioSegment.from_file(filename, format=file_extension[1:])
if end is None and playback_time is not None:
# Play the track starting from start for playback_time seconds
segment = audio[int(start):int(start + playback_time)]
play(segment)
elif end is not None and playback_time is None:
# Play the track starting from start and ending at end
segment = audio[int(start):int(end)]
play(segment)
else:
# Play the whole thing
play(audio)
def adjust_volume(in_fp):
def adjust(volume):
audio_p = audio + volume
fn_p = fn + "_" + str(volume) +"db" + ".wav"
fd = audio_p.export(path.join(out_dir, str(volume) + 'db', path.split(in_dir)[-1], fn_p), format=format)
in_dir, fn = path.split(in_fp)
fn, file_ext = path.splitext(fn)
file_ext = file_ext.lower()
format = file_ext.replace('.', '')
# audio = None
y, sr = librosa.load(in_fp, sr=44100)
tmp_in_fp = "tmp/" + fn + "_tmp.wav"
librosa.output.write_wav(tmp_in_fp, y, sr, norm=False)
format = "wav"
audio = aseg.from_file(tmp_in_fp, format)
os.remove(tmp_in_fp)
if audio != None:
for v in volume_list:
adjust(v)
def cut_and_send(infile, outfile, length):
# print(infile)
# print(outfile)
# print(length)
# return
myaudio = AudioSegment.from_file(infile, "wav")
chunk_length_ms = length # pydub calculates in millisec
chunks = make_chunks(myaudio, chunk_length_ms) # Make chunks of one sec
for i, chunk in enumerate(chunks):
chunk_name = "chunk{0}.wav".format(i)
print("exporting", chunk_name)
chunk.export(chunk_name, format="wav")
r = sr.Recognizer()
with sr.AudioFile(chunk_name) as source:
audio = r.record(source)
# recognize speech using Google Speech Recognition
try:
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
txt = r.recognize_google(audio) + " "
with open(outfile, 'a') as f:
f.write(txt)
except sr.UnknownValueError:
print("Ehm... sorry not understood this one.")
except sr.RequestError as e:
print("Request failed; {0}".format(e))
os.remove(chunk_name)
def main():
flag = raw_input("flag> ")
c_wave = AudioSegment.from_file("files/cuir.wav")
m_wave = AudioSegment.from_file("files/moustache.wav")
c_m_wave = AudioSegment.from_file("files/cuir_moustache.wav")
space_wave = AudioSegment.from_file("files/space.wav")
blank_wave = AudioSegment.from_file("files/blank.wav")
overlay = AudioSegment.from_file("files/overlay.wav")
intro_wave = AudioSegment.from_file("files/intro.wav")
chall_wave = intro_wave[0:0]
morse_flag = morse(flag)
print morse_flag
counter = 0
while counter < len(morse_flag):
bip = morse_flag[counter]
if bip == '.':
if counter < len(morse_flag) - 1 and morse_flag[counter+1] == '-':
chall_wave += c_m_wave
counter += 1
else:
chall_wave += c_wave
elif bip == '-':
chall_wave += m_wave
elif bip == ' ':
chall_wave += space_wave
elif bip == '!':
if ADD_BLANKS:
chall_wave += blank_wave
counter += 1
overlay_wave = repeat_to_length(overlay, len(chall_wave))
overlay_chall_wave = chall_wave.overlay(overlay_wave)
full_chall_wave = intro_wave + overlay_chall_wave
full_chall_wave.export("./challenge.mp4", format="mp4")
def convert_to_wav(path):
song = AudioSegment.from_file(path)
song = song[:30000]
song.export(path[:-3]+"wav",format='wav')
return path[:-3]+"wav"
def load_pydub(filename):
""" Loads an MP3 or FLAC file.
"""
try:
from pydub import AudioSegment
data = AudioSegment.from_file(filename)
except ImportError:
logger.exception('"pydub" is a required Python dependency for '
'handling this audio file: %s.', filename)
raise
except FileNotFoundError:
if os.path.isfile(filename):
raise DependencyError()
else:
raise
if data.channels > 1:
data = functools.reduce(
lambda x, y: x.overlay(y),
data.split_to_mono()
)
raw = data.get_array_of_samples()
raw = numpy.frombuffer(raw, dtype=raw.typecode)
return {
'signal' : raw,
'sample_rate' : data.frame_rate,
'sample_width' : data.sample_width * 8,
'channels' : data.channels
}
###############################################################################
def get_mime_type(filename):
""" Returns the MIME type associated with a particular audio file.
"""
try:
import magic
except ImportError:
if get_mime_type.warn:
logger.warning('Python package "magic" could not be loaded, '
'possibly because system library "libmagic" could not be '
'found. We are falling back on our own heuristics.')
get_mime_type.warn = False
ext = os.path.splitext(filename)[1].lower()
return {
'.wav' : 'audio/x-wav',
'.mp3' : 'audio/mpeg',
'.flac' : 'audio/x-flac'
}.get(ext, 'unknown')
else:
# Read off magic numbers and return MIME types
mime_magic = magic.Magic(mime=True)
ftype = mime_magic.from_file(filename)
if isinstance(ftype, bytes):
ftype = ftype.decode('utf-8')
# If we are dealing with a symlink, read the link
# and try again with the target file. We do this in
# a while loop to cover the case of symlinks which
# point to other symlinks
current_filename = filename
while ftype == 'inode/symlink':
current_filename = os.readlink(current_filename)
ftype = mime_magic.from_file(current_filename)
ftype = ftype.decode('utf-8') if isinstance(ftype, bytes) else ftype
return ftype
reader_file.py 文件源码
项目:audio-fingerprint-identifying-python
作者: itspoma
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def parse_audio(self):
limit = None
# limit = 10
songname, extension = os.path.splitext(os.path.basename(self.filename))
try:
audiofile = AudioSegment.from_file(self.filename)
if limit:
audiofile = audiofile[:limit * 1000]
data = np.fromstring(audiofile._data, np.int16)
channels = []
for chn in xrange(audiofile.channels):
channels.append(data[chn::audiofile.channels])
fs = audiofile.frame_rate
except audioop.error:
print('audioop.error')
pass
# fs, _, audiofile = wavio.readwav(filename)
# if limit:
# audiofile = audiofile[:limit * 1000]
# audiofile = audiofile.T
# audiofile = audiofile.astype(np.int16)
# channels = []
# for chn in audiofile:
# channels.append(chn)
return {
"songname": songname,
"extension": extension,
"channels": channels,
"Fs": audiofile.frame_rate,
"file_hash": self.parse_file_hash()
}
def run(self):
# Open an audio segment
sound = AudioSegment.from_file(self.filepath)
player = pyaudio.PyAudio()
stream = player.open(format=player.get_format_from_width(sound.sample_width),
channels=sound.channels,
rate=sound.frame_rate,
output=True)
# PLAYBACK LOOP
start = 0
length = sound.duration_seconds
volume = 100.0
playchunk = sound[start*1000.0:(start+length)*1000.0] - (60 - (60 * (volume/100.0)))
millisecondchunk = 50 / 1000.0
while self.loop :
self.time = start
for chunks in make_chunks(playchunk, millisecondchunk*1000):
self.time += millisecondchunk
stream.write(chunks._data)
if not self.loop:
break
if self.time >= start+length:
break
stream.close()
player.terminate()
def get_length(filename):
"""
Get the length of an audio file suitable for use in playback()
:param filename: Location of audio file
:return: length of file in seconds
"""
file_name, file_extension = os.path.splitext(filename)
audio = AudioSegment.from_file(filename, file_extension[1:])
return float(len(audio)) / 1000
def __make_chunks(self):
self.__segment = AudioSegment.from_file(self.__path)
self.__chunks = make_chunks(self.__segment, 100)
def load_pydub(filename):
""" Loads an MP3 or FLAC file.
"""
try:
from pydub import AudioSegment
data = AudioSegment.from_file(filename)
except ImportError:
logger.exception('"pydub" is a required Python dependency for '
'handling this audio file: %s.', filename)
raise
except FileNotFoundError:
if os.path.isfile(filename):
raise DependencyError()
else:
raise
if data.channels > 1:
data = functools.reduce(
lambda x, y: x.overlay(y),
data.split_to_mono()
)
raw = data.get_array_of_samples()
raw = numpy.frombuffer(raw, dtype=raw.typecode)
return {
'signal' : raw,
'sample_rate' : data.frame_rate,
'sample_width' : data.sample_width * 8,
'channels' : data.channels
}
###############################################################################
def get_mime_type(filename):
""" Returns the MIME type associated with a particular audio file.
"""
try:
import magic
except ImportError:
if get_mime_type.warn:
logger.warning('Python package "magic" could not be loaded, '
'possibly because system library "libmagic" could not be '
'found. We are falling back on our own heuristics.')
get_mime_type.warn = False
ext = os.path.splitext(filename)[1].lower()
return {
'.wav' : 'audio/x-wav',
'.mp3' : 'audio/mpeg',
'.flac' : 'audio/x-flac'
}.get(ext, 'unknown')
else:
# Read off magic numbers and return MIME types
mime_magic = magic.Magic(mime=True)
ftype = mime_magic.from_file(filename)
if isinstance(ftype, bytes):
ftype = ftype.decode('utf-8')
# If we are dealing with a symlink, read the link
# and try again with the target file. We do this in
# a while loop to cover the case of symlinks which
# point to other symlinks
current_filename = filename
while ftype == 'inode/symlink':
current_filename = os.readlink(current_filename)
ftype = mime_magic.from_file(current_filename)
ftype = ftype.decode('utf-8') if isinstance(ftype, bytes) else ftype
return ftype
def mp3_to_array(file):
audio = AudioSegment.from_file(file, format='mp3').set_channels(1)[:(millis_per_sec * 30)]
return np.array(audio.get_array_of_samples()), audio.frame_rate
def splitAudioIntoParts(uploadedFilepath, extension, basedir):
""" Yields the filename of a namedTemporaryFile,
which is deleted at the next iteration. """
assert os.path.exists(uploadedFilepath)
track = AudioSegment.from_file(uploadedFilepath, extension[1:])
sampleRate = track.frame_rate
tracklen = len(track)
overlap = 2*1000 # 2 seconds
chunkBaseLength = 10*1000 # 10 seconds
num_segments = int(math.ceil(tracklen / chunkBaseLength))
basename = basenameNoExt(uploadedFilepath)
# Iterate over every chunkBaseLength segment
for i in range(0, num_segments):
start_time = i*chunkBaseLength
end_time = min((i+1)*chunkBaseLength+overlap, tracklen)
curr_track = track[start_time:end_time]
# Save the segment to a NamedTemporaryFile in basedir,
# named after both the original filename and the part of the track.
# Once the with completes, the file is deleted.
with tempfile.NamedTemporaryFile(
prefix = basename+"_",
suffix = "_part%02d.wav"%i,
dir = basedir) as currFilename:
curr_track.export(currFilename.name, format="wav", bitrate="192k")
yield (currFilename.name, sampleRate)
def validateAudio(uploadedFilepath, extension):
""" Ensures that the audio is ready for processing, which currently means:
1. Only a single channel
- required by Google Speech API
"""
track = AudioSegment.from_file(uploadedFilepath, extension[1:])
if track.channels != 1:
raise AudioError("Google Speech API requires single-channel audio, "\
"but you have %d channels in your audio." % (track.channels,))
return None
def extract(file):
"""
Extracts audio from a given file
First the audio is converted into wav format
"""
s = file.split('.')
file_format = s[len(s) - 1]
try:
song = AudioSegment.from_file(file, file_format)
#song = AudioSegment.from_mp3(file)
song = song[: 30 * 1000 ]
song.export(file[:-3] + "wav", format="wav")
file = file[:-3] + "wav"
except Exception as e:
print(e)
try:
(rate, data) = scipy.io.wavfile.read(file)
mfcc_feat = mfcc(data,rate)
#redusing mfcc dimension to 104
mm = np.transpose(mfcc_feat)
mf = np.mean(mm,axis=1)
cf = np.cov(mm)
ff=mf
#ff is a vector of size 104
for i in range(mm.shape[0]):
ff = np.append(ff,np.diag(cf,i))
if file_format != 'wav':
os.remove(file)
return ff.reshape(1, -1)
except Exception as e:
print(e)
def createViz(pin):
src = "audio.mp3"
questions = {23: "What's your motto?", 24: "Give us your best laugh!", 25:"Make a wish!", 8:"What are you grateful for today?"}
audio = AudioSegment.from_file(src)
data = np.fromstring(audio._data, np.int16)
fs = audio.frame_rate
BARS = 500
BAR_HEIGHT = 300
LINE_WIDTH = 5
length = len(data)
RATIO = length/BARS
count = 0
maximum_item = 0
max_array = []
highest_line = 0
for d in data:
if count < RATIO:
count = count + 1
if abs(d) > maximum_item:
maximum_item = abs(d)
else:
max_array.append(maximum_item)
if maximum_item > highest_line:
highest_line = maximum_item
maximum_item = 0
count = 1
line_ratio = highest_line/BAR_HEIGHT
fnt = ImageFont.truetype('vcr.ttf', 50)
im = Image.new('RGB', (BARS * LINE_WIDTH, BAR_HEIGHT), (255, 255, 255))
draw = ImageDraw.Draw(im)
draw.text((0,0), questions[pin] , font=fnt, fill=(0,0,0,255))
current_x = 1
for item in max_array:
item_height = item/line_ratio
current_y = (BAR_HEIGHT - item_height)/2
draw.line((current_x, current_y, current_x, current_y + item_height), fill=(0, 0, 0), width=4)
current_x = current_x + LINE_WIDTH
im.show()
filename = "audioViz_" + strftime("%Y_%m_%d%H-%M-%S", gmtime()) + ".bmp"
im.save("static/images/" + filename)
call(["lpr","-o","fit-to-page","static/images/" + filename])
def hello():
if request.method == 'GET':
if request.args.get("hub.verify_token") == VERIFY_TOKEN:
return request.args.get("hub.challenge")
else:
return 'Invalid verification token'
if request.method == 'POST':
output = request.get_json()
for event in output['entry']:
messaging = event['messaging']
for x in messaging:
if x.get('message'):
recipient_id = x['sender']['id']
if x['message'].get('text'):
message = x['message']['text']
msg = AI.process_message(message)
bot.send_text_message(recipient_id, msg)
if x['message'].get('attachments'):
for att in x['message'].get('attachments'):
print(att['payload']['url'])
voice_url = urllib.request.urlopen(att['payload']['url'])
with open('voicemsg.aac', 'w+b') as f:
f.write(voice_url.read())
f.close()
aac_file = AudioSegment.from_file('voicemsg.aac', format='aac')
wav_handler = aac_file.export('rawmsg.wav', format='wav')
os.system("sox rawmsg.wav -r 16000 temp.wav")
wav_handler = AudioSegment.from_file('temp.wav', format='wav')
raw_handler = wav_handler.export('rawmsg.raw', format='raw')
decoder.start_utt()
stream = open('rawmsg.raw', 'rb')
#stream.seek(44)
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
else:
break
decoder.end_utt()
sentence = " ".join([seg.word for seg in decoder.seg()])
bot.send_text_message(recipient_id, sentence)
#bot.send_attachment_url(recipient_id, att['type'], att['payload']['url'])
else:
pass
return "Success"
def split_audio_file(self, file=""):
file_root, file_ext = os.path.splitext(file)
file_format = file_ext.replace(".", "").lower()
if file_format not in ["mp3", "m4a", "mp4"]:
raise FileNotSupportedError(file_format)
file_size = os.path.getsize(file)
if file_size > self.MAX_CONVERT_FILE_SIZE:
raise FileTooLargeError(file_size)
file_parts = []
if file_size <= self.MAX_TG_FILE_SIZE:
if file_format == "mp3":
file_parts.append(file)
else:
logger.info("Converting: %s", file)
try:
sound = AudioSegment.from_file(file, file_format)
file_converted = file.replace(file_ext, ".mp3")
sound.export(file_converted, format="mp3")
del sound
gc.collect()
file_parts.append(file_converted)
except (OSError, MemoryError) as exc:
gc.collect()
raise FileNotConvertedError
else:
logger.info("Splitting: %s", file)
try:
id3 = mutagen.id3.ID3(file, translate=False)
except:
id3 = None
parts_number = file_size // self.MAX_TG_FILE_SIZE + 1
try:
sound = AudioSegment.from_file(file, file_format)
part_size = len(sound) / parts_number
for i in range(parts_number):
file_part = file.replace(file_ext, ".part{}{}".format(str(i + 1), file_ext))
part = sound[part_size * i:part_size * (i + 1)]
part.export(file_part, format="mp3")
del part
gc.collect()
if id3:
try:
id3.save(file_part, v1=2, v2_version=4)
except:
pass
file_parts.append(file_part)
# https://github.com/jiaaro/pydub/issues/135
# https://github.com/jiaaro/pydub/issues/89#issuecomment-75245610
del sound
gc.collect()
except (OSError, MemoryError) as exc:
gc.collect()
raise FileConvertedPartiallyError(file_parts)
return file_parts