def process_listen(self):
if not os.path.isfile("data/temp/heard.pcm") or os.stat("data/temp/heard.pcm").st_size == 0:
await self.bot.send_embed(self.text_channel, ":warning: No input found")
return
func = functools.partial(subprocess.call, ["ffmpeg", "-f", "s16le", "-y", "-ar", "44.1k", "-ac", "2", "-i", "data/temp/heard.pcm", "data/temp/heard.wav"], shell = True)
await self.bot.loop.run_in_executor(None, func)
with speech_recognition.AudioFile("data/temp/heard.wav") as source:
audio = self.recognizer.record(source)
'''
try:
await self.bot.reply("Sphinx thinks you said: " + recognizer.recognize_sphinx(audio))
except speech_recognition.UnknownValueError:
await self.bot.reply("Sphinx could not understand audio")
except speech_recognition.RequestError as e:
await self.bot.reply("Sphinx error; {0}".format(e))
'''
try:
text = self.recognizer.recognize_google(audio)
await self.bot.send_embed(self.text_channel, "I think you said: `{}`".format(text))
except speech_recognition.UnknownValueError:
# await self.bot.send_embed(self.text_channel, ":no_entry: Google Speech Recognition could not understand audio")
await self.bot.send_embed(self.text_channel, ":no_entry: I couldn't understand that")
except speech_recognition.RequestError as e:
await self.bot.send_embed(self.text_channel, ":warning: Could not request results from Google Speech Recognition service; {}".format(e))
else:
response = clients.aiml_kernel.respond(text)
# TODO: Handle brain not loaded?
if not response:
games_cog = client.get_cog("Games")
if not games_cog: return
response = await games_cog.cleverbot_get_reply(text)
await self.bot.send_embed(self.text_channel, "Responding with: `{}`".format(response))
await self.play_tts(response, self.bot.user)
# open("data/heard.pcm", 'w').close() # necessary?
# os.remove ?
python类AudioFile()的实例源码
def cut_and_send(infile, outfile, length):
# print(infile)
# print(outfile)
# print(length)
# return
myaudio = AudioSegment.from_file(infile, "wav")
chunk_length_ms = length # pydub calculates in millisec
chunks = make_chunks(myaudio, chunk_length_ms) # Make chunks of one sec
for i, chunk in enumerate(chunks):
chunk_name = "chunk{0}.wav".format(i)
print("exporting", chunk_name)
chunk.export(chunk_name, format="wav")
r = sr.Recognizer()
with sr.AudioFile(chunk_name) as source:
audio = r.record(source)
# recognize speech using Google Speech Recognition
try:
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
txt = r.recognize_google(audio) + " "
with open(outfile, 'a') as f:
f.write(txt)
except sr.UnknownValueError:
print("Ehm... sorry not understood this one.")
except sr.RequestError as e:
print("Request failed; {0}".format(e))
os.remove(chunk_name)
def __init__(self, audio_file=None):
"""
Thread used to caught n audio from the microphone and pass it to a callback method
"""
super(SpeechRecognition, self).__init__()
self.recognizer = sr.Recognizer()
self.microphone = sr.Microphone()
self.callback = None
self.stop_thread = None
self.kill_yourself = False
self.audio_stream = None
# get global configuration
sl = SettingLoader()
self.settings = sl.settings
if audio_file is None:
# audio file not set, we need to capture a sample from the microphone
with self.microphone as source:
if self.settings.recognition_options.adjust_for_ambient_noise_second > 0:
# threshold is calculated from capturing ambient sound
logger.debug("[SpeechRecognition] threshold calculated by "
"capturing ambient noise during %s seconds" %
self.settings.recognition_options.adjust_for_ambient_noise_second)
Utils.print_info("[SpeechRecognition] capturing ambient sound during %s seconds" %
self.settings.recognition_options.adjust_for_ambient_noise_second)
self.recognizer.adjust_for_ambient_noise(source,
duration=self.settings.
recognition_options.adjust_for_ambient_noise_second)
else:
# threshold is defined manually
logger.debug("[SpeechRecognition] threshold defined by settings: %s" %
self.settings.recognition_options.energy_threshold)
self.recognizer.energy_threshold = self.settings.recognition_options.energy_threshold
Utils.print_info("Threshold set to: %s" % self.recognizer.energy_threshold)
else:
# audio file provided
with sr.AudioFile(audio_file) as source:
self.audio_stream = self.recognizer.record(source) # read the entire audio file
def transcribe(inputfile,outputfile='',to_txt=True):
wav_source=True
if inputfile.lower()[-4:]!='.wav': # Creates a temporary WAV
wav_source=False # if input is MP3
temp_filename=inputfile.split('/')[-1]+'_temp.wav'
wav_path='/var/tmp/'+temp_filename # Pathname for temp WAV
subprocess.call(['ffmpeg', '-y', '-i', inputfile, wav_path]) # '-y' option overwrites existing file if present
else:
wav_path=inputfile
transcript=''
r = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio = r.record(source) # read the entire audio file
try: # recognize speech using Sphinx
print('Processing ...')
transcript=r.recognize_sphinx(audio)
except sr.UnknownValueError:
print("Sphinx error: No speech detected.")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
if wav_source==False:
os.remove(wav_path) # deleting temp WAV
if to_txt==True:
if outputfile=='':
outputfile=inputfile[:-4]+'.pocketsphinx.txt'
with open(outputfile, 'w') as fo:
fo.write(transcript)
return transcript
else:
return transcript
def _recognize_bing(wav_path, api_key, language='zh-CN'):
r = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio = r.record(source)
try:
text = r.recognize_bing(audio, key=api_key, language=language)
return text
except (sr.UnknownValueError, sr.RequestError):
return None
def listen_translate():
while(True):
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone(sample_rate=8000) as source:
print("Say something!")
# print(5),
# time.sleep(1)
# print(4),
# time.sleep(1)
# print(3),
# time.sleep(1)
# print(2),
# time.sleep(1)
# print(1),
# time.sleep(1)
audio = r.listen(source)#,timeout=5,phrase_time_limit=0.05
# r = sr.Recognizer()
# with sr.AudioFile('./english.wav') as source:
# audio = r.record(source) # read the entire audio file
# write audio to a WAV file ``
with open("microphone-results.wav", "wb") as f:
f.write(audio.get_wav_data())
# recognize speech using Sphinx
try:
print("Sphinx thinks you said :" + r.recognize_sphinx(audio))
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
```
def play_audio():
r = sr.Recognizer()
with sr.AudioFile('./english.wav') as source:
audio = r.record(source) # read the entire audio file
print audio
# recognize speech using Sphinx
try:
print("Sphinx thinks you said :" + r.recognize_sphinx(audio))
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))