def transcribe(inputfile,outputfile='',to_txt=True):
wav_source=True
if inputfile.lower()[-4:]!='.wav': # Creates a temporary WAV
wav_source=False # if input is MP3
temp_filename=inputfile.split('/')[-1]+'_temp.wav'
wav_path='/var/tmp/'+temp_filename # Pathname for temp WAV
subprocess.call(['ffmpeg', '-y', '-i', inputfile, wav_path]) # '-y' option overwrites existing file if present
else:
wav_path=inputfile
transcript=''
r = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio = r.record(source) # read the entire audio file
try: # recognize speech using Sphinx
print('Processing ...')
transcript=r.recognize_sphinx(audio)
except sr.UnknownValueError:
print("Sphinx error: No speech detected.")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
if wav_source==False:
os.remove(wav_path) # deleting temp WAV
if to_txt==True:
if outputfile=='':
outputfile=inputfile[:-4]+'.pocketsphinx.txt'
with open(outputfile, 'w') as fo:
fo.write(transcript)
return transcript
else:
return transcript
python类Recognizer()的实例源码
def _recognize_bing(wav_path, api_key, language='zh-CN'):
r = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio = r.record(source)
try:
text = r.recognize_bing(audio, key=api_key, language=language)
return text
except (sr.UnknownValueError, sr.RequestError):
return None
def initialize_recognizer():
recognizer = speech_recognition.Recognizer()
return recognizer
def sound_to_text():
NEWS = []
for i in range(int(length / 8)):
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), 'nlp_' + str(i) + '.wav')
# use "english.wav" as the audio source
r = sr.Recognizer()
with sr.WavFile(WAV_FILE) as source:
audio = r.record(source) # read the entire WAV file
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
print(i, ". part: ", r.recognize_google(audio,language="tr"))
NEWS.append(r.recognize_google(audio,language="tr"))
except sr.UnknownValueError:
# print("Google Speech Recognition could not understand audio")
pass
except sr.RequestError as e:
# print("Could not request results from Google Speech Recognition service; {0}".format(e))
pass
return NEWS
def __init__(self, engine='witai', wavfile='/files/sound.wav'):
self.directorycurrent = os.path.dirname(os.path.realpath(__file__))
self.wavfile = self.directorycurrent + wavfile
print self.wavfile
self.engine = engine
self.r = sr.Recognizer()
with sr.WavFile(self.wavfile) as self.source:
self.audio = self.r.record(self.source)
def extract(self, filename, **kwargs):
speech = ''
# convert to wav, if not already .wav
base, ext = os.path.splitext(filename)
if ext != '.wav':
temp_filename = self.convert_to_wav(filename)
try:
speech = self.extract(temp_filename, **kwargs)
finally: # make sure temp_file is deleted
os.remove(temp_filename)
else:
r = sr.Recognizer()
with sr.WavFile(filename) as source:
audio = r.record(source)
try:
speech = r.recognize_google(audio)
except LookupError: # audio is not understandable
speech = ''
# add a newline, to make output cleaner
speech += '\n'
return speech
def speak_response(response, name):
bot_response = response
# Some basic data cleaning
bot_response = bot_response.replace("'", "")
bot_response = bot_response.replace("\n", "")
bot_response = bot_response.replace("(", "")
bot_response = bot_response.replace(")", "")
bot_response = bot_response.replace(";", "")
speak_who(bot_response, name)
# obtain audio from the microphone
# warnings.filterwarnings("ignore")
# r = sr.Recognizer()
def v2t(self):
listening = True;
while listening:
r = sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
engine = pyttsx3.init()
text = r.recognize_google(audio)
if text == "quit listening":
listening = False
else:
v = self.n.get();
self.n.set(v)
def detected_callback(self):
self.detector.terminate()
play_wav(onyx.__path__[0] + "/client/speech/resources/ding.wav")
r = sr.Recognizer()
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source, timeout=1, phrase_time_limit=5)
try:
result = stt.execute(audio, language=self.lang)
print("You said: " + result)
def create_ws():
def onConnected(event=None):
print ("Sending message...")
payload = {
'utterances': [result]
}
ws.emit(Message('recognizer_loop:utterance', payload))
t.close()
#self.detector.start(self.detected_callback)
ws = WebsocketClient()
ws.on('connected', onConnected)
# This will block until the client gets closed
ws.run_forever()
t = threading.Thread(target=create_ws)
t.start()
time.sleep(2)
self.detector.start(self.detected_callback)
except sr.UnknownValueError:
print("Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Speech Recognition service; {0}".format(e))
def __init__(self):
self.lang = config.get("Base", "lang")
self.recognizer = Recognizer()
def ears():
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
# recognize speech using Google Speech Recognition
try:
return r.recognize_google(audio)
except sr.UnknownValueError:
return ears()
except sr.RequestError as e:
return "I do not understand; {0}".format(e)
def extract(self, filename, **kwargs):
speech = ''
# convert to wav, if not already .wav
base, ext = os.path.splitext(filename)
if ext != '.wav':
temp_filename = self.convert_to_wav(filename)
try:
speech = self.extract(temp_filename, **kwargs)
finally: # make sure temp_file is deleted
os.remove(temp_filename)
else:
r = sr.Recognizer()
with sr.WavFile(filename) as source:
audio = r.record(source)
try:
speech = r.recognize_google(audio)
except LookupError: # audio is not understandable
speech = ''
# add a newline, to make output cleaner
speech += '\n'
return speech
def listen_translate():
while(True):
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone(sample_rate=8000) as source:
print("Say something!")
# print(5),
# time.sleep(1)
# print(4),
# time.sleep(1)
# print(3),
# time.sleep(1)
# print(2),
# time.sleep(1)
# print(1),
# time.sleep(1)
audio = r.listen(source)#,timeout=5,phrase_time_limit=0.05
# r = sr.Recognizer()
# with sr.AudioFile('./english.wav') as source:
# audio = r.record(source) # read the entire audio file
# write audio to a WAV file ``
with open("microphone-results.wav", "wb") as f:
f.write(audio.get_wav_data())
# recognize speech using Sphinx
try:
print("Sphinx thinks you said :" + r.recognize_sphinx(audio))
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
```
def listen_and_recognize():
r = sr.Recognizer()
m = sr.Microphone(sample_rate=8000)
r.listen_in_background(m,callback,phrase_time_limit=1)
while(True):
lastlen=0
if len(audiolist)==0:
time.sleep(10)
continue
if lastlen==len(audiolist):
time.sleep(10)
continue
output = wave.open('microphone-results.wav', 'wb')
output.setnchannels(1)
setparam=False
para=None
for audio in audiolist:
with open("temps.wav", "wb") as f:
f.write(audio.get_wav_data())
temps = wave.open('temps.wav', 'rb')
#print temps.getparams()
if not setparam:
para=temps.getparams()
output.setparams(para)
setparam=True
output.writeframes(temps.readframes(temps.getnframes()))
output.close()
# output = wavefile.open('microphone-results.wav', 'rb')
# outputaudio=sr.AudioData(output.readframes(output.getnframes()),para[2],para[1])
# translate(r,outputaudio)
#baidu('microphone-results.wav')
lastlen=len(audiolist)
time.sleep(10)
def play_audio():
r = sr.Recognizer()
with sr.AudioFile('./english.wav') as source:
audio = r.record(source) # read the entire audio file
print audio
# recognize speech using Sphinx
try:
print("Sphinx thinks you said :" + r.recognize_sphinx(audio))
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
def initiate(self):
print("Stephanie is on and loading, wait for the beep sound to give your command.")
if self.c.config.getboolean("APPLICATION", "update_check"):
self.updater.check_for_update()
self.status = True
if self.c.config.getboolean("SYSTEM", "wake_up_engine"):
self.status = False
self.active = False
if self.c.config.getboolean("SYSTEM", "always_on_engine"):
self.status = False
self.active = False
r = sr.Recognizer()
act = Activity(sr, r, self.events)
assistant = VirtualAssistant(sr, r, self.events)
if self.c.config.getboolean("SYSTEM", "wake_up_engine"):
while not self.active:
with sr.Microphone() as source:
self.active = act.check(source)
self.status = self.active
self.events.sleep_status = not self.status
if self.active:
self.speaker.speak("How may I help you?")
while self.status:
with sr.Microphone() as source:
assistant.main(source)
if self.events.active_status:
self.status = False
self.active = True
elif self.events.sleep_status:
self.status = False
self.active = False
elif self.c.config.getboolean("SYSTEM", "always_on_engine"):
while not self.active:
with sr.Microphone() as source:
self.active = act.check_always_on(source)
self.status = self.active
if self.active:
while self.status:
with sr.Microphone() as source:
assistant.main(source)
self.status = False
self.active = False
if self.events.active_status:
self.status = False
self.active = True
else:
self.speaker.speak("How may I help you?")
while self.status:
with sr.Microphone() as source:
assistant.main(source)
if self.events.active_status:
self.status = False
def callback(self,data):
i=0
rg=spr.Recognizer()
try:
frame = self.bridge.imgmsg_to_cv2(data, "bgr8")
frame = libs.resize(frame, width=600)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
(rects, i, facess) = et.track(gray, i)
for rect in rects:
cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 0), 2)
if facess != []:
for face in facess:
pred, conf = recognizer.predict(face)
if conf < 120:
print "Reconozco a Lucas con una confianza de {}".format(conf)
self.num=self.num+1
if self.num==10:
self.engine.say('Hi ')
self.engine.say( list(dictid.keys())[list(dictid.values()).index(pred)])
self.engine.runAndWait()
with spr.Microphone() as source:
rg.adjust_for_ambient_noise(source)
print 'Escuchando'
audio=rg.listen(source)
try:
respuesta= rg.recognize_sphinx(audio)
print respuesta
if respuesta!='no':
self.engine.say('OKEY ')
self.engine.say('Getting')
self.engine.say('new')
self.engine.say('data')
self.engine.runAndWait()
except spr.UnknownValueError:
print 'error'
else:
print "Desconocido"
cv2.imshow("Tracking", frame)
cv2.waitKey(1)
except CvBridgeError as e:
print(e)
def write_by_speak(self):
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=1)
# print(r.energy_threshold)
# print("Chucking rate: ", source.CHUNK)
# print("format rate :", source.format) # Debuggin purpose
# CHUNK = 1024
# FORMAT = pyaudio.paInt16 # the Format is picked up from the pyaudio
# CHANNELS = 2 # The Cross Channels
# # RATE = 44100
# source.CHUNK = CHUNK
# source.format = FORMAT # FORMATING THE SOURCE FILE
# print(dir(source))
print("Say something!...")
# print(r.energy_threshold)
r.energy_threshold += 280
# # print(r.adjust_for_ambient_noise(source,duration=1))
audio = r.listen(source)
# Speech recognition using Google Speech Recognition
try:
print("Parsing ...") # Debugging To
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
# print(r.energy_threshold )
# print(help(r.recognize_google))
# text = r.recognize_google(audio, language='en-US')
text = r.recognize_google(audio, language='en-GB') # Recognizing the command through the google
# r.re
# r.re
print("You said: " + text)
return text
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
return
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
return
except sr.HTTPError as e:
print("Couldn't connect to the websites perhaps , Hyper text transfer protocol error; {0}".format(e))
return # returning for the debugging