def listen_for_speech(threshold=THRESHOLD, num_phrases=1):
"""
Listens to Microphone, extracts phrases from it and sends it to
Google's TTS service and returns response. a "phrase" is sound
surrounded by silence (according to threshold). num_phrases controls
how many phrases to process before finishing the listening process
(-1 for infinite).
"""
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print "* Listening mic. "
audio2send = []
cur_data = '' # current chunk of audio data
rel = RATE/CHUNK
slid_win = deque(maxlen=SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=PREV_AUDIO * rel)
started = False
n = num_phrases
response = []
while (num_phrases == -1 or n > 0):
cur_data = stream.read(CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if(sum([x > THRESHOLD for x in slid_win]) > 0):
if(not started):
print "Starting record of phrase"
started = True
audio2send.append(cur_data)
elif (started is True):
print "Finished"
# The limit was reached, finish capture and deliver.
filename = save_speech(list(prev_audio) + audio2send, p)
result = transcribe_audio('speech.wav')
print result
text = result['data']
print("Text: " + text + "\n")
started = False
slid_win = deque(maxlen=SILENCE_LIMIT * rel)
prev_audio = deque(maxlen=0.5 * rel)
audio2send = []
n -= 1
print "* Done recording"
stream.close()
p.terminate()
评论列表
文章目录