def wav_to_input( wav_file_name ):
input_data, f_s = sf.read(wav_file_name)
# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
mfcc_feat = mfcc(input_data,f_s)
#Delta features
delta_feat = mfcc_feat[:-1]-mfcc_feat[1:]
#Delta-Delta features
deltadelta_feat = delta_feat[:-1]-delta_feat[1:]
#Removing the first two frames
mfcc_feat = mfcc_feat[2:]
delta_feat = delta_feat[1:]
#Concatenating mfcc, delta and delta-delta features
full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1)
return full_input
python类mfcc()的实例源码
def get_data(rootdir = TIMIT_main_dir):
inputs = []
targets = []
for dir_path, sub_dirs, files in os.walk(rootdir):
for file in files:
if (os.path.join(dir_path, file)).endswith('.wav'):
wav_file_name = os.path.join(dir_path, file)
input_data, f_s = sf.read(wav_file_name)
# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
mfcc_feat = mfcc(input_data,f_s)
inputs.append(mfcc_feat)#Rakeshvar wants one frame along each column but i am using Lasagne
text_file_name = wav_file_name[:-4] + '.txt'
target_data_file = open(text_file_name)
target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?')
# target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?'))
target_data = target_data[8:-1]#No '.' in lexfree dictionary
targets.append(target_data)
return inputs, targets
def get_data(rootdir = TIMIT_main_dir):
inputs = []
targets = []
for dir_path, sub_dirs, files in os.walk(rootdir):
for file in files:
if (os.path.join(dir_path, file)).endswith('.wav'):
wav_file_name = os.path.join(dir_path, file)
input_data, f_s = sf.read(wav_file_name)
# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
mfcc_feat = mfcc(input_data,f_s)
#Delta features
delta_feat = mfcc_feat[:-1]-mfcc_feat[1:]
#Delta-Delta features
deltadelta_feat = delta_feat[:-1]-delta_feat[1:]
#Removing the first two frames
mfcc_feat = mfcc_feat[2:]
delta_feat = delta_feat[1:]
#Concatenating mfcc, delta and delta-delta features
full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1)
inputs.append(np.asarray(full_input, dtype=theano.config.floatX))#Rakeshvar wants one frame along each column but i am using Lasagne
text_file_name = wav_file_name[:-4] + '.txt'
target_data_file = open(text_file_name)
target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?')
# target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?'))
target_data = target_data[8:-1]#No '.' in lexfree dictionary
targets.append(target_data)
return inputs, targets
def read_wavs_trng(emotions, trng_path, pickle_path, use_pickle=False):
"""
Utility function to read wav files, convert them into MFCC vectors and store in a pickle file
(Pickle file is useful in case you re-train on the same data changing hyperparameters)
"""
trng_data = {}
if use_pickle and os.path.isfile(pickle_path):
write_pickle = False
trng_data = pickle.load(open(pickle_path, "rb"))
else:
write_pickle = True
for emo in emotions:
mfccs = []
for wavfile in glob.glob(trng_path + '/' + emo + '/*.wav'):
rate, sig = wvf.read(wavfile)
mfcc_feat = mfcc(sig, rate)
mfccs.append(mfcc_feat)
trng_data[emo] = mfccs
if write_pickle:
pickle.dump(trng_data, open(pickle_path, "wb"))
return trng_data
def test_emo(test_file, gmms):
"""
NOTE: Use only after training.
Test a given file and predict an emotion for it.
"""
rate, sig = wvf.read(test_file)
mfcc_feat = mfcc(sig, rate)
pred = {}
for emo in gmms:
pred[emo] = gmms[emo].score(mfcc_feat)
return emotions_nbest(pred, 2), pred
speech_recognizer.py 文件源码
项目:Artificial-Intelligence-with-Python
作者: PacktPublishing
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def run_tests(test_files):
# Classify input data
for test_file in test_files:
# Read input file
sampling_freq, signal = wavfile.read(test_file)
# Extract MFCC features
with warnings.catch_warnings():
warnings.simplefilter('ignore')
features_mfcc = mfcc(signal, sampling_freq)
# Define variables
max_score = -float('inf')
output_label = None
# Run the current feature vector through all the HMM
# models and pick the one with the highest score
for item in speech_models:
model, label = item
score = model.compute_score(features_mfcc)
if score > max_score:
max_score = score
predicted_label = label
# Print the predicted output
start_index = test_file.find('/') + 1
end_index = test_file.rfind('/')
original_label = test_file[start_index:end_index]
print('\nOriginal: ', original_label)
print('Predicted:', predicted_label)
def make_split_audio_array(folder, num_splits = 5):
lst = []
for filename in os.listdir(folder):
if filename.endswith('wav'):
normed_sig = make_standard_length(filename)
chunk = normed_sig.shape[0]/num_splits
for i in range(num_splits - 1):
lst.append(normed_sig[i*chunk:(i+2)*chunk])
lst = np.array(lst)
lst = lst.reshape(lst.shape[0], -1)
return lst
# for input wav file outputs (13, 2999) mfcc np array
def make_normed_mfcc(filename, outrate=8000):
normed_sig = make_standard_length(filename)
normed_mfcc_feat = mfcc(normed_sig, outrate)
normed_mfcc_feat = normed_mfcc_feat.T
return normed_mfcc_feat
# make mfcc np array from wav file using librosa package
def make_librosa_mfcc(filename):
y, sr = librosa.load(filename)
mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
return mfcc_feat
# make mfcc np array from wav file using speech features package
def make_mfcc(filename):
(rate, sig) = wav.read(filename)
mfcc_feat = mfcc(sig, rate)
mfcc_feat = mfcc_feat.T
return mfcc_feat
# for folder containing wav files, output numpy array of normed mfcc
def make_mean_mfcc(filename):
try:
(rate, sig) = wav.read(filename)
mfcc_feat = mfcc(sig, rate)
avg_mfcc = np.mean(mfcc_feat, axis = 0)
return avg_mfcc
except:
pass
# write new csv corresponding to dataframe of given language and gender
def make_mean_mfcc_df(folder):
norms = []
for filename in os.listdir(folder):
(rate, sig) = wav.read(filename)
mfcc_feat = mfcc(sig, rate)
mean_mfcc = np.mean(mfcc_feat, axis = 0)
#mean_mfcc = np.reshape(mean_mfcc, (1,13))
norms.append(mean_mfcc)
flat = [a.ravel() for a in norms]
stacked = np.vstack(flat)
df = pd.DataFrame(stacked)
return df
speech_recognizer.py 文件源码
项目:Artificial-Intelligence-with-Python
作者: PacktPublishing
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def build_models(input_folder):
# Initialize the variable to store all the models
speech_models = []
# Parse the input directory
for dirname in os.listdir(input_folder):
# Get the name of the subfolder
subfolder = os.path.join(input_folder, dirname)
if not os.path.isdir(subfolder):
continue
# Extract the label
label = subfolder[subfolder.rfind('/') + 1:]
# Initialize the variables
X = np.array([])
# Create a list of files to be used for training
# We will leave one file per folder for testing
training_files = [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]
# Iterate through the training files and build the models
for filename in training_files:
# Extract the current filepath
filepath = os.path.join(subfolder, filename)
# Read the audio signal from the input file
sampling_freq, signal = wavfile.read(filepath)
# Extract the MFCC features
with warnings.catch_warnings():
warnings.simplefilter('ignore')
features_mfcc = mfcc(signal, sampling_freq)
# Append to the variable X
if len(X) == 0:
X = features_mfcc
else:
X = np.append(X, features_mfcc, axis=0)
# Create the HMM model
model = ModelHMM()
# Train the HMM
model.train(X)
# Save the model for the current word
speech_models.append((model, label))
# Reset the variable
model = None
return speech_models
# Define a function to run tests on input files