def get_data(rootdir = TIMIT_main_dir):
inputs = []
targets = []
for dir_path, sub_dirs, files in os.walk(rootdir):
for file in files:
if (os.path.join(dir_path, file)).endswith('.wav'):
wav_file_name = os.path.join(dir_path, file)
input_data, f_s = sf.read(wav_file_name)
# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
mfcc_feat = mfcc(input_data,f_s)
#Delta features
delta_feat = mfcc_feat[:-1]-mfcc_feat[1:]
#Delta-Delta features
deltadelta_feat = delta_feat[:-1]-delta_feat[1:]
#Removing the first two frames
mfcc_feat = mfcc_feat[2:]
delta_feat = delta_feat[1:]
#Concatenating mfcc, delta and delta-delta features
full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1)
inputs.append(np.asarray(full_input, dtype=theano.config.floatX))#Rakeshvar wants one frame along each column but i am using Lasagne
text_file_name = wav_file_name[:-4] + '.txt'
target_data_file = open(text_file_name)
target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?')
# target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?'))
target_data = target_data[8:-1]#No '.' in lexfree dictionary
targets.append(target_data)
return inputs, targets
评论列表
文章目录