def process_data(wav_files, phn_files):
max_step_size = 0
inputs = []
targets = []
for i in tqdm(range(len(wav_files))):
# extract mfcc features from wav
(rate, sig) = wav.read(wav_files[i])
mfcc_feat = mfcc(sig, rate)
fbank_feat = logfbank(sig, rate)
acoustic_features = join_features(mfcc_feat, fbank_feat) # time_stamp x n_features
# extract label from phn
phn_labels = []
with open(phn_files[i], 'rb') as csvfile:
phn_reader = csv.reader(csvfile, delimiter=' ')
for row in phn_reader:
if row[2] == 'q':
continue
phn_labels.append(phoneme_set_39[phoneme_48_39.get(row[2], row[2])] - 1)
inputs.append(acoustic_features)
targets.append(phn_labels)
return lists_batches(inputs, targets)
评论列表
文章目录