utils.py 文件源码-python代码片段

utils.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

def process_data(wav_files, phn_files):
    max_step_size = 0
    inputs = []
    targets = []
    for i in tqdm(range(len(wav_files))):
        # extract mfcc features from wav
        (rate, sig) = wav.read(wav_files[i])
        mfcc_feat = mfcc(sig, rate)
        fbank_feat = logfbank(sig, rate)
        acoustic_features = join_features(mfcc_feat, fbank_feat) # time_stamp x n_features

        # extract label from phn
        phn_labels = []
        with open(phn_files[i], 'rb') as csvfile:
            phn_reader = csv.reader(csvfile, delimiter=' ')
            for row in phn_reader:
                if row[2] == 'q':
                    continue
                phn_labels.append(phoneme_set_39[phoneme_48_39.get(row[2], row[2])] - 1)

        inputs.append(acoustic_features)
        targets.append(phn_labels)

    return lists_batches(inputs, targets)