utils.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:phoneme_ctc 作者: tbornt 项目源码 文件源码
def process_data(wav_files, phn_files):
    max_step_size = 0
    inputs = []
    targets = []
    for i in tqdm(range(len(wav_files))):
        # extract mfcc features from wav
        (rate, sig) = wav.read(wav_files[i])
        mfcc_feat = mfcc(sig, rate)
        fbank_feat = logfbank(sig, rate)
        acoustic_features = join_features(mfcc_feat, fbank_feat) # time_stamp x n_features

        # extract label from phn
        phn_labels = []
        with open(phn_files[i], 'rb') as csvfile:
            phn_reader = csv.reader(csvfile, delimiter=' ')
            for row in phn_reader:
                if row[2] == 'q':
                    continue
                phn_labels.append(phoneme_set_39[phoneme_48_39.get(row[2], row[2])] - 1)

        inputs.append(acoustic_features)
        targets.append(phn_labels)

    return lists_batches(inputs, targets)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号