data_handling.py 文件源码-python代码片段

def XsSeg2Xae(Xs, Xs_mask, segs, maxUtt, maxLen, nResample=None, check_output=False):
    Xae = np.split(Xs, len(Xs))
    FRAME_SIZE = Xs.shape[-1]
    deletedChars = np.zeros((len(Xae), maxUtt))
    oneLetter = np.zeros((len(Xae), maxUtt))
    for i,utt in enumerate(Xae):
        utt_target = np.zeros((maxUtt, nResample if nResample else maxLen, FRAME_SIZE))
        utt = np.squeeze(utt, 0)[np.logical_not(Xs_mask[i])]
        utt = np.split(utt, np.where(segs[i,:len(utt)])[0])
        if len((utt[0])) == 0:
            utt.pop(0)
        n_words = min(len(utt), maxUtt)
        padwords = maxUtt - n_words
        for j in range(n_words):
            w_len = min(len(utt[j]), maxLen)
            w_target = np.zeros((nResample if nResample else maxLen, FRAME_SIZE))
            deletedChars[i,padwords+j] += max(0, len(utt[j]) - maxLen)
            oneLetter[i,padwords+j] += int(w_len == 1)
            if nResample:
                if w_len > 1:
                    word = resample(utt[j][:w_len], nResample)
                else:
                    word = np.repeat(utt[j][:w_len], nResample, axis=0)
                w_len = maxLen
            else:
                word = utt[j][:w_len]
            w_target[-w_len:] = word
            utt[j] = w_target
            utt_target[padwords+j] = utt[j]
        extraWDel = 0
        for j in range(maxUtt, len(utt)):
            extraWDel += len(utt[j])
        ## Uniformly distribute clipping penaresh2lty for excess words
        deletedChars[i,:] += float(extraWDel) / maxUtt
        Xae[i] = utt_target
    Xae = np.stack(Xae)
    ## NOTE: Reconstitution will fail if there has been any clipping.
    ## Do not use this feature unless maxutt and maxlen are large enough
    ## to make clipping very unlikely.
    ## Currently only works in acoustic mode.
    if check_output:
        for i in range(len(Xs)):
            src = Xs[i][np.logical_not(Xs_mask[i])]
            target = Xae[i]
            reconstituted = np.zeros((0,FRAME_SIZE))
            for wi in range(maxUtt):
                w = target[wi][np.where(target[wi].any(-1))]
                reconstituted = np.concatenate([reconstituted, w])
            for j in range(len(src)):
                assert np.allclose(src[j], reconstituted[j]), \
                       '''Reconstitution of MFCC frames failed at timestep %d.
                       Source region: %s\n Reconstituted region: %s''' \
                       %(j, src[j-1:j+2], reconstituted[j-1:j+2])

    return Xae, deletedChars, oneLetter