def XsSeg2XaePhon(Xs, Xs_mask, segs, maxLen, nResample=None):
Xae = np.split(Xs, len(Xs))
FRAME_SIZE = Xs.shape[-1]
deletedChars = []
oneLetter = []
Xae_phon = []
for i, utt in enumerate(Xae):
utt = np.squeeze(utt, 0)[np.logical_not(Xs_mask[i])]
utt = np.split(utt, np.where(segs[i, :len(utt)])[0])
if len((utt[0])) == 0:
utt.pop(0)
for j in range(len(utt)):
w_len = min(len(utt[j]), maxLen)
w_target = np.zeros((nResample if nResample else maxLen, FRAME_SIZE))
deletedChars.append(max(0, len(utt[j]) - maxLen))
oneLetter.append(int(w_len == 1))
if nResample:
if w_len > 1:
word = resample(utt[j][:w_len], nResample)
else:
word = np.repeat(utt[j][:w_len], nResample, axis=0)
w_len = maxLen
else:
word = utt[j][:w_len]
w_target[-w_len:] = word
Xae_phon.append(w_target)
Xae_phon = np.stack(Xae_phon)
deletedChars = np.array(deletedChars)
oneLetter = np.array(oneLetter)
return Xae_phon, deletedChars, oneLetter
评论列表
文章目录