def main():
args = check_argv()
print "Reading:", args.input_npz_fn
input_npz = np.load(args.input_npz_fn)
d_frame = input_npz[input_npz.keys()[0]].shape[1]
print "Frame dimensionality:", d_frame
if args.frame_dims is not None and args.frame_dims < d_frame:
d_frame = args.frame_dims
print "Reducing frame dimensionality:", d_frame
print "Downsampling:", args.technique
output_npz = {}
for key in input_npz:
# Limit input dimensionailty
y = input_npz[key][:, :args.frame_dims].T
# Downsample
if args.technique == "interpolate":
x = np.arange(y.shape[1])
f = interpolate.interp1d(x, y, kind="linear")
x_new = np.linspace(0, y.shape[1] - 1, args.n)
y_new = f(x_new).flatten(flatten_order) #.flatten("F")
elif args.technique == "resample":
y_new = signal.resample(y, args.n, axis=1).flatten(flatten_order) #.flatten("F")
elif args.technique == "rasanen":
# Taken from Rasenen et al., Interspeech, 2015
n_frames_in_multiple = int(np.floor(y.shape[1] / args.n)) * args.n
y_new = np.mean(
y[:, :n_frames_in_multiple].reshape((d_frame, args.n, -1)), axis=-1
).flatten(flatten_order) #.flatten("F")
# This was done in Rasenen et al., Interspeech, 2015, but didn't help here
# last_term = args.n/3. * np.log10(y.shape[1] * 10e-3) # Not sure if this should be in frames or ms
# y_new = np.hstack([y_new, last_term])
# Save result
output_npz[key] = y_new
print "Output dimensionality:", output_npz[output_npz.keys()[0]].shape[0]
print "Writing:", args.output_npz_fn
np.savez_compressed(args.output_npz_fn, **output_npz)
评论列表
文章目录