def interpolate_test_policy(old_policy, new_policy, interpolate, reference_path, context, old_coeff, new_coeff, weight, method):
Y_predict = np.zeros(reference_path.shape)
if method is "stack_vel_pos":
for i in range(len(reference_path)):
if i<tao:
Y_predict[i] = reference_path[i] #note: have the first tau frames correct
else:
feature = context[i]
for j in range(tao-1):
feature = np.hstack((feature,Y_predict[i-(j+1)]-Y_predict[i-(j+2)]))
for j in range(tao):
feature = np.hstack((feature,Y_predict[i-(j+1)]))
previous_prediction = Y_predict[i-tao:i]
previous_prediction = previous_prediction[::-1]
#current_prediction = interpolate * new_policy.predict(feature) + (1-interpolate) * old_policy.predict(feature)
old_model_predict = (old_policy.predict(feature) + np.inner(old_coeff, previous_prediction) * weight) / (1+weight)
new_model_predict = (new_policy.predict(feature) + np.inner(new_coeff, previous_prediction) * weight) / (1+weight)
#Y_predict[i] = (current_prediction + np.inner(coeff,previous_prediction)*weight)/(1+weight) # replace
Y_predict[i] = interpolate * new_model_predict + (1-interpolate) * old_model_predict
return Y_predict
评论列表
文章目录