def interpolate_learned_policy(old_policy, new_policy, interpolate, old_coeff, new_coeff, weight, method):
if method is "stack_vel_pos":
learned_trajectory = np.zeros(human.shape)
for item in inPlay:
for index in np.arange(item[0],item[0]+tao):
learned_trajectory[index] = human[index]
for index in np.arange(item[0]+tao,item[1]+1):
feature = autoreg_game_context[index,:]
for i in range(tao-1):
feature = np.append(feature, learned_trajectory[index-(i+1)] - learned_trajectory[index-(i+2)])
for i in range(tao):
feature = np.append(feature,learned_trajectory[index-(i+1)])
previous_prediction = learned_trajectory[index-tao:index].copy()
previous_prediction = previous_prediction[::-1]
old_model_predict = (old_policy.predict(feature) + np.inner(old_coeff, previous_prediction) * weight) / (1+weight)
new_model_predict = (new_policy.predict(feature) + np.inner(new_coeff, previous_prediction) * weight) / (1+weight)
#current_prediction = interpolate * new_policy.predict(feature) + (1-interpolate) * old_policy.predict(feature)
learned_trajectory[index] = interpolate * new_model_predict + (1-interpolate) * old_model_predict
return learned_trajectory
评论列表
文章目录