def discount_rewards(x, gamma):
"""
Given vector x, computes a vector y such that
y[i] = x[i] + gamma * x[i+1] + gamma^2 x[i+2] + ...
"""
return signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
# Source: http://stackoverflow.com/a/12201744/1735784
评论列表
文章目录