def get_greedy(self, v):
"""
Compute optimal actions taking v as the value function.
Parameters
----------
v : array_like(float, ndim=1, length=len(?_grid))
An approximate value function represented as a
one-dimensional array.
Returns
-------
policy : array_like(float, ndim=1, length=len(?_grid))
The decision to accept or reject an offer where 1 indicates
accept and 0 indicates reject
"""
# == Simplify names == #
f, g, ?, c, q = self.f, self.g, self.?, self.c, self.q
vf = LinearNDInterpolator(self.grid_points, v)
N = len(v)
policy = np.zeros(N, dtype=int)
for i in range(N):
w, ? = self.grid_points[i, :]
v1 = w / (1 - ?)
integrand = lambda m: vf(m, q(m, ?)) * (? * f(m) +
(1 - ?) * g(m))
integral, error = fixed_quad(integrand, 0, self.w_max)
v2 = c + ? * integral
policy[i] = v1 > v2 # Evaluates to 1 or 0
return policy
评论列表
文章目录