def _plot_value_function(self, value_functions, n_iter):
value_matrix = numpy.zeros((10, 10), dtype='float')
for stateid in range(len(self.states)):
dealer_showing, player_state = self.states[stateid].split('#')
dealer_showing = 0 if dealer_showing == 'A' else int(dealer_showing)-1
player_state = int(player_state)
if player_state >= 12 and player_state < 22:
value_matrix[player_state-12, dealer_showing] = value_functions[stateid]
fig = plt.figure()
ax = Axes3D(fig)
Y, X = numpy.meshgrid(range(10), range(12,22))
ax.plot_surface(Y, X, value_matrix, rstride=1, cstride=1, cmap='coolwarm')
ax.set_title('value function in iteration %i' % n_iter)
ax.set_xlabel('dealer showing')
ax.set_ylabel('player sum')
ax.set_zlabel('value function')
plt.show()
monte_carlo_control.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录