def _evaluate_rollout(self, state, limit):
# _, player, legal_moves = Game.possible_moves(state)
winner = 0
# old_board = Board()
# old_board.stones = state
player = None
for i in range(limit):
legal_states, p, legal_moves = Game.possible_moves(state)
if player is None:
player = p
if len(legal_states) == 0:
break
probs = self._rollout(state, legal_moves)
mask = np.full_like(probs, -0.01)
mask[:, legal_moves] = probs[:, legal_moves]
probs = mask
best_move = np.argmax(probs, 1)[0]
idx = np.where(legal_moves == best_move)[0]
# if idx.size == 0:
# print(i, idx)
# print(best_move)
# print(probs.shape)
# print(legal_moves)
# print(probs)
assert idx.size == 1
idx = idx[0]
st1 = legal_states[idx]
over, winner, last_loc = st1.is_over(state)
if over:
break
state = st1
else:
# If no break from the loop, issue a warning.
print("WARNING: rollout reached move limit")
if winner == 0:
return 0
else:
return 1 if winner == player else -1
评论列表
文章目录