def get_stochastic_network_move(session, input_layer, output_layer, board_state, side,
valid_only=False, game_spec=None):
"""Choose a move for the given board_state using a stocastic policy. A move is selected using the values from the
output_layer as a categorical probability distribution to select a single move
Args:
session (tf.Session): Session used to run this network
input_layer (tf.Placeholder): Placeholder to the network used to feed in the board_state
output_layer (tf.Tensor): Tensor that will output the probabilities of the moves, we expect this to be of
dimesensions (None, board_squares) and the sum of values across the board_squares to be 1.
board_state: The board_state we want to get the move for.
side: The side that is making the move.
Returns:
(np.array) It's shape is (board_squares), and it is a 1 hot encoding for the move the network has chosen.
"""
np_board_state = np.array(board_state)
if side == -1:
np_board_state = -np_board_state
np_board_state = np_board_state.reshape(1, *input_layer.get_shape().as_list()[1:])
probability_of_actions = session.run(output_layer,
feed_dict={input_layer: np_board_state})[0]
if valid_only:
available_moves = list(game_spec.available_moves(board_state))
if len(available_moves) == 1:
move = np.zeros(game_spec.board_squares())
np.put(move, game_spec.tuple_move_to_flat(available_moves[0]), 1)
return move
available_moves_flat = [game_spec.tuple_move_to_flat(x) for x in available_moves]
for i in range(game_spec.board_squares()):
if i not in available_moves_flat:
probability_of_actions[i] = 0.
prob_mag = sum(probability_of_actions)
if prob_mag != 0.:
probability_of_actions /= sum(probability_of_actions)
try:
move = np.random.multinomial(1, probability_of_actions)
except ValueError:
# sometimes because of rounding errors we end up with probability_of_actions summing to greater than 1.
# so need to reduce slightly to be a valid value
move = np.random.multinomial(1, probability_of_actions / (1. + 1e-6))
return move
评论列表
文章目录