network_helpers.py 文件源码-python代码片段

def get_stochastic_network_move(session, input_layer, output_layer, board_state, side,
                                valid_only=False, game_spec=None):
    """Choose a move for the given board_state using a stocastic policy. A move is selected using the values from the
     output_layer as a categorical probability distribution to select a single move

    Args:
        session (tf.Session): Session used to run this network
        input_layer (tf.Placeholder): Placeholder to the network used to feed in the board_state
        output_layer (tf.Tensor): Tensor that will output the probabilities of the moves, we expect this to be of
            dimesensions (None, board_squares) and the sum of values across the board_squares to be 1.
        board_state: The board_state we want to get the move for.
        side: The side that is making the move.

    Returns:
        (np.array) It's shape is (board_squares), and it is a 1 hot encoding for the move the network has chosen.
    """
    np_board_state = np.array(board_state)
    if side == -1:
        np_board_state = -np_board_state

    np_board_state = np_board_state.reshape(1, *input_layer.get_shape().as_list()[1:])
    probability_of_actions = session.run(output_layer,
                                         feed_dict={input_layer: np_board_state})[0]

    if valid_only:
        available_moves = list(game_spec.available_moves(board_state))
        if len(available_moves) == 1:
            move = np.zeros(game_spec.board_squares())
            np.put(move, game_spec.tuple_move_to_flat(available_moves[0]), 1)
            return move
        available_moves_flat = [game_spec.tuple_move_to_flat(x) for x in available_moves]
        for i in range(game_spec.board_squares()):
            if i not in available_moves_flat:
                probability_of_actions[i] = 0.

        prob_mag = sum(probability_of_actions)
        if prob_mag != 0.:
            probability_of_actions /= sum(probability_of_actions)

    try:
        move = np.random.multinomial(1, probability_of_actions)
    except ValueError:
        # sometimes because of rounding errors we end up with probability_of_actions summing to greater than 1.
        # so need to reduce slightly to be a valid value
        move = np.random.multinomial(1, probability_of_actions / (1. + 1e-6))

    return move