genData.py 文件源码-python代码片段

def genRegressionData(n_samples: int = 100, n_features: int = 2, n_redundant: int = 0, strRel: int = 1,
                      n_repeated: int = 0, noise: float = 1, random_state: object = None,
                      partition = None) -> object:
    """Generate synthetic regression data

    Parameters
    ----------
    n_samples : int, optional
        Number of samples
    n_features : int, optional
        Number of features
    n_redundant : int, optional
        Number of features which are part of redundant subsets (weakly relevant)
    strRel : int, optional
        Number of features which are mandatory for the underlying model (strongly relevant)
    n_repeated : int, optional
        Number of features which are clones of existing ones. 
    noise : float, optional
        Noise of the created samples around ground truth.
    random_state : object, optional
        Randomstate object used for generation.

    Returns
    -------
    X : array of shape [n_samples, n_features]
        The generated samples.
    y : array of shape [n_samples]
        The output values (target).

    Raises
    ------
    ValueError
    Wrong parameters for specified amonut of features/samples.
    """ 

    _checkParam(**locals())
    random_state = check_random_state(random_state)

    X = np.zeros((int(n_samples), int(n_features)))

    # Find partitions which defíne the weakly relevant subsets
    if partition is None:
        # Legacy behaviour yielding subsets of size 2
        partition =  int(n_redundant / 2) * [2]
    part_size = len(partition) 

    X_informative, Y = make_regression(n_features=int(strRel + part_size),
                                        n_samples=int(n_samples),
                                        noise=noise,
                                        n_informative=int(strRel),
                                        random_state=random_state,
                                        shuffle=False)

    X = _fillVariableSpace(**locals())

    return X, Y