def genRegressionData(n_samples: int = 100, n_features: int = 2, n_redundant: int = 0, strRel: int = 1,
n_repeated: int = 0, noise: float = 1, random_state: object = None,
partition = None) -> object:
"""Generate synthetic regression data
Parameters
----------
n_samples : int, optional
Number of samples
n_features : int, optional
Number of features
n_redundant : int, optional
Number of features which are part of redundant subsets (weakly relevant)
strRel : int, optional
Number of features which are mandatory for the underlying model (strongly relevant)
n_repeated : int, optional
Number of features which are clones of existing ones.
noise : float, optional
Noise of the created samples around ground truth.
random_state : object, optional
Randomstate object used for generation.
Returns
-------
X : array of shape [n_samples, n_features]
The generated samples.
y : array of shape [n_samples]
The output values (target).
Raises
------
ValueError
Wrong parameters for specified amonut of features/samples.
"""
_checkParam(**locals())
random_state = check_random_state(random_state)
X = np.zeros((int(n_samples), int(n_features)))
# Find partitions which defíne the weakly relevant subsets
if partition is None:
# Legacy behaviour yielding subsets of size 2
partition = int(n_redundant / 2) * [2]
part_size = len(partition)
X_informative, Y = make_regression(n_features=int(strRel + part_size),
n_samples=int(n_samples),
noise=noise,
n_informative=int(strRel),
random_state=random_state,
shuffle=False)
X = _fillVariableSpace(**locals())
return X, Y
评论列表
文章目录