def create_regression_dataset(n_samples, n_features, n_informative, effective_rank, tail_strength,
noise, random_state=None):
"""
Creates a regression dataset
:param n_samples: number of observations
:param n_features: number of features
:param n_informative: number of informative features
:param n_targets: The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar.
:param effective_rank: approximate number of singular vectors required to explain data
:param tail_strength: relative importance of the fat noisy tail of the singular values profile
:param noise: standard deviation of the gaussian noise applied to the output
:param random_state: the numpy RandomState
:return: the requested dataframe
"""
random_state = get_random_state(random_state)
X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_informative,
n_targets=1, effective_rank=effective_rank, tail_strength=tail_strength,
noise=noise, random_state=random_state)
# cast to a data frame
df = pd.DataFrame(X)
# rename X columns
df = rename_columns(df)
# and add the Y
df['y'] = y
return df
评论列表
文章目录