def h2o_f_classif(X, feature_names, target_feature):
"""Compute the ANOVA F-value for the provided sample.
This method is adapted from ``sklearn.feature_selection.f_classif``
to function on H2OFrames.
Parameters
----------
X : ``H2OFrame``, shape=(n_samples, n_features)
The feature matrix. Each feature will be tested
sequentially.
feature_names : array_like (str), optional (default=None)
The list of names on which to fit the transformer.
target_feature : str, optional (default=None)
The name of the target feature (is excluded from the fit)
for the estimator.
Returns
-------
f : float
The computed F-value of the test.
prob : float
The associated p-value from the F-distribution.
"""
frame = check_frame(X, copy=False)
# first, get unique values of y
y = X[target_feature]
_, unq = _unq_vals_col(y)
# if y is enum, make the unq strings..
unq = unq[_] if not y.isfactor()[0] else [str(i) for i in unq[_]]
# get the masks
args = [frame[y == k, :][feature_names] for k in unq]
f, prob = h2o_f_oneway(*args)
return f, prob
# The following function is a rewriting (of the sklearn rewriting) of
# scipy.stats.f_oneway. Contrary to the scipy.stats.f_oneway implementation
# it does not copy the data while keeping the inputs unchanged. Furthermore,
# contrary to the sklearn implementation, it does not use np.ndarrays, rather
# amending 1d H2OFrames inplace.
评论列表
文章目录