def get_cv_method(method, **kwargs):
if method == 'kfold':
return KFold(**kwargs)
elif method == 'skfold':
return StratifiedKFold(**kwargs)
elif method == 'loo':
return LeaveOneOut()
elif method == 'shuffle_split':
return ShuffleSplit(**kwargs)
elif method == 'split':
return TrainTestSplit(**kwargs)
elif method == 's_shuffle_split':
return StratifiedShuffleSplit(**kwargs)
elif method == 'time_series':
return TimeSeriesSplit(**kwargs)
else:
raise AttributeError('Invalid CV method - %s!' % method)
python类LeaveOneOut()的实例源码
def _leave_one_out(algr, X, y):
loo = LeaveOneOut()
square_error_sum = 0.0
for train_index, test_index in loo.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
model = algr.fit(X_train, y_train.ravel())
predicted_y = model.predict(X_test)
square_error_sum += float(y_test[0] - predicted_y) ** 2
mse = square_error_sum / X.shape[0]
print '-----------------------'
print 'Leave One Out?mse ' , mse
print '-----------------------'
def cv_LinearRegression_Bias( xM, yV):
"""
N_it times iteration is performed for cross_validation in order to make further average effect.
The flag of 'disp' is truned off so each iteration will not shown.
"""
#print( "cv_LinearRegression_None", xM.shape, yV.shape)
X, y = np.array( xM)[:,0], np.array( yV)[:,0]
# only 1-dim is allowed for both X and y
assert (X.ndim == 1) or (X.shape[2] == 1) and (yV.ndim == 1) or (yV.shape[2] == 1)
loo_c = model_selection.LeaveOneOut()
loo = loo_c.split( X)
yP = y.copy()
for train, test in loo:
bias = np.mean(y[train] - X[train])
yP[test] = X[test] + bias
cv_score_le = np.abs( np.array( y - yP)).tolist()
o_d = {'median_abs_err': np.median( cv_score_le),
'mean_abs_err': np.mean( cv_score_le),
'std_abs_err': np.std( cv_score_le), # this can be std(err)
'list': cv_score_le,
'ci': "t.b.d",
'yVp': X.tolist()}
return o_d
def cv_LinearRegression_Bias( xM, yV):
"""
N_it times iteration is performed for cross_validation in order to make further average effect.
The flag of 'disp' is truned off so each iteration will not shown.
"""
#print( "cv_LinearRegression_None", xM.shape, yV.shape)
X, y = np.array( xM)[:,0], np.array( yV)[:,0]
# only 1-dim is allowed for both X and y
assert (X.ndim == 1) or (X.shape[2] == 1) and (yV.ndim == 1) or (yV.shape[2] == 1)
loo_c = model_selection.LeaveOneOut()
loo = loo_c.split( X)
yP = y.copy()
for train, test in loo:
bias = np.mean(y[train] - X[train])
yP[test] = X[test] + bias
cv_score_le = np.abs( np.array( y - yP)).tolist()
o_d = {'median_abs_err': np.median( cv_score_le),
'mean_abs_err': np.mean( cv_score_le),
'std_abs_err': np.std( cv_score_le), # this can be std(err)
'list': cv_score_le,
'ci': "t.b.d",
'yVp': X.tolist()}
return o_d
signal_extractor.py 文件源码
项目:Automatic-feature-extraction-from-signal
作者: VVVikulin
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def basic_quality(self, target, feature_vector):
assert (len(target) == len(feature_vector))
if self.quality == 'NWP':
sort_data_p = np.array([x for (y,x) in sorted(zip(feature_vector, target), key=lambda x: x[0])])
sort_data_n = np.array([x for (y,x) in sorted(zip(-1.0 * feature_vector, target), key=lambda x: x[0])])
p_nwp = QualityMeasure.calc_nwp(sort_data_p)
n_nwp = QualityMeasure.calc_nwp(sort_data_n)
return min(n_nwp, p_nwp)
if self.quality == 'corrcoef':
return 1 - abs(np.corrcoef(target, feature_vector)[0][1])
if self.quality == 'mutual_info':
m = MINE()
m.compute_score(target, feature_vector)
return 1.0 - m.mic()
if self.quality == 'chi2':
return 1 - chi2(abs(feature_vector.reshape(len(feature_vector), 1)), target)[0][0]
if self.quality == 'distcorr':
return 1 - distcorr(target, feature_vector)
if self.quality == 'distree':
data = np.column_stack((feature_vector, self.random_feature))
clf = DecisionTreeClassifier(max_depth=5, random_state=0)
clf.fit(data, target)
return 1.0 - clf.feature_importances_[0]
if self.quality == 'knnscore':
errors = []
clf = KNeighborsClassifier()
data = np.array([feature_vector]).transpose()
loo = LeaveOneOut()
for train, test in loo.split(data):
clf = KNeighborsClassifier()
clf.fit(data[train], target[train])
errors.append(accuracy_score(target[test], clf.predict(data[test])))
return 1.0 - np.mean(errors)
return 'WRONG QUALITY NAME'
def test_nested_cv():
# Test if nested cross validation works with different combinations of cv
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
labels = rng.randint(0, 5, 15)
cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_iter=3, random_state=0)]
for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
cv=inner_cv)
cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv,
fit_params={'labels': labels})
def test_cross_val_predict():
boston = load_boston()
X, y = boston.data, boston.target
cv = KFold()
est = Ridge()
# Naive loop (should be same as cross_val_predict):
preds2 = np.zeros_like(y)
for train, test in cv.split(X, y):
est.fit(X[train], y[train])
preds2[test] = est.predict(X[test])
preds = cross_val_predict(est, X, y, cv=cv)
assert_array_almost_equal(preds, preds2)
preds = cross_val_predict(est, X, y)
assert_equal(len(preds), len(y))
cv = LeaveOneOut()
preds = cross_val_predict(est, X, y, cv=cv)
assert_equal(len(preds), len(y))
Xsp = X.copy()
Xsp *= (Xsp > np.median(Xsp))
Xsp = coo_matrix(Xsp)
preds = cross_val_predict(est, Xsp, y)
assert_array_almost_equal(len(preds), len(y))
preds = cross_val_predict(KMeans(), X)
assert_equal(len(preds), len(y))
class BadCV():
def split(self, X, y=None, labels=None):
for i in range(4):
yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])
assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())
def test_cross_validator_with_default_params():
n_samples = 4
n_unique_labels = 4
n_folds = 2
p = 2
n_iter = 10 # (the default value)
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
X_1d = np.array([1, 2, 3, 4])
y = np.array([1, 1, 2, 2])
labels = np.array([1, 2, 3, 4])
loo = LeaveOneOut()
lpo = LeavePOut(p)
kf = KFold(n_folds)
skf = StratifiedKFold(n_folds)
lolo = LeaveOneLabelOut()
lopo = LeavePLabelOut(p)
ss = ShuffleSplit(random_state=0)
ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2
loo_repr = "LeaveOneOut()"
lpo_repr = "LeavePOut(p=2)"
kf_repr = "KFold(n_folds=2, random_state=None, shuffle=False)"
skf_repr = "StratifiedKFold(n_folds=2, random_state=None, shuffle=False)"
lolo_repr = "LeaveOneLabelOut()"
lopo_repr = "LeavePLabelOut(n_labels=2)"
ss_repr = ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, "
"train_size=None)")
ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds,
n_unique_labels, comb(n_unique_labels, p), n_iter, 2]
for i, (cv, cv_repr) in enumerate(zip(
[loo, lpo, kf, skf, lolo, lopo, ss, ps],
[loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
ss_repr, ps_repr])):
# Test if get_n_splits works correctly
assert_equal(n_splits[i], cv.get_n_splits(X, y, labels))
# Test if the cross-validator works as expected even if
# the data is 1d
np.testing.assert_equal(list(cv.split(X, y, labels)),
list(cv.split(X_1d, y, labels)))
# Test that train, test indices returned are integers
for train, test in cv.split(X, y, labels):
assert_equal(np.asarray(train).dtype.kind, 'i')
assert_equal(np.asarray(train).dtype.kind, 'i')
# Test if the repr works without any errors
assert_equal(cv_repr, repr(cv))