def loo_proba(x, y, clf_used='rf', use_pca=False, params=None):
"""Perform leave-one-out
Parameters
----------
x : np.array
features
y : np.array
labels
clf_used : str
classifier
use_pca : bool
perform principal component analysis on features x in advance
params : dict
parameter for classifier
Returns
-------
np.array, np.array
class probability, hard classification
"""
# print "Performing LOO with %s and %d features. Using PCA: %s" % \
# (clf_used, x.shape[1], str(use_pca))
if use_pca:
old_dim = x.shape[1]
pca = PCA(n_components=0.999)
x = pca.fit_transform(x)
# print pca.explained_variance_ratio_
# print "Reduced feature space dimension %d, instead of %d" % (x.shape[1],
# old_dim)
nans_in_X = np.sum(np.isnan(x))
if nans_in_X > 0:
# print np.where(np.isnan(x))
# print "Found %d nans in features, converting to number." % nans_in_X
x = np.nan_to_num(x)
loo = cross_validation.LeaveOneOut(len(x))
shape = (len(x), len(list(set(y))))
prob = np.zeros(shape, dtype=np.float)
pred = np.zeros((len(x), 1), dtype=np.int)
cnt = 0
# print "rf params:", rf_params
for train_ixs, test_ixs in loo:
x_train = x[train_ixs]
x_test = x[test_ixs]
y_train = y[train_ixs]
clf = init_clf(clf_used, params)
clf.fit(x_train, y_train)
prob[cnt] = clf.predict_proba(x_test)
pred[cnt] = clf.predict(x_test)
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
# if pred[cnt] == y[test_ixs]:
# print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs]
# else:
# print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs], "\t WRONG"
cnt += 1
return prob, pred
评论列表
文章目录