def predictKFoldRandomForest(X, y, estimators=10, criterion="gini", maxdepth=None, selectKBest=0, kfold=10):
"""
Classifies the data using decision trees and k-fold CV
:param X: The matrix of feature vectors
:type X: list
:param y: The vector containing labels corresponding to the feature vectors
:type y: list
:param estimators: The number of random trees to use in classification
:type estimators: int
:param criterion: The splitting criterion employed by the decision tree
:type criterion: str
:param splitter: The method used to split the data
:type splitter: str
:param maxDepth: The maximum depth the tree is allowed to grow
:type maxDepth: int
:param selectKBest: The number of best features to select
:type selectKBest: int
:param kfold: The number of folds to use in K-fold CV
:type kfold: int
:return: A list of predicted labels across the k-folds
"""
try:
# Prepare data
X, y = numpy.array(X), numpy.array(y)
# Define classifier
clf = ensemble.RandomForestClassifier(n_estimators=estimators, criterion=criterion, max_depth=maxdepth)
X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
predicted = cross_val_predict(clf, X_new, y, cv=kfold).tolist()
except Exception as e:
prettyPrintError(e)
return []
return predicted
评论列表
文章目录