def predictAndTestEnsemble(X, y, Xtest, ytest, classifiers=[], selectKBest=0):
"""
Trains an Ensemble of classifiers (with default params) and using a training dataset,
and returns majority vote using the same training dataset and an out-of-sample test dataset
:type X: list
:param y: The labels corresponding to the training feature vectors
:type y: list
:param Xtest: The matrix of test feature vectors
:type Xtest: list
:param ytest: The labels corresponding to the test feature vectors
:type ytest: list
:param classifiers: A list of classifiers to use in the ensemble
:type classifiers: list of str
:param selectKBest: The number of best features to select
:type selectKBest: int
:return: Two lists of the validation and test accuracies across the k-folds
"""
try:
predicted, predicted_test = [], []
# Prepare the data
X, y, Xtest, ytest = numpy.array(X), numpy.array(y), numpy.array(Xtest), numpy.array(ytest)
# Define classifiers
ensembleClassifiers = []
for c in classifiers:
if c.lower().find("knn") != -1:
K = int(c.split('-')[-1])
clf = neighbors.KNeighborsClassifier(n_neighbors=K)
elif c.lower().find("svm") != -1:
clf = svm.SVC(kernel='linear', C=1)
elif c.lower().find("forest") != -1:
E = int(c.split('-')[-1])
clf = ensemble.RandomForestClassifier(n_estimators=E,)
# Add to list
ensembleClassifiers.append((c, clf))
# Select K Best features if applicable
X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
Xtest_new = SelectKBest(chi2, k=selectKBest).fit_transform(Xtest, ytest) if selectKBest > 0 else Xtest
# Train and fit the voting classifier
voting = VotingClassifier(estimators=ensembleClassifiers, voting='hard')
prettyPrint("Fitting ensemble model")
voting = voting.fit(X_new, y)
prettyPrint("Validating model")
predicted = voting.predict(X_new)
# Same for the test dataset
prettyPrint("Testing the model")
predicted_test = voting.predict(Xtest_new)
except Exception as e:
prettyPrintError(e)
return [], []
return predicted, predicted_test
评论列表
文章目录