def predictKFoldSVMSSK(X, y, kfold=10, subseqLength=3, selectKBest=0):
"""Classifies the data using Support vector machines with the SSK kernel and k-fold CV
:param X: The list of text documents containing traces
:type X: list
:param y: The labels of documents in 'X'
:type y: list
:param kfold: The number of folds
:type kfold: int (default: 10)
:param subseqLength: Length of subsequence used by the SSK
:type subseqLength: int (default: 3)
:param selectKBest: The number of best features to select
:type selectKBest: int
:return: An array of predicted classes
"""
try:
predicted = []
# Retrieve Gram Matrix from string kernel
if verboseON():
prettyPrint("Generating Gram Matrix from documents", "debug")
X_gram = string_kernel(X, X)
y = numpy.array(y)
# Define classifier
clf = svm.SVC(kernel="precomputed")
X_gram_new = SelectKBest(chi2, k=selectKBest).fit_transform(X_gram, y) if selectKBest > 0 else X_gram
prettyPrint("Performing %s-fold CV on the %s best features" % (kfold, selectKBest))
predicted = cross_val_predict(clf, X_gram_new, y, cv=kfold).tolist()
except Exception as e:
prettyPrintError(e)
return []
return predicted
评论列表
文章目录