python类SelectKBest()的实例源码-面圈网

onlinedetectWithlittleData.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def analyseReasonWithXsqure(anamolySample,normalSample,topk,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)
    name = []
    for i in data.columns:
        name.append(i)
    X_new = SelectKBest(chi2, topk).fit(data, target)
    outcome = X_new.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

11.5 feature_seleticon_filter.py 文件源码项目：ML-note 作者: JasonK93 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_SelectKBest():
    '''
    test the method of SelectKBert
    :return:  None
    '''
    X=[   [1,2,3,4,5],
          [5,4,3,2,1],
          [3,3,3,3,3,],
          [1,1,1,1,1] ]
    y=[0,1,0,1]
    print("before transform:",X)
    selector=SelectKBest(score_func=f_classif,k=3)
    selector.fit(X,y)
    print("scores_:",selector.scores_)
    print("pvalues_:",selector.pvalues_)
    print("selected index:",selector.get_support(True))
    print("after transform:",selector.transform(X))

birchForChangeWindowSize.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def analyseReasonWithXsqure(anamolySample,normalSample,topk):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)
    name = []
    for i in data.columns:
        name.append(i)
    X_new = SelectKBest(chi2, topk).fit(data, target)
    outcome = X_new.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

feat_regress.py 文件源码项目：Stock-Market-Analysis-and-Prediction 作者: samshara 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def select_kbest_reg(data_frame, target, k=5):
    """
    Selecting K-Best features regression
    :param data_frame: A pandas dataFrame with the training data
    :param target: target variable name in DataFrame
    :param k: desired number of features from the data
    :returns feature_scores: scores for each feature in the data as 
    pandas DataFrame
    """
    feat_selector = SelectKBest(f_regression, k=k)
    _ = feat_selector.fit(data_frame.drop(target, axis=1), data_frame[target])

    feat_scores = pd.DataFrame()
    feat_scores["F Score"] = feat_selector.scores_
    feat_scores["P Value"] = feat_selector.pvalues_
    feat_scores["Support"] = feat_selector.get_support()
    feat_scores["Attribute"] = data_frame.drop(target, axis=1).columns

    return feat_scores

test_searchgrid.py 文件源码项目：searchgrid 作者: jnothman 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[set_grid(clf1, kernel=['linear']),
                              clf2,
                              set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                              clf4])
    param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
                  {'clf': [clf3], 'clf__kernel': ['poly'],
                   'clf__degree': [2, 3], 'sel__k': [2, 3]},
                  {'clf': [clf2, clf4], 'sel__k': [2, 3]}]
    assert build_param_grid(estimator) == param_grid

scikit_feature_selectors.py 文件源码项目：MENGEL 作者: CodeSpaceHQ 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def select_percentile_selector(data,target):

    # Select Model
    selector = SelectPercentile(percentile = 75)  # Default is 10%

    # Fit, Format, and Return
    return format_selector(selector, data, target)


# http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html

decomposition.py 文件源码项目：NLPWorks 作者: thautwarm 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def de_c2(X,y):
    """ chi2 """
    dim  = X.shape[1]
    de   = min(2000,dim) 
    clf  = SelectKBest(chi2, k = de)
    clf.fit(X,y)
    def _func(X1,X2):
        return clf.transform(X1), clf.transform(X2)
    return _func

# def de_mic(X,y):
#     """ MIC """
#     dim  = X.shape[1]
#     de   = min(2000,dim) 
#     clf = SelectKBest(MIC, k=de)
#     clf.fit(X,y)
#     def _func(X1,X2):
#         return clf.transform(X1),clf.transform(X2)
#     return _func

onlinedetect.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def analyseReasonWithXsqure(anamolySample,normalSample,topk,name):
    data = anamolySample
    target = []
    for i in range(0,len(anamolySample)):
        target.append(1)
    data = data.append(normalSample)
    for i in range(0,len(normalSample)):
        target.append(0)
    name = []
    for i in data.columns:
        name.append(i)
    X_new = SelectKBest(chi2, topk).fit(data, target)
    outcome = X_new.get_support()
    for i in range(0,len(name)):
        if outcome[i]:
            print name[i]

simulatev1.3.py 文件源码项目：onlineDetectForHadoop 作者: DawnsonLi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def analyseReasonWithXsqure(anamolySample, normalSample, topk, name):

    target = []
    for i in range(0, len(anamolySample)):
        target.append(1)
    data = pd.concat([anamolySample, normalSample])
    for i in range(0, len(normalSample)):
        target.append(0)

    X_new = SelectKBest(chi2, topk).fit(data, target)
    outcome = X_new.get_support()
    warnstr = ""
    for i in range(0, len(name)):
        if outcome[i]:
            warnstr += name[i]
            warnstr += "   ;   "
    print 'x2:',warnstr
    return warnstr

feat_select.py 文件源码项目：Stock-Market-Analysis-and-Prediction 作者: samshara 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def select_kbest_clf(data_frame, target, k=4):
    """
    Selecting K-Best features for classification
    :param data_frame: A pandas dataFrame with the training data
    :param target: target variable name in DataFrame
    :param k: desired number of features from the data
    :returns feature_scores: scores for each feature in the data as 
    pandas DataFrame
    """
    feat_selector = SelectKBest(f_classif, k=k)
    _ = feat_selector.fit(data_frame.drop(target, axis=1), data_frame[target])

    feat_scores = pd.DataFrame()
    feat_scores["F Score"] = feat_selector.scores_
    feat_scores["P Value"] = feat_selector.pvalues_
    feat_scores["Support"] = feat_selector.get_support()
    feat_scores["Attribute"] = data_frame.drop(target, axis=1).columns

    return feat_scores

classification.py 文件源码项目：Oedipus 作者: tum-i22 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def reduceDimensionality(X, y, method="selectkbest", targetDim=10):
    """ Reduces the dimensionality of [X] to [targetDim] """
    try:
        # Check for the required methodology first
        if method.lower() == "selectkbest":
            prettyPrint("Selecting %s best features from dataset" % targetDim, "debug")
            kBestSelector = SelectKBest(k=targetDim)
            X_new = kBestSelector.fit_transform(X, y).tolist()
        elif method.lower() == "pca":
            prettyPrint("Extracting %s features from dataset using PCA" % targetDim, "debug")
            pcaExtractor = PCA(n_components=targetDim)
            # Make sure vectors in X are positive
            X_new = pcaExtractor.fit_transform(X, y).tolist()
        else:
            prettyPrint("Unknown dimensionality reduction method \"%s\"" % method, "warning")
            return X

    except Exception as e:
        prettyPrint("Error encountered in \"reduceDimensionality\": %s" % e, "error")
        return X

    # Return the reduced dataset
    return X_new

utils.py 文件源码项目：deeppavlov 作者: deepmipt 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def ngrams_selection(train_data, train_labels, ind, model_file,
                     ngram_range_=(1, 1), max_num_features=100,
                     analyzer_type='word'):
    """Create and save vectorizers and feature selectors on given train data.

    Args:
        train_data: list of train text samples
        train_labels: list of train labels
        ind: index of vectorizer/selector to save file
        model_file: model filename
        ngram_range_: range of n-grams
        max_num_features: maximum number of features to select
        analyzer_type: analyzer type for TfidfVectorizer 'word' or 'char'

    Returns:
        nothing
    """
    vectorizer = TfidfVectorizer(ngram_range=ngram_range_, sublinear_tf=True, analyzer=analyzer_type)

    X_train = vectorizer.fit_transform(train_data)

    if max_num_features < X_train.shape[1]:
        ch2 = SelectKBest(chi2, k=max_num_features)
        ch2.fit(X_train, train_labels)
        data_struct = {'vectorizer': vectorizer, 'selector': ch2}
        print ('creating ', model_file + '_ngrams_vect_' + ind + '.bin')
        with open(model_file + '_ngrams_vect_' + ind + '.bin', 'wb') as f:
            pickle.dump(data_struct, f)
    else:
        data_struct = {'vectorizer': vectorizer}
        print ('creating', model_file + '_ngrams_vect_' + ind + '.bin')
        with open(model_file + '_ngrams_vect_' + ind + '.bin', 'wb') as f:
            pickle.dump(data_struct, f)
    return

FeatureSelection.py 文件源码项目：rdocChallenge 作者: Elyne 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def chiSquare(train_data, train_classes, topK):
    vectorizer = DictVectorizer()  

    # Fit and transform the train data.        
    x_train = vectorizer.fit_transform(train_data)
    y_train = train_classes

    if (x_train.shape[1] < topK):
        topK = x_train.shape[1]

    selector = SelectKBest(chi2, k=topK)
    x_new = selector.fit_transform(x_train, y_train)

    return vectorizer.inverse_transform(selector.inverse_transform(x_new))

feature_selection.py 文件源码项目：MultimodalAutoencoder 作者: natashamjaques 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def transform_select_K_best(X_train,Y_train, X_all, K=100):
    """Selects the best K features given the training data.

    Args:
        X_train: A matrix containing training data
        Y_train: Classification labels for the training data
        X_all: A matrix containing all the data
        K: The number of features to select
    """
    skb = SelectKBest(f_classif,K)
    skb.fit(X_train,Y_train)

    return skb.transform(X_all)

test_searchgrid.py 文件源码项目：searchgrid 作者: jnothman 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_make_pipeline():
    t1 = SelectKBest()
    t2 = SelectKBest()
    t3 = SelectKBest()
    t4 = SelectKBest()
    t5 = SelectPercentile()
    t6 = SelectKBest()
    t7 = SelectKBest()
    t8 = SelectKBest()
    t9 = SelectPercentile()
    in_steps = [[t1, None],
                [t2, t3],
                [t4, t5],  # mixed
                t6,
                [None, t7],
                [t8, None, t9],  # mixed
                None]
    pipe = make_pipeline(*in_steps, memory='/path/to/nowhere')
    union = make_union(*in_steps)

    for est, est_steps in [(pipe, pipe.steps),
                           (union, union.transformer_list)]:
        names, steps = zip(*est_steps)
        assert names == ('selectkbest-1', 'selectkbest-2', 'alt-1',
                         'selectkbest-3', 'selectkbest-4', 'alt-2', 'nonetype')
        assert steps == (t1, t2, t4, t6, None, t8, None)

        assert len(est._param_grid) == 5
        assert est._param_grid[names[0]] == [t1, None]
        assert est._param_grid[names[1]] == [t2, t3]
        assert est._param_grid[names[2]] == [t4, t5]
        assert est._param_grid[names[4]] == [None, t7]
        assert est._param_grid[names[5]] == [t8, None, t9]

    assert type(pipe) is Pipeline
    assert type(union) is FeatureUnion
    assert pipe.memory == '/path/to/nowhere'

analysis_utilities.py 文件源码项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def export_best_feature_names(self, df, labels, out_folder_path, k):
        columns, repos, observations = self.decompose_df(df)
        feature_scores = SelectKBest(chi2, k=k).fit(observations, labels).scores_
        feature_scores = np.nan_to_num(feature_scores)
        k_best_features = np.argpartition(feature_scores.ravel(), (-1) * k)[(-1) * k:]
        k_best_feature_names = columns[k_best_features]

        out_file_path = os.path.join(out_folder_path, "feature_selection.txt")
        with open(out_file_path, "w") as output_file:
            for feature_name in k_best_feature_names:
                output_file.write(feature_name + "\n")

FeatureSelection.py 文件源码项目：ModelFlow 作者: yuezPrincetechs 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def __init__(self,selector,return_array=False):
        '''
        ??sklearn??????????????????sklearn?
        selector: sklearn.feature_selection????????????sklearn.feature_selection.SelectKBest(sklearn.feature_selection.f_classif,k=4)?
        return_array: True?????numpy.ndarray?Fasle??????X?

        ????: 
        selector_: fit??selector?
        '''
        BaseSelector.__init__(self,return_array=return_array)
        self.selector=selector

FeatureSelection.py 文件源码项目：ModelFlow 作者: yuezPrincetechs 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test():
    #??????
    np.random.seed(13)
    X=pd.DataFrame(np.random.randn(20,10))
    X.columns=['x%d'%i for i in range(10)]
    y=pd.Series(np.random.choice([0,1],20))

    #??sklearn?????????????
    clf_sklearn=feature_selection.SelectKBest(feature_selection.f_classif,k=4)
    clf=SklearnSelector(estimator=clf_sklearn)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)

    clf_sklearn=SelectFromModel(LogisticRegression())
    clf=SklearnSelector(estimator=clf_sklearn)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)

    #?????
    clf_selectkbest=feature_selection.SelectKBest(feature_selection.f_classif,k=4)
    clf_selectfrommodel=SelectFromModel(LogisticRegression())
    clf_baseselector=SklearnSelector(clf_selectkbest)
    clf=VotingSelector(selectors=[('clf_selectkbest',clf_selectkbest),
                                  ('clf_selectfrommodel',clf_selectfrommodel),
                                  ('clf_baseselector',clf_baseselector)],threshold=0.5)
    clf.fit(X,y)
    clf.transform(X)
    print(clf.feature_selected)
    print(clf.df_voting)
    print(clf.score)

MutualInfoClassif.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, conf):
        SemiSupervisedFeatureSelection.__init__(self, conf)
        self.projection = SelectKBest(mutual_info_classif, k = conf.num_components)

ChiSquare.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __init__(self, conf):
        SemiSupervisedFeatureSelection.__init__(self, conf)
        self.projection = SelectKBest(chi2, k = conf.num_components)

Fclassif.py 文件源码项目：SecuML 作者: ANSSI-FR 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, conf):
        SemiSupervisedFeatureSelection.__init__(self, conf)
        self.projection = SelectKBest(f_classif, k = conf.num_components)

getFeature.py 文件源码项目：USTC_AILab2 作者: overflocat 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def getFeature():
    fileData = open("data")
    row = []
    col = []
    data = []
    evalRes = []
    rowIndex = -1
    fileList = fileData.readlines()
    random.shuffle(fileList)
    for line in fileList:
        line = line.rstrip('\n')
        dataList = re.split(' |:', line)

        if int(dataList[0]) >= 7:
            evalRes.append(1)
        else:
            if int(dataList[0]) <= 4:
                evalRes.append(-1)
            else:
                continue
        del dataList[0]

        rowIndex = rowIndex + 1
        row.extend([rowIndex] * int(len(dataList) / 2))
        col.extend(map(int, dataList[::2]))
        data.extend(map(int, dataList[1::2]))

    featureMatrix = csr_matrix((data, (row, col)))
    featureMNew = SelectKBest(chi2, k=20000).fit_transform(featureMatrix, evalRes)
    return featureMNew, evalRes

model.py 文件源码项目：train-occupancy 作者: datamindedbe 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_model_random_forest(df, features, categorical_features, target, split=0.70):
    print "using %d features (%d columns) on %d rows and target %s. Split %f." % (
    len(features), len(df.columns), len(df), target, split)
    df['is_train'] = np.random.uniform(0, 1, len(df)) <= split
    train, test = df[df['is_train'] == True], df[df['is_train'] == False]


    # one_hot_encoding because it doesn't work in pipeline for some reason
    # for f in categorical_features:
    #     dummies = pd.get_dummies(df[f], prefix=f)
    #     for dummy in dummies.columns:
    #         df[dummy] = dummies[dummy]
    #         features.append(dummy)
    #     df = df.drop(f, 1)
    #     features.remove(f)

    clf = Pipeline([
        ("imputer", Imputer(strategy="mean", axis=0)),
        ('feature_selection', SelectKBest(k=5)),
        ("forest", RandomForestClassifier())])
    clf.fit(train[features], train[target])
    score = clf.score(test[features], test[target])
    predicted = clf.predict(test[features])

    cm = confusion_matrix(test[target], predicted)
    print "Random Forest score: %f" % score
    print "confusion_matrix : \n%s" % cm
    return clf

predict.py 文件源码项目：train-occupancy 作者: datamindedbe 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def make_predictions_random_forest(df, features, target, split=0.70):
    print "using %d features (%d columns) on %d rows and target %s. Split %f." % (
    len(features), len(df.columns), len(df), target, split)
    # print "unused features: ", '\n\t\t'.join([f for f in df.columns if f not in features])
    # print "columns: ", '\n\t\t'.join(df.columns)
    df['is_train'] = np.random.uniform(0, 1, len(df)) <= split
    train, test = df[df['is_train'] == True], df[df['is_train'] == False]

    clf = Pipeline([
        ("imputer", Imputer(strategy="mean", axis=0)),
        ('feature_selection', SelectKBest(k=200)),
        ("forest", RandomForestClassifier(
            min_samples_leaf=1, min_samples_split=10, n_estimators=60, max_depth=None, criterion='gini'))])
    clf.fit(train[features], train[target])
    score = clf.score(test[features], test[target])
    predicted = clf.predict(test[features])

    cm = confusion_matrix(test[target], predicted)
    # print classification_report(test[target], predicted)

    return score, cm


# Utility function to report best scores

scikit_feature_selectors.py 文件源码项目：MENGEL 作者: CodeSpaceHQ 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def select_k_best_selector(data,target):

    # Select Model
    selector = SelectKBest(k=3)  # default is 10 features

    # Fit, Format, and Return
    return format_selector(selector, data, target)

xgb_classification.py 文件源码项目：jingjuSingingPhraseMatching 作者: ronggong 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def buildEstimators(mode):
    if mode == 'train' or mode == 'cv':
        # best parameters got by gridsearchCV, best score: 1
        estimators = [('anova_filter', SelectKBest(f_classif, k='all')),
                      ('xgb', xgb.XGBClassifier(learning_rate=0.1,n_estimators=300,max_depth=3))]
        clf = Pipeline(estimators)
    elif mode == 'test':
        clf = pickle.load(open(join(classifier_path,"xgb_classifier.plk"), "r"))
    return clf

cache.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def main():
    from sklearn import svm
    from sklearn.datasets import samples_generator
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import f_regression
    from sklearn.preprocessing import MinMaxScaler

    X, y = samples_generator.make_classification(n_samples=1000, n_informative=5, n_redundant=4, random_state=_random_state)
    anova_filter = SelectKBest(f_regression, k=5)
    scaler = MinMaxScaler()
    clf = svm.SVC(kernel='linear')

    steps = [scaler, anova_filter, clf]
    cached_run(steps, X, y)

semeval_regression.py 文件源码项目：semeval2016-task4 作者: aesuli 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def main():
    sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-i', '--input', help='Input file', required=True)
    parser.add_argument('-t', '--test', help='Test file', required=True)
    parser.add_argument('-o', '--output', help='Output filename prefix', required=True)
    parser.add_argument('-c', '--c', help='C value for SVM', type=float, default=1.0)
    parser.add_argument('-k', '--k', help='Number of features to keep', type=int, default=1000)
    args = parser.parse_args()

    data = read_semeval_regression(args.input, encoding='windows-1252')

    analyzer = get_rich_analyzer(word_ngrams=[2, 3], char_ngrams=[4])

    pipeline = Pipeline([
        ('vect', CountVectorizer(analyzer=analyzer)),
        ('tfidf', TfidfTransformer()),
        ('sel', SelectKBest(chi2, k=args.k)),
        ('clf', BinaryTreeRegressor(base_estimator=LinearSVC(C=args.c), verbose=False)),
    ])

    test = read_test_data(args.test, encoding='windows-1252')

    regressor = pipeline.fit(data[0], data[1])

    y = regressor.predict(test[2])

    with open('%sc%f-k%i-C.output' % (args.output, args.c, args.k), 'w', encoding='utf8') as outfile:
        for id_, topic, rate in zip(test[0], test[1], y):
            print(id_, topic, rate, sep='\t', file=outfile)

parameter_search.py 文件源码项目：RIDDLE 作者: jisungk 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def select_feats(X, y, nb_features, nb_features_to_keep=2048):
    X, y = preproc_for_sklearn(X, y, nb_features)

    if nb_features < nb_features_to_keep:
        nb_features_to_keep = nb_features_to_keep / 4

    feature_selector = SelectKBest(chi2, k=nb_features_to_keep).fit(X, y)
    selected_indices = feature_selector.get_support(indices=True) 

    return selected_indices

ScikitLearners.py 文件源码项目：Aion 作者: aleisalem 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def predictKFoldKNN(X, y, K=10, kfold=10, selectKBest=0):
    """
    Classifies the data using K-nearest neighbors and k-fold CV
    :param X: The list of feature vectors
    :type X: list
    :param y: The list of labels corresponding to the feature vectors
    :type y: list
    :param K: The number of nearest neighbors to consider in classification
    :type K: int
    :param kfold: The number of folds in the CV
    :type kfold: int
    :param selectKBest: The number of best features to select
    :type selectKBest: int
    :return: An array of predicted classes
    """
    try:
        # Prepare data 
        X, y = numpy.array(X), numpy.array(y)
        # Define classifier
        clf = neighbors.KNeighborsClassifier(n_neighbors=K)
        # Select K Best features if enabled
        X_new = SelectKBest(chi2, k=selectKBest).fit_transform(X, y) if selectKBest > 0 else X
        predicted = cross_val_predict(clf, X_new, y, cv=kfold).tolist()

    except Exception as e:
        prettyPrintError(e)
        return []

    return predicted