python类cross_val_score()的实例源码-面圈网

marketing_predict.py 文件源码项目：playground 作者: Pennsy 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def learn_decision_tree(data):
    DT = tree.DecisionTreeClassifier(max_depth=7)
    scorer = make_scorer(matthews_corrcoef)
    for i in range(5):
        scores = cross_val_score(DT, data.X_train, data.y_train, cv=10, scoring=scorer)
        print("iteration",i, "dt mean:", scores.mean())
        scores = list(scores)
        print("Decision Tree train scores:\n", scores)
    return DT
    # DT = DT.fit(train_data[:, :-1], train_data[:, -1])
    # predictionsDT = DT.predict(validation_data[:, :-1])

    # validating predicions
    # dtError = 0
    # for i in range(0, len(validation_data)):
    #         if(validation_data[i][20] != predictionsDT[i]):
    #                 dtError = dtError + 1
    # print("DT Error : ", float(dtError)/len(validation_data)*100.0)

mvpa_voxelselector.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _sfn(l, mask, myrad, bcast_var):
    """Score classifier on searchlight data using cross-validation.

    The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The
    number of cross-validation folds is in `bast_var[1].
    """
    clf = bcast_var[2]
    data = l[0][mask, :].T
    # print(l[0].shape, mask.shape, data.shape)
    skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
                                          shuffle=False)
    accuracy = np.mean(model_selection.cross_val_score(clf, data,
                                                       y=bcast_var[0],
                                                       cv=skf,
                                                       n_jobs=1))
    return accuracy

classification.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    )

test_pyglmnet.py 文件源码项目：pyglmnet 作者: glm-tools 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(X.shape[0], 5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert_equal(len(scores), 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)

modeltest.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())

regularize.py 文件源码项目：DSI-personal-reference-kit 作者: teb311 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))

tests.py 文件源码项目：scikit-mdr 作者: EpistasisLab 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_mdr_sklearn_pipeline():
    """Ensure that MDR can be used as a transformer in a scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True))
    assert np.mean(cv_scores) > 0.

tests.py 文件源码项目：scikit-mdr 作者: EpistasisLab 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def test_mdr_sklearn_pipeline_parallel():
    """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1)
    assert np.mean(cv_scores) > 0.

main.py 文件源码项目：xplore 作者: fahd09 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def eval_models(eda_objs, clfs):
    '''
    Uses a given set of classifiers objects to evaluates a given set of pipelines
    and return their CV scores.

    Parameters
    ----------
    pipelines_names: list of strings
                names of the pipelines to compare
    eda_objs : list of objects
    clfs     : list of classifiers
    *kwargs : Additional arguments to pass to sikit-learn's cross_val_score 
    '''        

    if isinstance(clfs, list) is False:
        clfs = [clfs]
    acc = []
    for clf_name, clf in clfs:        
        for pipe_name, obj in eda_objs:   
            X, y = obj.df[obj._get_input_features()], obj.df[obj.y]
            cv_score = cross_val_score(estimator=clf, X=X, y=y, cv=5, scoring='r2') #neg_mean_squared_error
            acc.append([(clf_name, pipe_name, v) for v in cv_score])
    acc = [item for sublist in acc for item in sublist] # flatten the list of lists
    return acc

alphas.py 文件源码项目：yellowbrick 作者: DistrictDataLabs 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init__(self, model, ax=None, alphas=None,
                 cv=None, scoring=None, **kwargs):

        # Check to make sure this is not a "RegressorCV"
        name = model.__class__.__name__
        if name.endswith("CV"):
            raise YellowbrickTypeError((
                "'{}' is a CV regularization model;"
                " try AlphaSelection instead."
            ).format(name))

        # Call super to initialize the class
        super(ManualAlphaSelection, self).__init__(model, ax=ax, **kwargs)

        # Set manual alpha selection parameters
        self.alphas = alphas or np.logspace(-10, -2, 200)
        self.errors = None
        self.score_method = partial(cross_val_score, cv=cv, scoring=scoring)

test_prediction.py 文件源码项目：nba-prediction 作者: 395299296 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def train_model(team_stats, result_data, test_data):
    # ??????
    X, y = build_dataSet(team_stats, result_data)

    # ??????
    print("Fitting on %d game samples.." % len(X))

    model = LogisticRegression()
    model.fit(X, y)

    #??10????????????
    print("Doing cross-validation..")
    print(cross_val_score(model, X, y, cv = 10, scoring='accuracy', n_jobs=-1).mean())

    #??????model?????????
    print('Predicting on test data..')

    result = []
    for index, row in test_data.iterrows():
        team1 = row['Vteam']
        team2 = row['Hteam']
        pred = predict_winner(team1, team2, model, team_stats)
        result.append(pred[0][0])

    return result

k_fold_predictor.py 文件源码项目：movie-quality-profitability-predictor 作者: wbowditch 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def compute_cross_fold(data):
     data_table = pd.read_csv("total_set.csv",index_col=0)

     #data_norm = (data - data.mean()) / (data.sum())
     scaler = preprocessing.StandardScaler().fit(data)
     data_scaled = scaler.transform(data)
     #print data_scaled
     profitability_target = data_table['Profitable']
     #print profitability_target
     #gross_target = data_table['Domestic Gross']
     #tomato = data_table['Rotten']


     #normalized_target_gross = (gross_target - gross_target.mean()) / (gross_target.max() - gross_target.min())
     #tomato = (tomato - tomato.mean()) / (tomato.max() - tomato.min())


     #clf_profit = svm.SVC(kernel='rbf',C=0.8, gamma=5,verbose=True)
     clf_profit = svm.LinearSVC(C=0.001,verbose=True,tol=.1)
     clf_profit.fit(data_scaled,profitability_target)
     scores = cross_val_score(clf_profit, data_scaled, profitability_target, cv=10)

     #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
     return (scores.mean(), scores.std() * 2)

knn_classify_sklearn.py 文件源码项目：python_utils 作者: Jayhello 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cross_validation():
    x_train, x_test, y_train, y_test = load_data()
    k_lst = list(range(1, 30))
    lst_scores = []

    for k in k_lst:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
        lst_scores.append(scores.mean())

    # changing to misclassification error
    MSE = [1 - x for x in lst_scores]
    optimal_k = k_lst[MSE.index(min(MSE))]
    print "The optimal number of neighbors is %d" % optimal_k
    # plot misclassification error vs k
    # plt.plot(k_lst, MSE)
    # plt.ylabel('Misclassification Error')
    plt.plot(k_lst, lst_scores)
    plt.xlabel('Number of Neighbors K')
    plt.ylabel('correct classification rate')
    plt.show()

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def test_cross_val_score_predict_labels():
    # Check if ValueError (when labels is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if labels is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    label_cvs = [LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(),
                 LabelShuffleSplit()]
    for cv in label_cvs:
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv)

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        # 3 fold cross val is used so we need atleast 3 samples per class
        X_df, y_ser = InputFeatureType(X), TargetType(y2)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cross_val_score(clf, X_df, y_ser)

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y)

test_validation.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

two_sigma_financial_modelling.py 文件源码项目：PortfolioTimeSeriesAnalysis 作者: MizioAnd 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def rmse_cv(model, x_train, y_train):
        rmse = np.sqrt(-cross_val_score(model, x_train, y_train, scoring='neg_mean_squared_error', cv=5))
        return rmse

genericmodelclass.py 文件源码项目：easyML 作者: aarshayj 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def KFold_CrossValidation(self, scoring_metric):
        # Generate cross validation folds for the training dataset. 

        error = model_selection.cross_val_score(
                estimator=self.alg, 
                X=self.datablock.train[self.predictors].values, 
                y=self.datablock.train[self.datablock.target].values, 
                cv=self.cv_folds, scoring=scoring_metric, n_jobs=-1
                ) 

        return {
            'mean_error': np.mean(error),
            'std_error': np.std(error),
            'all_error': error 
            }

plot_hyperopt.py 文件源码项目：fluentopt 作者: mehdidc 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def feval(d):
    max_depth = d['max_depth']
    n_estimators = d['n_estimators']
    clf = RandomForestClassifier(n_jobs=-1, max_depth=max_depth, n_estimators=n_estimators)
    scores = cross_val_score(clf, data_X, data_y, cv=5, scoring='accuracy')
    return np.mean(scores) - np.std(scores)

voxelselector.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _cross_validation_for_one_voxel(clf, vid, num_folds, subject_data, labels):
    """Score classifier on data using cross validation."""
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_folds,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, subject_data,
                                             y=labels,
                                             cv=skf, n_jobs=1)
    logger.debug(
        'cross validation for voxel %d is done' %
        vid
    )
    return (vid, scores.mean())

dmonscilearnclassification.py 文件源码项目：dmon-adp 作者: igabriel85 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def adaBoost(self, settings, data=None, dropna=True):
        df = self.__loadData(data, dropna)
        features = df.columns[:-1]
        X = df[features]
        y = df.iloc[:, -1].values
        seed = 7
        num_trees = 500
        kfold = model_selection.KFold(n_splits=10, random_state=seed)
        print kfold
        model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
        results = model_selection.cross_val_score(model, X, y, cv=kfold)
        model.fit(X, y)
        print results.mean()
        print model.score(X, y)
        return True

8voting_classifier.py 文件源码项目：Machine-Learning-Algorithms 作者: PacktPublishing 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def compute_accuracies(lr, dt, svc, vc, X, Y):
    accuracies = []

    accuracies.append(cross_val_score(lr, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(dt, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(svc, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(vc, X, Y, scoring='accuracy', cv=10).mean())

    print('Accuracies:')
    print(np.array(accuracies))

    return accuracies

__init__.py 文件源码项目：data_utilities 作者: fmv1992 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def multiprocessing_grid_search(queue, shared_list, persistent_object):
    """Explore cross validation grid using multiprocessing."""
    # scores = cross_val_score(*cross_val_score_args, **cross_val_score_kwargs)
    # queue.put(scores)
    while True:
        # All parameters from cross_val_score, i to compute pickle name and
        # persistent_path.
        passed_parameters = queue.get()
        if passed_parameters is None:
            break
        # Dismember arguments and values.
        grid, cvs_args, cvs_kwargs = passed_parameters
        estimator, x = cvs_args
        estimator.set_params(**grid)
        del cvs_args

        # Check if value was already calculated:
        stored_value = persistent_object.retrieve(estimator, grid)
        if stored_value is None:
            scores = cross_val_score(estimator, x, **cvs_kwargs)
            persistent_object.update(estimator, grid, scores)
        else:
            scores = stored_value
        grid_result = grid.copy()
        grid_result['scores'] = scores
        shared_list.append(grid_result)

dsb_create_voxel_model_predictions.py 文件源码项目：data-science-bowl-2017 作者: tondonia 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def score(self, params):
        self.change_to_int(params, self.to_int_params)
        self.level0.set_params(**params)
        score = model_selection.cross_val_score(self.level0, self.trainX, self.trainY, cv=5, n_jobs=-1)
        print('%s ------ Score Mean:%f, Std:%f' % (params, score.mean(), score.std()))
        return {'loss': score.mean(), 'status': STATUS_OK}

house_prices.py 文件源码项目：HousePrices 作者: MizioAnd 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def rmse_cv(model, x_train, y_train):
        rmse = np.sqrt(-cross_val_score(model, x_train, y_train, scoring='neg_mean_squared_error', cv=5))
        return rmse

sklearnexample.py 文件源码项目：pyGPGO 作者: hawk31 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def evaluateModel(C, gamma):
    clf = SVC(C=10**C, gamma=10**gamma)
    return np.average(cross_val_score(clf, X, y))

train.py 文件源码项目：tensorflow_kaggle_house_price 作者: Cuongvn08 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse= np.sqrt(-cross_val_score(model, train.values, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

validate.py 文件源码项目：DSI-personal-reference-kit 作者: teb311 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def cross_validate(estimator, training_data, training_targets):
    mse = cross_val_score(estimator, X=training_data, y=training_targets, scoring=root_mean_log_squared_error)
    r2 = cross_val_score(estimator, X=training_data, y=training_targets, scoring='r2')

    return (-1 * np.mean(mse), np.mean(r2))

Adaboost.py 文件源码项目：Machine-Learning-Tools-on-Iris-Dataset 作者: debjitpaul 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def perform_adaboost(self,X_train_std,y_train,X_test_std, y_test): ##perform adaboost

      ada = AdaBoostClassifier(n_estimators=10)
      ada.fit(X_train_std, y_train)
      train_score=cross_val_score(ada,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(ada,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = ada.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()