python类KNeighborsClassifier()的实例源码

main.py 文件源码 项目:MachineLearningBasics 作者: zoebchhatriwala 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def main():

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)

    clrTree = tree.DecisionTreeClassifier()
    clrTree = clrTree.fit(x_train, y_train)
    outTree = clrTree.predict(x_test)

    clrKN = KNeighborsClassifier()
    clrKN = clrKN.fit(x_train, y_train)
    outKN = clrKN.predict(x_test)

    # Prediction accuracy
    print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%")
    print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
classifier.py 文件源码 项目:rltk 作者: usc-isi-i2 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
classify.py 文件源码 项目:oss-github-analysis-project 作者: itu-oss-project-team 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers
models.py 文件源码 项目:johnson-county-ddj-public 作者: dssg 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
p-final.py 文件源码 项目:Stock-Market-Prediction 作者: Diptiranjan1 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print()
    print()
    return confidence

# examples of running:
knn_missing_data.py 文件源码 项目:Generative-ConvACs 作者: HUJI-Deep 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):

    raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
    raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
    # Using 'brute' method since we only want to do one query per classifier
    # so this will be quicker as it avoids overhead of creating a search tree
    knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
    prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
    total_images = raw_im_data.shape[0]
    pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
    for i in range(total_images):
        mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
        mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
        v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
        rep_mask = np.tile(mask,(trX.shape[0],1))
        # Corrupt whole training set according to the current mask
        corr_trX = np.multiply(trX, rep_mask)        
        knn_m.fit(corr_trX, trY)
        prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
        pbar.update(i)
    pbar.finish()
    return prob_Y_hat
fit.py 文件源码 项目:sudokuextract 作者: hbldh 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _load_sklearn_default_classifier():
    if sys.version_info[0] == 2:
        file_name = "sklearn_classifier_py2.pklz"
        protocol = 2
    else:
        file_name = "sklearn_classifier_py3.pklz"
        protocol = 3

    file_path = resource_filename('sudokuextract.data', file_name)
    if resource_exists('sudokuextract.data', file_name):
        f = gzip.open(file_path, 'rb')
        classifier = pickle.load(f)
        f.close()
    else:
        classifier = KNeighborsClassifier(n_neighbors=10)
        classifier = fit_combined_classifier(classifier)
        f = gzip.open(file_path, 'wb')
        pickle.dump(classifier, f, protocol=protocol)
        f.close()

    return classifier
fit.py 文件源码 项目:sudokuextract 作者: hbldh 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _load_sudokuextract_default_classifier():
    file_name = "sudokuextract_classifier.pklz"
    protocol = 2

    file_path = resource_filename('sudokuextract.data', file_name)
    if resource_exists('sudokuextract.data', file_name):
        f = gzip.open(file_path, 'rb')
        classifier_json = pickle.load(f)
        classifier = KNeighborsClassifier(classifier_json.get('n_neighbors'),
                                          classifier_json.get('weights'),
                                          classifier_json.get('metric'),
                                          classifier_json.get('p'))
        classifier.fit(np.array(classifier_json.get('data')),
                       np.array(classifier_json.get('labels')))
        f.close()
    else:
        classifier = KNeighborsClassifier(n_neighbors=10)
        classifier = fit_combined_classifier(classifier)
        f = gzip.open(file_path, 'wb')
        pickle.dump(classifier.to_json(), f, protocol=protocol)
        f.close()
    return classifier
stats.py 文件源码 项目:gpam_stats 作者: ricoms 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def n3_error_rate_nearest_neighbor_classifier(data):

    features = data.columns[:-1, ]
    mistakes = 0
    n = data.shape[0]

    for i in range(n):
        bad_df = data.index.isin([i])
        good_df = ~bad_df

        knn = KNeighborsClassifier( n_neighbors=1 )
        knn.fit(data.iloc[good_df].iloc[:, :-1], data.iloc[good_df].iloc[: ,-1])
        temp = np.array(data.iloc[i ,:-1]).reshape(1,-1)
        mistake = 1 if data.iloc[i, -1] != knn.predict(temp) else 0

        mistakes = mistakes + mistake

    n3 = (1.0 * mistakes) / n
    if n3 > 1:
        n3 = 1
    return n3
ClassificationKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
embeddings_maker.py 文件源码 项目:nlvr_tau_nlp_final_proj 作者: udiNaveh 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def check_word2vec(embed_dict, embeds, key_words = ['of', 'is', 'a', 'yellow', 'circle', 'box']):

    KN = KNeighborsClassifier(n_neighbors=3)

    print('fitting pseudo-KNN...')
    KN.fit(embeds, [1]*len(embeds))
    inds = KN.kneighbors(embeds, return_distance=False)
    # print(inds)

    embeds_list = embeds.tolist()
    for word in key_words:
        req_words = []
        ind = embeds_list.index(embed_dict[word].tolist())
        req_inds = inds[ind]
        for idx in req_inds:
            for w in embed_dict:
                if (embed_dict[w] == embeds[idx]).all()==True:
                    req_words.append(w)
        print('for:', word, ', the 3nn are:', req_words)
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def knn_ps2(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["year"] = (1 + df["year"]) * 10.
        df_new["hour"] = (1 + df["hour"]) * 4.
        df_new["weekday"] = (1 + df["weekday"]) * 3.11
        df_new["month"] = (1 + df["month"]) * 2.11
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x)) * 10.
        df_new["x"] = df["x"] * 465.
        df_new["y"] = df["y"] * 975.
        return df_new
    logging.info("train knn_ps2 model")
    df_cell_train_feats_knn = prepare_feats(df_cell_train_feats)
    clf = KNeighborsClassifier(n_neighbors=np.floor(np.sqrt(len(y_train))/5.3).astype(int),
                               weights=lambda x: x ** -2, metric='manhattan', n_jobs=-1)
    clf.fit(df_cell_train_feats_knn, y_train)
    df_cell_test_feats_knn = prepare_feats(df_cell_test_feats)
    y_test_pred = clf.predict_proba(df_cell_test_feats_knn)
    return y_test_pred
ClassificationLibCalculator.py 文件源码 项目:TextStageProcessor 作者: mhyhre 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def classification_knn(self):
        self.signals.PrintInfo.emit("???????? KNN")
        output_dir = self.output_dir + 'knn_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # ????????? ????????? ? ???????
        # ???????? ????? ???????? ???????? ?? ??? ?????????????, ???? ?????????? ? ??????.
        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        # ??????? ? ????????? ????????????? ? ????? ??????????????
        classificator = KNeighborsClassifier(n_neighbors=self.knn_n_neighbors)
        classificator.fit(trainingSet, self.trainingClass)
        results = classificator.predict(testSet)
        proba = classificator.predict_proba(testSet)

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_, self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)
classifier.py 文件源码 项目:stock-price-prediction 作者: chinuy 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
training.py 文件源码 项目:static-gesture-recognition 作者: windmark 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def splitValidateModel(self, visualizePredictions = False):
    (label_vector, input_vector) = loadData(self.featureFile)

    indexArray = range(0, len(input_vector))
    trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \
      cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit))

    kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
    kNNClassifier.fit(trainData, trainLabels) 
    predictedLabels = kNNClassifier.predict(testData)

    print("Classification report for classifier %s:\n%s\n"
          % ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels)))
    print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels))
    print('Split Validation training :: Done.\n')

    if visualizePredictions:
      self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
training.py 文件源码 项目:static-gesture-recognition 作者: windmark 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def trainLimited(self, featureFile, n_datapoints):
    (label_vector, input_vector) = loadData(featureFile)

    trainData, testData, trainLabels, testLabels = \
      cross_validation.train_test_split(input_vector, label_vector, test_size=(0))

    n_totalrows = int((len(label_vector)/n_datapoints))
    for n in range(0, n_totalrows):
      limited_label_vector = trainLabels[0: (n+1) * n_datapoints]
      limited_input_vector = trainData[0: (n+1) * n_datapoints]

      kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
      kNNClassifier.fit(limited_input_vector, limited_label_vector)

      scores = cross_validation.cross_val_score(kNNClassifier, limited_input_vector, limited_label_vector, cv = 5)
      print '%f on %d datapoints' % ((sum(scores) / len(scores)), len(limited_label_vector))
User_Interface.py 文件源码 项目:yttresearch-machine-learning-algorithms-analysis 作者: gdemos01 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def KNNClassifier(action):

        # Setting our classifier to Logistic Regression
        clf = KNeighborsClassifier(n_neighbors=5)

        dir = input('Give Data Directory: ')

        if int(action) == 1:
                print('Loading Data')
                PopularityClassifier.loadData(dir)   
                PopularityClassifier.youtubePopular(dir,clf,0)
                PopularityClassifier.twitterPopular(dir,clf,0)       
                PopularityClassifier.bothPopular(dir,clf,0)
        elif int(action) == 2:
                print('Loading Data')
                ViralityClassifier.loadData(dir)
                ViralityClassifier.youtubeViral(dir,clf,0)
                ViralityClassifier.twitterViral(dir,clf,0)
                ViralityClassifier.bothViral(dir,clf,0)
        else:
                print('Loading Data')
                ViralityAndPopularityClassifier.loadData(dir)
                ViralityAndPopularityClassifier.youtubeViralAndPopular(dir,clf,0)
                ViralityAndPopularityClassifier.twitterViralAndPopular(dir,clf,0)
                ViralityAndPopularityClassifier.bothViralAndPopular(dir,clf,0)
test_boundaries.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_init(self):
        """
        Testing the init method
        """
        model = neighbors.KNeighborsClassifier(3)
        viz = DecisionBoundariesVisualizer(model)

        self.assertEquals(viz.step_size, 0.0025)
        self.assertEqual(viz.name, 'KNeighborsClassifier')
        self.assertEqual(viz.estimator, model)

        self.assertIsNone(viz.classes_)
        self.assertIsNone(viz.features_)
        self.assertIsNotNone(viz.markers)
        self.assertIsNotNone(viz.scatter_alpha)
        self.assertTrue(viz.show_scatter)

        self.assertIsNone(viz.Z)
        self.assertIsNone(viz.xx)
        self.assertIsNone(viz.yy)
        self.assertIsNone(viz.class_labels)
        self.assertIsNone(viz.title)
        self.assertIsNone(viz.x)
        self.assertIsNone(viz.y)
test_boundaries.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_draw_ax_show_scatter_False(self):
        """Test that the matplotlib functions are being called when the
        scatter plot isn't drawn
        """
        model = neighbors.KNeighborsClassifier(3)
        viz = DecisionBoundariesVisualizer(
            model, features=['one', 'two'], show_scatter=False)
        fitted_viz = viz.fit(X_two_cols, y=y)
        fitted_viz.ax = mock.Mock()
        fitted_viz.ax.pcolormesh = mock.MagicMock()
        fitted_viz.ax.scatter = mock.MagicMock()
        fitted_viz.ax.legend = mock.MagicMock()

        fitted_viz.draw(X_two_cols, y=y)
        self.assertEquals(len(fitted_viz.ax.pcolormesh.mock_calls), 1)
        self.assertEquals(len(fitted_viz.ax.scatter.mock_calls), 0)
        self.assertEquals(len(fitted_viz.ax.legend.mock_calls), 1)
test_boundaries.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_finalize(self):
        model = neighbors.KNeighborsClassifier(3)
        viz = DecisionBoundariesVisualizer(
            model, features=['one', 'two'], show_scatter=False)
        fitted_viz = viz.fit(X_two_cols, y=y)
        fitted_viz.draw(X_two_cols, y=y)

        fitted_viz.ax = mock.Mock()
        fitted_viz.ax.legend = mock.MagicMock()
        fitted_viz.ax.set_xlabel = mock.MagicMock()
        fitted_viz.ax.set_ylabel = mock.MagicMock()

        fitted_viz.poof()

        fitted_viz.ax.legend.assert_called_once_with(loc='best', frameon=True)
        fitted_viz.ax.set_xlabel.assert_called_once_with('one')
        fitted_viz.ax.set_ylabel.assert_called_once_with('two')
knn.py 文件源码 项目:DataMiningCompetitionFirstPrize 作者: lzddzh 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def learn(x, y, test_x):
    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_gdbt)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_gdbt)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_gdbt)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_gdbt)

    clf = KNeighborsClassifier(1, weight_list).fit(x, y)

    prediction_list = clf.predict(test_x)
    return prediction_list
chapter_4.py 文件源码 项目:python-machine-learning-book 作者: jeremyn 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def use_sbs_with_knn(columns, X_train, X_test, y_train, y_test):
    knn = KNeighborsClassifier(n_neighbors=2)
    sbs = SBS(knn, k_features=1)
    sbs.fit(X_train, y_train)

    k_feat = [len(k) for k in sbs.subsets_]
    plt.plot(k_feat, sbs.scores_, marker='o')
    plt.ylim([0.7, 1.1])
    plt.ylabel('Accuracy')
    plt.xlabel('Number of features')
    plt.grid()
    plt.show()

    k5 = list(sbs.subsets_[8])
    print(columns[1:][k5])

    knn.fit(X_train, y_train)
    print("Training accuracy: %s" % knn.score(X_train, y_train))
    print("Test accuracy: %s" % knn.score(X_test, y_test))

    knn.fit(X_train[:, k5], y_train)
    print("Training accuracy: %s" % knn.score(X_train[:, k5], y_train))
    print("Test accuracy: %s" % knn.score(X_test[:, k5], y_test))
knn_cv.py 文件源码 项目:ML 作者: saurabhsuman47 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def knn_cv(post_features, post_class, n_folds, n_neighbors, length_dataset = -1):

    if(length_dataset == -1):
        length_dataset = len(post_class)
    cv = KFold(n = length_dataset, n_folds = n_folds, shuffle = True)
    train_accuracy = []
    test_accuracy = []

    for train,test in cv:
        knn = neighbors.KNeighborsClassifier(n_neighbors = n_neighbors)
        knn.fit(post_features[train],post_class[train])
        train_accuracy.append(knn.score(post_features[train], post_class[train]))
        test_accuracy.append(knn.score(post_features[test], post_class[test]))

#    return (sum(train_accuracy)/n_folds), (sum(test_accuracy)/n_folds)
    return np.mean(train_accuracy), np.mean(test_accuracy)
classify.py 文件源码 项目:Stock-Market-Analysis-and-Prediction 作者: samshara 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def performKNNClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel):
    """
    KNN binary Classification
    """
    clf = KNeighborsClassifier(3)
    clf.fit(X_train, y_train)

    if savemodel == True:
        #fname_out = '{}-{}.pickle'.format(fout, datetime.now().date())
        fname_out = fout+'.pickle'
        with open(fname_out, 'wb') as f:
            pickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
knn_classify_sklearn.py 文件源码 项目:python_utils 作者: Jayhello 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cross_validation():
    x_train, x_test, y_train, y_test = load_data()
    k_lst = list(range(1, 30))
    lst_scores = []

    for k in k_lst:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
        lst_scores.append(scores.mean())

    # changing to misclassification error
    MSE = [1 - x for x in lst_scores]
    optimal_k = k_lst[MSE.index(min(MSE))]
    print "The optimal number of neighbors is %d" % optimal_k
    # plot misclassification error vs k
    # plt.plot(k_lst, MSE)
    # plt.ylabel('Misclassification Error')
    plt.plot(k_lst, lst_scores)
    plt.xlabel('Number of Neighbors K')
    plt.ylabel('correct classification rate')
    plt.show()
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_true(np.mean(clf.predict(iris.data) == iris.target) > 0.95)

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95)
test_neighbors.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits.data.astype('uint8')
    Y = digits.target
    (n_samples, n_features) = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    (X_train, Y_train, X_test, Y_test) = X[train], Y[train], X[test], Y[test]

    clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm='brute')
    score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test)
    score_float = clf.fit(X_train.astype(float), Y_train).score(
        X_test.astype(float), Y_test)
    assert_equal(score_uint8, score_float)
test_cross_validation.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def test_cross_val_score_multilabel():
    X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
                  [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
    y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
                  [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
    clf = KNeighborsClassifier(n_neighbors=1)
    scoring_micro = make_scorer(precision_score, average='micro')
    scoring_macro = make_scorer(precision_score, average='macro')
    scoring_samples = make_scorer(precision_score, average='samples')
    score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
    score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
    score_samples = cval.cross_val_score(clf, X, y,
                                         scoring=scoring_samples, cv=5)
    assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
    assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
    assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
knn_clf.py 文件源码 项目:classify_dream_of_the_red_chamber 作者: MrQianJinSi 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def knn_clf(observations, n_neighbors):
  # ??????
  range1 = [20, 30]
  len1 = len(range(range1[0], range1[1]))
  range2 = [110, 120]
  len2 = len(range(range2[0], range2[1]))

  training_index = list(range(range1[0], range1[1])) + list(range(range2[0],
    range2[1]))
  training_data = observations[training_index, :]
  training_label = np.ones(len1+len2, dtype='int32')
  training_label[len1:] = 2
  # ??????
  knn = KNeighborsClassifier(n_neighbors = 3)#, weights = 'distance')
  knn.fit(training_data, training_label) 
  # ??
  knn_pre = knn.predict(observations)

  print('????????')
  for i in range(8):
    print(knn_pre[i*10:(i+1)*10])

  print('????????????')
  for i in range(8,12):
    print(knn_pre[i*10:(i+1)*10])


问题


面经


文章

微信
公众号

扫码关注公众号