python类KNeighborsClassifier()的实例源码

identify_singing_voice_gender.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
generater.py 文件源码 项目:Machine-Learning 作者: grasses 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self, config = conf, split = 0.3, clf = KNeighborsClassifier(), auto_rebuild = False, debug = False):
        self.clf = clf
        self.conf = conf
        self.split = split
        self.debug = debug
        self.auto_rebuild = auto_rebuild
        self.init()
main.py 文件源码 项目:Machine-Learning 作者: grasses 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, conf = conf, clf = KNeighborsClassifier(), debug = False):
        self.clf = clf
        self.conf = conf
        self.debug = debug
        self.base = os.path.dirname(os.path.realpath(__file__))
        self.vote_db = {}
        self.letter_db = {}
        self.writer_db = {}
        self.total = self.right = 0
Prediction.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def knn(train, test, smoteit=True):
  "kNN"
  if smoteit:
    train = SMOTE(train)
  neigh = KNeighborsClassifier()
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  neigh.fit(train_DF[features], klass)
  preds = neigh.predict(test_DF[test_DF.columns[:-2]]).tolist()
  return preds
knnmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def setClf(self):
        clf = KNeighborsClassifier(n_neighbors = 33)
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
sklearn_usage.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def lession_4():
    iris = datasets.load_iris()
    iris_X = iris.data
    iris_y = iris.target
    # print iris_X[:2]
    # print iris_y
    X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.3)
    knn = KNeighborsClassifier()
    knn.fit(X_train,y_train)
    print knn.predict(X_test)
    print y_test

# dataset usage
classification.py 文件源码 项目:oss-github-analysis-project 作者: itu-oss-project-team 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def knn_classify(self, out_folder_path, training_set, test_set, training_labels, test_labels, k=1, msg=""):
        print("message: " + msg)
        out_file_pre_path = os.path.join(out_folder_path, "knn" + str(k) + msg)  # Any output file should extend this path

        knn_classifier = neighbors.KNeighborsClassifier(k, weights='distance')
        knn_classifier.fit(training_set, training_labels)
        predicted = knn_classifier.predict(test_set)

        success = accuracy_score(test_labels, predicted, normalize=False)
        conf_matrix = self.__retrieve_confusion_matrix(test_labels, predicted, out_file_pre_path)
        return conf_matrix, success
sklearn_module.py 文件源码 项目:srep 作者: Answeror 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        from sklearn.neighbors import KNeighborsClassifier as KNN
        self.clf = KNN()
1.py 文件源码 项目:sentiment-analysis 作者: lplping 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def KnnClass(x_train,y_train):
    from sklearn.neighbors import KNeighborsClassifier
    clf=KNeighborsClassifier()
    clf.fit(x_train,y_train)
    return clf


#========Decision Tree ========#
model_selection.py 文件源码 项目:StrepHit 作者: Wikidata 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_models(test):
    return [
        (LinearSVC, {
            'C': [0.01, 0.1, 1.0, 10.0],
            'multi_class': ['ovr', 'crammer_singer'],
        }),
    ] + ([
        (KNeighborsClassifier, {
            'weights': ['uniform', 'distance'],
        }),
        (SVC, {
            'C': [0.01, 0.1, 1.0, 10.0, 100.0],
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
            'decision_function_shape': ['ovr', 'ovo'],
        }),
        (RandomForestClassifier, {
            'criterion': ['gini', 'entropy'],
            'min_samples_split': [5, 10, 25],
            'min_samples_leaf': [5, 10, 25],
            'n_estimators': [5, 10, 50, 100],
        })
    ] if not test else [])
stats.py 文件源码 项目:gpam_stats 作者: ricoms 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def n4_non_linearity_of_nearest_neighbor_classifier( data, random_seed = 42, iterations = 20 ):

    def generate_interpolated_data_cl(data, cl, features, labels):
        points_in_class = data[data[labels] == cl].index.tolist()
        data_interpolated = pd.DataFrame(columns = features + [labels])

        for a, b in random_combinations(points_in_class):
            new_point = linear_interpolation(data.iloc[a, :-1], data.iloc[b, :-1] )
            df = pd.DataFrame([new_point + [cl]], columns = features + [labels] )
            data_interpolated = data_interpolated.append(df)

        return data_interpolated

    def get_n4_for_iteration(data):  

        labels = data.columns[-1]
        features = data.columns[:-1,].tolist()
        classes = data.iloc[:, -1].unique()
        data_to_interpolate = data.copy()

        knn = KNeighborsClassifier(n_neighbors=1)
        knn.fit(data[features], data[labels])

        for cl in classes:
            data_interpolated = generate_interpolated_data_cl(data_to_interpolate, cl, features, labels)

        mistakes = 1 - knn.score(data_interpolated[features], data_interpolated[labels])

        return mistakes

    random.seed( random_seed )
    n4 = []

    for i in range(iterations):
        mistakes = get_n4_for_iteration(data)
        n4.append(mistakes)

    return np.mean(n4)
model.py 文件源码 项目:meinkurve 作者: michgur 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self,data_file):
        self.file = data_file
        df = pd.read_csv(data_file)
        X = np.array(df.drop(['class'], 1))
        y = np.array(df['class'])
        self.size = sum(1 for line in open(data_file))

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

        clf = neighbors.KNeighborsClassifier()
        self.prediction = clf.fit(X_train, y_train)
classifiers.py 文件源码 项目:avito-contest 作者: fmilepe 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def KNN(X, y):
    print("Iniciando treinamento do KNN")
    clf = KNeighborsClassifier(n_jobs=6,leaf_size=15)
    kf = KFold(len(y),n_folds=20)
    clf.fit(X,y)

    X_score = X[:10000]
    y_score = y[:10000]
    score = clf.score(X_score, y_score)
    print("KNN score: ", score)

    return clf
knn.py 文件源码 项目:MixtureOfExperts 作者: krishnakalyan3 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def knn_model(X, y):
    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(X, y)
    return neigh
service.py 文件源码 项目:intellead-classification 作者: intellead 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def classification(lead):
    #classifiers = [
    #    ('ab', AdaBoostClassifier()),
    #    ('dt', DecisionTreeClassifier(max_depth=5)),
    #    ('kn', KNeighborsClassifier(16)),
    #]
    inputs = get_dataset_input_from_database(lead.keys())
    outputs = get_dataset_output_from_database()
    print('The total number of examples in the dataset is: %d' % (len(inputs)))
    inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
    print('The number of examples used for training are: %d' % (len(inputs_training)))
    print('The number of examples used for testing are: %d' % (len(inputs_test)))
    knn = KNeighborsClassifier(n_neighbors=7, p=2)
    knn.fit(inputs_training, np.ravel(outputs_training))
    print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
    #voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
    #voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
    #print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
    print('Lead data:')
    print(lead)
    data_to_predict = convert_dict_to_tuple(lead)
    print('Lead data to predict:')
    print(data_to_predict)
    lead_status = knn.predict(data_to_predict)
    lead_status_value = lead_status[0]
    #lead_status = voting_classifier.predict(data_to_predict)
    print('According to lead data, his status is: %d' % (lead_status_value))
    print('[0] unqualified [1] qualified')
    proba = knn.predict_proba(data_to_predict)
    max_proba = max(proba[0])
    print('Proba is: %d%%' %(max_proba*100))
    lead_status_dict = dict()
    dict.update(lead_status_dict, value=str(lead_status_value))
    dict.update(lead_status_dict, proba=str(max_proba))
    return lead_status_dict
test.py 文件源码 项目:Audio-classification-using-Bag-of-Frames-approach 作者: amogh3892 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def knn_predict(training_samples, training_labels, test_samples, test_lables,k_neighbours = 5,weights = "uniform",algorithm = "auto"):
    from sklearn.neighbors import KNeighborsClassifier

    clf = KNeighborsClassifier(n_neighbors = k_neighbours, weights =weights, algorithm = algorithm)

    t0 = time()
    clf.fit(training_samples,training_labels)
    training_time = round(time()-t0, 3)

    t0 = time()
    pred = clf.predict(test_samples)
    test_time = round(time()-t0, 3)

    from sklearn.metrics import accuracy_score

    acc = accuracy_score(pred,test_lables)

    no_features = np.array(training_samples).shape[1]
    training_samples = np.array(training_samples).shape[0]
    test_samples = np.array(test_samples).shape[0]

    with open("Temp\\results.txt","w") as outfile:
        outfile.write("Alogirthm : {}\n".format("KNN"))
        outfile.write("K  = {}\n".format(k_neighbours))
        outfile.write("weight = {}\n".format(weights))
        outfile.write("algorithm = {}\n".format(algorithm))
        outfile.write("No of features : {}\n".format(no_features))
        outfile.write("No of training samples : {}\n".format(training_samples))
        outfile.write("No of test samples : {}\n".format(test_samples))
        outfile.write("Training time : {}\n".format(training_time))
        outfile.write("Test time : {}\n".format(test_time))
        outfile.write("Accuracy : {}\n".format(acc))

    with open("Temp\\result_labels.csv","wb") as outfile:
        np.savetxt(outfile,pred)
scikit_classification_learners.py 文件源码 项目:MENGEL 作者: CodeSpaceHQ 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def train_random_forest():
    # Selecting the model
    return mp.ModelProperties(), RandomForestClassifier(n_estimators=100) # Default estimators is 10


# http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
scikit_classification_learners.py 文件源码 项目:MENGEL 作者: CodeSpaceHQ 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train_knn():
    # Selecting the model
    return mp.ModelProperties(), neighbors.KNeighborsClassifier() # default is 5 neighbors


# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn-svm-svc
ClassificationKNN.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationKNN, self).__init__(isTrain, isOutlierRemoval)
        # data preprocessing
        self.dataPreprocessing()

        # first parameter is the K neighbors
        # 'uniform' assigns uniform weights to each neighbor
        # 'distance' assigns weights proportional to the inverse of the distance from the query point
        # default metric is euclidean distance
        self.clf = neighbors.KNeighborsClassifier(2, weights='uniform')
knn.py 文件源码 项目:Sentences-analysis 作者: sungminoh 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def build_classifier(self):
        self.classifier = KNeighborsClassifier(n_neighbors=1)
        self.classifier.fit(self.coordinates, self.labels)


问题


面经


文章

微信
公众号

扫码关注公众号