def main():
iris = datasets.load_iris()
x = iris.data
y = iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)
clrTree = tree.DecisionTreeClassifier()
clrTree = clrTree.fit(x_train, y_train)
outTree = clrTree.predict(x_test)
clrKN = KNeighborsClassifier()
clrKN = clrKN.fit(x_train, y_train)
outKN = clrKN.predict(x_test)
# Prediction accuracy
print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%")
print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
python类KNeighborsClassifier()的实例源码
def get_feature_importance(self,clf, model_name ):
clfs = {'RandomForestClassifier':'feature_importances',
'ExtraTreesClassifier': 'feature_importances',
'AdaBoostClassifier': 'feature_importances',
'LogisticRegression': 'coef',
'svm.SVC': 'coef',
'GradientBoostingClassifier': 'feature_importances',
'GaussianNB': None,
'DecisionTreeClassifier': 'feature_importances',
'SGDClassifier': 'coef',
'KNeighborsClassifier': None,
'linear.SVC': 'coef'}
if clfs[model_name] == 'feature_importances':
return list(clf.feature_importances_)
elif clfs[model_name] == 'coef':
return list(clf.coef_.tolist())
else:
return None
def get_classifier_class(class_name):
name_table = {
'svm': SVC,
'k_neighbors': KNeighborsClassifier,
'gaussian_process': GaussianProcessClassifier,
'decision_tree': DecisionTreeClassifier,
'random_forest': RandomForestClassifier,
'ada_boost': AdaBoostClassifier,
'mlp': MLPClassifier,
'gaussian_naive_bayes': GaussianNB,
'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
}
if class_name not in name_table:
raise ValueError('No such classifier')
return name_table[class_name]
classify.py 文件源码
项目:oss-github-analysis-project
作者: itu-oss-project-team
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def __create_classifiers(self):
classifiers = list()
classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
"name": "sgd"})
classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
"name": "knn1"})
classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
"name": "knn3"})
classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
"name": "knn5"})
classifiers.append({"func": GaussianNB(),
"name": "naive_bayes"})
# classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
# classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
# classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
return classifiers
def define_model(self, model, parameters, n_cores = 0):
clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
'GaussianNB': GaussianNB(),
'DecisionTreeClassifier': DecisionTreeClassifier(),
'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3),
'linear.SVC': svm.LinearSVC() }
if model not in clfs:
raise ConfigError("Unsupported model {}".format(model))
clf = clfs[model]
clf.set_params(**parameters)
return clf
def do_ml(ticker):
X, y, df = extract_featuresets(ticker)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
y,
test_size=0.25)
#clf = neighbors.KNeighborsClassifier()
clf = VotingClassifier([('lsvc',svm.LinearSVC()),
('knn',neighbors.KNeighborsClassifier()),
('rfor',RandomForestClassifier())])
clf.fit(X_train, y_train)
confidence = clf.score(X_test, y_test)
print('accuracy:',confidence)
predictions = clf.predict(X_test)
print('predicted class counts:',Counter(predictions))
print()
print()
return confidence
# examples of running:
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):
raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
# Using 'brute' method since we only want to do one query per classifier
# so this will be quicker as it avoids overhead of creating a search tree
knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
total_images = raw_im_data.shape[0]
pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
for i in range(total_images):
mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
rep_mask = np.tile(mask,(trX.shape[0],1))
# Corrupt whole training set according to the current mask
corr_trX = np.multiply(trX, rep_mask)
knn_m.fit(corr_trX, trY)
prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
pbar.update(i)
pbar.finish()
return prob_Y_hat
def _load_sklearn_default_classifier():
if sys.version_info[0] == 2:
file_name = "sklearn_classifier_py2.pklz"
protocol = 2
else:
file_name = "sklearn_classifier_py3.pklz"
protocol = 3
file_path = resource_filename('sudokuextract.data', file_name)
if resource_exists('sudokuextract.data', file_name):
f = gzip.open(file_path, 'rb')
classifier = pickle.load(f)
f.close()
else:
classifier = KNeighborsClassifier(n_neighbors=10)
classifier = fit_combined_classifier(classifier)
f = gzip.open(file_path, 'wb')
pickle.dump(classifier, f, protocol=protocol)
f.close()
return classifier
def _load_sudokuextract_default_classifier():
file_name = "sudokuextract_classifier.pklz"
protocol = 2
file_path = resource_filename('sudokuextract.data', file_name)
if resource_exists('sudokuextract.data', file_name):
f = gzip.open(file_path, 'rb')
classifier_json = pickle.load(f)
classifier = KNeighborsClassifier(classifier_json.get('n_neighbors'),
classifier_json.get('weights'),
classifier_json.get('metric'),
classifier_json.get('p'))
classifier.fit(np.array(classifier_json.get('data')),
np.array(classifier_json.get('labels')))
f.close()
else:
classifier = KNeighborsClassifier(n_neighbors=10)
classifier = fit_combined_classifier(classifier)
f = gzip.open(file_path, 'wb')
pickle.dump(classifier.to_json(), f, protocol=protocol)
f.close()
return classifier
def n3_error_rate_nearest_neighbor_classifier(data):
features = data.columns[:-1, ]
mistakes = 0
n = data.shape[0]
for i in range(n):
bad_df = data.index.isin([i])
good_df = ~bad_df
knn = KNeighborsClassifier( n_neighbors=1 )
knn.fit(data.iloc[good_df].iloc[:, :-1], data.iloc[good_df].iloc[: ,-1])
temp = np.array(data.iloc[i ,:-1]).reshape(1,-1)
mistake = 1 if data.iloc[i, -1] != knn.predict(temp) else 0
mistakes = mistakes + mistake
n3 = (1.0 * mistakes) / n
if n3 > 1:
n3 = 1
return n3
def parameterChoosing(self):
# Set the parameters by cross-validation
tuned_parameters = [{'weights': ['uniform', 'distance'],
'n_neighbors': range(2,60)
}
]
clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
clf.fit(self.X_train, self.y_train.ravel())
print "Best parameters set found on development set:\n"
print clf.best_params_
print "Grid scores on development set:\n"
for params, mean_score, scores in clf.grid_scores_:
print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)
print "Detailed classification report:\n"
y_true, y_pred = self.y_test, clf.predict(self.X_test)
print classification_report(y_true, y_pred)
def check_word2vec(embed_dict, embeds, key_words = ['of', 'is', 'a', 'yellow', 'circle', 'box']):
KN = KNeighborsClassifier(n_neighbors=3)
print('fitting pseudo-KNN...')
KN.fit(embeds, [1]*len(embeds))
inds = KN.kneighbors(embeds, return_distance=False)
# print(inds)
embeds_list = embeds.tolist()
for word in key_words:
req_words = []
ind = embeds_list.index(embed_dict[word].tolist())
req_inds = inds[ind]
for idx in req_inds:
for w in embed_dict:
if (embed_dict[w] == embeds[idx]).all()==True:
req_words.append(w)
print('for:', word, ', the 3nn are:', req_words)
model.py 文件源码
项目:5th_place_solution_facebook_check_ins
作者: aikinogard
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def knn_ps2(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["year"] = (1 + df["year"]) * 10.
df_new["hour"] = (1 + df["hour"]) * 4.
df_new["weekday"] = (1 + df["weekday"]) * 3.11
df_new["month"] = (1 + df["month"]) * 2.11
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x)) * 10.
df_new["x"] = df["x"] * 465.
df_new["y"] = df["y"] * 975.
return df_new
logging.info("train knn_ps2 model")
df_cell_train_feats_knn = prepare_feats(df_cell_train_feats)
clf = KNeighborsClassifier(n_neighbors=np.floor(np.sqrt(len(y_train))/5.3).astype(int),
weights=lambda x: x ** -2, metric='manhattan', n_jobs=-1)
clf.fit(df_cell_train_feats_knn, y_train)
df_cell_test_feats_knn = prepare_feats(df_cell_test_feats)
y_test_pred = clf.predict_proba(df_cell_test_feats_knn)
return y_test_pred
def classification_knn(self):
self.signals.PrintInfo.emit("???????? KNN")
output_dir = self.output_dir + 'knn_out/'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# ????????? ????????? ? ???????
# ???????? ????? ???????? ???????? ?? ??? ?????????????, ???? ?????????? ? ??????.
vectorizer = HashingVectorizer()
fdata = vectorizer.fit_transform(self.fdata)
trainingSet = fdata[:self.split]
testSet = fdata[self.split:]
# ??????? ? ????????? ????????????? ? ????? ??????????????
classificator = KNeighborsClassifier(n_neighbors=self.knn_n_neighbors)
classificator.fit(trainingSet, self.trainingClass)
results = classificator.predict(testSet)
proba = classificator.predict_proba(testSet)
self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_, self.test_filenames)
out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
self.signals.PrintInfo.emit(out_text)
def buildModel(dataset, method, parameters):
"""
Build final model for predicting real testing data
"""
features = dataset.columns[0:-1]
if method == 'RNN':
clf = performRNNlass(dataset[features], dataset['UpDown'])
return clf
elif method == 'RF':
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
elif method == 'KNN':
clf = neighbors.KNeighborsClassifier()
elif method == 'SVM':
c = parameters[0]
g = parameters[1]
clf = SVC(C=c, gamma=g)
elif method == 'ADA':
clf = AdaBoostClassifier()
return clf.fit(dataset[features], dataset['UpDown'])
def splitValidateModel(self, visualizePredictions = False):
(label_vector, input_vector) = loadData(self.featureFile)
indexArray = range(0, len(input_vector))
trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \
cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit))
kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
kNNClassifier.fit(trainData, trainLabels)
predictedLabels = kNNClassifier.predict(testData)
print("Classification report for classifier %s:\n%s\n"
% ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels))
print('Split Validation training :: Done.\n')
if visualizePredictions:
self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
def trainLimited(self, featureFile, n_datapoints):
(label_vector, input_vector) = loadData(featureFile)
trainData, testData, trainLabels, testLabels = \
cross_validation.train_test_split(input_vector, label_vector, test_size=(0))
n_totalrows = int((len(label_vector)/n_datapoints))
for n in range(0, n_totalrows):
limited_label_vector = trainLabels[0: (n+1) * n_datapoints]
limited_input_vector = trainData[0: (n+1) * n_datapoints]
kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
kNNClassifier.fit(limited_input_vector, limited_label_vector)
scores = cross_validation.cross_val_score(kNNClassifier, limited_input_vector, limited_label_vector, cv = 5)
print '%f on %d datapoints' % ((sum(scores) / len(scores)), len(limited_label_vector))
User_Interface.py 文件源码
项目:yttresearch-machine-learning-algorithms-analysis
作者: gdemos01
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def KNNClassifier(action):
# Setting our classifier to Logistic Regression
clf = KNeighborsClassifier(n_neighbors=5)
dir = input('Give Data Directory: ')
if int(action) == 1:
print('Loading Data')
PopularityClassifier.loadData(dir)
PopularityClassifier.youtubePopular(dir,clf,0)
PopularityClassifier.twitterPopular(dir,clf,0)
PopularityClassifier.bothPopular(dir,clf,0)
elif int(action) == 2:
print('Loading Data')
ViralityClassifier.loadData(dir)
ViralityClassifier.youtubeViral(dir,clf,0)
ViralityClassifier.twitterViral(dir,clf,0)
ViralityClassifier.bothViral(dir,clf,0)
else:
print('Loading Data')
ViralityAndPopularityClassifier.loadData(dir)
ViralityAndPopularityClassifier.youtubeViralAndPopular(dir,clf,0)
ViralityAndPopularityClassifier.twitterViralAndPopular(dir,clf,0)
ViralityAndPopularityClassifier.bothViralAndPopular(dir,clf,0)
def test_init(self):
"""
Testing the init method
"""
model = neighbors.KNeighborsClassifier(3)
viz = DecisionBoundariesVisualizer(model)
self.assertEquals(viz.step_size, 0.0025)
self.assertEqual(viz.name, 'KNeighborsClassifier')
self.assertEqual(viz.estimator, model)
self.assertIsNone(viz.classes_)
self.assertIsNone(viz.features_)
self.assertIsNotNone(viz.markers)
self.assertIsNotNone(viz.scatter_alpha)
self.assertTrue(viz.show_scatter)
self.assertIsNone(viz.Z)
self.assertIsNone(viz.xx)
self.assertIsNone(viz.yy)
self.assertIsNone(viz.class_labels)
self.assertIsNone(viz.title)
self.assertIsNone(viz.x)
self.assertIsNone(viz.y)
def test_draw_ax_show_scatter_False(self):
"""Test that the matplotlib functions are being called when the
scatter plot isn't drawn
"""
model = neighbors.KNeighborsClassifier(3)
viz = DecisionBoundariesVisualizer(
model, features=['one', 'two'], show_scatter=False)
fitted_viz = viz.fit(X_two_cols, y=y)
fitted_viz.ax = mock.Mock()
fitted_viz.ax.pcolormesh = mock.MagicMock()
fitted_viz.ax.scatter = mock.MagicMock()
fitted_viz.ax.legend = mock.MagicMock()
fitted_viz.draw(X_two_cols, y=y)
self.assertEquals(len(fitted_viz.ax.pcolormesh.mock_calls), 1)
self.assertEquals(len(fitted_viz.ax.scatter.mock_calls), 0)
self.assertEquals(len(fitted_viz.ax.legend.mock_calls), 1)
def test_finalize(self):
model = neighbors.KNeighborsClassifier(3)
viz = DecisionBoundariesVisualizer(
model, features=['one', 'two'], show_scatter=False)
fitted_viz = viz.fit(X_two_cols, y=y)
fitted_viz.draw(X_two_cols, y=y)
fitted_viz.ax = mock.Mock()
fitted_viz.ax.legend = mock.MagicMock()
fitted_viz.ax.set_xlabel = mock.MagicMock()
fitted_viz.ax.set_ylabel = mock.MagicMock()
fitted_viz.poof()
fitted_viz.ax.legend.assert_called_once_with(loc='best', frameon=True)
fitted_viz.ax.set_xlabel.assert_called_once_with('one')
fitted_viz.ax.set_ylabel.assert_called_once_with('two')
def learn(x, y, test_x):
weight_list = []
for j in range(len(y)):
if y[j] == "0":
weight_list.append(variables.weight_0_gdbt)
if y[j] == "1000":
weight_list.append(variables.weight_1000_gdbt)
if y[j] == "1500":
weight_list.append(variables.weight_1500_gdbt)
if y[j] == "2000":
weight_list.append(variables.weight_2000_gdbt)
clf = KNeighborsClassifier(1, weight_list).fit(x, y)
prediction_list = clf.predict(test_x)
return prediction_list
def use_sbs_with_knn(columns, X_train, X_test, y_train, y_test):
knn = KNeighborsClassifier(n_neighbors=2)
sbs = SBS(knn, k_features=1)
sbs.fit(X_train, y_train)
k_feat = [len(k) for k in sbs.subsets_]
plt.plot(k_feat, sbs.scores_, marker='o')
plt.ylim([0.7, 1.1])
plt.ylabel('Accuracy')
plt.xlabel('Number of features')
plt.grid()
plt.show()
k5 = list(sbs.subsets_[8])
print(columns[1:][k5])
knn.fit(X_train, y_train)
print("Training accuracy: %s" % knn.score(X_train, y_train))
print("Test accuracy: %s" % knn.score(X_test, y_test))
knn.fit(X_train[:, k5], y_train)
print("Training accuracy: %s" % knn.score(X_train[:, k5], y_train))
print("Test accuracy: %s" % knn.score(X_test[:, k5], y_test))
def knn_cv(post_features, post_class, n_folds, n_neighbors, length_dataset = -1):
if(length_dataset == -1):
length_dataset = len(post_class)
cv = KFold(n = length_dataset, n_folds = n_folds, shuffle = True)
train_accuracy = []
test_accuracy = []
for train,test in cv:
knn = neighbors.KNeighborsClassifier(n_neighbors = n_neighbors)
knn.fit(post_features[train],post_class[train])
train_accuracy.append(knn.score(post_features[train], post_class[train]))
test_accuracy.append(knn.score(post_features[test], post_class[test]))
# return (sum(train_accuracy)/n_folds), (sum(test_accuracy)/n_folds)
return np.mean(train_accuracy), np.mean(test_accuracy)
classify.py 文件源码
项目:Stock-Market-Analysis-and-Prediction
作者: samshara
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def performKNNClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel):
"""
KNN binary Classification
"""
clf = KNeighborsClassifier(3)
clf.fit(X_train, y_train)
if savemodel == True:
#fname_out = '{}-{}.pickle'.format(fout, datetime.now().date())
fname_out = fout+'.pickle'
with open(fname_out, 'wb') as f:
pickle.dump(clf, f, -1)
accuracy = clf.score(X_test, y_test)
return accuracy
def cross_validation():
x_train, x_test, y_train, y_test = load_data()
k_lst = list(range(1, 30))
lst_scores = []
for k in k_lst:
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
lst_scores.append(scores.mean())
# changing to misclassification error
MSE = [1 - x for x in lst_scores]
optimal_k = k_lst[MSE.index(min(MSE))]
print "The optimal number of neighbors is %d" % optimal_k
# plot misclassification error vs k
# plt.plot(k_lst, MSE)
# plt.ylabel('Misclassification Error')
plt.plot(k_lst, lst_scores)
plt.xlabel('Number of Neighbors K')
plt.ylabel('correct classification rate')
plt.show()
def test_neighbors_iris():
# Sanity checks on the iris dataset
# Puts three points of each label in the plane and performs a
# nearest neighbor query on points near the decision boundary.
for algorithm in ALGORITHMS:
clf = neighbors.KNeighborsClassifier(n_neighbors=1,
algorithm=algorithm)
clf.fit(iris.data, iris.target)
assert_array_equal(clf.predict(iris.data), iris.target)
clf.set_params(n_neighbors=9, algorithm=algorithm)
clf.fit(iris.data, iris.target)
assert_true(np.mean(clf.predict(iris.data) == iris.target) > 0.95)
rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
rgs.fit(iris.data, iris.target)
assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
0.95)
def test_neighbors_digits():
# Sanity check on the digits dataset
# the 'brute' algorithm has been observed to fail if the input
# dtype is uint8 due to overflow in distance calculations.
X = digits.data.astype('uint8')
Y = digits.target
(n_samples, n_features) = X.shape
train_test_boundary = int(n_samples * 0.8)
train = np.arange(0, train_test_boundary)
test = np.arange(train_test_boundary, n_samples)
(X_train, Y_train, X_test, Y_test) = X[train], Y[train], X[test], Y[test]
clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm='brute')
score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test)
score_float = clf.fit(X_train.astype(float), Y_train).score(
X_test.astype(float), Y_test)
assert_equal(score_uint8, score_float)
def test_cross_val_score_multilabel():
X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
[-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
[0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
clf = KNeighborsClassifier(n_neighbors=1)
scoring_micro = make_scorer(precision_score, average='micro')
scoring_macro = make_scorer(precision_score, average='macro')
scoring_samples = make_scorer(precision_score, average='samples')
score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
score_samples = cval.cross_val_score(clf, X, y,
scoring=scoring_samples, cv=5)
assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
def knn_clf(observations, n_neighbors):
# ??????
range1 = [20, 30]
len1 = len(range(range1[0], range1[1]))
range2 = [110, 120]
len2 = len(range(range2[0], range2[1]))
training_index = list(range(range1[0], range1[1])) + list(range(range2[0],
range2[1]))
training_data = observations[training_index, :]
training_label = np.ones(len1+len2, dtype='int32')
training_label[len1:] = 2
# ??????
knn = KNeighborsClassifier(n_neighbors = 3)#, weights = 'distance')
knn.fit(training_data, training_label)
# ??
knn_pre = knn.predict(observations)
print('????????')
for i in range(8):
print(knn_pre[i*10:(i+1)*10])
print('????????????')
for i in range(8,12):
print(knn_pre[i*10:(i+1)*10])