def test_classification_toy():
# Check classification on a toy dataset, including sparse versions.
clf = NearestCentroid()
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
# Same test, but with a sparse matrix to fit and test.
clf = NearestCentroid()
clf.fit(X_csr, y)
assert_array_equal(clf.predict(T_csr), true_result)
# Fit with sparse, test with non-sparse
clf = NearestCentroid()
clf.fit(X_csr, y)
assert_array_equal(clf.predict(T), true_result)
# Fit with non-sparse, test with sparse
clf = NearestCentroid()
clf.fit(X, y)
assert_array_equal(clf.predict(T_csr), true_result)
# Fit and predict with non-CSR sparse matrices
clf = NearestCentroid()
clf.fit(X_csr.tocoo(), y)
assert_array_equal(clf.predict(T_csr.tolil()), true_result)
python类NearestCentroid()的实例源码
def evaluate_ncc(train_data, train_labels, test_data, test_labels):
ncc = NearestCentroid()
ncc.fit(train_data, train_labels)
ncc_test = ncc.score(test_data, test_labels)
return ncc_test
def test_linear_sef():
"""
Performs some basic testing using the LinearSEF
:return:
"""
np.random.seed(1)
train_data = np.random.randn(100, 50)
train_labels = np.random.randint(0, 2, 100)
proj = LinearSEF(50, output_dimensionality=12)
proj._initialize(train_data)
proj_data = proj.transform(train_data, batch_size=8)
assert proj_data.shape[0] == 100
assert proj_data.shape[1] == 12
ncc = NearestCentroid()
ncc.fit(proj_data, train_labels)
acc_before = ncc.score(proj_data, train_labels)
loss = proj.fit(data=train_data, target_labels=train_labels, epochs=200,
target='supervised', batch_size=8, regularizer_weight=0, learning_rate=0.0001, verbose=False)
# Ensure that loss is reducing
assert loss[0] > loss[-1]
proj_data = proj.transform(train_data, batch_size=8)
assert proj_data.shape[0] == 100
assert proj_data.shape[1] == 12
ncc = NearestCentroid()
ncc.fit(proj_data, train_labels)
acc_after = ncc.score(proj_data, train_labels)
assert acc_after > acc_before
def test_kernel_sef():
"""
Performs some basic testing using the KernelSEF
:return:
"""
np.random.seed(1)
train_data = np.random.randn(100, 50)
train_labels = np.random.randint(0, 2, 100)
proj = KernelSEF(train_data, 50, output_dimensionality=12, kernel_type='rbf')
proj._initialize(train_data)
proj_data = proj.transform(train_data, batch_size=8)
assert proj_data.shape[0] == 100
assert proj_data.shape[1] == 12
ncc = NearestCentroid()
ncc.fit(proj_data, train_labels)
acc_before = ncc.score(proj_data, train_labels)
loss = proj.fit(data=train_data, target_labels=train_labels, epochs=200,
target='supervised', batch_size=8, regularizer_weight=0, learning_rate=0.0001, verbose=False)
# Ensure that loss is reducing
assert loss[0] > loss[-1]
proj_data = proj.transform(train_data, batch_size=8)
assert proj_data.shape[0] == 100
assert proj_data.shape[1] == 12
ncc = NearestCentroid()
ncc.fit(proj_data, train_labels)
acc_after = ncc.score(proj_data, train_labels)
assert acc_after > acc_before
outlier_heuristics.py 文件源码
项目:tf_literature_based_discovery
作者: xflows
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def _d_cs_indices(self):
classifier=NearestCentroid(metric='cosine')
#classifier=KNeighborsClassifier(n_neighbors=5)
return MisclassificationIndices.calculate(classifier,
BowDataset(self._tfidf_matrix(),self._classes),
n_folds=10)['inds']
def test_precomputed():
clf = NearestCentroid(metric="precomputed")
clf.fit(X, y)
S = pairwise_distances(T, clf.centroids_)
assert_array_equal(clf.predict(S), true_result)
def test_iris():
# Check consistency on dataset iris.
for metric in ('euclidean', 'cosine'):
clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
score = np.mean(clf.predict(iris.data) == iris.target)
assert score > 0.9, "Failed with score = " + str(score)
def test_iris_shrinkage():
# Check consistency on dataset iris, when using shrinkage.
for metric in ('euclidean', 'cosine'):
for shrink_threshold in [None, 0.1, 0.5]:
clf = NearestCentroid(metric=metric,
shrink_threshold=shrink_threshold)
clf = clf.fit(iris.data, iris.target)
score = np.mean(clf.predict(iris.data) == iris.target)
assert score > 0.8, "Failed with score = " + str(score)
def test_pickle():
import pickle
# classification
obj = NearestCentroid()
obj.fit(iris.data, iris.target)
score = obj.score(iris.data, iris.target)
s = pickle.dumps(obj)
obj2 = pickle.loads(s)
assert_equal(type(obj2), obj.__class__)
score2 = obj2.score(iris.data, iris.target)
assert_array_equal(score, score2,
"Failed to generate same score"
" after pickling (classification).")
def test_predict_translated_data():
# Test that NearestCentroid gives same results on translated data
rng = np.random.RandomState(0)
X = rng.rand(50, 50)
y = rng.randint(0, 3, 50)
noise = rng.rand(50)
clf = NearestCentroid(shrink_threshold=0.1)
clf.fit(X, y)
y_init = clf.predict(X)
clf = NearestCentroid(shrink_threshold=0.1)
X_noise = X + noise
clf.fit(X_noise, y)
y_translate = clf.predict(X_noise)
assert_array_equal(y_init, y_translate)
def test_manhattan_metric():
# Test the manhattan metric.
clf = NearestCentroid(metric='manhattan')
clf.fit(X, y)
dense_centroid = clf.centroids_
clf.fit(X_csr, y)
assert_array_equal(clf.centroids_, dense_centroid)
assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])