def DBCV(X, labels, dist_function=euclidean):
"""
Density Based clustering validation
Args:
X (np.ndarray): ndarray with dimensions [n_samples, n_features]
data to check validity of clustering
labels (np.array): clustering assignments for data X
dist_dunction (func): function to determine distance between objects
func args must be [np.array, np.array] where each array is a point
Returns: cluster_validity (float)
score in range[-1, 1] indicating validity of clustering assignments
"""
graph = _mutual_reach_dist_graph(X, labels, dist_function)
mst = _mutual_reach_dist_MST(graph)
cluster_validity = _clustering_validity_index(mst, labels)
return cluster_validity
python类euclidean()的实例源码
def patchIt(i,testInst):
testInst = testInst.values
C = changes()
def proj(one, two, test):
a = edist(one, test)
b = edist(two, test)
c = edist(one, two)
return (a**2-b**2+c**2)/(2*c)
better = sorted(i.pairs, key= lambda x: proj(x[0].sample, x[1].sample, testInst), reverse=True)[0]
(toMe, notToMe) = (better[0], better[1]) if better[0].sample[-1]<=better[1].sample[-1] else (better[1], better[0])
newInst = testInst + 0.5*(toMe.sample-testInst)
# set_trace()
if i.fsel:
old=testInst
indx = i.lbs[:int(len(i.lbs)*0.33)]
for n in indx:
C.save(name=i.testDF.columns.values[n], old=testInst[n], new=newInst[n])
testInst[n] = newInst[n]
i.change.append(C.log)
return testInst
else:
return newInst
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None):
shape = data.shape
if refs is None:
tops = data.max(axis=0)
bots = data.min(axis=0)
dists = scipy.matrix(scipy.diag(tops-bots))
rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
for i in range(nrefs):
rands[:, :, i] = rands[:, :, i]*dists+bots
else:
rands = refs
gaps = scipy.zeros((len(ks),))
for (i, k) in enumerate(ks):
g1 = method(n_clusters=k).fit(data)
(kmc, kml) = (g1.cluster_centers_, g1.labels_)
disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])])
refdisps = scipy.zeros((rands.shape[2],))
for j in range(rands.shape[2]):
g2 = method(n_clusters=k).fit(rands[:, :, j])
(kmc, kml) = (g2.cluster_centers_, g2.labels_)
refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])])
gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp)
return gaps
def in_euclidean_discance(self, pos, center, r):
"""
A helper function to return true or false.
Decided whether a position(frac) inside a
distance restriction.
"""
from scipy.spatial.distance import euclidean as euclidean_discance
from itertools import product
cart_cent = self.get_cartesian_from_frac(center)
trans = np.array([i for i in product([-1, 0, 1], repeat=3)])
allpos = pos + trans
for p in allpos:
cart_p = self.get_cartesian_from_frac(p)
if euclidean_discance(cart_p, cart_cent) < r:
return True
break
return False
def find_nearest_instances_subprocess(training_data_instances, training_data_labels, test_data_instances, test_instance_start_index, test_instance_end_index,\
classified_results):
# print test_instance_start_index, test_instance_end_index
for test_instance_index in range(test_instance_start_index, test_instance_end_index):
test_instance = test_data_instances[test_instance_index]
# find the nearest training instance with euclidean distance
minimal_euclidean_distance = euclidean(test_instance, training_data_instances[0])
minimal_euclidean_distance_index = 0
for training_instance, training_instance_index in zip(training_data_instances, range(len(training_data_instances))):
# compute the euclidean distance
euclidean_distance = euclidean(test_instance, training_instance)
if euclidean_distance < minimal_euclidean_distance:
minimal_euclidean_distance = euclidean_distance
minimal_euclidean_distance_index = training_instance_index
classified_results[test_instance_index] =\
training_data_labels[int(minimal_euclidean_distance_index)]
def compute_fitness(target, candidate):
# first truncate the longer array
len_diff = np.abs(len(target) - len(candidate))
# truncate the longest one
a1 = target
a2 = candidate
if len(target) > len(candidate):# truncate a1, the target
a1 = a1[0:len(candidate)]
if len(candidate) > len(target):# truncate a2, the candidate
a2 = a2[0:len(target)]
dist = 0
for i in range(0, len(a1)):
dist = dist + euclidean(a1[i], a2[i])
if dist == 0:
return 1
else:
return 1/dist
def train(X, y, k, C):
n = X.shape[0]
m = X.shape[1]
w = np.zeros(m)
c = cost(X, y, w, C)
threshold = 1e-5
for iteration in xrange(10000):
new_w = np.zeros(m)
for j in xrange(m):
sum = 0
for i in xrange(n):
sum += y[i] * X[i, j] * (1 - 1 / (1 + np.exp(-y[i] * np.dot(X[i], w))))
new_w[j] = w[j] + k * sum / np.double(n) - k * C * w[j]
new_cost = cost(X, y, new_w, C)
if distance.euclidean(w, new_w) <= threshold:
return new_w
c = new_cost
w = new_w
return w
def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
"""Given two aromatic rings get the normal vectors that face the other ring"""
centroids = [calc_centroid(arom_coords) for arom_coords in [arom_a_coords, arom_b_coords]]
arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)
face_norms = []
for i, arom_norm in enumerate(arom_norms):
# get the index of the other arom
j = 1 if i ==0 else 0
norm = calc_facing_vector(arom_norm + centroids[i], centroids[j])
# norm_up = arom_norm
# norm_down = -1 * arom_norm
# # get the norm so that it points to the other ring
# d_up = euclidean(norm_up + centroids[i], centroids[j])
# d_down = cdist(norm_down + centroids[i], centroids[j])
# norm = norm_up if d_up < d_down else norm_down
face_norms.append(norm)
return face_norms
def push_to_b(self, xn, xp, e):
assert self.query(xn, count=False) == self.NEG
assert self.query(xp, count=False) == self.POS
d = distance.euclidean(xn, xp) / \
distance.euclidean(np.ones(self.n_features), np.zeros(self.n_features))
if d < e:
logger.debug('bin search done with %f', d)
return xn, xp
mid = .5 * np.add(xn, xp)
try:
l = self.query(mid)
if l == self.NEG:
return self.push_to_b(mid, xp, e)
else:
return self.push_to_b(xn, mid, e)
except RunOutOfBudget:
logger.debug('Run out of budget %d, push_to_b failed' % self.budget)
raise RunOutOfBudget
def travelling_salesman(colors):
colors_length = len(colors)
# Distance matrix
A = np.zeros([colors_length, colors_length])
for x in range(0, colors_length-1):
for y in range(0, colors_length-1):
A[x,y] = distance.euclidean(colors[x], colors[y])
# Nearest neighbour algorithm
path = NN(A, 0)
# Final array
colors_nn = []
for i in path:
colors_nn.append(colors[i])
return colors_nn
def test_local_search(self):
def acquisition_function(point):
point = [p.get_array() for p in point]
opt = np.array([1, 1, 1, 1])
dist = [euclidean(point, opt)]
return np.array([-np.min(dist)])
l = LocalSearch(acquisition_function, self.cs, epsilon=1e-10,
max_iterations=100000)
start_point = self.cs.sample_configuration()
acq_val_start_point = acquisition_function([start_point])
acq_val_incumbent, _ = l._one_iter(start_point)
# Local search needs to find something that is as least as good as the
# start point
self.assertLessEqual(acq_val_start_point, acq_val_incumbent)
def ckmeans(X, K, datasetID=-1, params=()):
global kmeans_init
pnames = [ 'init', 'max_iter', 'n_jobs', 'distance']
#dflts = ['k-means++', 300, -1, 'euclidean']
dflts = [ 'KA', 300, -1, 'euclidean']
if isinstance(params, np.ndarray):
paramsloc = params.tolist()
else:
paramsloc = params
(init, max_iter, n_jobs, distance) = ds.resolveargumentpairs(pnames, dflts, paramsloc)
if datasetID in kmeans_init:
init = kmeans_init[datasetID][0:K]
elif init == 'KA':
init = initclusterKA(X, K, distance)
elif init == 'KA_memorysaver':
init = initclusterKA_memorysaver(X, K, distance)
C = skcl.KMeans(K, init=init, max_iter=max_iter, n_jobs=n_jobs).fit(X).labels_
return clustVec2partMat(C, K)
def csoms(X, D, params=()):
pnames = ['neighbour', 'learning_rate', 'input_length_ratio']
dflts = [ 0.1, 0.2, -1]
if isinstance(params, np.ndarray):
paramsloc = params.tolist()
else:
paramsloc = params
(neighbour, learning_rate, input_length_ratio) = ds.resolveargumentpairs(pnames, dflts, paramsloc)
Xloc = np.array(X)
K = D[0] * D[1] # Number of clusters
N = Xloc.shape[0] # Number of genes
Ndim = Xloc.shape[1] # Number of dimensions in X
som = sompy.SOM(D, Xloc)
som.set_parameter(neighbor=neighbour, learning_rate=learning_rate, input_length_ratio=input_length_ratio)
centres = som.train(N).reshape(K, Ndim)
dists = [[spdist.euclidean(c, x) for c in centres] for x in Xloc]
C = [np.argmin(d) for d in dists]
return clustVec2partMat(C, K)
drowsiness detection.py 文件源码
项目:driver-fatigue-detection-system
作者: raja434
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def eye_aspect_ratio(eye):
# compute the euclidean distances between the two sets of
# vertical eye landmarks (x, y)-coordinates
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
# compute the euclidean distance between the horizon
# eye landmark (x, y)-coordinates
C = dist.euclidean(eye[0], eye[3])
# compute the eye aspect ratio
ear = (A + B) / (2.0 * C)
# return the eye aspect ratio
return ear
# construct the argument parse and parse the arguments
decisionboundaryplot.py 文件源码
项目:highdimensional-decision-boundary-plot
作者: tmadl
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _find_decision_boundary_on_hypersphere(self, centroid, R, penalize_known=False):
def objective(phi, grad=0):
# search on hypersphere surface in polar coordinates - map back to cartesian
cx = centroid + polar_to_cartesian(phi, R)
try:
cx2d = self.dimensionality_reduction.transform([cx])[0]
error = self.decision_boundary_distance(cx)
if penalize_known:
# slight penalty for being too close to already known decision boundary
# keypoints
db_distances = [euclidean(cx2d, self.decision_boundary_points_2d[k])
for k in range(len(self.decision_boundary_points_2d))]
error += 1e-8 * ((self.mean_2d_dist - np.min(db_distances)) /
self.mean_2d_dist)**2
return error
except (Exception, ex):
print("Error in objective function:", ex)
return np.infty
optimizer = self._get_optimizer(
D=self.X.shape[1] - 1, upper_bound=2 * np.pi, iteration_budget=self.hypersphere_iteration_budget)
optimizer.set_min_objective(objective)
db_phi = optimizer.optimize([rnd.random() * 2 * np.pi for k in range(self.X.shape[1] - 1)])
db_point = centroid + polar_to_cartesian(db_phi, R)
return db_point
def test_dbscan_feature():
# Tests the DBSCAN algorithm with a feature vector array.
# Parameters chosen specifically for this task.
# Different eps to other test, because distance is not normalised.
eps = 0.8
min_samples = 10
metric = 'euclidean'
# Compute DBSCAN
# parameters chosen for task
core_samples, labels = dbscan(X, metric=metric, eps=eps,
min_samples=min_samples)
# number of clusters, ignoring noise if present
n_clusters_1 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_1, n_clusters)
db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples)
labels = db.fit(X).labels_
n_clusters_2 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_2, n_clusters)
def test_dbscan_callable():
# Tests the DBSCAN algorithm with a callable metric.
# Parameters chosen specifically for this task.
# Different eps to other test, because distance is not normalised.
eps = 0.8
min_samples = 10
# metric is the function reference, not the string key.
metric = distance.euclidean
# Compute DBSCAN
# parameters chosen for task
core_samples, labels = dbscan(X, metric=metric, eps=eps,
min_samples=min_samples,
algorithm='ball_tree')
# number of clusters, ignoring noise if present
n_clusters_1 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_1, n_clusters)
db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples,
algorithm='ball_tree')
labels = db.fit(X).labels_
n_clusters_2 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_2, n_clusters)
def exponential(x, y, sigma=1):
"""Compute an exponential kernel.
The exponential kernel is closely related to the Gaussian kernel, with
only the square of the norm left out. It is also a radial basis function
kernel:
K(x, y) = exp(-||x - y|| / 2?^2)
where `x` and `y` are vectors in the input space (i.e., vectors of
features computed from training or test samples), ``||x - y||` is the
Euclidean norm, and the adjustable parameter `sigma` is used to adjust
the kernel 'bandwidth'. It is important to note that the observations made
about the `sigma` parameter for the Gaussian kernel also apply to the
Exponential and Laplacian kernels.
See Also
--------
gaussian
"""
return exp(-(dist.euclidean(x, y) / 2*sigma**2))
def circular(x, y, sigma):
"""Compute a circular kernel.
The circular kernel is used in geostatic applications. It is an example
of an isotropic stationary kernel and is positive definite in ?^2:
K(x, y) = 2/? arccos(-(||x - y|| / ?)) -
2/? (||x - y|| / ?) sqrt(1 - (||x - y|| / ?)^2)
if ||x - y|| < ?, zero otherwise.
where `x` and `y` are vectors in the input space (i.e., vectors of
features computed from training or test samples), ||x - y|| is the
Euclidean norm, and sigma is a free parameter with no reasonable default.
In other words, `sigma` should be defined *a priori* based on some
geostatistical analysis, such as semi-variogram analysis.
"""
pi2 = 2/PI
norm_sigma = dist.euclidean(x, y) / sigma
return pi2*acos(-norm_sigma) - pi2*norm_sigma*sqrt(1 - norm_sigma**2)
def kernel_dist(kernel=linear, **kw):
"""Generic kernel-induced distance metric.
To use, `partially` apply this function with a kernel argument.
Examples
--------
>>> from functools import partial
>>> dist = partial(kernel_dist, kernel=sigmoid)
"""
if kernel is None: # Don't use a kernel!
return dist.euclidean
elif getattr(kernel, "__name__") == "gaussian":
# We have a 'shortcut' for Gaussian kernels... this is kinda hacky
# But maybe worth it given the speedup our shortcut gets us?
return lambda x, y: 2 - 2*gaussian(x, y, kw.get("sigma", 1))
else:
kern = partial(kernel, **kw)
return lambda x, y: kern(x, x) - 2*kern(x, y) + kern(y, y)
def getPointOnHull( hullPoints,t, totalLength ):
lh = len(hullPoints)
for j in range(lh+1):
sideLength = distance.euclidean(hullPoints[j%lh],hullPoints[(j+1)%lh])
t_sub = sideLength / totalLength;
if t > t_sub:
t-= t_sub;
else :
return lerp(hullPoints[j%lh],hullPoints[(j+1)%lh], t / t_sub );
def euc(a,b): #function to return distance between points
return distance.euclidean(a,b)
def euc(a, b):
return distance.euclidean(a, b)
# New Classifier
def test_DBCV(data):
kmeans = KMeans(n_clusters=2)
kmeans_labels = kmeans.fit_predict(data)
hdbscanner = hdbscan.HDBSCAN()
hdbscan_labels = hdbscanner.fit_predict(data)
kmeans_score = DBCV.DBCV(data, kmeans_labels, dist_function=euclidean)
hdbscan_score = DBCV.DBCV(data, hdbscan_labels, dist_function=euclidean)
assert hdbscan_score > kmeans_score
def test__core_dist(data):
target = 0.09325490419185979
point = data[0]
core_dist = DBCV._core_dist(point, data, euclidean)
assert abs(core_dist - target) < 0.001
def test__mutual_reachability_dist(data):
target = 0.074196034579080888
point_1 = data[0]
point_2 = data[1]
dist = DBCV._mutual_reachability_dist(point_1, point_2, data, data,
euclidean)
assert dist == euclidean(point_1, point_2)
point_3 = data[5]
point_4 = data[46]
dist_2 = DBCV._mutual_reachability_dist(point_3, point_4, data, data,
euclidean)
assert abs(dist_2 - target) < 0.001
def test__mutual_reach_dist_graph(data):
target = 0.09872567819414102
hdbscanner = hdbscan.HDBSCAN()
hdbscan_labels = hdbscanner.fit_predict(data)
graph = DBCV._mutual_reach_dist_graph(data, hdbscan_labels,
euclidean)
assert graph.shape == (data.shape[0], data.shape[0])
assert abs(graph[0][0] - target < 0.001)
def closest(i, arr):
"""
:param arr: np array (len=No. Indep var + No. Depen var)
:return: float
"""
return sorted(i.clstr, key= lambda x: edist(arr[:-1], x.sample[:-1]))
def closest(i, arr):
"""
:param arr: np array (len=No. Indep var + No. Depen var)
:return: float
"""
return sorted(i.clstr, key= lambda x: edist(arr.sample[:-1], x.sample[:-1]))
def calculate_distance(self,vector1,vector2):
vectordist = distance.euclidean(vector1,vector2)
return vectordist