def find_nearest_point(points, p):
# initialize
minimal_distance = euclidean(p, points[0])
minimal_distance_point_index = 0
for i in range(len(points)):
distance = euclidean(p, points[i])
if distance < minimal_distance:
minimal_distance = distance
minimal_distance_point_index = i
return minimal_distance_point_index, minimal_distance
# compute k-means cost function
python类euclidean()的实例源码
def k_medians_cost_function(points, k_centers, points_labels):
cost_function = 0.0
for i in range(len(points)):
cost_function += euclidean(points[i], k_centers[points_labels[i]])
return cost_function
def plot_distortion(training_data_instances):
# dimension of a training data instance
d = training_data_instances.shape[1]
# first m instances considered
m = 20
fig, axes = plt.subplots(1, 1)
fig.suptitle("Distortion of random projection", fontsize = "x-large")
for k in [50, 100, 500]:
## generate random projection matrix
random_projection_matrix = generate_random_projection_matrix(k, d)
## random projection
m_instances = training_data_instances[0:m]
projected_m_instances = np.dot(m_instances, np.transpose(random_projection_matrix))
# print random_projected_matrix[0], random_projected_matrix.shape
## evaluate distortion - line chart
m_instances_distortions = []
for i in range(m):
for j in range(i + 1, m):
m_instances_distortions.append(euclidean(projected_m_instances[i], projected_m_instances[j]) / euclidean(m_instances[i], m_instances[j]))
m_instances_distortions = np.array(m_instances_distortions)
mean, std = np.mean(m_instances_distortions), np.std(m_instances_distortions)
# line chart
axes.plot(m_instances_distortions, label = "k=" + str(k))
axes.plot([0, m_instances_distortions.size], [mean, mean], label = "k=" + str(k) + ", mean = " + str(round(mean, 4)))
print "k = ", k, "distortion =", mean, "+-", std
axes.set_xlabel("pairs of instances", fontsize = "large")
axes.set_ylabel("distortion", fontsize = "large")
axes.legend(loc = "center right", fontsize = "medium")
plt.show()
def find_nearest_point(points, p):
# initialize
minimal_distance = euclidean(p, points[0])
minimal_distance_point_index = 0
for i in range(1, len(points)):
distance = euclidean(p, points[i])
if distance < minimal_distance:
minimal_distance = distance
minimal_distance_point_index = i
return minimal_distance_point_index, minimal_distance
# compute k-means cost function
def rwmd(self, tf_doc1, tf_doc2):
rwmdistance = 0
sumtf1 = sum(tf_doc1.values())
for word1 in tf_doc1:
dst = []
for word2 in tf_doc2:
dst.append(euclidean(self.emb[word1], self.emb[word2]))
rwmdistance += np.min(dst)*tf_doc1[word1]/sumtf1
return rwmdistance
def rect_distance(rect1, rect2):
#print "starting rect_distance with x1,", x1, x2, y1b, y2b
key = (rect1.tostring(), rect2.tostring())
if key in rect_distance_cache:
#print "F",
return rect_distance_cache[key]
#print "_",
x1, y1, x1b, y1b = rect1
x2, y2, x2b, y2b = rect2
distance = None
left = x2b < x1
right = x1b < x2
bottom = y2b < y1
top = y1b < y2
if top and left:
distance = euclidean((x1, y1b), (x2b, y2))
elif left and bottom:
distance = euclidean((x1, y1), (x2b, y2b))
elif bottom and right:
distance = euclidean((x1b, y1), (x2, y2b))
elif right and top:
distance = euclidean((x1b, y1b), (x2, y2))
elif left:
distance = x1 - x2b
elif right:
distance = x2 - x1b
elif bottom:
distance = y1 - y2b
elif top:
distance = y2 - y1b
else: # rectangles intersect
distance = 0
rect_distance_cache[key] = distance
return distance
def build_tree(self, space, words, imdb_name, num_trees = 1000, vector_list = None):
"""
Build annoy tree to calculate distance, if vector_list = None builds full tree with all words in language model. If not None, builds using the words in the vector list.
"""
# If pckl exist, load, else build
tree_path = osp.join(self._devkit_path, self.name + '_' + imdb_name + str(space) + '.ann')
pckl_path = osp.join(self._devkit_path, self.name + '_' + imdb_name + str(space) + 'array'+".pkl")
t = AnnoyIndex(self._dimension, metric="euclidean")
if osp.exists(tree_path):
print "Tree exist, loading from file..."
t.load(tree_path)
self._tree = t
with open(pckl_path, 'rb') as file:
self._labels = pickle.load(file)
else:
print "Building tree..."
counter = 0
word_list = []
if space == 0:
for word, feature in self._vectors.iteritems():
word_list.append(word)
t.add_item(counter,feature)
counter += 1
else:
for w in words:
word_list.append(w)
t.add_item(counter, self.word_vector(w))
counter += 1
t.build(num_trees)
self._tree = t
self._labels = word_list
# Save tree
t.save(tree_path)
with open(pckl_path, 'wb') as handle:
pickle.dump(word_list,handle)
def get_closest(self,img_feature, lbl_features):
smallest_distance = 999999999
smallest_ind = None
for i in range(len(lbl_features)):
value = distance.euclidean(img_feature,lbl_features[i])
if value < smallest_distance:
smallest_distance = value
smallest_ind = i
return smallest_distance, smallest_ind
def rewardFunction(a,b):
return distance.euclidean(a,b)
# This classifier is defined to predict labels.
def visit_all(orig ,gx0, gy0, gz0, gx1, gy1, gz1, model ):
if gx0 == gx1 and gy0 == gy1 and gz0 == gz1:
return True
if .99<model[gx0, gy0, gz0]<1.01 and orig != (gx0,gy0,gz0):
return False
miner = 1000000
move= [0,0,0]
for i in [-1,0,1]:
for j in [-1,0,1]:
for k in [-1,0,1]:
dist = distance.euclidean((gx0+i,gy0+j,gz0+k), (gx1,gy1,gz1))
if dist < miner:
miner = dist
move = (gx0+i,gy0+j,gz0+k)
return visit_all(orig, move[0],move[1],move[2],gx1, gy1, gz1, model)
def compute_distance(query_channel, channel, mean_vec, distance_type = 'eucos'):
""" Compute the specified distance type between chanels of mean vector and query image.
In caffe library, FC8 layer consists of 10 channels. Here, we compute distance
of distance of each channel (from query image) with respective channel of
Mean Activation Vector. In the paper, we considered a hybrid distance eucos which
combines euclidean and cosine distance for bouding open space. Alternatively,
other distances such as euclidean or cosine can also be used.
Input:
--------
query_channel: Particular FC8 channel of query image
channel: channel number under consideration
mean_vec: mean activation vector
Output:
--------
query_distance : Distance between respective channels
"""
if distance_type == 'eucos':
query_distance = spd.euclidean(mean_vec[channel, :], query_channel)/200. + spd.cosine(mean_vec[channel, :], query_channel)
elif distance_type == 'euclidean':
query_distance = spd.euclidean(mean_vec[channel, :], query_channel)/200.
elif distance_type == 'cosine':
query_distance = spd.cosine(mean_vec[channel, :], query_channel)
else:
print "distance type not known: enter either of eucos, euclidean or cosine"
return query_distance
def compute_s(i, x, labels, clusters):
norm_c= len(clusters)
s = 0
for x in clusters:
# print x
s += distance.euclidean(x, clusters[i])
return s
def compute_Rij(i, j, x, labels, clusters, nc):
Rij = 0
try:
# print "h"
d = distance.euclidean(clusters[i],clusters[j])
# print d
Rij = (compute_s(i, x, labels, clusters) + compute_s(j, x, labels, clusters))/d
# print Rij
except:
Rij = 0
return Rij
def compute_distmat(self, dataframe):
"""
Computes the pairwise euclidean distances between every atom.
Design choice: passed in a DataFrame to enable easier testing on
dummy data.
"""
self.eucl_dists = pdist(dataframe[['x', 'y', 'z']],
metric='euclidean')
self.eucl_dists = pd.DataFrame(squareform(self.eucl_dists))
self.eucl_dists.index = dataframe.index
self.eucl_dists.columns = dataframe.index
return self.eucl_dists
def test_features_centroid_distance(cls, arom, cation):
arom_heavy_atom_coords = np.array([atom.coords for atom in arom.atoms if
atom.atom_type.element in cls.heavy_atoms])
arom_centroid = calc_centroid(arom_heavy_atom_coords)
cation_coords = cation.atoms[0].coords
centroid_distance = euclidean(arom_centroid, cation_coords)
if cls.check_centroid_distance(centroid_distance) is False:
return False, centroid_distance
else:
return True, centroid_distance
def calc_facing_vector(vec_up, point):
vec_down = -1 * vec_up
d_up = euclidean(vec_up, point)
d_down = euclidean(vec_down, point)
face_vec = vec_up if d_up < d_down else vec_down
return face_vec
def calculate_euclidaen_distance(self, user_keywords, matched_article):
"Calculate the euclidaen distance between matched url and recored url"
new_article_kws = {i['name']: i['score'] for i in user_keywords if i['score'] >= 25}
matched_article_kws = {i['name']: i['score'] for i in matched_article.keywords if i['score'] >= 25}
all_keywords = [i for i in [i["name"] for i in user_keywords if i["score"] >= 25]] + \
[i for i in [i["name"] for i in matched_article.keywords if i["score"] >= 25]]
a = tuple([new_article_kws.get(i, 0) for i in all_keywords])
b = tuple([matched_article_kws.get(i, 0) for i in all_keywords])
try:
dst = distance.euclidean(a,b)
return dst
except:
return 0
def are_similar(self, first, second):
return dist.euclidean(first, second)
def calc_euclidean_dist_using_scipy(self, val1, val2):
""" SciPy distance.euclidean() function used to calculate Euclidean Distance """
if np.isnan(val1) or np.isnan(val2):
return 2**5 # high number so exclude when sort (infinity as integer 2**100000)
return distance.euclidean(val1, val2) # int(math.sqrt(abs(val1 - val2)**2))
def simulate(self, car_state_, ticks_count):
car_state = copy.deepcopy(car_state_)
start_pos = copy.deepcopy(car_state.pos)
while ticks_count:
self.calc(car_state)
ticks_count -= 1
dist = distance.euclidean(start_pos, car_state.pos)
return dist, car_state