python类euclidean()的实例源码

DBCV.py 文件源码 项目:DBCV 作者: christopherjenness 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def DBCV(X, labels, dist_function=euclidean):
    """
    Density Based clustering validation

    Args:
        X (np.ndarray): ndarray with dimensions [n_samples, n_features]
            data to check validity of clustering
        labels (np.array): clustering assignments for data X
        dist_dunction (func): function to determine distance between objects
            func args must be [np.array, np.array] where each array is a point

    Returns: cluster_validity (float)
        score in range[-1, 1] indicating validity of clustering assignments
    """
    graph = _mutual_reach_dist_graph(X, labels, dist_function)
    mst = _mutual_reach_dist_MST(graph)
    cluster_validity = _clustering_validity_index(mst, labels)
    return cluster_validity
CD.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def patchIt(i,testInst):
    testInst = testInst.values
    C = changes()
    def proj(one, two, test):
      a = edist(one, test)
      b = edist(two, test)
      c = edist(one, two)
      return (a**2-b**2+c**2)/(2*c)
    better = sorted(i.pairs, key= lambda x: proj(x[0].sample, x[1].sample, testInst), reverse=True)[0]
    (toMe, notToMe) = (better[0], better[1]) if better[0].sample[-1]<=better[1].sample[-1] else (better[1], better[0])
    newInst = testInst + 0.5*(toMe.sample-testInst)
    # set_trace()
    if i.fsel:
      old=testInst
      indx = i.lbs[:int(len(i.lbs)*0.33)]
      for n in indx:
        C.save(name=i.testDF.columns.values[n], old=testInst[n], new=newInst[n])
        testInst[n] = newInst[n]
      i.change.append(C.log)
      return testInst
    else:
      return newInst
network.py 文件源码 项目:sakmapper 作者: szairis 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None):
    shape = data.shape
    if refs is None:
        tops = data.max(axis=0)
        bots = data.min(axis=0)
        dists = scipy.matrix(scipy.diag(tops-bots))

        rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
        for i in range(nrefs):
            rands[:, :, i] = rands[:, :, i]*dists+bots
    else:
        rands = refs
    gaps = scipy.zeros((len(ks),))
    for (i, k) in enumerate(ks):
        g1 = method(n_clusters=k).fit(data)
        (kmc, kml) = (g1.cluster_centers_, g1.labels_)
        disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])])

        refdisps = scipy.zeros((rands.shape[2],))
        for j in range(rands.shape[2]):
            g2 = method(n_clusters=k).fit(rands[:, :, j])
            (kmc, kml) = (g2.cluster_centers_, g2.labels_)
            refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])])
        gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp)
    return gaps
scaffold.py 文件源码 项目:ababe 作者: unkcpz 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def in_euclidean_discance(self, pos, center, r):
        """
            A helper function to return true or false.
            Decided whether a position(frac) inside a
            distance restriction.
        """
        from scipy.spatial.distance import euclidean as euclidean_discance
        from itertools import product

        cart_cent = self.get_cartesian_from_frac(center)
        trans = np.array([i for i in product([-1, 0, 1], repeat=3)])
        allpos = pos + trans
        for p in allpos:
            cart_p = self.get_cartesian_from_frac(p)
            if euclidean_discance(cart_p, cart_cent) < r:
                return True
                break

        return False
random_projection.py 文件源码 项目:DataMining 作者: lidalei 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def find_nearest_instances_subprocess(training_data_instances, training_data_labels, test_data_instances, test_instance_start_index, test_instance_end_index,\
                                      classified_results):
    # print test_instance_start_index, test_instance_end_index
    for test_instance_index in range(test_instance_start_index, test_instance_end_index):
        test_instance = test_data_instances[test_instance_index]
        # find the nearest training instance with euclidean distance
        minimal_euclidean_distance = euclidean(test_instance, training_data_instances[0])
        minimal_euclidean_distance_index = 0
        for training_instance, training_instance_index in zip(training_data_instances, range(len(training_data_instances))):
            # compute the euclidean distance
            euclidean_distance = euclidean(test_instance, training_instance)
            if euclidean_distance < minimal_euclidean_distance:
                minimal_euclidean_distance = euclidean_distance
                minimal_euclidean_distance_index = training_instance_index
        classified_results[test_instance_index] =\
         training_data_labels[int(minimal_euclidean_distance_index)]
ga.py 文件源码 项目:dx7-programmer 作者: yeeking 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def compute_fitness(target, candidate):
    # first truncate the longer array
    len_diff = np.abs(len(target) - len(candidate))
    # truncate the longest one
    a1 = target
    a2 = candidate
    if len(target) > len(candidate):# truncate a1, the target
        a1 = a1[0:len(candidate)]
    if len(candidate) > len(target):# truncate a2, the candidate
        a2 = a2[0:len(target)]
    dist = 0
    for i in range(0, len(a1)):
        dist = dist + euclidean(a1[i], a2[i])
    if dist == 0:
        return 1
    else:
        return 1/dist
main.py 文件源码 项目:coursera-machine-learning-yandex 作者: dstarcev 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train(X, y, k, C):
    n = X.shape[0]
    m = X.shape[1]
    w = np.zeros(m)
    c = cost(X, y, w, C)
    threshold = 1e-5
    for iteration in xrange(10000):
        new_w = np.zeros(m)
        for j in xrange(m):
            sum = 0
            for i in xrange(n):
                sum += y[i] * X[i, j] * (1 - 1 / (1 + np.exp(-y[i] * np.dot(X[i], w))))
            new_w[j] = w[j] + k * sum / np.double(n) - k * C * w[j]
        new_cost = cost(X, y, new_w, C)
        if distance.euclidean(w, new_w) <= threshold:
            return new_w
        c = new_cost
        w = new_w
    return w
pi_cation.py 文件源码 项目:mastic 作者: ADicksonLab 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
    """Given two aromatic rings get the normal vectors that face the other ring"""

    centroids = [calc_centroid(arom_coords) for arom_coords in [arom_a_coords, arom_b_coords]]
    arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)

    face_norms = []
    for i, arom_norm in enumerate(arom_norms):
        # get the index of the other arom
        j = 1 if i ==0 else 0
        norm = calc_facing_vector(arom_norm + centroids[i], centroids[j])
        # norm_up = arom_norm
        # norm_down = -1 * arom_norm
        # # get the norm so that it points to the other ring
        # d_up = euclidean(norm_up + centroids[i], centroids[j])
        # d_down = cdist(norm_down + centroids[i], centroids[j])
        # norm = norm_up if d_up < d_down else norm_down
        face_norms.append(norm)

    return face_norms
OnlineBase.py 文件源码 项目:Steal-ML 作者: ftramer 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def push_to_b(self, xn, xp, e):
        assert self.query(xn, count=False) == self.NEG
        assert self.query(xp, count=False) == self.POS

        d = distance.euclidean(xn, xp) / \
            distance.euclidean(np.ones(self.n_features), np.zeros(self.n_features))
        if d < e:
            logger.debug('bin search done with %f', d)
            return xn, xp

        mid = .5 * np.add(xn, xp)
        try:
            l = self.query(mid)
            if l == self.NEG:
                return self.push_to_b(mid, xp, e)
            else:
                return self.push_to_b(xn, mid, e)
        except RunOutOfBudget:
            logger.debug('Run out of budget %d, push_to_b failed' % self.budget)
            raise RunOutOfBudget
color-sorter.py 文件源码 项目:rubiks-color-resolver 作者: dwalton76 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def travelling_salesman(colors):
    colors_length = len(colors)

    # Distance matrix
    A = np.zeros([colors_length, colors_length])
    for x in range(0, colors_length-1):
        for y in range(0, colors_length-1):
            A[x,y] = distance.euclidean(colors[x], colors[y])

    # Nearest neighbour algorithm
    path = NN(A, 0)

    # Final array
    colors_nn = []
    for i in path:
        colors_nn.append(colors[i])

    return colors_nn
test_ei_optimization.py 文件源码 项目:SMAC3 作者: automl 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_local_search(self):

        def acquisition_function(point):
            point = [p.get_array() for p in point]
            opt = np.array([1, 1, 1, 1])
            dist = [euclidean(point, opt)]
            return np.array([-np.min(dist)])

        l = LocalSearch(acquisition_function, self.cs, epsilon=1e-10,
                        max_iterations=100000)

        start_point = self.cs.sample_configuration()
        acq_val_start_point = acquisition_function([start_point])

        acq_val_incumbent, _ = l._one_iter(start_point)

        # Local search needs to find something that is as least as good as the
        # start point
        self.assertLessEqual(acq_val_start_point, acq_val_incumbent)
clustering.py 文件源码 项目:clust 作者: BaselAbujamous 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def ckmeans(X, K, datasetID=-1, params=()):
    global kmeans_init

    pnames  = [     'init', 'max_iter', 'n_jobs',  'distance']
    #dflts  = ['k-means++',        300,       -1, 'euclidean']
    dflts   = [       'KA',        300,       -1, 'euclidean']
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (init, max_iter, n_jobs, distance) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    if datasetID in kmeans_init:
        init = kmeans_init[datasetID][0:K]
    elif init == 'KA':
        init = initclusterKA(X, K, distance)
    elif init == 'KA_memorysaver':
        init = initclusterKA_memorysaver(X, K, distance)

    C = skcl.KMeans(K, init=init, max_iter=max_iter, n_jobs=n_jobs).fit(X).labels_
    return clustVec2partMat(C, K)
clustering.py 文件源码 项目:clust 作者: BaselAbujamous 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def csoms(X, D, params=()):
    pnames = ['neighbour', 'learning_rate', 'input_length_ratio']
    dflts  = [        0.1,             0.2,                   -1]
    if isinstance(params, np.ndarray):
        paramsloc = params.tolist()
    else:
        paramsloc = params
    (neighbour, learning_rate, input_length_ratio) = ds.resolveargumentpairs(pnames, dflts, paramsloc)

    Xloc = np.array(X)

    K = D[0] * D[1] # Number of clusters
    N = Xloc.shape[0] # Number of genes
    Ndim = Xloc.shape[1] # Number of dimensions in X

    som = sompy.SOM(D, Xloc)
    som.set_parameter(neighbor=neighbour, learning_rate=learning_rate, input_length_ratio=input_length_ratio)

    centres = som.train(N).reshape(K, Ndim)
    dists = [[spdist.euclidean(c, x) for c in centres] for x in Xloc]
    C = [np.argmin(d) for d in dists]
    return clustVec2partMat(C, K)
drowsiness detection.py 文件源码 项目:driver-fatigue-detection-system 作者: raja434 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def eye_aspect_ratio(eye):
    # compute the euclidean distances between the two sets of
    # vertical eye landmarks (x, y)-coordinates
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])

    # compute the euclidean distance between the horizon
    # eye landmark (x, y)-coordinates
    C = dist.euclidean(eye[0], eye[3])

    # compute the eye aspect ratio
    ear = (A + B) / (2.0 * C)

    # return the eye aspect ratio
    return ear

# construct the argument parse and parse the arguments
decisionboundaryplot.py 文件源码 项目:highdimensional-decision-boundary-plot 作者: tmadl 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _find_decision_boundary_on_hypersphere(self, centroid, R, penalize_known=False):
        def objective(phi, grad=0):
            # search on hypersphere surface in polar coordinates - map back to cartesian
            cx = centroid + polar_to_cartesian(phi, R)
            try:
                cx2d = self.dimensionality_reduction.transform([cx])[0]
                error = self.decision_boundary_distance(cx)
                if penalize_known:
                    # slight penalty for being too close to already known decision boundary
                    # keypoints
                    db_distances = [euclidean(cx2d, self.decision_boundary_points_2d[k])
                                    for k in range(len(self.decision_boundary_points_2d))]
                    error += 1e-8 * ((self.mean_2d_dist - np.min(db_distances)) /
                                     self.mean_2d_dist)**2
                return error
            except (Exception, ex):
                print("Error in objective function:", ex)
                return np.infty

        optimizer = self._get_optimizer(
            D=self.X.shape[1] - 1, upper_bound=2 * np.pi, iteration_budget=self.hypersphere_iteration_budget)
        optimizer.set_min_objective(objective)
        db_phi = optimizer.optimize([rnd.random() * 2 * np.pi for k in range(self.X.shape[1] - 1)])
        db_point = centroid + polar_to_cartesian(db_phi, R)
        return db_point
test_dbscan.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_dbscan_feature():
    # Tests the DBSCAN algorithm with a feature vector array.
    # Parameters chosen specifically for this task.
    # Different eps to other test, because distance is not normalised.
    eps = 0.8
    min_samples = 10
    metric = 'euclidean'
    # Compute DBSCAN
    # parameters chosen for task
    core_samples, labels = dbscan(X, metric=metric, eps=eps,
                                  min_samples=min_samples)

    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_1, n_clusters)

    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples)
    labels = db.fit(X).labels_

    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)
test_dbscan.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_dbscan_callable():
    # Tests the DBSCAN algorithm with a callable metric.
    # Parameters chosen specifically for this task.
    # Different eps to other test, because distance is not normalised.
    eps = 0.8
    min_samples = 10
    # metric is the function reference, not the string key.
    metric = distance.euclidean
    # Compute DBSCAN
    # parameters chosen for task
    core_samples, labels = dbscan(X, metric=metric, eps=eps,
                                  min_samples=min_samples,
                                  algorithm='ball_tree')

    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_1, n_clusters)

    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples,
                algorithm='ball_tree')
    labels = db.fit(X).labels_

    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)
kernel.py 文件源码 项目:addc 作者: carsonfarmer 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def exponential(x, y, sigma=1):
    """Compute an exponential kernel.

    The exponential kernel is closely related to the Gaussian kernel, with
    only the square of the norm left out. It is also a radial basis function
    kernel:
                    K(x, y) = exp(-||x - y|| / 2?^2)
    where `x` and `y` are vectors in the input space (i.e., vectors of
    features computed from training or test samples), ``||x - y||` is the
    Euclidean norm, and the adjustable parameter `sigma` is used to adjust
    the kernel 'bandwidth'. It is important to note that the observations made
    about the `sigma` parameter for the Gaussian kernel also apply to the
    Exponential and Laplacian kernels.

    See Also
    --------
    gaussian
    """
    return exp(-(dist.euclidean(x, y) / 2*sigma**2))
kernel.py 文件源码 项目:addc 作者: carsonfarmer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def circular(x, y, sigma):
    """Compute a circular kernel.

    The circular kernel is used in geostatic applications. It is an example
    of an isotropic stationary kernel and is positive definite in ?^2:
    K(x, y) = 2/? arccos(-(||x - y|| / ?)) -
              2/? (||x - y|| / ?) sqrt(1 - (||x - y|| / ?)^2)
    if ||x - y|| < ?, zero otherwise.
    where `x` and `y` are vectors in the input space (i.e., vectors of
    features computed from training or test samples), ||x - y|| is the
    Euclidean norm, and sigma is a free parameter with no reasonable default.
    In other words, `sigma` should be defined *a priori* based on some
    geostatistical analysis, such as semi-variogram analysis.
    """
    pi2 = 2/PI
    norm_sigma = dist.euclidean(x, y) / sigma
    return pi2*acos(-norm_sigma) - pi2*norm_sigma*sqrt(1 - norm_sigma**2)
kernel.py 文件源码 项目:addc 作者: carsonfarmer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def kernel_dist(kernel=linear, **kw):
    """Generic kernel-induced distance metric.

    To use, `partially` apply this function with a kernel argument.

    Examples
    --------
    >>> from functools import partial
    >>> dist = partial(kernel_dist, kernel=sigmoid)
    """
    if kernel is None:  # Don't use a kernel!
        return dist.euclidean
    elif getattr(kernel, "__name__") == "gaussian":
        # We have a 'shortcut' for Gaussian kernels... this is kinda hacky
        # But maybe worth it given the speedup our shortcut gets us?
        return lambda x, y: 2 - 2*gaussian(x, y, kw.get("sigma", 1))
    else:
        kern = partial(kernel, **kw)
        return lambda x, y: kern(x, x) - 2*kern(x, y) + kern(y, y)
coonswarp.py 文件源码 项目:RasterFairy 作者: Quasimondo 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def getPointOnHull( hullPoints,t, totalLength ):
    lh = len(hullPoints)
    for j in range(lh+1):
        sideLength = distance.euclidean(hullPoints[j%lh],hullPoints[(j+1)%lh])
        t_sub = sideLength / totalLength;
        if t > t_sub:
            t-= t_sub;
        else :
            return lerp(hullPoints[j%lh],hullPoints[(j+1)%lh], t / t_sub );
ep5.py 文件源码 项目:MachineLearningRecipes 作者: officialgupta 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def euc(a,b): #function to return distance between points
    return distance.euclidean(a,b)
main.py 文件源码 项目:MachineLearningBasics 作者: zoebchhatriwala 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def euc(a, b):
    return distance.euclidean(a, b)


# New Classifier
test_dbcb.py 文件源码 项目:DBCV 作者: christopherjenness 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_DBCV(data):
    kmeans = KMeans(n_clusters=2)
    kmeans_labels = kmeans.fit_predict(data)
    hdbscanner = hdbscan.HDBSCAN()
    hdbscan_labels = hdbscanner.fit_predict(data)
    kmeans_score = DBCV.DBCV(data, kmeans_labels, dist_function=euclidean)
    hdbscan_score = DBCV.DBCV(data, hdbscan_labels, dist_function=euclidean)
    assert hdbscan_score > kmeans_score
test_dbcb.py 文件源码 项目:DBCV 作者: christopherjenness 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test__core_dist(data):
    target = 0.09325490419185979
    point = data[0]
    core_dist = DBCV._core_dist(point, data, euclidean)
    assert abs(core_dist - target) < 0.001
test_dbcb.py 文件源码 项目:DBCV 作者: christopherjenness 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test__mutual_reachability_dist(data):
    target = 0.074196034579080888
    point_1 = data[0]
    point_2 = data[1]
    dist = DBCV._mutual_reachability_dist(point_1, point_2, data, data,
                                          euclidean)
    assert dist == euclidean(point_1, point_2)
    point_3 = data[5]
    point_4 = data[46]
    dist_2 = DBCV._mutual_reachability_dist(point_3, point_4, data, data,
                                            euclidean)
    assert abs(dist_2 - target) < 0.001
test_dbcb.py 文件源码 项目:DBCV 作者: christopherjenness 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test__mutual_reach_dist_graph(data):
    target = 0.09872567819414102
    hdbscanner = hdbscan.HDBSCAN()
    hdbscan_labels = hdbscanner.fit_predict(data)
    graph = DBCV._mutual_reach_dist_graph(data, hdbscan_labels,
                                          euclidean)
    assert graph.shape == (data.shape[0], data.shape[0])
    assert abs(graph[0][0] - target < 0.001)
CD.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def closest(i, arr):
    """
    :param arr: np array (len=No. Indep var + No. Depen var)
    :return: float
    """
    return sorted(i.clstr, key= lambda x: edist(arr[:-1], x.sample[:-1]))
CD.py 文件源码 项目:XTREE 作者: ai-se 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def closest(i, arr):
    """
    :param arr: np array (len=No. Indep var + No. Depen var)
    :return: float
    """
    return sorted(i.clstr, key= lambda x: edist(arr.sample[:-1], x.sample[:-1]))
lda_visualizer.py 文件源码 项目:quoll 作者: LanguageMachines 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def calculate_distance(self,vector1,vector2):
        vectordist = distance.euclidean(vector1,vector2)
        return vectordist


问题


面经


文章

微信
公众号

扫码关注公众号