python类DBSCAN的实例源码

cluster_points.py 文件源码 项目:eclipse2017 作者: google 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def cluster_points(coordinates, eps, min_samples, n_jobs=1):
    """Given coordinates, function returns the number of clusters in the
    set of coordinates and a list of integer labels corresponding to
    the input coordinate list

    Arguments:
      coordinates: a sequence of (lat, lon) tuples
      eps: the cluster size in radial degrees
      min_samples: the size of the smallest cluster
      n_jobs: number of CPUs to use to compute the clusters
    Returns:
      n_clusters: number of clusters
      labels: the labels of the clusters
    """

    db = DBSCAN(eps=eps,
                min_samples=min_samples,
                n_jobs=n_jobs).fit(coordinates)


    return db
iFruitFly_Testing_weka.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def dbFun(_x,_original_vals, f):
    db = DBSCAN(eps=0.3, min_samples=20).fit(_x)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True

    labels = db.labels_
    #print(labels)
    n_clusters_ = len(set(labels)) - (1 if -1 else 0)
    #gettingCharacteristics(_x, core_samples_mask, labels, n_clusters_,
    #_original_vals)
    print("Wait plotting clusters.....")
    plotCluster(_x, labels, core_samples_mask, n_clusters_, f)
    return

##############################################################################################
# Plotting the cluster after the result of DBSCAN
sklearn_basic.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def dbscan(fig):
    global X_iris, geo
    ax = fig.add_subplot(geo + 5, projection='3d', title='dbscan')
    dbscan = cluster.DBSCAN()
    dbscan.fit(X_iris)
    res = dbscan.labels_
    core = dbscan.core_sample_indices_
    print repr(core)
    size = [5 if i not in core else 40 for i in range(len(X_iris))]
    print repr(size)
    for n, i in enumerate(X_iris):
        ax.scatter(*i[: 3], s=size[n], c='bgrcmyk'[res[n] % 7],
                   alpha=0.8, marker='o')

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')
    return res
alg.py 文件源码 项目:image-segmentation 作者: alexlouden 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def cluster_dbscan(self, image_cols):
        print 'DBSCAN'
        # TODO handle outliers/noise
        # Look at different metrics?

        db = DBSCAN(eps=self.params.epsilon, min_samples=10, metric='euclidean')
        db.fit(image_cols)

        # from IPython import embed; embed(); import ipdb; ipdb.set_trace()
        self.number_of_clusters = np.max(db.labels_) + 1
        # Ignore -1 cluster, it's noise

        print 'number of clusters', self.number_of_clusters

        # Clusters
        centers = np.zeros((self.number_of_clusters, 3))
        for i in range(0, self.number_of_clusters):
            cluster_points = image_cols[db.labels_ == i]
            cluster_mean = np.mean(cluster_points, axis=0)
            centers[i, :] = cluster_mean

        return centers
dbscan.py 文件源码 项目:icing 作者: slipguru 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def train(self, data, sample_weight=None):
        """
        :type data: pyspark.RDD
        :param data: (key, k-dim vector like)
        Train the model using a (key, vector) RDD
        """
        parts = KDPartitioner(data, self.max_partitions)
        self.data = data
        self.bounding_boxes = parts.bounding_boxes
        self.expanded_boxes = {}
        self._create_neighborhoods()
        # repartition data set on the partition label
        self.data = self.data.map(lambda ((k, p), v): (p, (k, v))) \
            .partitionBy(len(parts.partitions)) \
            .map(lambda (p, (k, v)): ((k, p), v))
        # create parameters for sklearn DBSCAN
        params = self.dbscan_params or {
            'eps': self.eps,
            'min_samples': self.min_samples,
            'metric': self.metric}
        # perform dbscan on each part
        self.data = self.data.mapPartitions(
            lambda iterable: dbscan_partition(iterable, params, sample_weight))
        self.data.cache()
        self._remap_cluster_ids()
calibrate.py 文件源码 项目:pyhiro 作者: wanweiwei07 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, ompath, density = 4.0):
        """

        :param ompath: path of the mesh template

        author: weiwei
        date: 20170711
        """

        cadtemp = CADTemp.CADTemp(ompath = ompath, density = density)

        self.objnp = pg.packpandanp(cadtemp.objtrimesh.vertices,
                               cadtemp.objtrimesh.face_normals,
                               cadtemp.objtrimesh.faces,
                               name='')
        self.temppnt = cadtemp.pcdtemp

        self.kinect = PyKinectRuntime.PyKinectRuntime(PyKinectV2.FrameSourceTypes_Depth)
        self.dbscan = DBSCAN(eps=50, min_samples=100, n_jobs=-1)
        self.randsac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold = 15)
        self.tablepnt = []
        self.objectpnt = []
dbscan.py 文件源码 项目:scikit-discovery 作者: MITHaystack 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def process(self, obj_data):
        ''' 
        Run DBScan on data. Stores result in data wrapper

        @param obj_data: Data wrapper to be processed
        '''

        epsilon = self.ap_paramList[0]()
        min_points = self.ap_paramList[1]()

        results = dict()


        for label, data in obj_data.getIterator():
            results[label] = DBSCAN(eps=epsilon, min_samples = min_points).fit_predict(data.loc[:,self.column_names])

        obj_data.addResult(self.str_description, results)
test_base.py 文件源码 项目:yellowbrick 作者: DistrictDataLabs 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_clusterer_enforcement(self):
        """
        Assert that only clustering estimators can be passed to cluster viz
        """
        nomodels = [
            SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
        ]

        for nomodel in nomodels:
            with self.assertRaises(YellowbrickTypeError):
                visualizer = ClusteringScoreVisualizer(nomodel())

        models = [
            KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
        ]

        for model in models:
            try:
                visualizer = ClusteringScoreVisualizer(model())
            except YellowbrickTypeError:
                self.fail("could not pass clustering estimator to visualizer")
cluster_engine.py 文件源码 项目:artorithmia 作者: alichtner 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def fit(self, model, n_clusters=5):
        """
        Fits clusters to the feature set using a Kmeans model.

        Input:  n_clusters (int) number of clusters to use during clustering
        Output: None
        """
        self.n_clusters = n_clusters
        scaler = StandardScaler()
        self.features = scaler.fit_transform(self.features)

        if model == 'kmeans':
            self.model = KMeans(self.n_clusters)
        elif model == 'DBSCAN':
            self.model = DBSCAN(eps=0.3, min_samples = 3)
        self.cluster_fit = self.model.fit(self.features)
        print ('-- Running clustering on {} piece collection --'
               .format(self.n_artworks))
echoDoc0.1.py 文件源码 项目:EchoBurst 作者: TyJK 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def newDBSCANModel(vectorFile, outputFile):
    model = Doc2Vec.load("Models\\" + vectorFile)
    vecs = []
    for doc in range(0, len(model.docvecs)):
        doc_vec = model.docvecs[doc]
        # print doc_vec
        vecs.append(doc_vec.reshape((1, 300)))

    doc_vecs = np.array(vecs, dtype='float')  # TSNE expects float type values

    # print doc_vecs
    docs = []
    for i in doc_vecs:
        docs.append(i[0])
    db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs)
    joblib.dump(db, outputFile)


    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    clusters = db.labels_.tolist()
    cluster_info = {'labels': model.docvecs.offset2doctag,
                    "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in
                                                            model.docvecs.offset2doctag],
                    'clusters': clusters}
    sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
                              columns=['labels', "index, wordcount and repeated words", 'clusters'])
    print(sentenceDF)
    sentenceDF.to_csv("DBSCAN.csv")

    print('Estimated number of clusters: %d' % n_clusters_)
dbscan.py 文件源码 项目:pypardis 作者: bwoneill 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def dbscan_partition(iterable, params):
    """
    :type iterable: iter
    :param iterable: iterator yielding ((key, partition), vector)
    :type params: dict
    :param params: dictionary containing sklearn DBSCAN parameters
    :rtype: iter
    :return: ((key, cluster_id), v)
    Performs a DBSCAN on a given partition of the data
    """
    # read iterable into local memory
    data = list(iterable)
    (key, part), vector = data[0]
    x = np.array([v for (_, __), v in data])
    y = np.array([k for (k, _), __ in data])
    # perform DBSCAN
    model = skc.DBSCAN(**params)
    c = model.fit_predict(x)
    cores = set(model.core_sample_indices_)
    # yield (key, cluster_id), non-core samples labeled with *
    for i in xrange(len(c)):
        flag = '' if i in cores else '*'
        yield (y[i], '%i:%i%s' % (part, c[i], flag))
cluster_tools.py 文件源码 项目:SUPPA 作者: comprna 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def DBSCAN_cluster(psi_matrix, eventid_lst, dist, minpts, metric):

    # Setting logging preferences
    logger = logging.getLogger(__name__)

    # The metric is "cosine" works only with the algorithm "brute"
    if metric == "cosine":
        alg = 'brute'
    else:
        alg = 'auto'

    try:
        db = DBSCAN(eps=dist, min_samples=minpts, metric=metric, algorithm=alg).fit(psi_matrix)
        labels = db.labels_
    except:
        logger.error("Unknown error: {}".format(sys.exc_info()))
        sys.exit(1)

    eventid_labels_dict = {k: v for k, v in zip(eventid_lst, labels)}

    return eventid_labels_dict, labels
cluster_tools.py 文件源码 项目:SUPPA 作者: comprna 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def cluster_analysis(dpsi, psivec, sig_threshold, dpsi_threshold, eps, minpts, metric, indexes, clustering,
                     separation, output):

    path = os.path.dirname(os.path.realpath(dpsi))
    os.chdir(path)

    psi_matrix, eventid_lst = process_cluster_input(dpsi, psivec, sig_threshold, dpsi_threshold, indexes)

    if(clustering=="DBSCAN"):
        eventid_labels_dict, labels = DBSCAN_cluster(psi_matrix, eventid_lst, eps, minpts, metric)
        #eventid_labels_dict are the labels of the clustering for eacg event

        write_averaged_cluster_output(psi_matrix, eventid_lst, eventid_labels_dict, output)
        calculate_cluster_scores(psi_matrix, labels, output)

    else:
        #OPTICS
        points_list = create_points_list(psi_matrix, eventid_lst) #Transform the points on psi_matrix to Points from optics.py
        optics = Optics(points_list, eps, minpts)  # Maximum radius to be considered, cluster size >= 2 points
        optics.run()  # run the algorithm
        clusters = optics.cluster(separation)  # minimum threshold for clustering (upper limit to separate the clusters)
        eventid_labels_dict, labels = generate_labels(clusters, eventid_lst)
        write_averaged_cluster_output(psi_matrix, eventid_lst, eventid_labels_dict, output)
        calculate_cluster_scores(psi_matrix, labels, output)
clusters.py 文件源码 项目:extract 作者: dblalock 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def makeDBScan(X=None, k=-1):
    return cluster.DBSCAN(eps=.2)
_cluster.py 文件源码 项目:ananke 作者: beiko-lab 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def sts_matrix_generator(ind, slope_matrix):
    """Work-horse function. Computes the short time-series (STS) distance for
    an index, ind of the slope matrix.

    Parameters
    ----------
    ind: int
        The index of the slope matrix that is being computed.
    slope_matrix: np.matrix
        The slope matrix.

    Returns
    -------
        (ind, dists): ind is the index and dists is a np.matrix containing the
                      STS distances
    """
    mx = slope_matrix[ind, :]
    mv = slope_matrix[ind:, :]
    mx_rep = np.vstack((mx,)*mv.shape[0])
    diff = mx_rep - mv
    diff = np.square(diff)
    sts_squared = diff.sum(axis=1)
    dists = np.sqrt(sts_squared)
    return (ind, dists)

#  DBSCAN from scikit learn
_cluster.py 文件源码 项目:ananke 作者: beiko-lab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cluster_dbscan(matrix, distance_measure="sts", eps=1):
    """Clusters the distance matrix for a given epsilon value, if distance
    measure is sts. Other distance measures are: [‘cityblock’, ‘cosine’, 
    ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’, ‘braycurtis’, ‘canberra’, 
    ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, 
    ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, 
    ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]

    Parameters
    ----------
    matrix: np.matrix
        The input matrix. If distance measure is sts, this should be the sts
        distance matrix. If other distance, this should be the time-series
        matrix of size ngenes x nsamples.
    distance_measure: str
        The distance measure, default is sts, short time-series distance.
        Any distance measure available in scikit-learn is available here.
        Note: multiple time-series is NOT supported for distances other than    
        "sts".

    Returns
    -------
    cluster_labels: list of int
        A list of size ngenes that defines cluster membership.
    """
    if (distance_measure == "sts"):
        dbs = DBSCAN(eps=eps, metric='precomputed', min_samples=2)
    else:
        dbs = DBSCAN(eps=eps, metric=distance_measure, min_samples=2)
    cluster_labels = dbs.fit_predict(matrix)
    return cluster_labels
setup_ifruitfly.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def dbFun( _x,_original_vals, f):
    db = DBSCAN(eps=0.3, min_samples=20).fit(_x)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True

    labels = db.labels_
    #print(labels)
    n_clusters_ = len(set(labels)) - (1 if -1 else 0)
    #gettingCharacteristics(_x, core_samples_mask, labels, n_clusters_,
    #_original_vals)
    print("Wait plotting clusters.....")
    plotCluster(_x, labels, core_samples_mask, n_clusters_, f)
    return
iFruitFly_v2.0.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def dbFun( _x,_original_vals, f):
    db = DBSCAN(eps=0.3, min_samples=20).fit(_x)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True

    labels = db.labels_
    #print(labels)
    n_clusters_ = len(set(labels)) - (1 if -1 else 0)
    #gettingCharacteristics(_x, core_samples_mask, labels, n_clusters_,
    #_original_vals)
    print("Wait plotting clusters.....")
    plotCluster(_x, labels, core_samples_mask, n_clusters_, f)
    return
iFruitFly_Testing_weka.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def demo_printing_picture(anomaly_file, prefix, rgb_directory, pre_prefix, dir, file_name):
    #clusters = webDemo.main(anomaly_file,
    #"D:\\ifruitly_junk\\results\\result.jpg")
    clusters = v_demo(anomaly_file, prefix, pre_prefix, file_name, dir)
    return

##############################################################################################
# Running the DBSCAN for output
dbscan.py 文件源码 项目:lol-category 作者: vonum 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def db_scan(data, eps, min_samples, metric):
  dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric=metric).fit(data)
  print 'DBSCAN'
  print metrics.silhouette_score(data, dbscan.labels_)
  print collections.Counter(dbscan.labels_)
  reduced_data = reduce_with_pca(data)
  plot_2d_data(reduced_data, dbscan.labels_)
dmonscilearncluster.py 文件源码 项目:dmon-adp 作者: igabriel85 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def sdbscanTrain(self, settings, mname, data):
        '''
        :param data: -> dataframe with data
        :param settings: -> settings dictionary
        :param mname: -> name of serialized clusterer
        :return: -> clusterer
        :example settings: -> {eps:0.9, min_samples:10, metric:'euclidean' ,
        algorithm:'auto, leaf_size:30, p:0.2, n_jobs:1}
        '''
        for k, v in settings.iteritems():
            logger.info('[%s] : [INFO] SDBSCAN %s set to %s',
                         datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
            print "SDBSCAN %s set to %s" % (k, v)
        sdata = StandardScaler().fit_transform(data)
        try:
            db = DBSCAN(eps=float(settings['eps']), min_samples=int(settings['min_samples']), metric=settings['metric'],
                        algorithm=settings['algorithm'], leaf_size=int(settings['leaf_size']), p=float(settings['p']),
                        n_jobs=int(settings['n_jobs'])).fit(sdata)
        except Exception as inst:
            logger.error('[%s] : [ERROR] Cannot instanciate sDBSCAN with %s and %s',
                           datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
            print "Error while  instanciating sDBSCAN with %s and %s" % (type(inst), inst.args)
            sys.exit(1)
        labels = db.labels_
        print labels
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print'Estimated number of clusters: %d' % n_clusters_
        self.__serializemodel(db, 'sdbscan', mname)
        return db
alg.py 文件源码 项目:image-segmentation 作者: alexlouden 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, image, colour_space='hsv', cluster_method='ward', scale=None, num_clusters=None, quantile=None):
        self.image = image
        self.colour_space = colour_space
        self.cluster_method = cluster_method

        self.params = Parameters()

        # Scaling colour space
        if scale is None:
            self.params.scale = (1, 1, 1)
        else:
            # TODO validate 3 float tuple
            self.params.scale = scale

        # K-means param
        if num_clusters is None:
            self.params.num_clusters = 8
        else:
            # TODO validate
            self.params.num_clusters = int(num_clusters)

        # Mean-shift param
        if quantile is None:
            self.params.quantile = 0.1
        else:
            self.params.quantile = float(quantile)

        # DBSCAN param
        # if epsilon is None:
        self.params.epsilon = 255*0.1

        # Log
        h, w = self.image.shape[:2]
        msg = 'Clustering a {}x{} image: cluster_method={} colour_space={} num_clusters={} quantile={}'.format(
            w, h, cluster_method, colour_space, num_clusters, quantile
        )
        print msg
cluster.py 文件源码 项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def dbscan(self, n_clusters=None, eps=0.5, min_samples=10,
               algorithm='auto', leaf_size=30):
        """
        Perform DBSCAN clustering

        This can also be used for Duplicate Detection (when ep

        Parameters
        ----------
        n_clusters : int
            number of clusters # not used just present for compatibility
        lsi_components : int
            apply LSA before the clustering algorithm
        eps : float
            The maximum distance between two samples for them to be considered
             as in the same neighborhood.
        min_samples : int
            The number of samples (or total weight) in a neighborhood
            for a point to be considered as a core point.
            This includes the point itself.
        """
        from sklearn.cluster import DBSCAN
        pars = {'is_hierarchical': False, "metric": self.metric}

        km = DBSCAN(eps=eps, min_samples=min_samples, algorithm=algorithm,
                    leaf_size=leaf_size)

        return self._cluster_func(n_clusters, km, pars)
cluster.py 文件源码 项目:Particle-Picking-Cryo-EM 作者: hqythu 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def main():
    centers = get_list('out_center.txt')
    labels = get_list('142-label.txt')
    judge(centers, labels)
    n_class = int(len(centers) * 0.18)
    est = KMeans(n_clusters=n_class, max_iter=1000)
    est.fit(centers)
    new_list = []
    for x, y in est.cluster_centers_:
        min_num = 10000
        min_x = -1
        min_y = -1
        for x_, y_ in centers:
            dist = distance(x, y, x_, y_)
            if (dist < min_num) or (min_x == -1):
                min_num = dist
                min_x = x_
                min_y = y_
        new_list.append([min_x, min_y])
    judge(new_list, labels)
    judge(est.cluster_centers_, labels)

    # db = DBSCAN(eps=0.3, min_samples=180).fit(centers)
    # print(db.core_sample_indices_)
    # judge(new_list, labels)
    # print(est.cluster_centers_)
    # save_list('result.txt', est.cluster_centers_)
    # af = AffinityPropagation(preference=180).fit(centers)
    # judge(af.cluster_centers_, labels)
dbscan.py 文件源码 项目:icing 作者: slipguru 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def dbscan_partition(iterable, params, sample_weight=None):
    """
    :type iterable: iter
    :param iterable: iterator yielding ((key, partition), vector)
    :type params: dict
    :param params: dictionary containing sklearn DBSCAN parameters
    :rtype: iter
    :return: ((key, cluster_id), v)
    Performs a DBSCAN on a given partition of the data
    """
    # read iterable into local memory
    data = list(iterable)
    (key, part), vector = data[0]
    x = np.array([v for (_, __), v in data])
    y = np.array([k for (k, _), __ in data])
    # perform DBSCAN
    model = skc.DBSCAN(**params)
    # import sys
    # print(model, file=sys.stderr)
    weights = [sample_weight[k[0]] for k in x]
    c = model.fit_predict(x, sample_weight=weights)
    cores = set(model.core_sample_indices_)
    # yield (key, cluster_id), non-core samples labeled with *
    for i in xrange(len(c)):
        flag = '' if i in cores else '*'
        yield (y[i], '%i:%i%s' % (part, c[i], flag))
car_recognizer.py 文件源码 项目:Vision-based-parking-lot-availability-OpenCV 作者: Saar1312 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def dbscan(points,eps,min_samples):
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(points) # eps=5 min_samples = 80

    # Labeling pixels by cluster
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    # Creating list of clusters
    return [points[labels == i] for i in xrange(n_clusters_)]
cluster.py 文件源码 项目:textcatvis 作者: cod3licious 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def cluster_texts(textdict, eps=0.45, min_samples=3):
    """
    cluster the given texts

    Input:
        textdict: dictionary with {docid: text}
    Returns:
        doccats: dictionary with {docid: cluster_id}
    """
    doc_ids = list(textdict.keys())
    # transform texts into length normalized kpca features
    ft = FeatureTransform(norm='max', weight=True, renorm='length', norm_num=False)
    docfeats = ft.texts2features(textdict)
    X, featurenames = features2mat(docfeats, doc_ids)
    e_lkpca = KernelPCA(n_components=250, kernel='linear')
    X = e_lkpca.fit_transform(X)
    xnorm = np.linalg.norm(X, axis=1)
    X = X/xnorm.reshape(X.shape[0], 1)
    # compute cosine similarity
    D = 1. - linear_kernel(X)
    # and cluster with dbscan
    clst = DBSCAN(eps=eps, metric='precomputed', min_samples=min_samples)
    y_pred = clst.fit_predict(D)
    return {did: y_pred[i] for i, did in enumerate(doc_ids)}
tst7.py 文件源码 项目:pyhiro 作者: wanweiwei07 项目源码 文件源码 阅读 87 收藏 0 点赞 0 评论 0
def getRotMat(verts):
    """

    find the table and do calibration

    :param verts: see depthToXYZ
    :return:

    author: weiwei
    date: 20170711
    """

    cutverts = []
    for vert in verts:
        if vert[0] < 700.0 and vert[0] > -700.0:
            if vert[1] < 200.0 and vert[1] > -600.0:
                if vert[2] < -1000.0 and vert[2] > -1500.0:
                    cutverts.append([vert[0], vert[1], vert[2]])

    # clustering using DBSCAN
    X = np.array(cutverts)
    db = DBSCAN(eps=20, min_samples = 100, n_jobs = -1).fit(X)
    print db.labels_
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    returnvertslist = []
    unique_labels = set(labels)
    for k in unique_labels:
        class_member_mask = (labels == k)
        print class_member_mask, core_samples_mask
        xyzlist = X[class_member_mask & core_samples_mask]
        print xyzlist
        returnvertslist.append(xyzlist.tolist())

    return returnvertslist
    # return verts
kinectinterface.py 文件源码 项目:pyhiro 作者: wanweiwei07 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        """
        Kinect interface

        author: weiwei
        date: 20170715
        """

        self.kinect = PyKinectRuntime.PyKinectRuntime(PyKinectV2.FrameSourceTypes_Depth)
        self.dbscan = DBSCAN(eps=50, min_samples=100, n_jobs=-1)
        self.randsac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold = 15)
classifiers.py 文件源码 项目:MasterDegree 作者: Waszker 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _get_dbscan(parameters):
    if parameters is None:
        parameters = {
        }
    return DBSCAN(**parameters)


问题


面经


文章

微信
公众号

扫码关注公众号