python类cluster()的实例源码

friends.py 文件源码 项目:massivedatans 作者: JohannesBuchner 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def debugplot(self, u = None):
        print 'creating plot...'
        n = len(self.region['members'][0]) / 2
        plt.figure(figsize=(6, n/2*4+1))
        m = self.region['members']
        d = self.region['maxdistance']
        for i in range(n):
            plt.subplot(numpy.ceil(n / 2.), 2, 1+i)
            j = i * 2
            k = i * 2 + 1
            plt.plot(m[:,j], m[:,k], 'x', color='b', ms=1)
            plt.gca().add_artist(plt.Circle((m[0,j], m[0,k]), d, color='g', alpha=0.3))
            if u is not None:
                plt.plot(u[j], u[k], 's', color='r')
                plt.gca().add_artist(plt.Circle((u[j], u[k]), d, color='r', alpha=0.3))
        prefix='friends%s-%s_' % ('1' if self.jackknife else '', self.metric)
        plt.savefig(prefix + 'cluster.pdf')
        plt.close()
        print 'creating plot... done'
pyxpose.py 文件源码 项目:pyxpose 作者: PetitPrince 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def find_a_dominant_color(image):
    # K-mean clustering to find the k most dominant color, from:
    # http://stackoverflow.com/questions/3241929/python-find-dominant-most-common-color-in-an-image
    n_clusters = 5

    # Get image into a workable form
    im = image.copy()
    im = im.resize((150, 150))      # optional, to reduce time
    ar = scipy.misc.fromimage(im)
    im_shape = ar.shape
    ar = ar.reshape(scipy.product(im_shape[:2]), im_shape[2])
    ar = np.float_(ar)

    # Compute clusters
    codes, dist = scipy.cluster.vq.kmeans(ar, n_clusters)
    vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

    # Get the indexes of the most frequent, 2nd most frequent, 3rd, ...
    sorted_idxs = np.argsort(counts)

    # Get the color
    peak = codes[sorted_idxs[1]] # get second most frequent color

    return [int(i) for i in peak.tolist()] # list comprehension to quickly cast everything to int
dbscan.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.eps <= 0.0:
        raise ValueError('eps must be > 0')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        logging.info('normalize features to unit variance')
        vectors = scipy.cluster.vq.whiten(vectors)

    return words, vectors, options
control_enrichment.py 文件源码 项目:MIRA 作者: comprna 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def write_fasta(file_list):
    '''
    Writes fasta files for cluster sequqnce from bedfile
    '''

    for infile in file_list:

        ifi = open(infile, "r")
        out_fa_file = infile + ".fa"

        with open(out_fa_file, "w") as of:
            of.close()
            pass

        of = open(out_fa_file,'a+')

        for line in ifi:
            f_header, f_seq = fasta_header(line)
            '''
            Create outfile
            '''
            of.write("{}\n{}\n".format(f_header, f_seq))

        print("done {}".format(out_fa_file))
        ifi.close()
utils.py 文件源码 项目:scanpy 作者: theislab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def compute_group_overlap_score(ref_labels, pred_labels,
                                threshold_overlap_pred=0.5,
                                threshold_overlap_ref=0.5):
    """How well do the pred_labels explain the ref_labels?

    A predicted cluster explains a reference cluster if it is contained within the reference
    cluster with at least 50% (threshold_overlap_pred) of its points and these correspond
    to at least 50% (threshold_overlap_ref) of the reference cluster.
    """
    ref_unique, ref_counts = np.unique(ref_labels, return_counts=True)
    ref_dict = dict(zip(ref_unique, ref_counts))
    pred_unique, pred_counts = np.unique(pred_labels, return_counts=True)
    pred_dict = dict(zip(pred_unique, pred_counts))
    summary = []
    for true in ref_unique:
        sub_pred_unique, sub_pred_counts = np.unique(pred_labels[true == ref_labels], return_counts=True)
        relative_overlaps_pred = [sub_pred_counts[i] / pred_dict[n] for i, n in enumerate(sub_pred_unique)]
        relative_overlaps_ref = [sub_pred_counts[i] / ref_dict[true] for i, n in enumerate(sub_pred_unique)]
        pred_best_index = np.argmax(relative_overlaps_pred)
        summary.append(1 if (relative_overlaps_pred[pred_best_index] >= threshold_overlap_pred and
                             relative_overlaps_ref[pred_best_index] >= threshold_overlap_ref)
                       else 0)
        # print(true, sub_pred_unique[pred_best_index], relative_overlaps_pred[pred_best_index],
        #       relative_overlaps_ref[pred_best_index], summary[-1])
    return sum(summary)/len(summary)
utils.py 文件源码 项目:scanpy 作者: theislab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def hierarch_cluster(M):
    """Cluster matrix using hierarchical clustering.

    Parameters
    ----------
    M : np.ndarray
        Matrix, for example, distance matrix.

    Returns
    -------
    Mclus : np.ndarray
        Clustered matrix.
    indices : np.ndarray
        Indices used to cluster the matrix.
    """
    import scipy as sp
    import scipy.cluster
    link = sp.cluster.hierarchy.linkage(M)
    indices = sp.cluster.hierarchy.leaves_list(link)
    Mclus = np.array(M[:, indices])
    Mclus = Mclus[indices, :]
    if False:
        pl.matshow(Mclus)
        pl.colorbar()
    return Mclus, indices
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_leafs(self, use_labels=False):
        """ Use to retrieve labels.

        Parameters
        ----------
        use_labels :    bool, optional
                        If True, self.labels will be returned. If False, will
                        use either columns (if matrix is pandas DataFrame)
                        or indices (if matrix is np.ndarray)
        """

        if isinstance(self.mat, pd.DataFrame):
            if use_labels:
                return [self.labels[i] for i in scipy.cluster.hierarchy.leaves_list(self.linkage)]
            else:
                return [self.mat.columns.tolist()[i] for i in scipy.cluster.hierarchy.leaves_list(self.linkage)]
        else:
            return scipy.cluster.hierarchy.leaves_list(self.linkage)
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def calc_agg_coeff(self):
        """ Returns the agglomerative coefficient, measuring the clustering
        structure of the linkage matrix. Because it grows with the number of 
        observations, this measure should not be used to compare datasets of 
        very different sizes.

        For each observation i, denote by m(i) its dissimilarity to the first 
        cluster it is merged with, divided by the dissimilarity of the merger 
        in the final step of the algorithm. The agglomerative coefficient is 
        the average of all 1 - m(i).
        """

        # Turn into pandas DataFrame for fancy indexing
        Z = pd.DataFrame(self.linkage, columns = ['obs1','obs2','dist','n_org'] )

        # Get all distances at which an original observation is merged 
        all_dist = Z[ ( Z.obs1.isin(self.leafs) ) | (Z.obs2.isin(self.leafs) ) ].dist.values

        # Divide all distances by last merger
        all_dist /= self.linkage[-1][2]

        # Calc final coefficient
        coeff = np.mean( 1 - all_dist )

        return coeff
clustering.py 文件源码 项目:ccCluster 作者: gsantoni 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def main():
    from optparse import OptionParser
    parser = OptionParser(usage="%prog --XSCALEfile=<LP filename> --outname=<output dendogram>")

    parser.add_option("-o","--outname", dest="outname", default='Dendrogram', help="output dendogram file name")
    parser.add_option("-t", "--threshold", dest="threshold", default='0.4', help="Distance threshold for clustering")
    parser.add_option("-c", "--count",action="store_true", dest="count", default=False, help="Counts datasets in the biggest cluster and exit")
    (options, args) = parser.parse_args()

    thr = float(options.threshold)
    CC = Clustering('Cluster_log.txt')
    link = CC.tree()
    if options.count:
        CC.checkMultiplicity(thr)
        print(CC.thrEstimation())
    else:
        CC.checkMultiplicity(thr) 
        CC.merge('ano', thr)
plotting.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def plot_feature_overlap(df, cmap='binary', method='cluster'):
    """Plot feature-feature presence overlap of a pandas dataframe.

    Args:
        df: A pandas dataframe.
        cmap: A matplotlib colormap.
        method: Method of clustering, one of 'cluster' or 'tree'.
    """
    V = len(df.columns)
    present = (df == df).as_matrix().astype(np.float32)
    overlap = np.dot(present.T, present)
    assert overlap.shape == (V, V)

    # Sort features to make blocks contiguous.
    if method == 'tree':
        # TODO(fritzo) Fix this to not look awful.
        grid = make_complete_graph(V)
        weights = np.empty(grid.shape[1], dtype=np.float32)
        for k, v1, v2 in grid.T:
            weights[k] = overlap[v1, v2]
        edges = estimate_tree(grid, weights)
        order, order_inv = order_vertices(edges)
    elif method == 'cluster':
        distance = scipy.spatial.distance.pdist(overlap)
        clustering = scipy.cluster.hierarchy.complete(distance)
        order_inv = scipy.cluster.hierarchy.leaves_list(clustering)
    else:
        raise ValueError(method)
    overlap = overlap[order_inv, :]
    overlap = overlap[:, order_inv]
    assert overlap.shape == (V, V)

    pyplot.imshow(overlap**0.5, cmap=cmap)
    pyplot.axis('off')
friends.py 文件源码 项目:massivedatans 作者: JohannesBuchner 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def cluster(self, u, ndim, keepRadius=False):
        """

        """
        if self.verbose: print 'building region ...'
        if len(u) > 10:
            if keepRadius and self.region is not None and 'maxdistance' in self.region:
                maxdistance = self.region['maxdistance']
            else:
                if self.radial:
                    if self.jackknife:
                        #maxdistance = initial_rdistance_guess(u, k=1, metric=self.metric)
                        maxdistance = nearest_rdistance_guess(u, metric=self.metric)
                    else:
                        maxdistance = find_rdistance(u, nbootstraps=20, metric=self.metric, verbose=self.verbose)
                else:
                    maxdistance = find_maxdistance(u)
            if self.force_shrink and self.region is not None and 'maxdistance' in self.region:
                maxdistance = min(maxdistance, self.region['maxdistance'])
            if self.keep_phantom_points and len(self.phantom_points) > 0:
                # add phantoms to u now
                print 'including phantom points in cluster members', self.phantom_points
                u = numpy.vstack((u, self.phantom_points))
            ulow  = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0)
            uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0)
        else:
            maxdistance = None
            ulow = numpy.zeros(ndim)
            uhigh = numpy.ones(ndim)
        if self.verbose: print 'setting sampling region:', (ulow, uhigh), maxdistance
        self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh)
        self.generator = None
friends.py 文件源码 项目:massivedatans 作者: JohannesBuchner 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def rebuild(self, u, ndim, keepRadius=False):
        if self.last_cluster_points is None or \
            len(self.last_cluster_points) != len(u) or \
            numpy.any(self.last_cluster_points != u):
            self.cluster(u=self.transform_new_points(u), ndim=ndim, keepRadius=keepRadius)
            self.last_cluster_points = u

            # reset generator
            self.generator = self.generate(ndim=ndim)
hiermetriclearn.py 文件源码 项目:massivedatans 作者: JohannesBuchner 项目源码 文件源码 阅读 57 收藏 0 点赞 0 评论 0
def rebuild(self, u, ndim, keepMetric=False):
        if self.last_cluster_points is not None and \
            len(self.last_cluster_points) == len(u) and \
            numpy.all(self.last_cluster_points == u):
            # do nothing if everything stayed the same
            return

        self.cluster(u=u, ndim=ndim, keepMetric=keepMetric)
        self.last_cluster_points = u

        print 'maxdistance:', self.region.maxdistance
        self.generator = self.generate(ndim)
ORFsClustering.py 文件源码 项目:genepred 作者: egorbarsukoff 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def start_clustering(self):
        functions.log('Calculate {0} distances...'.format(int(len(self.orfs) * (len(self.orfs) + 1) / 2)))
        self.distances = self.create_distance_matrix()
        functions.log('Start clustering...')
        self.linkage_matrix = scipy.cluster.hierarchy.linkage(ssd.squareform(self.distances), method='complete')
        functions.log('Clustering done.')
ORFsClustering.py 文件源码 项目:genepred 作者: egorbarsukoff 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def clustering(self, clustering_distance):
        """
        Create clusters
        :param clustering_distance: max distance between ORFs in one clusters
        :return:
        """
        functions.log('Create clusters...')
        for ind, o in enumerate(scipy.cluster.hierarchy.fcluster(self.linkage_matrix, clustering_distance, 'distance')):
            if self.clusters.get(o) is None:
                self.clusters[o] = set()
            self.clusters[o].add(ind)
            self.orfs_clusters[ind] = o
ORFsClustering.py 文件源码 项目:genepred 作者: egorbarsukoff 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def get_args():
    parser = argparse.ArgumentParser(description='Clustering ORFs by position in the graph')
    parser.add_argument('sequences', type=str, help='Path to ORFs sequences file')
    parser.add_argument('paths', type=str, help='Path to ORFs paths file from ORFFinderInGraph.py')
    parser.add_argument('graph', type=str, help='Path to graph in GFA format')
    parser.add_argument('threshold', type=int, help='Max level of dissimilarity of ORFs in one cluster(between 0 and 1)')
    parser.add_argument('-o', '--output', type=str, default='', help='Output folder')
    parser.add_argument('-sd', '--savedistances', default='False', action='store_true',
                        help='Save pairwise distances matrix (in .npy format)')
    parser.set_defaults(savedistances=False)
    return parser.parse_args()
leveler.py 文件源码 项目:Maybe-Useful-Cogs 作者: AznStevy 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def _auto_color(self, url:str, ranks):
        phrases = ["Calculating colors..."] # in case I want more
        #try:
        await self.bot.say("**{}**".format(random.choice(phrases)))
        clusters = 10

        async with self.session.get(url) as r:
            image = await r.content.read()
        with open('data/leveler/temp_auto.png','wb') as f:
            f.write(image)

        im = Image.open('data/leveler/temp_auto.png').convert('RGBA')
        im = im.resize((290, 290)) # resized to reduce time
        ar = scipy.misc.fromimage(im)
        shape = ar.shape
        ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), clusters)
        vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

        # sort counts
        freq_index = []
        index = 0
        for count in counts:
            freq_index.append((index, count))
            index += 1
        sorted_list = sorted(freq_index, key=operator.itemgetter(1), reverse=True)

        colors = []
        for rank in ranks:
            color_index = min(rank, len(codes))
            peak = codes[sorted_list[color_index][0]] # gets the original index
            peak = peak.astype(int)

            colors.append(''.join(format(c, '02x') for c in peak))
        return colors # returns array
        #except:
            #await self.bot.say("```Error or no scipy. Install scipy doing 'pip3 install numpy' and 'pip3 install scipy' or read here: https://github.com/AznStevy/Maybe-Useful-Cogs/blob/master/README.md```")

    # converts hex to rgb
dbscan.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 52 收藏 0 点赞 0 评论 0
def write_cluster_ids(words, cluster_ids, out=None):
    """Write given list of words and their corresponding cluster ids to out."""

    assert len(words) == len(cluster_ids), 'word/cluster ids number mismatch'

    if out is None:
        out = sys.stdout
    for word, cid in izip(words, cluster_ids):
        print >> out, '%s\t%d' % (word, cid)
dbscan.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 49 收藏 0 点赞 0 评论 0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    try:
        words, vectors, options = process_options(argv[1:])
    except Exception, e:
        if str(e):
            print >> sys.stderr, 'Error: %s' % str(e)
            return 1
        else:
            raise

    dbscan = sklearn.cluster.DBSCAN(eps=options.eps, metric=options.metric)
    dbscan.fit(numpy.array(vectors))
    noisy = sum(1 for l in dbscan.labels_ if l == -1)
    unique = len(set(dbscan.labels_))
    logging.info('%d clusters, %d noisy, %d vectors' % (unique, noisy,
                                                        len(vectors)))
    if noisy >= len(vectors) / 4:
        logging.warning('%d/%d noisy (-1) labels (try higher eps?)' % \
                            (noisy, len(vectors)))
    elif unique < (len(vectors)/2)**0.5:
        logging.warning('only %d clusters (try lower eps?)' % unique)
    write_cluster_ids(words, dbscan.labels_)

    return 0
kmeans.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.k is not None and options.k < 2:
        raise ValueError('cluster number must be >= 2')

    if options.method == MINIBATCH_KMEANS and not with_sklearn:
        logging.warning('minibatch kmeans not available, using kmeans (slow)')
        options.method = KMEANS

    if options.jobs != 1 and (options.method != KMEANS or not with_sklearn):
        logging.warning('jobs > 1 only supported scikit-learn %s' % KMEANS)
        options.jobs = 1

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.k is None:
        options.k = int(math.ceil((len(wv.words())/2)**0.5))
        logging.info('set k=%d (%d words)' % (options.k, len(wv.words())))

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        logging.info('normalize features to unit variance')
        vectors = scipy.cluster.vq.whiten(vectors)

    return words, vectors, options
kmeans.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 50 收藏 0 点赞 0 评论 0
def minibatch_kmeans(vectors, k):
    if not with_sklearn:
        raise NotImplementedError
    # Sculley (http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf)
    # uses batch size 1000. sklearn KMeans defaults to n_init 10
    kmeans = sklearn.cluster.MiniBatchKMeans(k, batch_size=1000, n_init=10)
    kmeans.fit(vectors)
    return kmeans.labels_
kmeans.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def write_cluster_ids(words, cluster_ids, out=None):
    """Write given list of words and their corresponding cluster ids to out."""

    assert len(words) == len(cluster_ids), 'word/cluster ids number mismatch'

    if out is None:
        out = sys.stdout
    for word, cid in izip(words, cluster_ids):
        print >> out, '%s\t%d' % (word, cid)
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __getattr__(self, key):
        if key == 'linkage':
            self.cluster()
            return self.linkage
        elif key in ['leafs', 'leaves']:
            return self.get_leafs()
        elif key == 'cophenet':
             return self.calc_cophenet()
        elif key == 'agg_coeff':
            return self.calc_agg_coeff()
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def calc_cophenet(self):
        """ Returns Cophenetic Correlation coefficient of your clustering.
        This (very very briefly) compares (correlates) the actual pairwise 
        distances of all your samples to those implied by the hierarchical 
        clustering. The closer the value is to 1, the better the clustering 
        preserves the original distances,
        """        

        return scipy.cluster.hierarchy.cophenet( self.linkage, self.condensed_dist_mat )
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def cluster(self, method='ward'):
        """ Cluster distance matrix. This will automatically be called when
        attribute linkage is requested for the first time.

        Parameters
        ----------
        method :    str, optional
                    Clustering method (see scipy.cluster.hierarchy.linkage 
                    for reference)
        """

        # First, convert similarity matrix to distance matrix
        if self.mat_type != 'distance':            
            if isinstance( self.mat, pd.DataFrame ):                
                self.dist_mat = ( self.mat.as_matrix() - self.mat.max().max() ) * -1
            else:
                self.dist_mat = ( self.mat - self.mat.max() ) * -1
        else:
            if isinstance( self.mat, pd.DataFrame ):
                self.dist_mat = self.mat.as_matrix()
            else:
                self.dist_mat = self.mat

        # Second, convert into condensed distance matrix - otherwise clustering
        # thinks we are passing observations instead of final scores
        self.condensed_dist_mat = scipy.spatial.distance.squareform( self.dist_mat, checks=False )

        self.linkage = scipy.cluster.hierarchy.linkage(self.condensed_dist_mat, method=method)

        # Save method in case we want to look it up later
        self.method = method

        module_logger.info('Clustering done using method "{0}"'.format(method) )
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 50 收藏 0 点赞 0 评论 0
def plot3d(self, k=5, criterion='maxclust', **kwargs):
        """Plot neuron using :func:`pymaid.plot.plot3d`. Will only work if
        instance has neurons attached to it.

        Parameters
        ----------
        k :         {int, float}
        criterion : {'maxclust','distance'}, optional
                    If `maxclust`, `k` clusters will be formed. If `distance`, 
                    clusters will be created at threshold `k`.
        **kwargs
                will be passed to plot.plot3d() 
                see help(plot.plot3d) for a list of keywords      

        See Also
        --------
        :func:`pymaid.plot.plot3d` 
                    Function called to generate 3d plot                  
        """

        if 'neurons' not in self.__dict__:
            module_logger.error(
                'This works only with cluster results from neurons')
            return None       

        cmap = self.get_colormap(k=k, criterion=criterion)

        kwargs.update({'color': cmap})

        return plotting.plot3d(self.neurons, **kwargs)
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 53 收藏 0 点赞 0 评论 0
def to_json(self, fname='cluster.json', k=5, criterion='maxclust'):
        """ Convert clustered neurons into json file that can be loaded into 
        CATMAID selection table.

        Parameters
        ----------
        fname :     str, optional
                    Filename to save selection to
        k :         {int, float}
        criterion : {'maxclust','distance'}, optional
                    If `maxclust`, `k` clusters will be formed. If `distance`, 
                    clusters will be created at threshold `k`.

        See Also
        --------
        :func:`pymaid.plot.plot3d` 
                    Function called to generate 3d plot                  
        """        

        cmap = self.get_colormap(k=k, criterion=criterion)

        # Convert to 0-255
        cmap = { n : [ int(v*255) for v in cmap[n] ] for n in cmap }  

        data = [ dict(skeleton_id=int(n),
                     color="#{:02x}{:02x}{:02x}".format( cmap[n][0],cmap[n][1],cmap[n][2] ),
                     opacity=1
                     ) for n in cmap ]

        with open(fname, 'w') as outfile:
            json.dump(data, outfile)

        module_logger.info('Selection saved as %s in %s' % (fname, os.getcwd()))    

        return
cluster.py 文件源码 项目:PyMaid 作者: schlegelp 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_clusters(self, k, criterion='maxclust', return_type='labels'):
        """ Wrapper for cluster.hierarchy.fcluster to get clusters.

        Parameters
        ----------
        k :             {int, float}
        criterion :     {'maxclust','distance'}, optional
                        If `maxclust`, `k` clusters will be formed. If 
                        `distance`, clusters will be created at threshold `k`.
        return_type :   {'labels','indices','columns','rows'}
                        Determines what to construct the clusters of. 'labels'
                        only works if labels are provided. 'indices' refers
                        to index in distance matrix. 'columns'/'rows' works
                        if distance matrix is pandas DataFrame                        

        Returns
        -------
        list 
                    list of clusters [ [leaf1, leaf5], [leaf2, ...], ... ]
        """

        cl = scipy.cluster.hierarchy.fcluster(self.linkage, k, criterion=criterion)

        if self.labels and return_type.lower()=='labels':
            return [[self.labels[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
        elif return_type.lower() == 'rows':
            return [[self.mat.columns.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
        elif return_type.lower() == 'columns':
            return [[self.mat.index.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
        else:
            return [[j for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
clustering.py 文件源码 项目:ccCluster 作者: gsantoni 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def checkMultiplicity(self, thr):
        FlatC = hierarchy.fcluster(self.Tree, thr, criterion='distance')
        counter=collections.Counter(FlatC)
        Best = max(counter.iteritems(), key=operator.itemgetter(1))[0]
        print('You are clustering with a threshold of %s'%(thr))
        print('The biggest cluster contains %s datasets from a total of %s'%(counter[Best], len(self.labelList)))
clustering.py 文件源码 项目:ccCluster 作者: gsantoni 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def minimalForCompleteness(self):
        print("Running estimator for minimal threshold for completeness")
        labels=self.createLabels()
        x = 0.00
        dx = 0.05
        countsList = {}
        x_list = []
        while x < 1:
            Arrays= {}
            FlatC = hierarchy.fcluster(self.Tree, x, criterion='distance')
            counter=collections.Counter(FlatC)
            Best = max(counter.iteritems(), key=operator.itemgetter(1))[0]
            toProcess=[Best]
            y=0
            for cluster, filename in zip(FlatC,labels):
                if cluster in toProcess:
                    hklFile = any_reflection_file(filename)
                    b= hklFile.as_miller_arrays()
                    for column in b:
                        if column.is_xray_intensity_array():
                            Arrays[y]=column
                            break
                    y+=1
            try:
                Arr = Arrays[0]
            except:
                countsList.append(0)
            for label in range(1, y):
                try:
                    Arr = Arr.concatenate(Arrays[label])
                except:
                    pass
            countsList[x]=(Arr.completeness())
            x+= dx
       # return minimal for max
        L = []
        for key in countsList:
            if countsList[key]>0.98:
                L.append(key)
        L.sort()
        return L[0]


问题


面经


文章

微信
公众号

扫码关注公众号