python类KMeans()的实例源码-面圈网

vector_quantization.py 文件源码项目：Python-Machine-Learning-Cookbook 作者: PacktPublishing 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def compress_image(img, num_clusters):
    # Convert input image into (num_samples, num_features) 
    # array to run kmeans clustering algorithm 
    X = img.reshape((-1, 1))  

    # Run kmeans on input data
    kmeans = cluster.KMeans(n_clusters=num_clusters, n_init=4, random_state=5)
    kmeans.fit(X)
    centroids = kmeans.cluster_centers_.squeeze()
    labels = kmeans.labels_

    # Assign each value to the nearest centroid and 
    # reshape it to the original image shape
    input_image_compressed = np.choose(labels, centroids).reshape(img.shape)

    return input_image_compressed

Simple_function.py 文件源码项目：vapor 作者: mills-lab 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def k_means_cluster_Predict(data_list,info):
    array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
    ks = list(range(1,len(info)))
    KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
    BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
    ks_picked=ks[BIC.index(max(BIC))]
    if ks_picked==1:
        return [data_list]
    else:
        out=[]
        std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
        whitened = whiten(array_diagnal)
        centroids, distortion=kmeans(whitened,ks_picked)
        idx,_= vq(whitened,centroids)
        for x in range(ks_picked):
            group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
            out.append(group1)
        return out

kmeans.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def run_kmeans(transformed_pca_matrix, n_clusters, random_state=None):
    if random_state is None:
        random_state=cr_constants.RANDOM_STATE

    kmeans = sk_cluster.KMeans(n_clusters=n_clusters, random_state=random_state)
    clusters = kmeans.fit_predict(transformed_pca_matrix) + 1

    cluster_score = compute_db_index(transformed_pca_matrix, kmeans)

    clusters = cr_clustering.relabel_by_size(clusters)

    clustering_key = cr_clustering.format_clustering_key(cr_clustering.CLUSTER_TYPE_KMEANS, n_clusters)

    return cr_clustering.create_clustering(clusters=clusters,
                                           num_clusters=n_clusters,
                                           cluster_score=cluster_score,
                                           clustering_type=cr_clustering.CLUSTER_TYPE_KMEANS,
                                           global_sort_key=n_clusters,
                                           description=cr_clustering.humanify_clustering_key(clustering_key))

deal.py 文件源码项目：DomainDependencyMemeJsai2017 作者: GINK03 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def step4():
  key_vec = pickle.loads(open("key_vec.pkl", "rb").read()) 
  vecs = []
  for ev, vec in enumerate(key_vec.values()):
    x = np.array(vec)
    if np.isnan(x).any():
      # print(vec)
      continue
    vecs.append(x)
  vecs   = np.array(vecs)
  kmeans = KMeans(n_clusters=128, init='k-means++', n_init=10, max_iter=300,
                       tol=0.0001,precompute_distances='auto', verbose=0,
                       random_state=None, copy_x=True, n_jobs=1)
  print("now fitting...")
  kmeans.fit(vecs)

  open("kmeans.model", "wb").write( pickle.dumps(kmeans) )
  for p in kmeans.predict(vecs):
    print(p)

example.py 文件源码项目：DNGR-Keras 作者: MdAsifKhan 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cluster(data,true_labels,n_clusters=3):

    km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    km.fit(data)

    km_means_labels = km.labels_
    km_means_cluster_centers = km.cluster_centers_
    km_means_labels_unique = np.unique(km_means_labels)

    colors_ = cycle(colors.cnames.keys())

    initial_dim = np.shape(data)[1]
    data_2 = tsne(data,2,initial_dim,30)

    plt.figure(figsize=(12, 6))
    plt.scatter(data_2[:,0],data_2[:,1], c=true_labels)
    plt.title('True Labels')

    return km_means_labels

tfa.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def init_centers_widths(self, R):
        """Initialize prior of centers and widths

        Returns
        -------

        centers : 2D array, with shape [K, n_dim]
            Prior of factors' centers.

        widths : 1D array, with shape [K, 1]
            Prior of factors' widths.

        """

        kmeans = KMeans(
            init='k-means++',
            n_clusters=self.K,
            n_init=10,
            random_state=100)
        kmeans.fit(R)
        centers = kmeans.cluster_centers_
        widths = self._get_max_sigma(R) * np.ones((self.K, 1))
        return centers, widths

pro_cluster.py 文件源码项目：PPRE 作者: MaoYuwei 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def all_cluster():
    #????????between??
    # bet_dic = {}
    # fin = open('sort_between.txt', 'r')
    # while True:
    #     line = fin.readline()
    #     if line:
    #         line = line.strip()
    #         between, vec = line.split('^')
    #         vec = vec.strip('[')
    #         vec = vec.strip(']')
    #         vec = vec.split(',')
    #         bet_dic[between] = vec
    #
    #     else:
    #         break
    # bet_dic = pd.DataFrame(bet_dic)
    # bet_dic = bet_dic.T
    # bet_dic.to_csv('dataframe.csv')
    # fin.close()
    df = pd.read_csv('dataframe.csv')
    clf = KMeans(n_clusters=50)
    s = clf.fit(df[1:, 1:])
    print s

clustering.py 文件源码项目：oss-github-analysis-project 作者: itu-oss-project-team 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def k_means_clustering(self, out_path, pd_data, number_of_clusters):
        headers, repos, features = self.__fetch_data(pd_data)

        kmeans = KMeans(n_clusters=number_of_clusters, random_state=0, n_init=200).fit(features)  # apply kmeans algorithm

        # form clusters
        clusters = []
        for i in range(0, number_of_clusters): # k cluster
            repo_list = []
            for j in range (0, len(kmeans.labels_)):  # a label for each repo.
                if i == kmeans.labels_[j]:  # if repo label is equal to Cluster number
                    repo_list.append(repos[j])  # add repo to cluster i's list.
            clusters.append(repo_list)

        out_file_path = os.path.join(out_path, "kmeans_noOfClusters" + str(number_of_clusters))
        self.__export_k_means_results(kmeans, headers, clusters, out_file_path)  # avoid ".csv"

kmeans.py 文件源码项目：ref-extract 作者: brandonrobertz 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def cluster(X, seed=0, n_clusters=20, alg='kmeans'):
    """
    Perform k-means on given X data. For alg, use one of:
    'kmeans' (sklearn KMeans) or 'spherical' (SphericalKMeans)
    returns (X pred clusters, cluster centers)
    NOTE: euclidean tends to perform very poorly
    """
    # log("Clustering k-means with {} clusters".format(n_clusters))
    if alg == 'kmeans':
        Model = KMeans
    elif alg == 'spherical':
        # inplace l2 normalization (spherical k-means assumes this)
        normalize(X, 'l2', copy=False)
        Model = SphericalKMeans

    kmeans = Model(
        n_clusters=int(n_clusters), random_state=seed
    )
    pred_clusters = kmeans.fit_predict(X)
    return pred_clusters, kmeans.cluster_centers_

outlier.py 文件源码项目：ASTRiDE 作者: dwkim78 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self, edges, branching_factor=50, threshold=0.1):
        # Make features list.
        features = []
        for i in range(len(edges)):
            edge = edges[i]
            features.append([edge['perimeter'], edge['area'],
                             edge['shape_factor'], edge['radius_deviation']])
        features = np.array(features)

        # Normalize features
        normed_features = features.copy()
        for i in range(features.shape[1]):
            avg = np.median(features[::, i])
            std = np.std(features[::, i])

            normed_features[::, i] -= avg
            normed_features[::, i] /= avg

        self.features = features
        self.normed_features = normed_features
        self.branching_factor = branching_factor
        self.threshold = threshold
        #self.run(Birch, branching_factor=50, threshold=0.1, n_clusters=2)
        self.run(KMeans, n_clusters=2)
        #self.run(AgglomerativeClustering, n_clusters=2)

argumentation_utils.py 文件源码项目：bot2017Fin 作者: AllanYiin 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def color_differenciate(img:Image,k:int):
    imgarr = img2array(img)
    imgarr_r = imgarr.reshape((imgarr.shape[0] * imgarr.shape[1], 3))
    clt =KMeans(n_clusters = k)
    clt.fit(imgarr_r)
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    images=[]
    for i in range(len(numLabels)):
        images.append(np.ones(imgarr_r.shape,dtype=np.int32)*255)
    for idx in range(len(clt.labels_)):
            label=clt.labels_[idx]
            images[label][idx][0]=imgarr_r[idx][0]
            images[label][idx][1] = imgarr_r[idx][1]
            images[label][idx][2] = imgarr_r[idx][2]
    new_images=[]
    for i in range(len(numLabels)):
        new_img=array2img(images[i].reshape(imgarr.shape))
        new_img.save('test_'+str(i)+'.jpg')
        new_images.append(new_img)
    return new_images

compute.py 文件源码项目：WebAppEx 作者: karlafej 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_plot(x, y, k, iris=iris):
    k_means = KMeans(n_clusters= k)
    k_means.fit(iris.data) 
    colormap = rainbow(np.linspace(0, 1, k))
    fig = plt.figure()
    splt = fig.add_subplot(1, 1, 1)
    splt.scatter(iris.data[:,x], iris.data[:,y], c = colormap[k_means.labels_], s=40)
    splt.scatter(k_means.cluster_centers_[:,x], k_means.cluster_centers_[:,y], c = 'black', marker='x')
    splt.set_xlabel(iris.feature_names[x])
    splt.set_ylabel(iris.feature_names[y])

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0) 
    figdata_png = base64.b64encode(figfile.getvalue()).decode()
    return figdata_png

bokeh_clustering.py 文件源码项目：WebAppEx 作者: karlafej 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def update():
    # Get the current slider values
    N = clusters.value
    x_var = axis_map[x_axis.value]
    y_var = axis_map[y_axis.value]

    k_means = KMeans(n_clusters=N)
    k_means.fit(iris.data) 
    centroids = k_means.cluster_centers_

    palette = sns.palettes.color_palette('hls', N)
    colormap = np.array(palette.as_hex())[k_means.labels_] # as hex is necessary for bokeh to render the colors properly.

    plot.xaxis.axis_label = x_axis.value
    plot.yaxis.axis_label = y_axis.value

    source.data = dict(
        x=iris.data[:,x_var],
        y=iris.data[:,y_var],
        colors=colormap)
    centers.data = dict(
        cx=centroids[:,x_var],
        cy=centroids[:,y_var])

similar_posts.py 文件源码项目：hugo_similar_posts 作者: elbaulp 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def KmeansWrapper(true_k, data, load=False):
    from sklearn.externals import joblib

    modelName = 'doc_cluster.%s.plk' % true_k

    if load:
        km = joblib.load(modelName)
        labels = km.labels_
    else:
        km = KMeans(n_clusters=true_k,
                    init='k-means++',
                    # max_iter=1000,
                    n_init=10,
                    n_jobs=-1,
                    random_state=0,
                    verbose=0)
        km.fit_predict(data)
        labels = km.labels_
        joblib.dump(km,  modelName)

    return labels, km.cluster_centers_

similar_posts.py 文件源码项目：hugo_similar_posts 作者: elbaulp 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def elbowMethod(X, k=21):
    distortions = []
    for i in range(1, k):
        km2 = KMeans(n_clusters=i,
                     init='k-means++',
                     n_init=10,
                     random_state=0,
                     n_jobs=-1,
                     verbose=0)
        km2.fit(X)
        distortions.append(km2.inertia_)
        print('k=%s, Distortion: %.2f' % (i, km2.inertia_))

    plt.plot(range(1, k), distortions, marker='o')
    plt.xlabel('Number of clusters')
    plt.ylabel('Distortion')
    plt.show()

similar_posts.py 文件源码项目：hugo_similar_posts 作者: elbaulp 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def gridSearch(data, params, true_k):

    tfidf = TfidfVectorizer(strip_accents=None,
                            lowercase=True,
                            sublinear_tf=True,
                            analyzer='word')

    lr_tfidf = Pipeline([('vect', tfidf),
                         ('clf', KMeans(init='k-means++',
                                        n_jobs=-1,
                                        random_state=0,
                                        verbose=0))])
    gsTfIdf = GridSearchCV(
        lr_tfidf, params, n_jobs=1, verbose=1)

    gsTfIdf.fit(data)
    print()
    print("Best score: %0.3f" % gsTfIdf.best_score_)
    print("Best parameters set:")
    best_parameters = gsTfIdf.best_estimator_.get_params()
    for param_name in sorted(params.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))

Simple_function.py 文件源码项目：vapor 作者: mills-lab 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def k_means_cluster(data_list):
    if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10:
        array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
        ks = list(range(1,min([5,len(data_list[0])+1])))
        KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
        KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks]
        BIC=[]
        BIC_rec=[]
        for x in ks:
            if KMeans_predict[x-1].max()<x-1: continue
            else:
                BIC_i=compute_bic(KMeans[x-1],array_diagnal)
                if abs(BIC_i)<10**8:
                    BIC.append(BIC_i)
                    BIC_rec.append(x)
        #BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
        #ks_picked=ks[BIC.index(max(BIC))]
        ks_picked=BIC_rec[BIC.index(max(BIC))]
        if ks_picked==1:
            return [data_list]
        else:
            out=[]
            std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
            whitened = whiten(array_diagnal)
            centroids, distortion=kmeans(whitened,ks_picked)
            idx,_= vq(whitened,centroids)
            for x in range(ks_picked):
                group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
                out.append(group1)
            return out
    else:
        return [data_list]

kmeans.py 文件源码项目：elm 作者: ContinuumIO 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def kmeans_aic(model, X, **kwargs):
    '''AIC (Akaike Information Criterion) for k-means for model selection

    Parameters:
        :model:  An elm.pipeline.Pipeline with KMeans or MiniBatchKMeans as final step in Pipeline
        :X:      The X data that were just given to "fit", or "partial_fit"
        :kwargs: placeholder - ignored

    Returns:
        :AIC: float

    '''

    k, m = model._estimator.cluster_centers_.shape
    if isinstance(X, xr.DataArray):
        n = X.flat.values.shape[0]
    else:
        n = X.shape[0]
    d = model._estimator.inertia_
    aic =  d + 2 * m * k
    delattr(model._estimator, 'labels_')
    return aic

hmm.py 文件源码项目：NetPower_TestBed 作者: Vignesh2208 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _init(self, X, lengths=None):
        super(GaussianHMM, self)._init(X, lengths=lengths)

        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in self.init_params or not hasattr(self, "means_"):
            kmeans = cluster.KMeans(n_clusters=self.n_components,
                                    random_state=self.random_state)
            kmeans.fit(X)
            self.means_ = kmeans.cluster_centers_
        if 'c' in self.init_params or not hasattr(self, "covars_"):
            cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components).copy()

kmedoids.py 文件源码项目：ECoG-ClusterFlow 作者: sugeerth 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def ConsensusCluster(self, data, subsamples, subsample_fraction, norm_var, kvalues): 
        """
        Performs consensus clustering algorithms here!!!
        """
        return
        partition = dict()
        stuff = []
        nb_clusters = 0 # this is the number of cluster the dataset is supposed to be partitioned into
        distances = nx.to_numpy_matrix(data)

        for i in kvalues:
            clusterid, error, nfound = KMeans(distances, nclusters= i, npass=300)
            uniq_ids = list(set(clusterid))
            new_ids = [ uniq_ids.index(val) for val in clusterid]

            for i,value in enumerate(new_ids):
                partition[i] = value
            stuff.append(partition)

ldcof.py 文件源码项目：anomaly-detection-libs 作者: IceKhan13 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def fit(self, data):
        """ fit model on data """
        self.data = data

        kmeans = KMeans(n_clusters=self.n_clusters)
        kmeans.fit(data)
        self.clusterer = kmeans
        logging.info('Fit has been completed')

        self.data_clusters = self.clusterer.predict(data)
        self.cluster_centers = self.clusterer.cluster_centers_
        logging.info('Cluster calculation has been completed')

        self.__clusters_separation()
        logging.info('Cluster separation has been completed')

        self.__cluster_avg_distances()
        logging.info('Cluster avg distances has been calculated')

stratification.py 文件源码项目：oasis 作者: ngmarchant 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def stratify_by_features(features, n_strata, **kwargs):
    """Stratify by clustering the items in feature space

    Parameters
    ----------
    features : array-like, shape=(n_items,n_features)
        feature matrix for the pool, where rows correspond to items and columns
        correspond to features.

    n_strata : int
        number of strata to create.

    **kwargs :
        passed to sklearn.cluster.KMeans

    Returns
    -------
    Strata instance
    """
    n_items = features.shape[0]
    km = KMeans(n_clusters=n_strata, **kwargs)
    allocations = km.fit_predict(X=features)
    return Strata(allocations)

cluster.py 文件源码项目：Particle-Picking-Cryo-EM 作者: hqythu 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cluster(centers):
    n_class = int(len(centers) * 0.18)
    est = KMeans(n_clusters=n_class, max_iter=1000)
    est.fit(centers)
    new_list = []
    for x, y in est.cluster_centers_:
        min_num = 10000
        min_x = -1
        min_y = -1
        for x_, y_ in centers:
            dist = distance(x, y, x_, y_)
            if (dist < min_num) or (min_x == -1):
                min_num = dist
                min_x = x_
                min_y = y_
        new_list.append([min_x, min_y])
    return new_list

nasality.py 文件源码项目：AutismVoicePrint 作者: opraveen 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def noise_removal(aud_sample):
    if (min(abs(aud_sample)) == 0):
      return aud_sample

    data = abs(np.copy(aud_sample))
    clf = KMeans(n_clusters = 2,n_init = 5)
    data = data.reshape(-1,1)
    clf.fit(data)
    if clf.cluster_centers_[0] < clf.cluster_centers_[1]:
      noise = 0
    else:
      noise = 1

    aud = np.copy(aud_sample)

    window = 500
    windowStride = 50
    for i in range(0,len(clf.labels_),windowStride):
        if sum(clf.labels_[i:i+window] == noise) == window:
            aud[i:i+window] = 0

    return aud

main.py 文件源码项目：coursera-machine-learning-yandex 作者: dstarcev 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def calculate():
    from sklearn.metrics import mean_squared_error
    import os
    if not os.path.exists('plots'):
        os.makedirs('plots')

    for k in xrange(2, 22):
        cluster = KMeans(k, init='k-means++', random_state=241)
        cluster.fit(X)
        reduced_image = recreate_image(cluster.cluster_centers_, cluster.labels_, h, w, d)
        mse = np.mean((image - reduced_image) ** 2)
        psnr = 10 * np.log10(1.0 / mse)
        plot(reduced_image, "plots/plot%d.png" % (k))
        print "k: %d, mse: %.2f psnr: %.2f" % (k, mse, psnr)
        if psnr > 20:
            return k

tp3_solutions.py 文件源码项目：TPs 作者: DataMiningP7 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def evaluate_kmeans(X, model):
    """ Evaluate a K-Means model that has been trained on X using the
     Silhouette score.

    Args:
        X: the TF-IDF matrix where each line represents a document and each
           column represents a word, typically obtained by running
           transform_text() from the TP2.
        model: the KMeans model trained on X.
    Returns:
        A double that corresponds to the Silhouette score of the model.
    """
    return silhouette_score(X, model.labels_)


# Ex2

tp3_solutions.py 文件源码项目：TPs 作者: DataMiningP7 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def agglomerative_clustering(X, k=10):
    """ Run an agglomerative clustering on X.

    Args:
        X: the TF-IDF matrix where each line represents a document and each
           column represents a word, typically obtained by running
           transform_text() from the TP2.
        k: the number of clusters we want (default: 10).
    Returns:
        An AgglomerativeClustering model trained on X.
    """
    model = AgglomerativeClustering(n_clusters=k)
    model.fit(X)

    # Note all the other functions are the same except we use
    # 'AgglomerativeClustering' instead of 'KMeans'.
    return model


# Ex4.1

clustering.py 文件源码项目：eezzy 作者: 3Blades 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def cluster_kmeans(X_train, model_args=None, gridsearch=True):
    from sklearn.cluster import KMeans
    print('KMeans')

    if gridsearch is True:
        param_grid = {
            'n_clusters': np.arange(1, 20, 2),
            'max_iter': [50, 100, 300],
            'tol': [1e-5, 1e-4, 1e-3]
        }
        prune(param_grid, model_args)
    else:
        if 'n_clusters' not in model_args:
            raise KeyError('Need to define n_clusters for Birch')
        param_grid = None

    return ModelWrapper(KMeans, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)

stock.py 文件源码项目：autoxd 作者: nessessary 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def getFlatVolume(series_volumes):
        """????????????? return: float"""
        results = np.array(series_volumes)
        results_n = np.zeros((len(results),2))
        results_n[:,0] = 1
        results_n[:,1] = np.array(results)  
        #???3?? ?????????????
        k = KMeans(3)
        k.fit(results_n)
        df = pd.DataFrame(k.labels_)
        df_c = pd.DataFrame(k.cluster_centers_)
        v = []
        for i in range(3):
            v.append( df[df[0]==i].count()[0])
        df_c[2] = v
        return df_c.iloc[df_c[2].argmax()][1]


    #
    #????
    #----------------------------------------------------------------------

freegripcontactpairs.py 文件源码项目：pyhiro 作者: wanweiwei07 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def clusterFacetSamplesKNN(self, reduceRatio=3, maxNPnts=5):
        """
        cluster the samples of each facet using k nearest neighbors
        the cluster center and their correspondent normals will be saved
        in self.objsamplepnts_refcls and self.objsamplenrmals_refcls

        :param: reduceRatio: the ratio of points to reduce
        :param: maxNPnts: the maximum number of points on a facet
        :return: None

        author: weiwei
        date: 20161129, tsukuba
        """

        self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object)
        self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object)
        for i, facet in enumerate(self.facets):
            self.objsamplepnts_refcls[i] = np.empty(shape=(0,0))
            self.objsamplenrmls_refcls[i] = np.empty(shape=(0,0))
            X = self.objsamplepnts_ref[i]
            nX = X.shape[0]
            if nX > reduceRatio:
                kmeans = KMeans(n_clusters=maxNPnts if nX/reduceRatio>maxNPnts else nX/reduceRatio, random_state=0).fit(X)
                self.objsamplepnts_refcls[i] = kmeans.cluster_centers_
                self.objsamplenrmls_refcls[i] = np.tile(self.facetnormals[i], [self.objsamplepnts_refcls.shape[0],1])