python类k_means()的实例源码

index.py 文件源码 项目:Davies_Bouldin_Index_KMeans 作者: akankshadara 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def main():
    df = pd.read_csv("dataset.csv")
    df = df.dropna()
    # print df
    x1 = df.copy()
    del x1['Customer']
    del x1['Effective To Date']
    x4 = pd.get_dummies(x1)
    # print x4
    n = 10
    clf = k_means(x4, n_clusters = n)
    centroids = clf[0] 
    # 10 clusters
    labels = clf[1] 
    # print x4[1]
    index_db_val = compute_DB_index(x4, labels, centroids, n)
    print "The value of Davies Bouldin index for a K-Means cluser of size " + str(n) + " is: " + str(index_db_val)
test_k_means.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_k_means_non_collapsed():
    # Check k_means with a bad initialization does not yield a singleton
    # Starting with bad centers that are quickly ignored should not
    # result in a repositioning of the centers to the center of mass that
    # would lead to collapsed centers which in turns make the clustering
    # dependent of the numerical unstabilities.
    my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
    array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
    km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
    km.fit(my_X)

    # centers must not been collapsed
    assert_equal(len(np.unique(km.labels_)), 3)

    centers = km.cluster_centers_
    assert_true(np.linalg.norm(centers[0] - centers[1]) >= 0.1)
    assert_true(np.linalg.norm(centers[0] - centers[2]) >= 0.1)
    assert_true(np.linalg.norm(centers[1] - centers[2]) >= 0.1)
neural_doodle.py 文件源码 项目:keras 作者: GeekLiB 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
neural_doodle.py 文件源码 项目:pCVR 作者: xjtushilei 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
nbof.py 文件源码 项目:cbof 作者: passalis 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def initialize_dictionary(self, X, max_iter=100, redo=5, n_samples=50000, normalize=False):
        """
        Samples some feature vectors from X and learns an initial dictionary
        :param X: list of objects
        :param max_iter: maximum k-means iters
        :param redo: number of times to repeat k-means clustering
        :param n_samples: number of feature vectors to sample from the objects
        :param normalize: use l_2 norm normalization for the feature vectors
        """

        # Sample only a small number of feature vectors from each object
        samples_per_object = int(np.ceil(n_samples / len(X)))

        features = None
        print("Sampling feature vectors...")
        for i in (range(len(X))):
            idx = np.random.permutation(X[i].shape[0])[:samples_per_object + 1]
            cur_features = X[i][idx, :]
            if features is None:
                features = cur_features
            else:
                features = np.vstack((features, cur_features))

        print("Clustering feature vectors...")
        features = np.float64(features)
        if normalize:
            features = feature_normalizer(features)

        V = cluster.k_means(features, n_clusters=self.Nk, max_iter=max_iter, n_init=redo)
        self.V.set_value(np.asarray(V[0], dtype=theano.config.floatX))
Gap_stats.py 文件源码 项目:PySCUBA 作者: GGiecold 项目源码 文件源码 阅读 49 收藏 0 点赞 0 评论 0
def KMEANS(data, k):

    if data.shape[0] < 20000:
        centroids, cluster_IDs, _ = k_means(data, k, init = 'k-means++', precompute_distances = 'auto', n_init = 20, max_iter = 200)
    else:
        mbkm = MiniBatchKMeans(k, 'k-means++', max_iter = 100, batch_size = data.shape[0] / k, n_init = 20)
        mbkm.fit(data)

        centroids = mbkm.cluster_centers_
        cluster_IDs = mbkm.labels_

    return centroids, cluster_IDs
improved_neural_doodle.py 文件源码 项目:Neural-Style-Transfer-Windows 作者: titu1994 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
neural_doodle.py 文件源码 项目:Neural-Style-Transfer-Windows 作者: titu1994 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
neural_doodle.py 文件源码 项目:keras-customized 作者: ambrite 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
cluster.py 文件源码 项目:betasqaud 作者: AJacobs15 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test():
    vectors = [[0,0,1], [0,1,0], [1,0,0]]
    s = cluster.k_means(vectors,3)
    return s
cluster.py 文件源码 项目:betasqaud 作者: AJacobs15 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test():
    vectors = [[0,0,1], [0,1,0], [1,0,0]]
    s = cluster.k_means(vectors,3)
    return s
neural_doodle.py 文件源码 项目:keras 作者: NVIDIA 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
test_k_means.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
plaid.py 文件源码 项目:biclustlib 作者: padilha 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _kmeans_initialization(self, residuals):
        """Computes k-means with k = 2 to find the initial components (rows or columns) of a new layer/bicluster."""
        _, labels, _ = k_means(residuals, n_clusters=2, n_init=self.initialization_iterations, init='random', n_jobs=1)
        count0, count1 = np.bincount(labels)

        if count0 <= count1:
            return np.where(labels == 0)[0]

        return np.where(labels == 1)[0]
neural_doodle.py 文件源码 项目:keras-101 作者: burness 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels


问题


面经


文章

微信
公众号

扫码关注公众号