def main():
df = pd.read_csv("dataset.csv")
df = df.dropna()
# print df
x1 = df.copy()
del x1['Customer']
del x1['Effective To Date']
x4 = pd.get_dummies(x1)
# print x4
n = 10
clf = k_means(x4, n_clusters = n)
centroids = clf[0]
# 10 clusters
labels = clf[1]
# print x4[1]
index_db_val = compute_DB_index(x4, labels, centroids, n)
print "The value of Davies Bouldin index for a K-Means cluser of size " + str(n) + " is: " + str(index_db_val)
python类k_means()的实例源码
def test_k_means_non_collapsed():
# Check k_means with a bad initialization does not yield a singleton
# Starting with bad centers that are quickly ignored should not
# result in a repositioning of the centers to the center of mass that
# would lead to collapsed centers which in turns make the clustering
# dependent of the numerical unstabilities.
my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
km.fit(my_X)
# centers must not been collapsed
assert_equal(len(np.unique(km.labels_)), 3)
centers = km.cluster_centers_
assert_true(np.linalg.norm(centers[0] - centers[1]) >= 0.1)
assert_true(np.linalg.norm(centers[0] - centers[2]) >= 0.1)
assert_true(np.linalg.norm(centers[1] - centers[2]) >= 0.1)
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype('float64'), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def initialize_dictionary(self, X, max_iter=100, redo=5, n_samples=50000, normalize=False):
"""
Samples some feature vectors from X and learns an initial dictionary
:param X: list of objects
:param max_iter: maximum k-means iters
:param redo: number of times to repeat k-means clustering
:param n_samples: number of feature vectors to sample from the objects
:param normalize: use l_2 norm normalization for the feature vectors
"""
# Sample only a small number of feature vectors from each object
samples_per_object = int(np.ceil(n_samples / len(X)))
features = None
print("Sampling feature vectors...")
for i in (range(len(X))):
idx = np.random.permutation(X[i].shape[0])[:samples_per_object + 1]
cur_features = X[i][idx, :]
if features is None:
features = cur_features
else:
features = np.vstack((features, cur_features))
print("Clustering feature vectors...")
features = np.float64(features)
if normalize:
features = feature_normalizer(features)
V = cluster.k_means(features, n_clusters=self.Nk, max_iter=max_iter, n_init=redo)
self.V.set_value(np.asarray(V[0], dtype=theano.config.floatX))
def KMEANS(data, k):
if data.shape[0] < 20000:
centroids, cluster_IDs, _ = k_means(data, k, init = 'k-means++', precompute_distances = 'auto', n_init = 20, max_iter = 200)
else:
mbkm = MiniBatchKMeans(k, 'k-means++', max_iter = 100, batch_size = data.shape[0] / k, n_init = 20)
mbkm.fit(data)
centroids = mbkm.cluster_centers_
cluster_IDs = mbkm.labels_
return centroids, cluster_IDs
improved_neural_doodle.py 文件源码
项目:Neural-Style-Transfer-Windows
作者: titu1994
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def test():
vectors = [[0,0,1], [0,1,0], [1,0,0]]
s = cluster.k_means(vectors,3)
return s
def test():
vectors = [[0,0,1], [0,1,0], [1,0,0]]
s = cluster.k_means(vectors,3)
return s
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
def test_k_means_function():
# test calling the k_means function directly
# catch output
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
verbose=True)
finally:
sys.stdout = old_stdout
centers = cluster_centers
assert_equal(centers.shape, (n_clusters, n_features))
labels = labels
assert_equal(np.unique(labels).shape[0], n_clusters)
# check that the labels assignment are perfect (up to a permutation)
assert_equal(v_measure_score(true_labels, labels), 1.0)
assert_greater(inertia, 0.0)
# check warning when centers are passed
assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
init=centers)
# to many clusters desired
assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
def _kmeans_initialization(self, residuals):
"""Computes k-means with k = 2 to find the initial components (rows or columns) of a new layer/bicluster."""
_, labels, _ = k_means(residuals, n_clusters=2, n_init=self.initialization_iterations, init='random', n_jobs=1)
count0, count1 = np.bincount(labels)
if count0 <= count1:
return np.where(labels == 0)[0]
return np.where(labels == 1)[0]
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels