def __init__(self, edges, branching_factor=50, threshold=0.1):
# Make features list.
features = []
for i in range(len(edges)):
edge = edges[i]
features.append([edge['perimeter'], edge['area'],
edge['shape_factor'], edge['radius_deviation']])
features = np.array(features)
# Normalize features
normed_features = features.copy()
for i in range(features.shape[1]):
avg = np.median(features[::, i])
std = np.std(features[::, i])
normed_features[::, i] -= avg
normed_features[::, i] /= avg
self.features = features
self.normed_features = normed_features
self.branching_factor = branching_factor
self.threshold = threshold
#self.run(Birch, branching_factor=50, threshold=0.1, n_clusters=2)
self.run(KMeans, n_clusters=2)
#self.run(AgglomerativeClustering, n_clusters=2)
python类Birch()的实例源码
def birch_cluster_partitioning(X, points_per_expert):
"""Return a list of lists each containing a partition of the indices of the
data to be fit that is generated by splitting along clusters found via
Birch clustering approach."""
sample_sets = []
num_samples = X.shape[0]
indices = np.arange(num_samples)
num_clusters = int(
float(num_samples) / points_per_expert)
birch = Birch(n_clusters=num_clusters, threshold=0.2)
labels = birch.fit_predict(X)
unique_labels = np.unique(labels)
# Fill each inner list i with indices matching its label i
for label in unique_labels:
sample_sets.append([i for i in indices if labels[i] == label])
return sample_sets
def GetItemPixels(self, I):
'''
Locates items that should be picked up on the screen
'''
ws = [8, 14]
D1 = np.abs(I - np.array([10.8721, 12.8995, 13.9932])).sum(axis = 2) < 15
D2 = np.abs(I - np.array([118.1302, 116.0938, 106.9063])).sum(axis = 2) < 76
R1 = view_as_windows(D1, ws, ws).sum(axis = (2, 3))
R2 = view_as_windows(D2, ws, ws).sum(axis = (2, 3))
FR = ((R1 + R2 / np.prod(ws)) >= 1.0) & (R1 > 10) & (R2 > 10)
PL = np.transpose(np.nonzero(FR)) * np.array(ws)
if len(PL) <= 0:
return []
bc = Birch(threshold = 50, n_clusters = None)
bc.fit(PL)
return bc.subcluster_centers_
def cluster_kmeans(X_train, model_args=None, gridsearch=True):
from sklearn.cluster import KMeans
print('KMeans')
if gridsearch is True:
param_grid = {
'n_clusters': np.arange(1, 20, 2),
'max_iter': [50, 100, 300],
'tol': [1e-5, 1e-4, 1e-3]
}
prune(param_grid, model_args)
else:
if 'n_clusters' not in model_args:
raise KeyError('Need to define n_clusters for Birch')
param_grid = None
return ModelWrapper(KMeans, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
def cluster_birch(X_train, model_args=None, gridsearch=True):
from sklearn.cluster import Birch
print('Birch')
if gridsearch is True:
## TODO:
# add hyperparamter searching. No scoring method available for this model,
# so we can't easily use gridsearching.
raise NotImplementedError('No hyperparameter optimization available yet for this model. Set gridsearch to False')
# prune(param_grid, model_args)
else:
if 'n_clusters' not in model_args:
raise KeyError('Need to define n_clusters for Birch')
param_grid = None
return ModelWrapper(Birch, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
def clusteringReminMost(window):
brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True)
brc.fit(window)
Class = brc.predict(window)
#???????????????????????????????????
num0 = 0
num1 = 0
num2 = 0
for i in Class :
if i == 0:
num0 += 1
elif i ==1:
num1 +=1
else:
num2 +=1
lable = chooseMax(num0, num1, num2)
newwindow = []
for i in range(1,len(Class)):
if Class[i] == lable:#????????????
newwindow.append(window[i])
return newwindow
onlinedetectWithlittleData.py 文件源码
项目:onlineDetectForHadoop
作者: DawnsonLi
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def clusteringReminMost(window):
brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True)
brc.fit(window)
Class = brc.predict(window)
#???????????????????????????????????
num0 = 0
num1 = 0
num2 = 0
for i in Class :
if i == 0:
num0 += 1
elif i ==1:
num1 +=1
else:
num2 +=1
lable = chooseMax(num0, num1, num2)
newwindow = window[0:1]
for i in range(1,len(Class)):
if Class[i] == lable:#????????????
newwindow = newwindow.append(window[i-1:i])#??pandas????
return newwindow
birchForChangeWindowSize.py 文件源码
项目:onlineDetectForHadoop
作者: DawnsonLi
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def clusteringReminMost(window):
brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True)
brc.fit(window)
Class = brc.predict(window)
#???????????????????????????????????
num0 = 0
num1 = 0
num2 = 0
for i in Class :
if i == 0:
num0 += 1
elif i ==1:
num1 +=1
else:
num2 +=1
lable = chooseMax(num0, num1, num2)
newwindow = window[0:1]
for i in range(1,len(Class)):
if Class[i] == lable:#????????????
newwindow = newwindow.append(window[i-1:i])
return newwindow
def clusteringReminMost(window):
brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True)
brc.fit(window)
Class = brc.predict(window)
#???????????????????????????????????
num0 = 0
num1 = 0
num2 = 0
for i in Class :
if i == 0:
num0 += 1
elif i ==1:
num1 +=1
else:
num2 +=1
lable = chooseMax(num0, num1, num2)
newwindow = window[0:1]
for i in range(1,len(Class)):
if Class[i] == lable:#????????????
newwindow = newwindow.append(window[i-1:i])#??pandas????
return newwindow
def make_birch_clustering(self, short_filenames, input_texts):
output_dir = self.output_dir + 'birch/'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if self.need_tf_idf:
self.signals.PrintInfo.emit("?????? TF-IDF...")
idf_filename = output_dir + 'tf_idf.csv'
msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
self.signals.PrintInfo.emit(msg)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(input_texts)
svd = TruncatedSVD(2)
normalizer = Normalizer(copy=False)
lsa = make_pipeline(svd, normalizer)
X = lsa.fit_transform(X)
birch = Birch(threshold=self.birch_threshold,
branching_factor=self.birch_branching_factor,
n_clusters=self.birch_clusters_count)
predict_result = birch.fit_predict(X)
self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')
clasters_output = ''
for claster_index in range(max(predict_result) + 1):
clasters_output += ('??????? ' + str(claster_index) + ':\n')
for predict, document in zip(predict_result, short_filenames):
if predict == claster_index:
clasters_output += (' ' + str(document) + '\n')
clasters_output += '\n'
self.signals.PrintInfo.emit(clasters_output)
self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
writeStringToFile(clasters_output, output_dir + 'clusters.txt')
self.draw_clusters_plot(X, predict_result, short_filenames)
def birchclustering(datalist):
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17,compute_labels=True)
brc.fit(datalist)
return brc
#print brc.predict(datalist)
def get_subtrees_sklearn(d, bin_chr, bin_position, method="ward", nchrom=1000, distfrac=0.4):
names = get_names(bin_chr, bin_position)
#ap = Birch(n_clusters=15)#damping=0.5, max_iter=200, convergence_iter=15, affinity='euclidean') #euclidean precomputed
ap = KMeans(n_clusters=10)
assignements = ap.fit_predict(d)#; print assignements[:10]
c = Counter(assignements); print c.most_common(5)
subtrees = [[] for i in range(max(assignements)+1)]; print len(subtrees), max(assignements)
for chrom, i in zip(names, assignements):
subtrees[i].append(chrom)
return subtrees