def cluster(self, u, ndim, keepMetric=False):
w = self.metric.transform(u)
prev_region = self.region
if keepMetric:
self.region = RadFriendsRegion(members=w)
if self.force_shrink and self.region.maxdistance > self.prev_maxdistance:
self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance)
self.prev_maxdistance = self.region.maxdistance
print 'keeping metric, not reclustering.'
return
metric_updated = False
clustermetric = self.metric
print 'computing distances for clustering...'
# Overlay all clusters (shift by cluster mean)
print 'Metric update ...'
cluster_mean = numpy.mean(u, axis=0)
shifted_cluster_members = u - cluster_mean
# Using original points and new metric, compute RadFriends bootstrapped distance and store
if self.metriclearner == 'none':
metric = self.metric # stay with identity matrix
metric_updated = False
elif self.metriclearner == 'simplescaling':
metric = SimpleScaling()
metric.fit(shifted_cluster_members)
metric_updated = True
elif self.metriclearner == 'truncatedscaling':
metric = TruncatedScaling()
metric.fit(shifted_cluster_members)
metric_updated = self.metric == IdentityMetric() or not numpy.all(self.metric.scale == metric.scale)
else:
assert False, self.metriclearner
self.metric = metric
wnew = self.metric.transform(u)
print 'Region update ...'
self.region = RadFriendsRegion(members=wnew) #, maxdistance=shifted_region.maxdistance)
if not metric_updated and self.force_shrink and self.prev_maxdistance is not None:
if self.region.maxdistance > self.prev_maxdistance:
self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance)
self.prev_maxdistance = self.region.maxdistance
print 'done.'
python类cluster()的实例源码
def plot_matrix(self):
""" Plot distance matrix and dendrogram using matplotlib.
Returns
-------
matplotlib figure
"""
# Compute and plot first dendrogram for all nodes.
fig = pylab.figure(figsize=(8, 8))
ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
Z1 = scipy.cluster.hierarchy.dendrogram(
self.linkage, orientation='left', labels=self.labels)
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2])
Z2 = scipy.cluster.hierarchy.dendrogram(self.linkage, labels=self.labels)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = self.mat.copy()
if isinstance(D, pd.DataFrame):
D = D.as_matrix()
D = D[idx1, :]
D = D[:, idx2]
im = axmatrix.matshow(
D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.6])
pylab.colorbar(im, cax=axcolor)
module_logger.info(
'Use matplotlib.pyplot.show() to render figure.')
return fig
def merge(self, anomFlag, thr):
FlatC = hierarchy.fcluster(self.Tree, thr, criterion='distance')
Log = open(self.CurrentDir+'/.cc_cluster.log', 'a')
counter=collections.Counter(FlatC)
Best = max(counter.iteritems(), key=operator.itemgetter(1))[0]
Process = True
#change checkboxes to standard variables
if Process:
ToProcess = [Best]
else:
ToProcess = set(Clusters)
for key in ToProcess:
if counter[key]==1:
ToProcess = [x for x in ToProcess if x != key]
#Processing pipeline,
#Does all the XSCALE run
for x in ToProcess:
if [thr,x, anomFlag] not in self.alreadyDone:
os.mkdir(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s'%(float(thr),x, anomFlag))
Xscale=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/XSCALE.INP'%(float(thr),x, anomFlag), 'a')
Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),x,anomFlag ), 'a')
print('OUTPUT_FILE=scaled.hkl',file=Xscale)
print('MERGE= TRUE', file=Xscale)
print('pointless hklout clustered.mtz << eof', file=Pointless)
if anomFlag=='ano':
print('FRIEDEL\'S_LAW= FALSE', file=Xscale)
elif anomFlag=='no_ano':
print('FRIEDEL\'S_LAW= TRUE', file=Xscale)
Xscale.close()
Pointless.close()
for cluster, filename in zip(FlatC,self.labelList):
if cluster in ToProcess:
OUT = open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/XSCALE.INP'%(float(thr),cluster,anomFlag), 'a')
Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),cluster,anomFlag), 'a')
print ('INPUT_FILE= ../%s'%(filename), file=OUT)
#print ('INCLUDE_RESOLUTION_RANGE=20, 1.8', file=OUT)
print ('MINIMUM_I/SIGMA= 0', file=OUT)
print ('XDSIN ../%s'%(filename), file= Pointless)
OUT.close()
Pointless.close()
#optional run of XSCALE
newProcesses=[]
for x in ToProcess:
if [thr,x, anomFlag] not in self.alreadyDone:
plt.savefig(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/Dendrogram.png'%(float(thr),x,anomFlag))
P= subprocess.Popen('/opt/pxsoft/xds/vdefault/linux-x86_64/xscale_par',cwd=self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/'%(float(thr), x, anomFlag))
P.wait()
print('Cluster, %s , %s , %s'%(float(thr),x, anomFlag), file=Log)
Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),x,anomFlag), 'a')
print('COPY \n bg\n TOLERANCE 4 \n eof', file= Pointless)
Pointless.close()
os.chmod(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(self.threshold,x,self.anomFlag ), st.st_mode | 0o111)
newProcesses.append([thr,x, anomFlag])