def cluster_helices(helices, cluster_distance=12.0):
""" Clusters helices according to the minimum distance between the line segments representing their backbone.
Notes
-----
Each helix is represented as a line segement joining the CA of its first Residue to the CA if its final Residue.
The minimal distance between pairwise line segments is calculated and stored in a condensed_distance_matrix.
This is clustered using the 'single' linkage metric
(all members of cluster i are at < cluster_distance away from at least one other member of cluster i).
Helices belonging to the same cluster are grouped together as values of the returned cluster_dict.
Parameters
----------
helices: Assembly
cluster_distance: float
Returns
-------
cluster_dict: dict
Keys: int
cluster number
Values: [Polymer]
"""
condensed_distance_matrix = []
for h1, h2 in itertools.combinations(helices, 2):
md = minimal_distance_between_lines(h1[0]['CA']._vector, h1[-1]['CA']._vector,
h2[0]['CA']._vector, h2[-1]['CA']._vector, segments=True)
condensed_distance_matrix.append(md)
z = linkage(condensed_distance_matrix, method='single')
clusters = fcluster(z, t=cluster_distance, criterion='distance')
cluster_dict = {}
for h, k in zip(helices, clusters):
if k not in cluster_dict:
cluster_dict[k] = [h]
else:
cluster_dict[k].append(h)
return cluster_dict
评论列表
文章目录