def generate_subsets_graph(data_mask, live_pointsp, graph, _):
# generate data subsets which share points.
firstmember = numpy.where(data_mask)[0][0]
allp = numpy.unique(live_pointsp[:,data_mask].flatten())
if len(live_pointsp[:,firstmember]) == len(allp):
# trivial case: all live points are the same across data sets
yield data_mask, live_pointsp[:,firstmember]
return
subgraphs = list(networkx.connected_component_subgraphs(graph, copy=False))
if len(subgraphs) == 1:
yield data_mask, allp
return
# then identify disjoint subgraphs
for subgraph in subgraphs:
print 'networkx subgraph:', subgraph.nodes()
member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
member_live_pointsp = []
for nodetype, i in subgraph.nodes():
if nodetype == 0:
member_data_mask[i] = True
else:
member_live_pointsp.append(i)
yield member_data_mask, member_live_pointsp
python类connected_component_subgraphs()的实例源码
profile_generate_subsets.py 文件源码
项目:massivedatans
作者: JohannesBuchner
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def run(self, ips, imgs, para = None):
edges, nodes = [], []
ntitles = ['PartID', 'NodeID', 'Degree','X', 'Y']
etitles = ['PartID', 'StartID', 'EndID', 'Length']
k, unit = ips.unit
comid = 0
for g in nx.connected_component_subgraphs(ips.data, False):
for idx in g.nodes():
o = g.node[idx]['o']
print(idx, g.degree(idx))
nodes.append([comid, idx, g.degree(idx), round(o[1]*k,2), round(o[0]*k,2)])
for (s, e) in g.edges():
eds = g[s][e]
for i in eds:
edges.append([comid, s, e, round(eds[i]['weight']*k, 2)])
comid += 1
IPy.table(ips.title+'-nodes', nodes, ntitles)
IPy.table(ips.title+'-edges', edges, etitles)
def run(self, ips, imgs, para = None):
edges, nodes = [], []
ntitles = ['PartID', 'NodeID', 'Degree','X', 'Y', 'Z']
etitles = ['PartID', 'StartID', 'EndID', 'Length', 'Distance']
k, unit = ips.unit
comid = 0
for g in nx.connected_component_subgraphs(ips.data, False):
for idx in g.nodes():
o = g.node[idx]['o']
nodes.append([comid, idx, g.degree(idx), round(o[1]*k,2), round(o[0]*k,2), round(o[2])])
for (s, e) in g.edges():
eds = g[s][e]
for i in eds:
l = round(eds[i]['weight']*k, 2)
dis = round(np.linalg.norm(g.node[s]['o']-g.node[e]['o'])*k, 2)
edges.append([comid, s, e, l, dis])
comid += 1
IPy.table(ips.title+'-nodes', nodes, ntitles)
IPy.table(ips.title+'-edges', edges, etitles)
def read_net(fname, weighted, directed, log):
if weighted:
G = nx.read_edgelist(inodetype=int, data=(('weight', float),),
create_using=nx.DiGraph())
else:
G = nx.read_edgelist(fname, nodetype=int, create_using=nx.DiGraph())
for edge in G.edges():
G[edge[0]][edge[1]]['weight'] = 1
if not directed:
G = G.to_undirected()
log.info('N: %d E: %d' % (G.number_of_nodes(), G.number_of_edges()))
log.info('CC: %d' % nx.number_connected_components(G))
giant = max(nx.connected_component_subgraphs(G), key=len)
log.info('N: %d E: %d' % (giant.number_of_nodes(), giant.number_of_edges()))
return giant
def getNetworkGraph(segments,segmentlengths):
"""
Builds a networkx graph from the network file, inluding segment length taken from arcpy.
It selects the largest connected component of the network (to prevent errors from routing between unconnected parts)
"""
#generate the full network path for GDAL to be able to read the file
path =str(os.path.join(arcpy.env.workspace,segments))
print path
if arcpy.Exists(path):
g = nx.read_shp(path)
#This selects the largest connected component of the graph
sg = list(nx.connected_component_subgraphs(g.to_undirected()))[0]
print "graph size (excluding unconnected parts): "+str(len(g))
# Get the length for each road segment and append it as an attribute to the edges in the graph.
for n0, n1 in sg.edges_iter():
oid = sg[n0][n1]["OBJECTID"]
sg.edge[n0][n1]['length'] = segmentlengths[oid]
return sg
else:
print "network file not found on path: "+path
def getNetworkGraph(segments,segmentlengths):
"""
Builds a networkx graph from the network file, inluding segment length taken from arcpy.
It selects the largest connected component of the network (to prevent errors from routing between unconnected parts)
"""
#generate the full network path for GDAL to be able to read the file
path =str(os.path.join(arcpy.env.workspace,segments))
print path
if arcpy.Exists(path):
g = nx.read_shp(path)
#This selects the largest connected component of the graph
sg = list(nx.connected_component_subgraphs(g.to_undirected()))[0]
print "graph size (excluding unconnected parts): "+str(len(g))
# Get the length for each road segment and append it as an attribute to the edges in the graph.
for n0, n1 in sg.edges_iter():
oid = sg[n0][n1]["OBJECTID"]
sg.edge[n0][n1]['length'] = segmentlengths[oid]
return sg
else:
print "network file not found on path: "+path
def Partition(UnclusteredGraph, code, type, scale):
# Make edges on Unclustered graph
# between all nodes separated by distance 'scale'
# on Dual Lattice
for node1 in UnclusteredGraph.nodes():
for node2 in UnclusteredGraph.nodes():
if node1 != node2:
d = code.distance(type, node1, node2)
if d <= scale:
UnclusteredGraph.add_edge(*(node1, node2), weight=d)
Clusters = []
# Networkx connected components analysis
subgraphs = nx.connected_component_subgraphs(UnclusteredGraph)
for i, sg in enumerate(subgraphs):
Clusters.append(sg.nodes(data=True))
return Clusters
# Choose fixed node for cluster fusion
def summarise(skelimage):
ndim = skelimage.ndim
g, counts, skelimage_labeled = skeleton_to_nx(skelimage)
coords = np.nonzero(skelimage)
ids = skelimage_labeled[coords]
sorted_coords = np.transpose(coords)[np.argsort(ids)]
tables = []
for i, cc in enumerate(nx.connected_component_subgraphs(g)):
stats = branch_statistics(cc)
if stats.size == 0:
continue
coords0 = sorted_coords[stats[:, 0].astype(int) - 1]
coords1 = sorted_coords[stats[:, 1].astype(int) - 1]
distances = np.sqrt(np.sum((coords0 - coords1)**2, axis=1))
skeleton_id = np.full(distances.shape, i, dtype=float)
tables.append(np.column_stack((skeleton_id, stats,
coords0, coords1, distances)))
columns = (['skeleton-id', 'node-id-0', 'node-id-1', 'branch-distance',
'branch-type'] +
['coord-0-%i' % i for i in range(ndim)] +
['coord-1-%i' % i for i in range(ndim)] +
['euclidean-distance'])
column_types = [int, int, int, float, int] + 2*ndim*[int] + [float]
arr = np.row_stack(tables).T
data_dict = {col: dat.astype(dtype)
for col, dat, dtype in zip(columns, arr, column_types)}
df = pd.DataFrame(data_dict)
return df
profile_generate_subsets.py 文件源码
项目:massivedatans
作者: JohannesBuchner
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def generate_subsets_graph_simple(data_mask, live_pointsp, graph, _):
# generate data subsets which share points.
firstmember = numpy.where(data_mask)[0][0]
# then identify disjoint subgraphs
for subgraph in networkx.connected_component_subgraphs(graph, copy=False):
member_data_mask = numpy.zeros(len(data_mask), dtype=bool)
member_live_pointsp = []
for nodetype, i in subgraph.nodes():
if nodetype == 0:
member_data_mask[i] = True
else:
member_live_pointsp.append(i)
yield member_data_mask, member_live_pointsp
def parsimony_grouping(g, peps):
''' Group peptides to proteins using the rule of parsimony
Inputs:
g: an undirected graph with peptide <-> protein as edges
peps: the set of peptide sequences, nodes not listed in the peptide set
are protein IDs.
'''
not_peps = set(g.nodes()) - set(peps)
prot_groups = dict()
for cc in nx.connected_component_subgraphs(g):
in_group_peptides = set(cc.nodes()) - not_peps
in_group_proteins = not_peps.intersection(cc.nodes())
if len(in_group_proteins) == 1:
prot_groups[in_group_proteins.pop()] = in_group_peptides
elif len(in_group_proteins) > 1:
reported = set()
while len(in_group_proteins - reported) > 0:
candidate_proteins = sorted(in_group_proteins - reported,
key=lambda p: (
len(set(cc[p].keys()) - reported), p),
reverse=True)
p = candidate_proteins[0]
current_peps = set(cc[p].keys())
plabel = [p]
for i in range(1, len(candidate_proteins)):
_p = candidate_proteins[i]
_peps = set(cc[_p].keys())
if _peps == current_peps:
plabel.append(_p)
if len(_peps - current_peps) == 0:
reported.add(_p)
plabel = ';'.join(sorted(plabel))
if len(current_peps - reported) > 0:
prot_groups[plabel] = current_peps
reported = reported.union(current_peps)
reported.add(p)
return prot_groups
def solve():
g = networkx.Graph()
# ?? 1
with open("144047638844506.txt", "r") as f:
for each_line in f:
# ???????????
each_line = each_line.strip()
point0, point1 = each_line.split(" ")
g.add_edge(point0, point1)
# ??????????????????????????????????????
print("????: {}".format(len(list(networkx.connected_component_subgraphs(g)))))
def solve():
g = networkx.Graph()
with open("144341511030664.txt", "r") as f:
for each_line in f:
# ???????????
each_line = each_line.strip()
point0, point1 = each_line.split(" ")
g.add_edge(point0, point1)
# ??????????????????????????????????????
print(g.number_of_nodes())
# ????????
length = len(list(networkx.connected_component_subgraphs(g)))
print("????: {}".format(100000 - g.number_of_nodes() + length))
def GetGmlSubG():
'read Data'
Gt=nx.Graph()
fedge = fname + '.edge'
try:
fdobj = open(fedge,'r')
except IOError as e:
print "***file open error:",e
else:
s = 'source' #s = 'source '
tepstr = ''
Result = []
eline = fdobj.readline()
while eline:
if s in eline:
source = eline[11:]
source = source.strip('\n')
tline = fdobj.readline()
target = tline[11:]
target = target.strip('\n')
tep = (source,target)
Result.append(tep)
eline = fdobj.readline()
#print Result
Gt.add_edges_from(Result)
graphs = nx.connected_component_subgraphs(Gt)
SubG = graphs[0].edges()
return SubG
def GetTxtSubG():
'read Data'
Gt=nx.Graph()
try:
fdobj = open(fname,'r')
except IOError as e:
print "***file open error:",e
else:
tepstr = ''
Result = []
eline = fdobj.readline()
while eline:
line = eline.strip().split()
#self.G.add_edge(line[0],line[1])
tep = (line[0],line[1])
Result.append(tep)
eline = fdobj.readline()
#print Result
Gt.add_edges_from(Result)
graphs = nx.connected_component_subgraphs(Gt)
SubG = graphs[0].edges()
return SubG
def GetGmlSubG():
'read Data'
Gt=nx.Graph()
fedge = fname + '.edge'
try:
fdobj = open(fedge,'r')
except IOError as e:
print "***file open error:",e
else:
s = 'source' #s = 'source '
tepstr = ''
Result = []
eline = fdobj.readline()
while eline:
if s in eline:
source = eline[11:]
source = source.strip('\n')
tline = fdobj.readline()
target = tline[11:]
target = target.strip('\n')
tep = (source,target)
Result.append(tep)
eline = fdobj.readline()
#print Result
Gt.add_edges_from(Result)
graphs = nx.connected_component_subgraphs(Gt)
SubG = graphs[0].edges()
return SubG
def GetTxtSubG():
'read Data'
Gt=nx.Graph()
try:
fdobj = open(fname,'r')
except IOError as e:
print "***file open error:",e
else:
tepstr = ''
Result = []
eline = fdobj.readline()
while eline:
line = eline.strip().split()
#self.G.add_edge(line[0],line[1])
tep = (line[0],line[1])
Result.append(tep)
eline = fdobj.readline()
#print Result
Gt.add_edges_from(Result)
graphs = nx.connected_component_subgraphs(Gt)
SubG = graphs[0].edges()
return SubG
def main():
edges = [] # ???????
domain_name = 'taobao.com'
domain_pkts = get_data(domain_name)
for i in domain_pkts[0]['details']:
for v in i['answers']:
edges.append((v['domain_name'],v['dm_data']))
plt.figure(1, figsize=(10, 8))
G = nx.Graph()
G.add_edges_from(edges)
pos = graphviz_layout(G, prog="fdp") #neato fdp
C = nx.connected_component_subgraphs(G) # ?????????????
for g in C:
c = [random.random()] * nx.number_of_nodes(g)
nx.draw(g,
pos,
node_size=90,
node_color=c,
vmin=0.0,
vmax=1.0,
with_labels=False
)
plt.savefig('./graph/'+domain_name+"_relation.png", dpi=75)
plt.show()
def atlas6():
""" Return the atlas of all connected graphs of 6 nodes or less.
Attempt to check for isomorphisms and remove.
"""
Atlas = graph_atlas_g()[0:100] # 208
# remove isolated nodes, only connected graphs are left
U = nx.Graph() # graph for union of all graphs in atlas
for G in Atlas:
zerodegree = [n for n in G if G.degree(n)==0]
for n in zerodegree:
G.remove_node(n)
U = nx.disjoint_union(U, G)
# list of graphs of all connected components
C = nx.connected_component_subgraphs(U)
UU = nx.Graph()
# do quick isomorphic-like check, not a true isomorphism checker
nlist = [] # list of nonisomorphic graphs
for G in C:
# check against all nonisomorphic graphs so far
if not iso(G, nlist):
nlist.append(G)
UU = nx.disjoint_union(UU, G) # union the nonisomorphic graphs
return UU
def lanl_graph():
""" Return the lanl internet view graph from lanl.edges
"""
import networkx as nx
try:
fh = open('lanl_routes.edgelist' , 'r')
except IOError:
print("lanl.edges not found")
raise
G = nx.Graph()
time = {}
time[0] = 0 # assign 0 to center node
for line in fh.readlines():
(head, tail, rtt) = line.split()
G.add_edge(int(head), int(tail))
time[int(head)] = float(rtt)
# get largest component and assign ping times to G0time dictionary
G0 = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)[0]
G0.rtt = {}
for n in G0:
G0.rtt[n] = time[n]
return G0
def atlas6():
""" Return the atlas of all connected graphs of 6 nodes or less.
Attempt to check for isomorphisms and remove.
"""
Atlas = graph_atlas_g()[0:208] # 208
# remove isolated nodes, only connected graphs are left
U = nx.Graph() # graph for union of all graphs in atlas
for G in Atlas:
zerodegree = [n for n in G if G.degree(n)==0]
for n in zerodegree:
G.remove_node(n)
U = nx.disjoint_union(U, G)
# list of graphs of all connected components
C = nx.connected_component_subgraphs(U)
UU = nx.Graph()
# do quick isomorphic-like check, not a true isomorphism checker
nlist = [] # list of nonisomorphic graphs
for G in C:
# check against all nonisomorphic graphs so far
if not iso(G, nlist):
nlist.append(G)
UU = nx.disjoint_union(UU, G) # union the nonisomorphic graphs
return UU
def sub_graph(edges,node_main,node_ip,node_cname):
"""
???????
:param edges:
:return:
"""
from collections import Counter
sub_graph_set = Counter() # ?????????????
sub_graph_count = 0 # ???????????
sub_graph_domain_count = Counter() # ??????????????
sub_graph_cname_count = Counter()
sub_graph_ip_count = Counter()
G = nx.Graph()
G.add_edges_from(edges)
C = nx.connected_component_subgraphs(G) # ??????
test = {'domain_count': [],
'cname_count': [],
'ip_count': []
}
for g in C:
sub_graph_count += 1
sub_graph_set[len(g.nodes())] += 1
# print list(set(g.nodes()).intersection(set(nodes_main)))
test['domain_count'].append(len(list(set(g.nodes()).intersection(set(node_main)))))
test['cname_count'].append(len(list(set(g.nodes()).intersection(set(node_cname)))))
test['ip_count'].append(len(list(set(g.nodes()).intersection(set(node_ip)))))
sub_graph_domain_count[len(list(set(g.nodes()).intersection(set(node_main))))] += 1
sub_graph_cname_count[len(list(set(g.nodes()).intersection(set(node_cname))))] += 1
sub_graph_ip_count[len(list(set(g.nodes()).intersection(set(node_ip))))] += 1
# print sub_graph_set
# print sub_graph_domain_count
# print sub_graph_cname_count
# print sub_graph_ip_count
draw_graph(test)
return sub_graph_count
def c_c(graph_file):
g = nx.read_edgelist(graph_file,create_using = nx.Graph(),nodetype = int)
graphs = nx.connected_component_subgraphs(g)
for graph in graphs:
print graph.number_of_nodes(),graph.number_of_edges()
print len(graphs)
def run(self, ips, imgs, para = None):
titles = ['PartID', 'Noeds', 'Edges', 'TotalLength', 'Density', 'AveConnect']
k, unit = ips.unit
gs = nx.connected_component_subgraphs(ips.data, False) if para['parts'] else [ips.data]
comid, datas = 0, []
for g in gs:
sl = 0
for (s, e) in g.edges():
sl += sum([i['weight'] for i in g[s][e].values()])
datas.append([comid, g.number_of_nodes(), g.number_of_edges(), round(sl*k, 2),
round(nx.density(g), 2), round(nx.average_node_connectivity(g),2)][1-para['parts']:])
comid += 1
print(titles, datas)
IPy.table(ips.title+'-graph', datas, titles[1-para['parts']:])
def run(self, ips, imgs, para = None):
titles = ['PartID', 'Noeds', 'Edges', 'TotalLength', 'Density', 'AveConnect']
k, unit = ips.unit
gs = nx.connected_component_subgraphs(ips.data, False) if para['parts'] else [ips.data]
comid, datas = 0, []
for g in gs:
sl = 0
for (s, e) in g.edges():
sl += sum([i['weight'] for i in g[s][e].values()])
datas.append([comid, g.number_of_nodes(), g.number_of_edges(), round(sl*k, 2),
round(nx.density(g), 2), round(nx.average_node_connectivity(g),2)][1-para['parts']:])
comid += 1
print(titles, datas)
IPy.table(ips.title+'-graph', datas, titles[1-para['parts']:])
def makeClusters(self, chrom):
"""
yield all non-solo clusters
returns all of the BreakPoints that comprise the events within the
cluster
"""
#PARAM
MAXSPAN = self.maxSpan
MAXOVLS = self.maxOvl
intervalTree = self.genome[chrom]
overlaps = []
graph = nx.Graph()
#Find clusters on the chromosome
#By building a graph
for interval in intervalTree:
if abs(interval.end - interval.begin) > MAXSPAN:
continue
ovls = intervalTree.search(interval)
if len(ovls) == 1 or len(ovls) > MAXOVLS:
continue
for a,b in itertools.combinations(ovls, 2):
if abs(a.end - a.begin) > MAXSPAN \
or abs(b.end - b.begin) > MAXSPAN:
continue
graph.add_edge(a, b)
#clusters are sub graphs
for subG in nx.connected_component_subgraphs(graph):
if len(subG.edges()) == 0:
continue
lookup = [x.data for x in subG.nodes()]
ret = [x for x in self.points[chrom] if x.id in lookup]
#I need to merge points that are too close
if len(ret) <= 10:
yield ret
def get_annotation_branch_lengths(annotation):
"""
Fragments an annotation into pieces at every branch point - remaining
lonely nodes are deleted. Branch nodes are simply deleted. The lengths of
the resulting fragments are then returned.
WARNING: THIS FUNCTION IS REALLY SLOW FOR SOME REASONS I DO NOT FULLY
UNDERSTAND AT THE MOMENT, PROBABLY OBJECT COPY RELATED
:param annotation:
:return: list of branch lengths in um
"""
# this is necessary to avoid trouble because of in place modifications
anno = copy.deepcopy(annotation)
nx_graph = su.annotation_to_nx_graph(anno)
branches = list({k for k, v in nx_graph.degree().iteritems() if v > 2})
nx_graph.remove_nodes_from(branches)
lonely = list({k for k, v in nx_graph.degree().iteritems() if v == 0})
nx_graph.remove_nodes_from(lonely)
ccs = list(nx.connected_component_subgraphs(nx_graph))
lengths = [cc.size(weight='weight') / 1000. for cc in ccs]
return lengths
def trim_unconnected_components(self):
"""
Remove all but the largest connected component.
"""
subgraphs = sorted(
nx.connected_component_subgraphs(self.graph),
key=len, reverse=True
)
self.graph = subgraphs[0]
def get_coiledcoil_region(self, cc_number=0, cutoff=7.0, min_kihs=2):
""" Assembly containing only assigned regions (i.e. regions with contiguous KnobsIntoHoles. """
g = self.filter_graph(self.graph, cutoff=cutoff, min_kihs=min_kihs)
ccs = sorted(networkx.connected_component_subgraphs(g, copy=True),
key=lambda x: len(x.nodes()), reverse=True)
cc = ccs[cc_number]
helices = [x for x in g.nodes() if x.number in cc.nodes()]
assigned_regions = self.get_assigned_regions(helices=helices, include_alt_states=False, complementary_only=True)
coiledcoil_monomers = [h.get_slice_from_res_id(*assigned_regions[h.number]) for h in helices]
return Assembly(coiledcoil_monomers)
def generate_bond_subgraphs_from_break(bond_graph, atom1, atom2):
"""Splits the bond graph between two atoms to producing subgraphs.
Notes
-----
This will not work if there are cycles in the bond graph.
Parameters
----------
bond_graph: networkx.Graph
Graph of covalent bond network
atom1: isambard.ampal.Atom
First atom in the bond.
atom2: isambard.ampal.Atom
Second atom in the bond.
Returns
-------
subgraphs: [networkx.Graph]
A list of subgraphs generated when a bond is broken in the covalent
bond network.
"""
bond_graph.remove_edge(atom1, atom2)
try:
subgraphs=list(networkx.connected_component_subgraphs(
bond_graph, copy=False))
finally:
# Add edge
bond_graph.add_edge(atom1, atom2)
return subgraphs
def GCC_Partition(UnclusteredGraph, scale):
# Make edges on Unclustered graph
# between all nodes separated by distance 'scale'
for node1 in UnclusteredGraph.nodes():
for node2 in UnclusteredGraph.nodes():
if node1 != node2:
dist = common.euclidean_dist(node1, node2)
if dist <= scale:
UnclusteredGraph.add_edge(*(node1, node2), weight=dist)
Clusters = []
subgraphs = nx.connected_component_subgraphs(UnclusteredGraph)
for i, sg in enumerate(subgraphs):
Clusters.append(sg.nodes(data=True))
return Clusters