def nx_plot_tree(server, node_size=200, **options):
"""Visualize the tree using the networkx package.
This plots to the current matplotlib figure.
Args:
server: A DataServer instance.
options: Options passed to networkx.draw().
"""
import networkx as nx
edges = server.estimate_tree()
perplexity = server.latent_perplexity()
feature_names = server.feature_names
V = 1 + len(edges)
G = nx.Graph()
G.add_nodes_from(range(V))
G.add_edges_from(edges)
H = nx.relabel_nodes(G, dict(enumerate(feature_names)))
node_size = node_size * perplexity / perplexity.max()
options.setdefault('alpha', 0.2)
options.setdefault('font_size', 8)
nx.draw(H, with_labels=True, node_size=node_size, **options)
python类relabel_nodes()的实例源码
def refine_to_chain(g, from_attr, to_attr):
'''can be used to refine basic blocks into blocks - the dual of contract_chains()
assume g.node[n][attr] is a list
returns a graph whose nodes are the refinement of the lists into paths
the elements of the lists are held as to_attr
the nodes become tuples (node_index, list_index)'''
paths = []
for n in g.nodes_iter():
block = g.node[n][from_attr]
size = len(block)
path = nx.path_graph(size, create_using=nx.DiGraph())
nx.relabel_nodes(path, mapping={x:(n, x) for x in path.nodes()}, copy=False)
path.add_edges_from(((n, size - 1), (s, 0)) for s in g.successors_iter(n))
paths.append(path)
values = {(n, x): block
for n in g.nodes_iter()
for x, block in enumerate(g.node[n][from_attr])}
res = nx.compose_all(paths)
nx.set_node_attributes(res, to_attr, values)
return res
def networkx_to_igraph(G):
mapping = dict(zip(G.nodes(),range(G.number_of_nodes())))
reverse_mapping = dict(zip(range(G.number_of_nodes()),G.nodes()))
G = nx.relabel_nodes(G,mapping)
G_ig = ig.Graph(len(G), list(zip(*list(zip(*nx.to_edgelist(G)))[:2])))
return G_ig, reverse_mapping
def draw(self, filename):
"""draw graph to a file called filename"""
def mapping(address):
return address[2:6] if len(address) > 6 else address[2:]
for u, v, d in self.graph.edges(data=True):
self.graph.node[u]['width'] = 0.6
self.graph.node[u]['height'] = 0.4
d['color'] = 'blue'
d['len'] = 1.4
g = nx.relabel_nodes(self.graph, mapping)
a = nx.drawing.nx_agraph.to_agraph(g)
a.graph_attr['label'] = 'Trustlines Network'
a.layout()
a.draw(filename)
def setUp(self):
integer_graph = nx.balanced_tree(2, 2, nx.DiGraph())
package_mapping = {
i: 'm.' + ('X' if i % 2 == 0 else 'Y') + '.' + letter
for (i, letter) in enumerate(string.ascii_lowercase)
}
# Edges: [(X.a, Y.b), (X.a, X.c), (Y.b, Y.d), (Y.b, X.e), (X.c, Y.f), (X.c, X.g)]
self.package_graph = nx.relabel_nodes(integer_graph, package_mapping)
for node in self.package_graph:
self.package_graph.node[node]['fqn'] = node.split('.')[1:]
self.project = self.get_project()
def get_lcc(di_graph):
di_graph = max(nx.weakly_connected_component_subgraphs(di_graph), key=len)
tdl_nodes = di_graph.nodes()
nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
nx.relabel_nodes(di_graph, nodeListMap, copy=False)
return di_graph, nodeListMap
def relabel_nodes(self):
new_nets = {}
for net_name, net in self.nets.items():
def mapping(x):
return '%s__%d' % (net_name, x)
new_nets[net_name] = nx.relabel_nodes(net, mapping, copy=False)
return new_nets
def networkx_to_igraph(G):
mapping = dict(zip(G.nodes(), range(G.number_of_nodes())))
reverse_mapping = dict(zip(range(G.number_of_nodes()), G.nodes()))
G = nx.relabel_nodes(G, mapping)
G_ig = ig.Graph(len(G), list(zip(*list(zip(*nx.to_edgelist(G)))[:2])))
return G_ig, reverse_mapping
def mapper_graph(df, lens_data=None, lens='pca', resolution=10, gain=0.5, equalize=True, clust='kmeans', stat='db',
max_K=5):
"""
input: N x n_dim image of of raw data under lens function, as a dataframe
output: (undirected graph, list of node contents, dictionary of patches)
"""
if lens_data is None:
lens_data = apply_lens(df, lens=lens)
patch_clusterings = {}
counter = 0
patches = covering_patches(lens_data, resolution=resolution, gain=gain, equalize=equalize)
for key, patch in patches.items():
if len(patch) > 0:
patch_clusterings[key] = optimal_clustering(df, patch, method=clust, statistic=stat, max_K=max_K)
counter += 1
print 'total of {} patches required clustering'.format(counter)
all_clusters = []
for key in patch_clusterings:
all_clusters += patch_clusterings[key]
num_nodes = len(all_clusters)
print 'this implies {} nodes in the mapper graph'.format(num_nodes)
A = np.zeros((num_nodes, num_nodes))
for i in range(num_nodes):
for j in range(i):
overlap = set(all_clusters[i]).intersection(set(all_clusters[j]))
if len(overlap) > 0:
A[i, j] = 1
A[j, i] = 1
G = nx.from_numpy_matrix(A)
total = []
all_clusters_new = []
mapping = {}
cont = 0
for m in all_clusters:
total += m
for n, m in enumerate(all_clusters):
if len(m) == 1 and total.count(m) > 1:
G.remove_node(n)
else:
all_clusters_new.append(m)
mapping[n] = cont
cont += 1
H = nx.relabel_nodes(G, mapping)
return H, all_clusters_new, patches
def _import_daggen(line_iter):
_NODE_TYPES = {"ROOT", "END", "COMPUTATION", "TRANSFER"}
result = nx.DiGraph()
node_mapper = lambda nid: "task_%d" % nid
nodes = {}
skip = True
for line in line_iter:
line = line.strip()
if line.startswith("NODE_COUNT"):
skip=False
continue
if skip or not line:
continue
node_parts = line.split(" ")
assert len(node_parts) == 6
magic, nodeid, children, nodetype, cost, parallel_ratio = node_parts
assert magic == "NODE"
nodeid = int(nodeid)
children = list(map(int, children.split(","))) if children != "-" else []
assert nodetype in _NODE_TYPES
cost = float(cost)
# unused_for_now
parallel_ratio = float(parallel_ratio)
nodes[nodeid] = (nodetype, children, cost)
for nodeid, (nodetype, _, cost) in nodes.items():
if nodetype != "TRANSFER":
result.add_node(node_mapper(nodeid), weight=cost)
for nodeid, (nodetype, children, _) in nodes.items():
if nodetype == "TRANSFER":
continue
for childid in children:
childtype, grandchildren, transfercost = nodes[childid]
if childtype == "TRANSFER":
assert len(grandchildren) == 1
destination = grandchildren[0]
weight = transfercost
else:
assert nodetype == "ROOT" or childtype=="END"
destination = childid
# TODO: Should be 0.
#
# Kludge to force order in 3rd-party HEFT implementation
# (nodes connected by edges with zero weight get mixed
# in HEFT priority list and violate precedence constraints)
#
# Can be removed as I can fix this BS in my HEFT
weight = 1.
result.add_edge(node_mapper(nodeid), node_mapper(destination), weight=weight)
node_order = nx.topological_sort(result)
return nx.relabel_nodes(result, {
node_order[0]: "root",
node_order[-1]: "end"
})
def evaluateStaticLinkPrediction(digraph, graph_embedding,
train_ratio=0.8,
n_sample_nodes=None,
sample_ratio_e=None,
no_python=False,
is_undirected=True):
node_num = digraph.number_of_nodes()
# seperate train and test graph
train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest(
digraph,
train_ratio=train_ratio,
is_undirected=is_undirected
)
if not nx.is_connected(train_digraph.to_undirected()):
train_digraph = max(
nx.weakly_connected_component_subgraphs(train_digraph),
key=len
)
tdl_nodes = train_digraph.nodes()
nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
nx.relabel_nodes(train_digraph, nodeListMap, copy=False)
test_digraph = test_digraph.subgraph(tdl_nodes)
nx.relabel_nodes(test_digraph, nodeListMap, copy=False)
# learning graph embedding
X, _ = graph_embedding.learn_embedding(
graph=train_digraph,
no_python=no_python
)
node_l = None
if n_sample_nodes:
test_digraph, node_l = graph_util.sample_graph(
test_digraph,
n_sample_nodes
)
X = X[node_l]
# evaluation
if sample_ratio_e:
eval_edge_pairs = evaluation_util.getRandomEdgePairs(
node_num,
sample_ratio_e,
is_undirected
)
else:
eval_edge_pairs = None
estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l)
predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
estimated_adj,
is_undirected=is_undirected,
edge_pairs=eval_edge_pairs
)
filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1])]
MAP = metrics.computeMAP(filtered_edge_list, test_digraph)
prec_curv, _ = metrics.computePrecisionCurve(
filtered_edge_list,
test_digraph
)
return (MAP, prec_curv)
def embed_multilayer(self):
"""Neural embedding of a multilayer network"""
self.nets = self.relabel_nodes()
# Return parameter p
# It controls the likelihood of immediately revisiting a node in the walk
# In-out parameter q
# If q > 1: the random walk is biased towards nodes close to node t
# If q < 1: the random walk is more inclined to visit nodes which
# are further away from node t
all_walks = self.simulate_walks()
all_nodes = self.get_all_nodes()
internal_vectors = self.init_internal_vectors(all_nodes)
tmp_fname = pjoin(self.out_dir, 'tmp.emb')
total_examples = len(all_walks) * self.n_iter
pushed_examples = 1000
for itr in range(self.n_iter):
# update leaf layers
self.log.info('Iteration: %d' % itr)
if itr == 0:
self.model = Word2Vec(
sentences=all_walks, size=self.dimension,
window=self.window_size, min_count=0, sg=1,
workers=self.n_workers, iter=1, batch_words=pushed_examples)
else:
self.model.current_iteration = itr
self.model.load_parent_word2vec_format(fname=tmp_fname)
delta = (self.model.alpha - self.model.min_alpha) *\
pushed_examples / total_examples
next_alpha = self.model.alpha - delta
next_alpha = max(self.model.min_alpha, next_alpha)
self.model.alpha = next_alpha
self.log.info('Next alpha = %8.6f' % self.model.alpha)
self.model.train(all_walks)
leaf_vectors = self.get_leaf_vectors(self.model)
internal_vectors = self.update_internal_vectors(
all_nodes, leaf_vectors, internal_vectors)
self.save_parent_word2vec_format(
all_nodes, internal_vectors, tmp_fname)
self.log.info('Done!')
fname = pjoin(self.out_dir, 'leaf_vectors.emb')
self.log.info('Saving leaf vectors: %s' % fname)
self.model.save_word2vec_format(fname)
fname = pjoin(self.out_dir, 'internal_vectors.emb')
self.log.info('Saving internal vectors: %s' % fname)
self.save_internal_word2vec_format(
all_nodes, internal_vectors, fname)
return self.model