def ConsensusCluster(self, data, subsamples, subsample_fraction, norm_var, kvalues):
"""
Performs consensus clustering algorithms here!!!
"""
return
partition = dict()
stuff = []
nb_clusters = 0 # this is the number of cluster the dataset is supposed to be partitioned into
distances = nx.to_numpy_matrix(data)
for i in kvalues:
clusterid, error, nfound = KMeans(distances, nclusters= i, npass=300)
uniq_ids = list(set(clusterid))
new_ids = [ uniq_ids.index(val) for val in clusterid]
for i,value in enumerate(new_ids):
partition[i] = value
stuff.append(partition)
python类to_numpy_matrix()的实例源码
def graphlet_kernel(graphs, num_samples):
N = len(graphs)
Phi = np.zeros((N,2**15))
P = generate_permutation_matrix()
for i in range(len(graphs)):
n = graphs[i].number_of_nodes()
if n >= 6:
A = nx.to_numpy_matrix(graphs[i])
A = np.asarray(A, dtype=np.uint8)
for j in range(num_samples):
r = np.random.permutation(n)
window = A[np.ix_(r[:6],r[:6])]
Phi[i, graphlet_type(window)] += 1
Phi[i,:] /= num_samples
K = np.dot(Phi,np.dot(P,np.transpose(Phi)))
return K
def Floyd_Warshall(G):
D = nx.to_numpy_matrix(G)
m, n = D.shape
for i in range(0, n):
for j in range(0, n):
if i != j and D[i, j] == 0:
D[i, j] = float('inf')
yield np.array(D), (0, 0, 0)
for k in range(0, n):
for i in range(0, n):
for j in range(0, n):
if D[i, j] > D[i, k] + D[k, j]:
yield np.array(D), (i, j, k)
D[i, j] = D[i, k] + D[k, j]
yield np.array(D), (0, 0, 0)
def Transitive_Closure(G):
D = nx.to_numpy_matrix(G).astype(bool)
m, n = D.shape
for k in range(0, n):
for i in range(0, n):
for j in range(0, n):
if not D[i, j]:
yield np.array(D), (i, j, k)
D[i, j] = D[i, k] and D[k, j]
yield np.array(D), (0, 0, 0)
def draw_adjacency_matrix(G, node_order=None, partitions=[], colors=[]):
"""
- G is a networkx graph
- node_order (optional) is a list of nodes, where each node in G
appears exactly once
- partitions is a list of node lists, where each node in G appears
in exactly one node list
- colors is a list of strings indicating what color each
partition should be
If partitions is specified, the same number of colors needs to be
specified.
"""
adjacency_matrix = nx.to_numpy_matrix(G, dtype=np.bool, nodelist=node_order)
# Plot adjacency matrix in toned-down black and white
fig = plt.figure(figsize=(8, 8)) # in inches
plt.imshow(adjacency_matrix,
cmap="Paired",
interpolation="none")
# The rest is just if you have sorted nodes by a partition and want to
# highlight the module boundaries
assert len(partitions) == len(colors)
ax = plt.gca()
for partition, color in zip(partitions, colors):
current_idx = 0
for module in partition:
ax.add_patch(patches.Rectangle((current_idx, current_idx),
len(module), # Width
len(module), # Height
facecolor="none",
edgecolor=color,
linewidth="1"))
current_idx += len(module)
plt.show()
def draw(self):
"""
Draws the plot to screen.
Note to self: Do NOT call super(MatrixPlot, self).draw(); the
underlying logic for drawing here is completely different from other
plots, and as such necessitates a different implementation.
"""
matrix = nx.to_numpy_matrix(self.graph, nodelist=self.nodes)
self.ax.matshow(matrix, cmap=self.cmap)
def __init__(self, dfa): # type: (DFA) -> None
"""
Class for maintaining state path weights inside a dfa.
This is a renormalized version of l_{p,n} in section 2 of the Bernardi
& Giménez paper, computed using matrix powers. See:
https://en.wikipedia.org/wiki/Adjacency_matrix#Matrix_powers
Note that ``path_weights[state, n]`` is the proportion of paths of
length n from state to _some_ final/accepting state.
`dfa` MUST have consecutive integer states, with 0 as the start state,
though this is not validated.
"""
self.longest_path_length = 0
self.graph = dfa.as_multidigraph
self.sink = len(dfa.nodes())
for state in dfa.nodes():
if dfa.node[state]['accepting']:
self.graph.add_edge(state, self.sink)
self.matrix = nx.to_numpy_matrix(self.graph, nodelist=self.graph.nodes())
vect = np.zeros(self.matrix.shape[0])
vect[-1] = 1.0 # Grabs the neighborhood of the sink node (last column).
self.vects = [self.normalize_vector(self.matrix.dot(vect)).T]
def computeKcliques(self,Number_of_clusters,data):
partition = dict()
nb_clusters = Number_of_clusters # this is the number of cluster the dataset is supposed to be partitioned into
distances = nx.to_numpy_matrix(data)
clusterid = nx.k_clique_communities(data)
# print "k_Cliques",clusterid
# uniq_ids = list(set(clusterid))
# new_ids = [ uniq_ids.index(val) for val in clusterid]
# for i,value in enumerate(new_ids):
# partition[i] = value
return partition
def computeKmeans(self,Number_of_clusters,data, iterations = 100):
partition = dict()
nb_clusters = Number_of_clusters # this is the number of cluster the dataset is supposed to be partitioned into
distances = nx.to_numpy_matrix(data)
clusterid, error, nfound = KMeans(distances, nclusters= nb_clusters, npass=300)
uniq_ids = list(set(clusterid))
new_ids = [ uniq_ids.index(val) for val in clusterid]
for i,value in enumerate(new_ids):
partition[i] = value
return partition
def HierarchicalClustering(self,data):
distances = nx.to_numpy_matrix(data)
hierarchy = linkage(distances)
print hierarchy,"HIERRATCJY"
Z = dendrogram(hierarchy)
print Z
return hierarchy
def computeKmeansCustom(self,Number_of_clusters,data, iterations = 100):
distances = nx.to_numpy_matrix(data)
clusters,medoids = self.cluster(distances,Number_of_clusters)
new_ids = [ uniq_ids.index(val) for val in clusterid]
def create_adjacency(graph):
A = nx.to_numpy_matrix(graph, dtype=int)
return A
def modularity(graph):
# convert to numpy adjacency matrix
A = nx.to_numpy_matrix(graph)
# compute adjacency matrix A's degree centrality
degree_centrality = np.sum(A, axis=0, dtype=int)
m = np.sum(degree_centrality, dtype=int) / 2
# compute matrix B
B = np.zeros(A.shape, dtype=float)
for i in range(len(A)):
for j in range(len(A)):
B[i, j] = A[i, j] - (degree_centrality[0, i] * degree_centrality[0, j]) / float(2 * m)
# compute A's eigenvector
w, v = LA.eig(B)
wmax = np.argmax(w)
s = np.zeros((len(A), 1), dtype=float)
for i in range(len(A)):
if v[i, wmax] < 0:
s[i, 0] = -1
else:
s[i, 0] = 1
Q = s.T.dot(B.dot(s)) / float(4 * m)
return Q[0, 0], s
def karate(path):
"""Load Zachary's Karate Club [@zachary1977information].
It is a social network of friendships between 34 members of a karate
club at a US university from 1970 to 1972. During the study a
conflict between instructor 'Mr. Hi' and administrator 'Officer' led
the club to split into two. Half of the members formed a new club
around 'Mr. Hi'; other members found a new instructor or quit karate.
Args:
path: str.
Path to directory which either stores file or otherwise file will
be downloaded and extracted there. Filename is `karate.gml`.
Returns:
Tuple of adjacency matrix as a np.darray `x_train` with 34 rows
and 34 columns and np.darray `y_train` of class memberships (0 for
'Mr.Hi' and 1 for 'Officer').
"""
import networkx as nx
path = os.path.expanduser(path)
filename = 'karate.gml'
if not os.path.exists(os.path.join(path, filename)):
url = 'http://www-personal.umich.edu/~mejn/netdata/karate.zip'
maybe_download_and_extract(path, url)
x_train = nx.read_gml(os.path.join(path, filename))
x_train = nx.to_numpy_matrix(x_train).astype(int)
labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 16, 17, 19, 21]
y_train = np.array([0 if i in labels else 1
for i in range(x_train.shape[0])], dtype=np.int)
return x_train, y_train
def edge_transform(self, g):
e = {}
for n1, n2, d in g.edges_iter(data=True):
e_t = []
e_t += [1]
e[(n1, n2)] = e_t
return nx.to_numpy_matrix(g), e
def edge_transform(self, g):
e = {}
for n1, n2, d in g.edges_iter(data=True):
e_t = []
e_t += [float(x) for x in list(d.values())]
e[(n1, n2)] = e_t
return nx.to_numpy_matrix(g), e
def edge_transform(self, g):
e = {}
for n1, n2, d in g.edges_iter(data=True):
e_t = []
e_t.append(d['label'])
e[(n1, n2)] = e_t
return nx.to_numpy_matrix(g), e
def edge_transform(self, g):
e = {}
for n1, n2, d in g.edges_iter(data=True):
e_t = []
e_t += [1]
e[(n1, n2)] = e_t
return nx.to_numpy_matrix(g), e
def evaluateStaticGraphReconstruction(digraph, graph_embedding,
X_stat, node_l=None, file_suffix=None,
sample_ratio_e=None, is_undirected=True,
is_weighted=False):
node_num = digraph.number_of_nodes()
# evaluation
if sample_ratio_e:
eval_edge_pairs = evaluation_util.getRandomEdgePairs(
node_num,
sample_ratio_e,
is_undirected
)
else:
eval_edge_pairs = None
if file_suffix is None:
estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
else:
estimated_adj = graph_embedding.get_reconstructed_adj(
X_stat,
file_suffix,
node_l
)
predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
estimated_adj,
is_undirected=is_undirected,
edge_pairs=eval_edge_pairs
)
MAP = metrics.computeMAP(predicted_edge_list, digraph)
prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)
# If weighted, compute the error in reconstructed weights of observed edges
if is_weighted:
digraph_adj = nx.to_numpy_matrix(digraph)
estimated_adj[digraph_adj == 0] = 0
err = np.linalg.norm(digraph_adj - estimated_adj)
err_baseline = np.linalg.norm(digraph_adj)
else:
err = None
err_baseline = None
return (MAP, prec_curv, err, err_baseline)
def learn_embedding(self, graph=None, edge_f=None,
is_weighted=False, no_python=False):
if not graph and not edge_f:
raise Exception('graph/edge_f needed')
if not graph:
graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
t1 = time()
# A = nx.to_scipy_sparse_matrix(graph)
# I = sp.eye(graph.number_of_nodes())
# M_g = I - self._beta*A
# M_l = self._beta*A
A = nx.to_numpy_matrix(graph)
M_g = np.eye(graph.number_of_nodes()) - self._beta * A
M_l = self._beta * A
S = np.dot(np.linalg.inv(M_g), M_l)
u, s, vt = lg.svds(S, k=self._d // 2)
X1 = np.dot(u, np.diag(np.sqrt(s)))
X2 = np.dot(vt.T, np.diag(np.sqrt(s)))
t2 = time()
self._X = np.concatenate((X1, X2), axis=1)
p_d_p_t = np.dot(u, np.dot(np.diag(s), vt))
eig_err = np.linalg.norm(p_d_p_t - S)
print('SVD error (low rank): %f' % eig_err)
return self._X, (t2 - t1)
def directed_modularity_matrix(G, nodelist=None):
""" INCLUDED FOR TESTING PURPOSES - Not implemented yet.
Return the directed modularity matrix of G.
The modularity matrix is the matrix B = A - <A>, where A is the adjacency
matrix and <A> is the expected adjacency matrix, assuming that the graph
is described by the configuration model.
More specifically, the element B_ij of B is defined as
B_ij = A_ij - k_i(out) k_j(in)/m
where k_i(in) is the in degree of node i, and k_j(out) is the out degree
of node j, with m the number of edges in the graph.
Parameters
----------
G : DiGraph
A NetworkX DiGraph
nodelist : list, optional
The rows and columns are ordered according to the nodes in nodelist.
If nodelist is None, then the ordering is produced by G.nodes().
Returns
-------
B : Numpy matrix
The modularity matrix of G.
Notes
-----
NetworkX defines the element A_ij of the adjacency matrix as 1 if there
is a link going from node i to node j. Leicht and Newman use the opposite
definition. This explains the different expression for B_ij.
See Also
--------
to_numpy_matrix
adjacency_matrix
laplacian_matrix
modularity_matrix
References
----------
.. [1] E. A. Leicht, M. E. J. Newman,
"Community structure in directed networks",
Phys. Rev Lett., vol. 100, no. 11, p. 118703, 2008.
"""
if nodelist is None:
nodelist = G.nodes()
A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, format='csr')
k_in = A.sum(axis=0)
k_out = A.sum(axis=1)
m = G.number_of_edges()
# Expected adjacency matrix
X = k_out * k_in / m
return A - X
def Find_InterModular_Edge_correlativity(self):
# Induced graph is the data structure responsible for the adjacency matrix of the community
self.Matrix = nx.to_numpy_matrix(self.induced_graph)
# Matrix Before calculating the correlation strength
# finding out the lower half values of the matrix, can discard other values as computationally intensive
self.Matrix = np.tril(self.Matrix,-1)
i=0
Sum = 0
j=0
SumTemp = 0
Edges = 0
nodes1 = [item for item in self.Graphwidget.scene().items() if isinstance(item, Node)]
# ite1rateing over the indices
for community in set(self.partition.values()):
i= i + 1
j=0
for community2 in set(self.partition.values()):
j= j + 1
# Not Calculating the communities which are communities to itself
if community == community2:
continue
# Calculating the correlation strength only with the lower half of the adjacency matrix
if i <= j:
continue
# list_nodes1 and list_nodes2 indicate which nodes are actually present in these communties
list_nodes1 = [nodes for nodes in self.partition.keys() if self.partition[nodes] == community]
list_nodes2 = [nodes for nodes in self.partition.keys() if self.partition[nodes] == community2]
# Re-initializing the
SumTemp = 0
Edges = 0
for node1 in nodes1:
if node1.counter-1 in list_nodes1:
for node2 in nodes1:
if node2.counter-1 in list_nodes2:
if node1.counter-1 == node2.counter-1:
continue
if self.Graphwidget.Graph_data().ThresholdData[node1.counter-1][node2.counter-1] > 0:
Edges = Edges + 1
if Edges != 0:
Sum=float("{0:.2f}".format(self.Matrix[i-1,j-1]/Edges))
self.Matrix[i-1,j-1] = Sum
self.induced_graph = nx.from_numpy_matrix(self.Matrix)
def participation_coefficient(self,G, weighted_edges=False):
""""Compute participation coefficient for nodes.
Parameters
----------
G: graph
A networkx graph
weighted_edges : bool, optional
If True use edge weights
Returns
-------
node : dictionary
Dictionary of nodes with participation coefficient as the value
Notes
-----
The participation coefficient is calculated with respect to a community
affiliation vector. This function uses the community affiliations as determined
by the Louvain modularity algorithm (http://perso.crans.org/aynaud/communities/).
"""
partition = cm.best_partition(G)
partition_list = []
for count in range(len(partition)):
partition_list.append(partition[count])
n = G.number_of_nodes()
Ko = []
for node in range(n):
node_str = np.sum([G[node][x]['weight'] for x in G[node].keys()])
Ko.append(node_str)
Ko = np.array(Ko)
G_mat_weighted = np.array(nx.to_numpy_matrix(G))
G_mat = (G_mat_weighted != 0) * 1
D = np.diag(partition_list)
Gc = np.dot(G_mat, D)
Kc2 = np.zeros(n)
for i in range(np.max(partition_list) + 1):
Kc2 = Kc2 + (np.sum(G_mat_weighted * (Gc == i),1) ** 2)
P = np.ones(n) - (Kc2/(Ko **2))
D = dict()
for i in range(len(P)):
D[i]=P[i]
return D
def prepareClsuterData(self, graph, Timestep, syllable):
print "Cluster Time ",Timestep, "Syllable:",syllable
self.Timestep = Timestep
distances = copy.deepcopy(nx.to_numpy_matrix(graph))
samples = [x for x in xrange(len(distances))]
self.ParsedData = ConsensusClster.DataModel.ParseNormal(distances, samples)
idlist = [ x.sample_id for x in self.ParsedData.samples ]
if len(dict.fromkeys(idlist)) != len(idlist):
raise ValueError, 'One or more Sample IDs are not unique!\n\n\nHere is the data'+str(len(dict.fromkeys(idlist)))+'len'+str(len(idlist))
if not self.pca_only:
self._postprocess()
kwds = []
self.GlobalCDF = dict()
self.partition1 = dict()
self.deltaArea = dict()
self.area = dict()
self.newKValues = [2,3,4,5,6,7]
for i in self.newKValues:
self.partition1[i] = self.run_cluster(i, self.subsamples, self.subsample_fraction, self.norm_var, kwds)
k = self.createDeltaArea(self.kvalues)
if Timestep == 62:
ChangeInDeltaArea = "ConsensusData/DeltaAreaChange"+str(self.graphWidget.Syllable)+str(4)+"Heatmap.tsv"
AllClusterData = "ConsensusData/AllClusterings"+str(self.graphWidget.Syllable)+str(4)+"Heatmap.tsv"
with open(ChangeInDeltaArea, 'w') as outfile:
kValues = 'day' #K-values
timeSteps = 'hour' #hour
value = 'value'
outfile.write("{}\t{}\t{}\n".format(timeSteps,kValues,value))
for i,j in self.DeltaAreaTimestep.iteritems():
KDict = j
# print i, KDict
for k,value in KDict.iteritems():
outfile.write("{}\t{}\t{}\n".format(i,k,value))
outfile.close()
with open(AllClusterData, 'w') as outfile:
pickle.dump(self.FinalPartiction, outfile)
print AllClusterData, "is the file name for all data", "look at the K value data and choose Wisely:)"
if k == 1:
partition = dict()
for i in range(len(distances)):
partition[i] = 0
self.partition1[1] = partition
self.FinalPartiction[self.Timestep] = self.partition1
return self.partition1[k]
def qm9_edges(g, e_representation='raw_distance'):
remove_edges = []
e={}
for n1, n2, d in g.edges_iter(data=True):
e_t = []
# Raw distance function
if e_representation == 'chem_graph':
if d['b_type'] is None:
remove_edges += [(n1, n2)]
else:
e_t += [i+1 for i, x in enumerate([rdkit.Chem.rdchem.BondType.SINGLE, rdkit.Chem.rdchem.BondType.DOUBLE,
rdkit.Chem.rdchem.BondType.TRIPLE, rdkit.Chem.rdchem.BondType.AROMATIC])
if x == d['b_type']]
elif e_representation == 'distance_bin':
if d['b_type'] is None:
step = (6-2)/8.0
start = 2
b = 9
for i in range(0, 9):
if d['distance'] < (start+i*step):
b = i
break
e_t.append(b+5)
else:
e_t += [i+1 for i, x in enumerate([rdkit.Chem.rdchem.BondType.SINGLE, rdkit.Chem.rdchem.BondType.DOUBLE,
rdkit.Chem.rdchem.BondType.TRIPLE, rdkit.Chem.rdchem.BondType.AROMATIC])
if x == d['b_type']]
elif e_representation == 'raw_distance':
if d['b_type'] is None:
remove_edges += [(n1, n2)]
else:
e_t.append(d['distance'])
e_t += [int(d['b_type'] == x) for x in [rdkit.Chem.rdchem.BondType.SINGLE, rdkit.Chem.rdchem.BondType.DOUBLE,
rdkit.Chem.rdchem.BondType.TRIPLE, rdkit.Chem.rdchem.BondType.AROMATIC]]
else:
print('Incorrect Edge representation transform')
quit()
if e_t:
e[(n1, n2)] = e_t
for edg in remove_edges:
g.remove_edge(*edg)
return nx.to_numpy_matrix(g), e