def sort_words(vertex_source, edge_source, window = 2, pagerank_config = {'alpha': 0.85,}):
"""??????????????
Keyword arguments:
vertex_source -- ???????????????????????????????pagerank????
edge_source -- ?????????????????????????????????pagerank???
window -- ????????window????????????
pagerank_config -- pagerank???
"""
sorted_words = []
word_index = {}
index_word = {}
_vertex_source = vertex_source
_edge_source = edge_source
words_number = 0
for word_list in _vertex_source:
for word in word_list:
if not word in word_index:
word_index[word] = words_number
index_word[words_number] = word
words_number += 1
graph = np.zeros((words_number, words_number))
for word_list in _edge_source:
for w1, w2 in combine(word_list, window):
if w1 in word_index and w2 in word_index:
index1 = word_index[w1]
index2 = word_index[w2]
graph[index1][index2] = 1.0
graph[index2][index1] = 1.0
debug('graph:\n', graph)
nx_graph = nx.from_numpy_matrix(graph)
scores = nx.pagerank(nx_graph, **pagerank_config) # this is a dict
sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)
for index, score in sorted_scores:
item = AttrDict(word=index_word[index], weight=score)
sorted_words.append(item)
return sorted_words
python类from_numpy_matrix()的实例源码
def build_matrix():
######????? ? ? ?????
word_index = {} # ????????
index_word = {} # ????????
weibo_data = handel_weibo_data() # ????????????
index = 0
for sent in weibo_data: # ?????
for word in sent: # ?????????
if not word in word_index.keys():
word_index[word] = index
index_word[index] = word
index += 1
words_number = index
#print "words_number", words_number
#######???????
graph = np.zeros((words_number, words_number)) # ??????
for word_list in weibo_data: # ???
for i in range(len(word_list)): # ???????????????????????????????
for j in range(i, len(word_list)):
w1 = word_list[i]
w2 = word_list[j] # ???????????
index1 = word_index[w1]
index2 = word_index[w2]
graph[index1][index2] += 1 # ?????????1
graph[index2][index1] += 1 # ?????????
######?????networkx??pagerank?????????????????
nx_graph = nx.from_numpy_matrix(graph) # ??networdx
scores = nx.pagerank(nx_graph, alpha=0.85) # ??pagerank??
sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) # ????????
key_words = [] # ??????????
for index, score in sorted_scores:
if index_word[index] == u'??' or index_word[index] == u'??' or len(index_word[index]) == 1:
continue
key_words.append((index_word[index], score))
########????????100????????
fp_textrank_result = open('f://emotion/mysite/Label_extract/result_textrank.txt', 'w+')
for i in range(100):
fp_textrank_result.write(key_words[i][0] + ' ' + str(round(key_words[i][1], 10)))
fp_textrank_result.write('\n')
fp_textrank_result.close()
"""
fp_test = open('f://emotion/mysite/Label_extract/test.txt', 'w+')
for i in range(100):
fp_test.write(key_words[i][0] + '?')
fp_test.close()
"""
print "textrank key word calculate is success..."
return key_words
def plot_ibp(model, target_dir=None, block=False, columns=[0], separate=False, K=4):
G = nx.from_numpy_matrix(model.Y(), nx.DiGraph())
F = model.leftordered()
W = model._W
# Plot Adjacency Matrix
draw_adjmat(model._Y)
# Plot Log likelihood
plot_csv(target_dir=target_dir, columns=columns, separate=separate)
#W[np.where(np.logical_and(W>-1.6, W<1.6))] = 0
#W[W <= -1.6]= -1
#W[W >= 1.6] = 1
# KMeans test
clusters = kmeans(F, K=K)
nodelist_kmeans = [k[0] for k in sorted(zip(range(len(clusters)), clusters), key=lambda k: k[1])]
adj_mat_kmeans = nx.adjacency_matrix(G, nodelist=nodelist_kmeans).A
draw_adjmat(adj_mat_kmeans, title='KMeans on feature matrix')
# Adjacency matrix generation
draw_adjmat(model.generate(nodelist_kmeans), title='Generated Y from ILFRM')
# training Rescal
R = rescal(model._Y, K)
R = R[nodelist_kmeans, :][:, nodelist_kmeans]
draw_adjmat(R, 'Rescal generated')
# Networks Plots
f = plt.figure()
ax = f.add_subplot(121)
title = 'Features matrix, K = %d' % model._K
ax.set_title(title)
ColorMap(F, pixelspervalue=5, title=title, ax=ax)
ax = f.add_subplot(122)
ax.set_title('W')
img = ax.imshow(W, interpolation='None')
plt.colorbar(img)
f = plt.figure()
ax = f.add_subplot(221)
ax.set_title('Spectral')
nx.draw_spectral(G, axes=ax)
ax = f.add_subplot(222)
ax.set_title('Spring')
nx.draw(G, axes=ax)
ax = f.add_subplot(223)
ax.set_title('Random')
nx.draw_random(G, axes=ax)
ax = f.add_subplot(224)
ax.set_title('graphviz')
try:
nx.draw_graphviz(G, axes=ax)
except:
pass
display(block=block)
def init_network(self, vertex_num=5, p=0.9, directed=False, file_name=None, adjMatrix=None):
""" init the network by reading a file
:param file_name:
the first line is the number of vertex
the next lines of which the first number is the vertex as
the start point then the next are the end respectively
:param vertex_num:
:param p:
:return:
"""
local_adjMatrix = adjMatrix
if not file_name:
# init by random
# local_list = np.random.permutation(vertex_num)
# local_adjMatrix = np.zeros([vertex_num, vertex_num], dtype=np.int)
#
# for index, var in enumerate(local_list):
# if index == vertex_num - 1:
# break
#
# kk = np.random.randint(0, 2) # control the direction of a matrix
# if kk == 0:
# local_adjMatrix[local_list[index]][local_list[index+1]] = 1
# else:
# local_adjMatrix[local_list[index+1]][local_list[index]] = 1
#
# m = np.random.randint(1, vertex_num*vertex_num*vertex_num) # control the density of the matrix
# for i in range(m):
# p1 = np.random.randint(0, vertex_num)
# p2 = np.random.randint(0, vertex_num)
# while p1 == p2:
# p2 = np.random.randint(0, vertex_num)
# local_adjMatrix[p1][p2] = 1
if local_adjMatrix is None:
local_G = nx.binomial_graph(vertex_num, p, directed=directed)
else:
local_G = nx.from_numpy_matrix(local_adjMatrix)
self.vertex_num = local_adjMatrix.shape[0]
else:
# init by file
with open(file_name, 'r') as fd:
for line in fd.readlines():
tt = line.split(' ')
if len(tt) == 1:
vv = int(tt[0])
local_adjMatrix = np.zeros([vv, vv], dtype=np.int)
self.vertex_num = vv
continue
for i in range(1, len(tt)):
local_adjMatrix[int(tt[0])-1][int(tt[i])-1] = 1
local_G = nx.from_numpy_matrix(local_adjMatrix)
return local_G
mnist_subgraphs.py 文件源码
项目:TextAsGraphClassification
作者: NightmareNyx
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test():
mnist = input_data.read_data_sets("MINST_data", one_hot=False)
train_data = mnist.train.images.astype(np.float32)
fraction = 50
train_labels = mnist.train._labels[:fraction]
with open('sugbgraphs_labels.pickle', 'wb') as f:
pickle.dump(train_labels, f)
test_data = mnist.test.images.astype(np.float32)
print(train_data.shape)
patch_size = 4
n_ids = range(patch_size * patch_size)
A = np.ones((patch_size * patch_size, patch_size * patch_size))
np.fill_diagonal(A, 0)
cc = 0
train = []
bins = list(np.linspace(0.0, 1.0, 10))
for sample in train_data[:fraction]:
sample = sample.reshape((28, 28))
sugbg = []
patches = image.extract_patches_2d(sample, (patch_size, patch_size))
cc += 1
for p in patches:
if np.sum(p) == 0:
continue
G1 = nx.from_numpy_matrix(A)
dictionary = dict(zip(n_ids, np.digitize(p.flatten(), bins)))
nx.set_node_attributes(G1, 'label', dictionary)
sugbg.append(G1)
train.append(sugbg)
print(cc)
with open('sugbgraphs_train.pickle', 'wb') as f:
pickle.dump(train, f)
del train
test = []
for sample in test_data[:5]:
sample = sample.reshape((28, 28))
sugbg = []
patches = image.extract_patches_2d(sample, (patch_size, patch_size))
for p in patches:
if np.sum(p) == 0:
continue
G1 = nx.from_numpy_matrix(A)
p = np.histogram(p.flatten(), bins=np.linspace(0.0, 1.0, 10))[0]
dictionary = dict(zip(n_ids, p))
nx.set_node_attributes(G1, 'label', dictionary)
sugbg.append(G1)
test.append(sugbg)
with open('sugbgraphs_test.pickle', 'wb') as f:
pickle.dump(sugbg, f)