def create_web_network_graph(self):
''' Functions that creates a NetworkX network visualization from the
explored pages
For documentation about NetworkX, check : https://networkx.github.io/'''
#Create a directed graph
web_graph=nx.DiGraph()
# Add our start nodes first to the graph, as the center.
web_graph.add_nodes_from(self.to_visit_urls[0])
#Now we explore our results to add the relevant websites to the graph
for base_url in os.listdir(self.main_directory+"web_content/"):
if self.is_danish_company(base_url): #Only Danish companies are added :
web_graph.add_node(base_url)
#Explore again to fill up all the edges (connections/links) between websites
for base_url in os.listdir(self.main_directory+"web_content/"):
if self.is_danish_company(base_url): # Same as before only Danish companies
#Load up the links from this Danish company to other websites
filename = self.main_directory+"web_content/"+base_url+"/external_urls_"+str(self.redirect_count)+"_redirect.p"
external_base_urls=pickle.load(open(filename, "rb" ))
#Now we also filter the list of external links
for external_link in external_base_urls:
if web_graph.has_node(external_link) : # The link is also in the graph, so the connection is added
web_graph.add_edge(base_url,external_link)
#Finally draw the network
#plt.figure(figsize=(120, 90))
plt.figure(figsize=(40, 40))
pos = nx.random_layout(web_graph)
nx.draw_networkx_nodes(web_graph,pos,node_size=2500)
nx.draw_networkx_nodes(web_graph,nodelist=self.to_visit_urls[0],pos=pos,node_size=3000,node_color='b')
#nx.draw_networkx_labels(web_graph,pos,fontsize=12)
nx.draw_networkx_edges(web_graph,pos,alpha=0.5)
plt.savefig(self.main_directory+"DTU network.png",dpi=40)
plt.show()
评论列表
文章目录