web_explorer.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:web_explorer 作者: nobriot 项目源码 文件源码
def create_web_network_graph(self):
        ''' Functions that creates a NetworkX network visualization from the 
        explored pages 
        For documentation about NetworkX, check : https://networkx.github.io/'''
        #Create a directed graph
        web_graph=nx.DiGraph()
        # Add our start nodes first to the graph, as the center.
        web_graph.add_nodes_from(self.to_visit_urls[0])

        #Now we explore our results to add the relevant websites to the graph
        for base_url in os.listdir(self.main_directory+"web_content/"):
            if self.is_danish_company(base_url): #Only Danish companies are added : 
                web_graph.add_node(base_url)

        #Explore again to fill up all the edges (connections/links) between websites
        for base_url in os.listdir(self.main_directory+"web_content/"):
            if self.is_danish_company(base_url): # Same as before only Danish companies
                #Load up the links from this Danish company to other websites
                filename = self.main_directory+"web_content/"+base_url+"/external_urls_"+str(self.redirect_count)+"_redirect.p"
                external_base_urls=pickle.load(open(filename, "rb" ))

                #Now we also filter the list of external links
                for external_link in external_base_urls:
                    if web_graph.has_node(external_link) : # The link is also in the graph, so the connection is added
                        web_graph.add_edge(base_url,external_link)

        #Finally draw the network 
        #plt.figure(figsize=(120, 90))
        plt.figure(figsize=(40, 40))
        pos = nx.random_layout(web_graph)
        nx.draw_networkx_nodes(web_graph,pos,node_size=2500)
        nx.draw_networkx_nodes(web_graph,nodelist=self.to_visit_urls[0],pos=pos,node_size=3000,node_color='b')
        #nx.draw_networkx_labels(web_graph,pos,fontsize=12)
        nx.draw_networkx_edges(web_graph,pos,alpha=0.5)
        plt.savefig(self.main_directory+"DTU network.png",dpi=40)
        plt.show()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号