overlap.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:cc-crawl-statistics 作者: commoncrawl 项目源码 文件源码
def plot_similarity_graph(self, show_edges=False):
        '''(trial) visualize similarity using GraphViz'''
        g = pygraphviz.AGraph(directed=False, overlap='scale', splines=True)
        g.node_attr['shape'] = 'plaintext'
        g.node_attr['fontsize'] = '12'
        if show_edges:
            g.edge_attr['color'] = 'lightgrey'
            g.edge_attr['fontcolor'] = 'grey'
            g.edge_attr['fontsize'] = '8'
        else:
            g.edge_attr['style'] = 'invis'
        for crawl1 in sorted(self.similarity['url']):
            for crawl2 in sorted(self.similarity['url'][crawl1]):
                similarity = self.similarity['url'][crawl1][crawl2]
                distance = 1.0 - similarity
                g.add_edge(MonthlyCrawl.short_name(crawl1),
                           MonthlyCrawl.short_name(crawl2),
                           len=(distance),
                           label='{0:.2f}'.format(distance))
        g.write(os.path.join(PLOTDIR, 'crawlsimilarity_url.dot'))
        g.draw(os.path.join(PLOTDIR, 'crawlsimilarity_url.svg'), prog='fdp')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号