def create_component_sframe(g, baseid_name='page_id', layer_name='layer'):
"""Get component SFrame enriched with structural properties for each component"""
columns = g.vertices.column_names()
columns.remove('__id')
columns.remove('component_id')
# Append s to have unique column names (required by graphlab)
gb_dict = {c + 's': gl.aggregate.CONCAT(c) for c in columns}
gb_dict['nids'] = gl.aggregate.CONCAT('__id')
gb_dict['node_count'] = gl.aggregate.COUNT('__id')
comps = g.vertices.groupby('component_id', gb_dict)
comps['width'] = comps.apply(lambda x: len(np.unique(x[layer_name + 's'])))
comps['height'] = comps.apply(lambda x: len(np.unique(x[baseid_name + 's'])))
edges = g.edges.groupby('component_id', {'src': gl.aggregate.CONCAT('__src_id'),
'tgt': gl.aggregate.CONCAT('__dst_id')})
comps = comps.join(edges, 'component_id')
return comps.sort('node_count', False)
评论列表
文章目录