def build_bubblechains(g: AssemblyGraph,
min_nodes: int=1) -> Iterable[AssemblyGraph]:
# Build dictionary which maps the bubble source to the bubble sink
logger.info("Searching for non-nested superbubbles in the assembly "
"graph...")
bubbles = {b[0]: b[1] for b in find_superbubbles(g, report_nested=False)}
bubble_entrances = set(bubbles.keys())
bubble_exits = set(bubbles.values())
logger.debug("Found superbubbles: %s", bubbles)
logger.info("Graph has %d superbubbles", len(bubbles))
# Obtain start nodes
# Priority is given as follows:
# 1. Bubble entrances without incoming edges
# 2. Bubble entrances for which holds that it's not also an exit of an
# other bubble
# 3. Bubble entrances for which the entrance and corresponding exit have
# not been visited yet
start_points = [
n for n in bubble_entrances if g.in_degree(n) == 0]
start_points.extend((n for n in bubble_entrances if n not in bubble_exits))
# We'll check later if this bubble has been visited already
start_points.extend(bubble_entrances)
logger.info("Number of start points : %d", len(start_points))
visited = set()
for start in start_points:
subgraph_nodes = set()
logger.debug("New start point %s", start)
if start not in bubble_entrances:
raise AssemblyError("Unexpected start point: {}, this is not a "
"bubble entrance.".format(start))
num_bubbles = 0
while start in bubble_entrances:
bubble_exit = bubbles[start]
if start in visited and bubble_exit in visited:
logger.debug("<%s, %s> already visited, stopping.",
start, bubble_exit)
break
bubble_nodes = superbubble_nodes(g, start, bubble_exit)
subgraph_nodes.update(bubble_nodes)
visited.update(bubble_nodes)
start = bubble_exit
num_bubbles += 1
if num_bubbles > 0:
logger.info("Built bubblechain of %d bubbles", num_bubbles)
if len(subgraph_nodes) >= min_nodes:
yield networkx.subgraph(g, subgraph_nodes)
评论列表
文章目录