def get_effective_genome_length(genome, read_length):
# type: (str, int) -> float
genome_names = pkg_resources.resource_listdir("epic",
"scripts/effective_sizes")
name_dict = {n.split("_")[0]: "".join(n.split("_")[:-1])
for n in genome_names}
try:
genome_exact = name_dict[genome.lower()]
egf = pkg_resources.resource_string( # type: ignore
"epic", "scripts/effective_sizes/{}_{}.txt".format(
genome_exact, read_length)).split()[-1].decode()
except KeyError:
genome_list = "\n".join(list(name_dict.keys()))
logging.error(
"Genome " + genome +
" not found.\n These are the available genomes: " + genome_list +
"\nIf yours is not there, please request it at github.com/endrebak/epic .")
genome_length = sum(create_genome_size_dict(genome).values())
logging.info("Using an effective genome fraction of {}.".format(egf))
assert float(
egf) < 1, "Something wrong happened, effective genome fraction over 1!"
egs = float(egf) * genome_length
return egs
评论列表
文章目录