def extract_phenocode_from_filepath(phenolist, regex):
print("NOTE: working with {} phenos".format(len(phenolist)))
if not isinstance(regex, re._pattern_type):
regex = re.compile(regex)
for pheno in phenolist:
if 'assoc_files' not in pheno:
raise PheWebError("ERROR: At least one phenotype doesn't have the key 'assoc_files'.")
if not pheno['assoc_files']:
raise PheWebError("ERROR: At least one phenotype has an empty 'assoc_files' list.")
phenocodes = []
for assoc_filepath in pheno['assoc_files']:
match = re.search(regex, assoc_filepath)
if match is None:
raise PheWebError("ERROR: The regex {!r} doesn't match the filepath {!r}".format(regex.pattern, assoc_filepath))
groups = match.groups()
if len(groups) != 1:
raise PheWebError("ERROR: The regex {!r} doesn't capture any groups on the filepath {!r}! You're using parentheses without backslashes, right?".format(regex.pattern, assoc_filepath))
phenocodes.append(groups[0])
if len(set(phenocodes)) != 1:
raise PheWebError("ERROR: At least one phenotype gets multiple different phenocodes from its several association filepaths. Here they are: {!r}".format(list(set(phenocodes))))
if 'phenocode' in pheno:
if pheno['phenocode'] != phenocodes[0]:
raise PheWebError("""\
ERROR: The regex {!r} matched the filepaths {!r} to produce the phenocode {!r}. But that phenotype already had a phenocode, {!r}.
""".format(regex.pattern, pheno['assoc_files'], phenocodes[0], pheno['phenocode']))
pheno['phenocode'] = phenocodes[0]
return phenolist
评论列表
文章目录