def _get_infos(self, limit=1000):
# return the per-pheno info for each of the first `limit` variants
fields_to_check = conf.parse.per_pheno_fields
with read_maybe_gzip(self.filepath) as f:
colnames = [colname.strip('"\' ').lower() for colname in next(f).rstrip('\n\r').split('\t')]
colidx_for_field = self._parse_header(colnames, fields_to_check)
self._assert_all_fields_mapped(colnames, fields_to_check, colidx_for_field)
for linenum, line in enumerate(itertools.islice(f, 0, limit)):
values = line.rstrip('\n\r').split('\t')
variant = self._parse_variant(values, colnames, colidx_for_field)
# Check that num_cases + num_controls == num_samples
if all(key in variant for key in ['num_cases', 'num_controls', 'num_samples']):
if variant['num_cases'] + variant['num_controls'] != variant['num_samples']:
raise PheWebError(
"The number of cases and controls don't add up to the number of samples on one line in one of your association files.\n" +
"- the filepath: {!r}\n".format(self.filepath) +
"- the line number: {}".format(linenum+1) +
"- parsed line: [{!r}]\n".format(line))
del variant['num_samples'] # don't need it.
yield variant
评论列表
文章目录