read_input_file.py 文件源码-python代码片段

def _get_infos(self, limit=1000):
        # return the per-pheno info for each of the first `limit` variants
        fields_to_check = conf.parse.per_pheno_fields
        with read_maybe_gzip(self.filepath) as f:
            colnames = [colname.strip('"\' ').lower() for colname in next(f).rstrip('\n\r').split('\t')]
            colidx_for_field = self._parse_header(colnames, fields_to_check)
            self._assert_all_fields_mapped(colnames, fields_to_check, colidx_for_field)
            for linenum, line in enumerate(itertools.islice(f, 0, limit)):
                values = line.rstrip('\n\r').split('\t')
                variant = self._parse_variant(values, colnames, colidx_for_field)
                # Check that num_cases + num_controls == num_samples
                if all(key in variant for key in ['num_cases', 'num_controls', 'num_samples']):
                    if variant['num_cases'] + variant['num_controls'] != variant['num_samples']:
                        raise PheWebError(
                            "The number of cases and controls don't add up to the number of samples on one line in one of your association files.\n" +
                            "- the filepath: {!r}\n".format(self.filepath) +
                            "- the line number: {}".format(linenum+1) +
                            "- parsed line: [{!r}]\n".format(line))
                    del variant['num_samples'] # don't need it.
                yield variant