__init__.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:lineage 作者: apriha 项目源码 文件源码
def _compute_snp_distances(self, df, build):
        if build == 36:
            hapmap = self._resources.get_hapmap_h36()
        else:
            hapmap = self._resources.get_hapmap_h37()

        for chrom in df['chrom'].unique():
            if chrom not in hapmap.keys():
                continue

            # create a new dataframe from the positions for the current chromosome
            temp = pd.DataFrame(df.loc[(df['chrom'] == chrom)]['pos'].values, columns=['pos'])

            # merge HapMap for this chrom
            temp = temp.append(hapmap[chrom], ignore_index=True)

            # sort based on pos
            temp = temp.sort_values('pos')

            # fill cM rates forward and backward
            temp['rate'] = temp['rate'].fillna(method='ffill')
            temp['rate'] = temp['rate'].fillna(method='bfill')

            # get difference between positions
            pos_diffs = np.ediff1d(temp['pos'])

            # compute cMs between each pos based on probabilistic recombination rate
            # https://www.biostars.org/p/123539/
            cMs_match_segment = (temp['rate'] * np.r_[pos_diffs, 0] / 1e6).values

            # add back into temp
            temp['cMs'] = np.r_[0, cMs_match_segment][:-1]

            temp = temp.reset_index()
            del temp['index']

            # use null `map` values to find locations of SNPs
            snp_indices = temp.loc[temp['map'].isnull()].index

            # use SNP indices to determine boundaries over which to sum cMs
            start_snp_ix = snp_indices + 1
            end_snp_ix = np.r_[snp_indices, snp_indices[-1]][1:] + 1
            snp_boundaries = np.c_[start_snp_ix, end_snp_ix]

            # sum cMs between SNPs to get total cM distance between SNPs
            # http://stackoverflow.com/a/7471967
            c = np.r_[0, temp['cMs'].cumsum()][snp_boundaries]
            cM_from_prev_snp = c[:, 1] - c[:, 0]

            # debug
            # temp.loc[snp_indices, 'cM_from_prev_snp'] = np.r_[0, cM_from_prev_snp][:-1]
            # temp.to_csv('debug.csv')

            # add back into df
            df.loc[(df['chrom'] == chrom), 'cM_from_prev_snp'] = np.r_[0, cM_from_prev_snp][:-1]

        return hapmap, df
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号