DartModules.py 文件源码

python
阅读 31 收藏 0 点赞 0 评论 0

项目:dartqc 作者: esteinig 项目源码 文件源码
def filter_data(self, mind=0.2, recalculate=True):

        """
        Re-write with Pandas
        """

        if mind is None:
            stamp("Returning data without filtering.")
            return self.data, self.attributes

        stamp("Filtering samples with missing data >", mind)
        stamp("Missing data calculated over", len(self.data), "SNPs")

        mind_prop = self._calculate_mind()

        to_remove = mind_prop[mind_prop > mind].index.tolist()

        filtered_data = {}
        for snp, data in self.data.items():
            data["calls"] = [snp_call for i, snp_call in self._iterate_call_indices(data["calls"])
                             if i not in to_remove]
            filtered_data[snp] = data

        attributes = self._adjust_attributes(self.attributes, mind, to_remove)

        percent_removed = format((len(to_remove) / attributes["sample_size"])*100, ".2f")

        stamp("Removed {r} samples out of {t} samples ({p}%)"
              .format(r=len(to_remove), t=attributes["sample_size"], p=percent_removed))

        # Recalculating SNP parameters:

        if recalculate:
            stamp("Recalculating MAF, CALL RATE and HWE for SNPs")
            marker = SNPModule(filtered_data, attributes)
            filtered_data, attributes = marker.get_data(threshold=None)

        return filtered_data, attributes
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号