def qualification_filter(self):
"""
Providing information of those unqualified and qualified contigs from the orginal fasta file
with the criterion: >20Kb & >=5 restriction sites inside.
"""
unqualified = np.empty(shape=[0,3])
qualified = np.empty(shape=[0,4])
rm_dup = self.RcmapTable[['CMapId','ContigLength','NumSites']].drop_duplicates()
for i in self.ref_id.keys():
index = i
name = self.ref_id[i]
length = self.ref_inf[name]
if i not in self.RcmapTable['CMapId'].unique():
unqualified = np.append(unqualified,[[index,name, length]],axis=0)
else:
Id = rm_dup[rm_dup['CMapId']==i].index[0]
sites = rm_dup['NumSites'][Id]
qualified = np.append(qualified,[[index,name,length,sites]],axis=0)
self.unqualified = pd.DataFrame(unqualified, columns=['index','contig','length(bp)'])
self.qualified = pd.DataFrame(qualified, columns=['index','contig','length(bp)','numSites'])
评论列表
文章目录