def find(self, query, cutoff, limit=None):
"""Find similar fragments to query.
Args:
query (str): Query fragment identifier
cutoff (float): Cutoff, similarity scores below cutoff are discarded.
limit (int): Maximum number of hits. Default is None for no limit.
Returns:
list[tuple[str,float]]: Hit fragment identifier and similarity score
"""
precision = float(self.score_precision)
precision10 = float(10**(floor(log10(precision))))
scutoff = int(cutoff * precision)
query_id = self.cache_l2i[query]
subjects = self.h5file.root.scores[query_id, ...]
filled_subjects_ids = subjects.nonzero()[0]
filled_subjects = [(i, subjects[i]) for i in filled_subjects_ids]
hits = [(self.cache_i2l[k], ceil(precision10 * v / precision) / precision10) for k, v in filled_subjects if v >= scutoff]
sorted_hits = sorted(hits, key=lambda r: r[1], reverse=True)
if limit is not None:
sorted_hits = sorted_hits[:limit]
return sorted_hits
评论列表
文章目录