def pwdist_kld(self, seq1idx, seq2idx):
"""Kullback-Leibler discrepancy (KL) between two vectors.
The KL discrepancy between sequences X and Y,
is computed from their L-tuple (word) frequencies.
References:
1. Wu, Hsieh, Li (2001) Biometrics 57: 441-448.
doi: 10.1111/j.0006-341X.2001.00441.x
Notes:
1. KL discrepancy must be computed based on relative
frequencies (those that sum to 1).
2. To avoid having an infinite dK L (X, Y) when freqs2 = 0,
the authors suggest modifying the orifinal formulation
by adding a unit to both terms of the frequency ratio.
"""
freqs1 = self[seq1idx] + 1
freqs2 = self[seq2idx] + 1
values = freqs1 * np.log2(freqs1 / freqs2)
value = np.sum(values)
return value
评论列表
文章目录