def get_expected_subsample_variety(dictionary, subsample_size):
"""Compute the expected variety of a subsample of given size drawn from a
given frequency dictionary.
"""
sample_size = sum(dictionary.values())
if subsample_size > sample_size:
raise ValueError(u'Not enough elements in dictionary')
num_subsamples = binom(sample_size, subsample_size, exact=True)
expected_variety = len(dictionary)
for freq in dictionary.values():
expected_variety -= _prob_no_occurrence(
sample_size, subsample_size, freq, num_subsamples
)
return expected_variety
评论列表
文章目录