def test_paired_ttest_with_diff_sums(data):
model, X_test = data
pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
nb_pairs = len(pairs)
nb_features, nb_classes, nb_cases = 1717, 4, 20
batch_size = 5
process_X_data_func_args = {'nb_features': nb_features}
dlc_gen = deeplift_contribs_generator(model, X_test,
process_X_data_func=process_X_data, nb_features=nb_features,
nb_classes=nb_classes, batch_size=batch_size,
process_X_data_func_args=process_X_data_func_args)
sums_D, sums_D2, sums_contribs, pairs = diff_sums_from_generator(dlc_gen,
nb_features=nb_features, nb_classes=nb_classes)
unadjusted_t_values, p_values = paired_ttest_with_diff_sums(sums_D,
sums_D2, pairs=pairs, nb_cases=nb_cases)
assert unadjusted_t_values.shape == (nb_pairs, nb_features)
assert p_values.shape == (nb_pairs, nb_features)
# force only 1 batch with abnormally high batch_size parameter
alt_dlc_gen = deeplift_contribs_generator(model, X_test,
process_X_data_func=process_X_data, nb_features=nb_features,
nb_classes=nb_classes, batch_size=109971161161043253 % 8085,
process_X_data_func_args=process_X_data_func_args)
# non-streaming paired t-test implementation... fails with larger
# datasets due to large matrix sizes (e.g., memory overflow), but
# works as an alternative implementation for a tiny unit testing dataset
alt_t_values, alt_p_values = [], []
for idx, contribs in enumerate(alt_dlc_gen):
assert not idx # check only 1 batch (idx == 0)
for i, j in pairs:
curr_t_values = np.zeros((nb_features, ))
curr_p_values = np.zeros((nb_features, ))
for f in range(nb_features):
t, p = ttest_rel(contribs[i][:, f], contribs[j][:, f])
curr_t_values[f] = t
curr_p_values[f] = p
alt_t_values.append(curr_t_values)
alt_p_values.append(curr_p_values)
for r in range(len(pairs)):
t = unadjusted_t_values[r]
alt_t = alt_t_values[r]
p = p_values[r] # already bonferroni adjusted
alt_p = bonferroni(alt_p_values[r], nb_pairs * nb_features)
assert t.shape == alt_t.shape
assert p.shape == alt_p.shape
assert np.all(del_nans(np.abs(alt_t - t)) < epsilon)
assert np.all(del_nans(np.abs(alt_p - p)) < epsilon)
评论列表
文章目录