def ttest(filename1, filename2):
qids1, values1 = load_evaluation_file(arguments.filename1)
qids2, values2 = load_evaluation_file(arguments.filename2)
if qids1.shape[0] != qids2.shape[0]:
raise ValueError('number of queries in files do not match (%d != %d)'\
% (qids1.shape[0], qids2.shape[0]))
qids1_sort_idxs = np.argsort(qids1)
qids2_sort_idxs = np.argsort(qids2)
qids1 = qids1[qids1_sort_idxs]
qids2 = qids2[qids2_sort_idxs]
if np.any(qids1 != qids2):
raise ValueError('files do not contain the same queries')
values1 = values1[qids1_sort_idxs]
values2 = values2[qids2_sort_idxs]
mean1 = np.mean(values1)
mean2 = np.mean(values2)
t_statistic, p_value = ttest_rel(values1, values2)
return values1.shape[0], mean1, mean2, t_statistic, p_value
评论列表
文章目录