def concordance(series1, series2, method, nreps=1000):
"""
Measures the concordance between two pandas Series and returns a pvalue
and measure of concordance.
Parameters
----------
series1, series2 : pandas Series
Series with matching indexes.
method : str
['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
nreps : int
number of repititions to build the null. Only needed if method is
'empirical'
Returns
-------
measure : float
some sort of measure of concordance (e.g. r for the correlation
methods, n_observed - mean(n_expected) for empirical, etc)
p : float
p value of observed concordance between series1 and series2
"""
if method == 'fisher':
# Note: this automatically ignores any bugs which were not present
# in both series.
mat = pd.crosstab(series1, series2)
return fisher_exact(mat)
elif method == 'spearman':
return spearmanr(series1, series2)
elif method == 'kendalltau':
return kendalltau(series1, series2, nan_policy='omit')
elif method == 'empirical':
return empirical_pval(series1, series2, nreps)
elif method == 'cohen':
tmp = pd.concat((series1, series2), axis=1).dropna()
return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan
else:
raise ValueError('Unknown concordance method.')
评论列表
文章目录