def kendalltau(rankA, rankB):
if len(rankA) != len(rankB):
raise TypeError("The two rank lists must be of the same length.")
N = len(rankA)
if isinstance(rankA[0], tuple):
rankA = [rankA[i][0] for i in range(N)]
if isinstance(rankB[0], tuple):
rankB = [rankB[i][0] for i in range(N)]
listA = [i for i in range(N)]
listB = [rankB.index(rankA[i]) for i in range(N)]
return kendalltau(listA, listB)[0]
python类kendalltau()的实例源码
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def get_corr_func(method):
if method in ['kendall', 'spearman']:
from scipy.stats import kendalltau, spearmanr
def _pearson(a, b):
return np.corrcoef(a, b)[0, 1]
def _kendall(a, b):
rs = kendalltau(a, b)
if isinstance(rs, tuple):
return rs[0]
return rs
def _spearman(a, b):
return spearmanr(a, b)[0]
_cor_methods = {
'pearson': _pearson,
'kendall': _kendall,
'spearman': _spearman
}
return _cor_methods[method]
def _heuristic_element_order(samples):
'''
Finds an order of elements that heuristically facilitates vine
modelling. For this purpose, Kendall's tau is calculated between
samples of pairs of elements and elements are scored according to the
sum of absolute Kendall's taus of pairs the elements appear in.
Parameters
----------
samples : array_like
n-by-d matrix of samples where n is the number of samples and d is
the number of marginals.
Returns
-------
order : array_like
Permutation of all element indices reflecting descending scores.
'''
dim = samples.shape[1]
# Score elements according to total absolute Kendall's tau
score = np.zeros(dim)
for i in range(1, dim):
for j in range(i):
tau, _ = kendalltau(samples[:, i], samples[:, j])
score[i] += np.abs(tau)
score[j] += np.abs(tau)
# Get order indices for descending score
order = score.argsort()[::-1]
return order
def kendall_tau(y_true, y_pred):
"""
Calculate Kendall's tau between ``y_true`` and ``y_pred``.
:param y_true: The true/actual/gold labels for the data.
:type y_true: array-like of float
:param y_pred: The predicted/observed labels for the data.
:type y_pred: array-like of float
:returns: Kendall's tau if well-defined, else 0
"""
ret_score = kendalltau(y_true, y_pred)[0]
return ret_score if not np.isnan(ret_score) else 0.0
def kendall_tau(y_true, y_score):
from scipy.stats import kendalltau
ret_score = kendalltau(y_true, y_score)[0]
return ret_score if not np.isnan(ret_score) else 0.0
def calc_correl(self, dev_pred, test_pred):
dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
test_prs, _ = pearsonr(test_pred, self.test_y_org)
dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
test_spr, _ = spearmanr(test_pred, self.test_y_org)
dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
test_tau, _ = kendalltau(test_pred, self.test_y_org)
return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
def kendall(y, z, nb_sample=100000):
"""Compute Kendall's correlation coefficient."""
if len(y) > nb_sample:
idx = np.arange(len(y))
np.random.shuffle(idx)
idx = idx[:nb_sample]
y = y[idx]
z = z[idx]
return kendalltau(y, z)[0]
test_analytics.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_corr_rank(self):
tm._skip_if_no_scipy()
import scipy
import scipy.stats as stats
# kendall and spearman
A = tm.makeTimeSeries()
B = tm.makeTimeSeries()
A[-5:] = A[:5]
result = A.corr(B, method='kendall')
expected = stats.kendalltau(A, B)[0]
self.assertAlmostEqual(result, expected)
result = A.corr(B, method='spearman')
expected = stats.spearmanr(A, B)[0]
self.assertAlmostEqual(result, expected)
# these methods got rewritten in 0.8
if scipy.__version__ < LooseVersion('0.9'):
raise nose.SkipTest("skipping corr rank because of scipy version "
"{0}".format(scipy.__version__))
# results from R
A = Series(
[-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
B = Series(
[-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
kexp = 0.4319297
sexp = 0.5853767
self.assertAlmostEqual(A.corr(B, method='kendall'), kexp)
self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)
test_nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_nancorr_kendall(self):
tm.skip_if_no_package('scipy.stats')
from scipy.stats import kendalltau
targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
method='kendall')
targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
method='kendall')
def compare_scores(byus, bydarpa):
"""byus = { team: score }, bydarpa = { team: score }"""
assert frozenset(byus.keys()) == frozenset(bydarpa.keys())
our_ranking = ordered_sets(byus)
darpa_ranking = ordered_sets(bydarpa)
our_picks = our_ranking.values()[0]
darpa_picks = darpa_ranking.values()[0]
from scipy import stats
# scipy takes them as ordered lists
teamorder = list(byus.keys())
vals_us = [ byus[t] for t in teamorder ]
vals_darpa = [ bydarpa[t] for t in teamorder ]
tau, p_value = stats.kendalltau(vals_us, vals_darpa)
def names(teams_set):
return '[' + ' '.join(sorted(n.split()[0] for n in teams_set)) + ']'
if our_picks == darpa_picks:
print "[ ] All first choice(s)",names(our_picks),"match, excellent!"
elif our_picks.isdisjoint(darpa_picks):
print "[XX] Our first choice(s)",names(our_picks)," completely different from DARPA's",names(darpa_picks)
else:
print "[__] Partial match between our first choice(s) and DARPA's. Both have",names(darpa_picks&our_picks),"(we also have:",names(our_picks-darpa_picks)," -- darpa also has:",names(darpa_picks-our_picks),")"
print " FOR US:"
for score,teams in our_ranking.iteritems():
print " ","%+.4f"%score,names(teams)
print " DARPA:"
for score,teams in darpa_ranking.iteritems():
print " ","%+.4f"%score,names(teams)
print " %s Kendall tau: %.4f (p-value for being correlated: %.6f)" % (("<7" if tau < 0.7 else "<8") if tau < 0.8 else " ", tau, p_value)
def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
best_spans = kwargs["span"]
span_logits = kwargs["score"]
if self.eval == "triviaqa":
scores = trivia_span_scores(data, best_spans)
elif self.eval == "squad":
scores = squad_span_scores(data, best_spans)
else:
raise RuntimeError()
has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])
selected_paragraphs = {}
for i, point in enumerate(data):
if self.per_doc:
key = (point.question_id, point.doc_id)
else:
key = point.question_id
if key not in selected_paragraphs:
selected_paragraphs[key] = i
elif span_logits[i] > span_logits[selected_paragraphs[key]]:
selected_paragraphs[key] = i
selected_paragraphs = list(selected_paragraphs.values())
out = {
"question-text-em": scores[selected_paragraphs, 2].mean(),
"question-text-f1": scores[selected_paragraphs, 3].mean(),
}
if self.k_tau:
out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]
if self.paragraph_level:
out["paragraph-text-em"] = scores[has_answer, 2].mean()
out["paragraph-text-f1"] = scores[has_answer, 3].mean()
prefix = "b%d/" % self.bound
return Evaluation({prefix+k: v for k,v in out.items()})
def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
if self.text_eval == "triviaqa":
scores = trivia_span_scores(data, kargs["spans"])
elif self.text_eval == "squad":
scores = squad_span_scores(data, kargs["spans"])
else:
raise RuntimeError()
has_answer = [len(x.answer.answer_spans) > 0 for x in data]
aggregated_scores = scores[has_answer].mean(axis=0)
prefix ="b%d/" % self.bound
scalars = {
prefix + "accuracy": aggregated_scores[0],
prefix + "f1": aggregated_scores[1],
prefix + "text-accuracy": aggregated_scores[2],
prefix + "text-f1": aggregated_scores[3]
}
if self.rank_metric == "spr":
metric = spearmanr
elif self.rank_metric == "k-tau":
metric = kendalltau
else:
raise ValueError()
if "none_prob" in kargs:
none_conf = kargs["none_prob"]
scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]
conf = kargs["conf"]
scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
return Evaluation(scalars)
def distance(a,b):
#return 1-dot(norm(a),norm(b)) #cosine similarity
#return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
#pearson correlation in negative so lower is better
#return 1- dot(norm(a),norm(b))
#tanimoto distance
#return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
return 1-sci.kendalltau(a,b)[0] #kendall tau
# Load the benchmark
def distance(a,b):
return scipy.spatial.distance.cosine(a,b) # ya incluye el 1-cos(ab)
#return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
#pearson correlation in negative so lower is better
#return 1- dot(norm(a),norm(b))
#tanimoto distance
#return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
#return sci.kendalltau(a,b) #kendall tau
# Load the benchmark
def distance(a,b):
return 1-dot(norm(a),norm(b)) #cosine similarity
#return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
#pearson correlation in negative so lower is better
#return 1- dot(norm(a),norm(b))
#tanimoto distance
#return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
#return sci.kendalltau(a,b) #kendall tau
# Load the benchmark
def select(self, X, Y, select_count=100):
corr = []
for i in range(X.shape[1]):
kd = kendalltau(X[:, i], Y)
corr.append((i, abs(kd.correlation)))
corr = sorted(corr, key=operator.itemgetter)[0:select_count]
indices = [x for x, y in corr]
return X[:, indices], indices
def calc_correl(self, dev_pred, test_pred):
dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
test_prs, _ = pearsonr(test_pred, self.test_y_org)
dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
test_spr, _ = spearmanr(test_pred, self.test_y_org)
dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
test_tau, _ = kendalltau(test_pred, self.test_y_org)
return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
def do_kendallt(list1, list2, alpha=0.05):
c, p = kendalltau(list1, list2)
if p < alpha:
return c
return 'n.s.'
Conf_Measure.py 文件源码
项目:Stock-Prediction-Time-Series-Analysis-Python
作者: Nekooeimehr
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType):
Predictions = RegModel.predict(Train_Data)
tau, p_value = stats.kendalltau(True_Labels, Predictions)
R2_Measure = r2_score(True_Labels, Predictions)
print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value)
print('The R Square of ', ModelType, ' model is ', R2_Measure)
print('')
return(tau, p_value, R2_Measure)
def concordance(series1, series2, method, nreps=1000):
"""
Measures the concordance between two pandas Series and returns a pvalue
and measure of concordance.
Parameters
----------
series1, series2 : pandas Series
Series with matching indexes.
method : str
['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
nreps : int
number of repititions to build the null. Only needed if method is
'empirical'
Returns
-------
measure : float
some sort of measure of concordance (e.g. r for the correlation
methods, n_observed - mean(n_expected) for empirical, etc)
p : float
p value of observed concordance between series1 and series2
"""
if method == 'fisher':
# Note: this automatically ignores any bugs which were not present
# in both series.
mat = pd.crosstab(series1, series2)
return fisher_exact(mat)
elif method == 'spearman':
return spearmanr(series1, series2)
elif method == 'kendalltau':
return kendalltau(series1, series2, nan_policy='omit')
elif method == 'empirical':
return empirical_pval(series1, series2, nreps)
elif method == 'cohen':
tmp = pd.concat((series1, series2), axis=1).dropna()
return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan
else:
raise ValueError('Unknown concordance method.')