python类kendalltau()的实例源码-面圈网

metrics.py 文件源码项目：reco 作者: mayukh18 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def kendalltau(rankA, rankB):

    if len(rankA) != len(rankB):
        raise TypeError("The two rank lists must be of the same length.")

    N = len(rankA)

    if isinstance(rankA[0], tuple):
        rankA = [rankA[i][0] for i in range(N)]

    if isinstance(rankB[0], tuple):
        rankB = [rankB[i][0] for i in range(N)]

    listA = [i for i in range(N)]
    listB = [rankB.index(rankA[i]) for i in range(N)]

    return kendalltau(listA, listB)[0]

nanops.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

mixedvine.py 文件源码项目：mixedvines 作者: asnelt 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _heuristic_element_order(samples):
        '''
        Finds an order of elements that heuristically facilitates vine
        modelling.  For this purpose, Kendall's tau is calculated between
        samples of pairs of elements and elements are scored according to the
        sum of absolute Kendall's taus of pairs the elements appear in.

        Parameters
        ----------
        samples : array_like
            n-by-d matrix of samples where n is the number of samples and d is
            the number of marginals.

        Returns
        -------
        order : array_like
            Permutation of all element indices reflecting descending scores.
        '''
        dim = samples.shape[1]
        # Score elements according to total absolute Kendall's tau
        score = np.zeros(dim)
        for i in range(1, dim):
            for j in range(i):
                tau, _ = kendalltau(samples[:, i], samples[:, j])
                score[i] += np.abs(tau)
                score[j] += np.abs(tau)
        # Get order indices for descending score
        order = score.argsort()[::-1]
        return order

metrics.py 文件源码项目：aes 作者: feidong1991 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def kendall_tau(y_true, y_pred):
    """
    Calculate Kendall's tau between ``y_true`` and ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Kendall's tau if well-defined, else 0
    """
    ret_score = kendalltau(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0

vwoptimize.py 文件源码项目：vwoptimize 作者: denik 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def kendall_tau(y_true, y_score):
    from scipy.stats import kendalltau
    ret_score = kendalltau(y_true, y_score)[0]
    return ret_score if not np.isnan(ret_score) else 0.0

evaluator.py 文件源码项目：aes-gated-word-char 作者: unkn0wnxx 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau

evaluation.py 文件源码项目：deepcpg 作者: cangermueller 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def kendall(y, z, nb_sample=100000):
    """Compute Kendall's correlation coefficient."""
    if len(y) > nb_sample:
        idx = np.arange(len(y))
        np.random.shuffle(idx)
        idx = idx[:nb_sample]
        y = y[idx]
        z = z[idx]
    return kendalltau(y, z)[0]

test_analytics.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        self.assertAlmostEqual(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        self.assertAlmostEqual(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            raise nose.SkipTest("skipping corr rank because of scipy version "
                                "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        self.assertAlmostEqual(A.corr(B, method='kendall'), kexp)
        self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)

test_nanops.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_nancorr_kendall(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall')

reproduce_cqe_scores.py 文件源码项目：virtual-competition 作者: mechaphish 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def compare_scores(byus, bydarpa):
    """byus = { team: score }, bydarpa = { team: score }"""
    assert frozenset(byus.keys()) == frozenset(bydarpa.keys())

    our_ranking = ordered_sets(byus)
    darpa_ranking = ordered_sets(bydarpa)

    our_picks = our_ranking.values()[0]
    darpa_picks = darpa_ranking.values()[0]

    from scipy import stats
    # scipy takes them as ordered lists
    teamorder = list(byus.keys())  
    vals_us = [ byus[t] for t in teamorder ]
    vals_darpa = [ bydarpa[t] for t in teamorder ]
    tau, p_value = stats.kendalltau(vals_us, vals_darpa)

    def names(teams_set):
        return '[' + ' '.join(sorted(n.split()[0] for n in teams_set)) + ']'

    if our_picks == darpa_picks:
        print "[  ] All first choice(s)",names(our_picks),"match, excellent!"
    elif our_picks.isdisjoint(darpa_picks):
        print "[XX] Our first choice(s)",names(our_picks)," completely different from DARPA's",names(darpa_picks)
    else:
        print "[__] Partial match between our first choice(s) and DARPA's. Both have",names(darpa_picks&our_picks),"(we also have:",names(our_picks-darpa_picks)," -- darpa also has:",names(darpa_picks-our_picks),")"
    print "     FOR US:"
    for score,teams in our_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "     DARPA:"
    for score,teams in darpa_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "  %s Kendall tau: %.4f (p-value for being correlated: %.6f)" % (("<7" if tau < 0.7 else "<8") if tau < 0.8 else "  ", tau, p_value)

evaluator.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
        best_spans = kwargs["span"]
        span_logits = kwargs["score"]
        if self.eval == "triviaqa":
            scores = trivia_span_scores(data, best_spans)
        elif self.eval == "squad":
            scores = squad_span_scores(data, best_spans)
        else:
            raise RuntimeError()

        has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])

        selected_paragraphs = {}
        for i, point in enumerate(data):
            if self.per_doc:
                key = (point.question_id, point.doc_id)
            else:
                key = point.question_id
            if key not in selected_paragraphs:
                selected_paragraphs[key] = i
            elif span_logits[i] > span_logits[selected_paragraphs[key]]:
                selected_paragraphs[key] = i
        selected_paragraphs = list(selected_paragraphs.values())

        out = {
            "question-text-em": scores[selected_paragraphs, 2].mean(),
            "question-text-f1": scores[selected_paragraphs, 3].mean(),
        }

        if self.k_tau:
            out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
            out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]

        if self.paragraph_level:
            out["paragraph-text-em"] = scores[has_answer, 2].mean()
            out["paragraph-text-f1"] = scores[has_answer, 3].mean()

        prefix = "b%d/" % self.bound
        return Evaluation({prefix+k: v for k,v in out.items()})

evaluator.py 文件源码项目：document-qa 作者: allenai 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
        if self.text_eval == "triviaqa":
            scores = trivia_span_scores(data, kargs["spans"])
        elif self.text_eval == "squad":
            scores = squad_span_scores(data, kargs["spans"])
        else:
            raise RuntimeError()

        has_answer = [len(x.answer.answer_spans) > 0 for x in data]
        aggregated_scores = scores[has_answer].mean(axis=0)
        prefix ="b%d/" % self.bound
        scalars = {
            prefix + "accuracy": aggregated_scores[0],
            prefix + "f1": aggregated_scores[1],
            prefix + "text-accuracy": aggregated_scores[2],
            prefix + "text-f1": aggregated_scores[3]
        }

        if self.rank_metric == "spr":
            metric = spearmanr
        elif self.rank_metric == "k-tau":
            metric = kendalltau
        else:
            raise ValueError()

        if "none_prob" in kargs:
            none_conf = kargs["none_prob"]
            scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
            scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]

        conf = kargs["conf"]
        scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
        scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
        return Evaluation(scalars)

hash2GloVe_test_stem2_kendall.py 文件源码项目：hash2vec 作者: Roj 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def distance(a,b):
    #return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    return 1-sci.kendalltau(a,b)[0] #kendall tau



# Load the benchmark

hash2GloVe_test.py 文件源码项目：hash2vec 作者: Roj 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def distance(a,b):
    return scipy.spatial.distance.cosine(a,b) # ya incluye el 1-cos(ab)
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark

hash2GloVe_test_log.py 文件源码项目：hash2vec 作者: Roj 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def distance(a,b):
    return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark

LeaveNOut.py 文件源码项目：AppsOfDataAnalysis 作者: nhanloukiala 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def select(self, X, Y, select_count=100):
        corr = []

        for i in range(X.shape[1]):
            kd = kendalltau(X[:, i], Y)
            corr.append((i, abs(kd.correlation)))

        corr = sorted(corr, key=operator.itemgetter)[0:select_count]
        indices = [x for x, y in corr]
        return X[:, indices], indices

asap_evaluator.py 文件源码项目：nea 作者: nusnlp 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau

experiment_corr_pca_ches.py 文件源码项目：cptm 作者: NLeSC 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def do_kendallt(list1, list2, alpha=0.05):
    c, p = kendalltau(list1, list2)

    if p < alpha:
        return c
    return 'n.s.'

Conf_Measure.py 文件源码项目：Stock-Prediction-Time-Series-Analysis-Python 作者: Nekooeimehr 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType):
    Predictions = RegModel.predict(Train_Data)        
    tau, p_value = stats.kendalltau(True_Labels, Predictions)
    R2_Measure = r2_score(True_Labels, Predictions)
    print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value)
    print('The R Square of ', ModelType, ' model is ', R2_Measure)
    print('')
    return(tau, p_value, R2_Measure)

concordance_analysis.py 文件源码项目：microbiomeHD 作者: cduvallet 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def concordance(series1, series2, method, nreps=1000):
    """
    Measures the concordance between two pandas Series and returns a pvalue
    and measure of concordance.

    Parameters
    ----------
    series1, series2 : pandas Series
        Series with matching indexes.
    method : str
        ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
    nreps : int
        number of repititions to build the null. Only needed if method is
        'empirical'

    Returns
    -------
    measure : float
        some sort of measure of concordance (e.g. r for the correlation
        methods, n_observed - mean(n_expected) for empirical, etc)
    p : float
        p value of observed concordance between series1 and series2
    """

    if method == 'fisher':
        # Note: this automatically ignores any bugs which were not present
        # in both series.
        mat = pd.crosstab(series1, series2)
        return fisher_exact(mat)

    elif method == 'spearman':
        return spearmanr(series1, series2)

    elif method == 'kendalltau':
        return kendalltau(series1, series2, nan_policy='omit')

    elif method == 'empirical':
        return empirical_pval(series1, series2, nreps)

    elif method == 'cohen':
        tmp = pd.concat((series1, series2), axis=1).dropna()
        return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan

    else:
        raise ValueError('Unknown concordance method.')