python类kendalltau()的实例源码

metrics.py 文件源码 项目:reco 作者: mayukh18 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def kendalltau(rankA, rankB):

    if len(rankA) != len(rankB):
        raise TypeError("The two rank lists must be of the same length.")

    N = len(rankA)

    if isinstance(rankA[0], tuple):
        rankA = [rankA[i][0] for i in range(N)]

    if isinstance(rankB[0], tuple):
        rankB = [rankB[i][0] for i in range(N)]

    listA = [i for i in range(N)]
    listB = [rankB.index(rankA[i]) for i in range(N)]

    return kendalltau(listA, listB)[0]
nanops.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]
mixedvine.py 文件源码 项目:mixedvines 作者: asnelt 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _heuristic_element_order(samples):
        '''
        Finds an order of elements that heuristically facilitates vine
        modelling.  For this purpose, Kendall's tau is calculated between
        samples of pairs of elements and elements are scored according to the
        sum of absolute Kendall's taus of pairs the elements appear in.

        Parameters
        ----------
        samples : array_like
            n-by-d matrix of samples where n is the number of samples and d is
            the number of marginals.

        Returns
        -------
        order : array_like
            Permutation of all element indices reflecting descending scores.
        '''
        dim = samples.shape[1]
        # Score elements according to total absolute Kendall's tau
        score = np.zeros(dim)
        for i in range(1, dim):
            for j in range(i):
                tau, _ = kendalltau(samples[:, i], samples[:, j])
                score[i] += np.abs(tau)
                score[j] += np.abs(tau)
        # Get order indices for descending score
        order = score.argsort()[::-1]
        return order
metrics.py 文件源码 项目:aes 作者: feidong1991 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def kendall_tau(y_true, y_pred):
    """
    Calculate Kendall's tau between ``y_true`` and ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Kendall's tau if well-defined, else 0
    """
    ret_score = kendalltau(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
vwoptimize.py 文件源码 项目:vwoptimize 作者: denik 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def kendall_tau(y_true, y_score):
    from scipy.stats import kendalltau
    ret_score = kendalltau(y_true, y_score)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
evaluator.py 文件源码 项目:aes-gated-word-char 作者: unkn0wnxx 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
evaluation.py 文件源码 项目:deepcpg 作者: cangermueller 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def kendall(y, z, nb_sample=100000):
    """Compute Kendall's correlation coefficient."""
    if len(y) > nb_sample:
        idx = np.arange(len(y))
        np.random.shuffle(idx)
        idx = idx[:nb_sample]
        y = y[idx]
        z = z[idx]
    return kendalltau(y, z)[0]
test_analytics.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        self.assertAlmostEqual(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        self.assertAlmostEqual(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            raise nose.SkipTest("skipping corr rank because of scipy version "
                                "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        self.assertAlmostEqual(A.corr(B, method='kendall'), kexp)
        self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)
test_nanops.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_nancorr_kendall(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
reproduce_cqe_scores.py 文件源码 项目:virtual-competition 作者: mechaphish 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def compare_scores(byus, bydarpa):
    """byus = { team: score }, bydarpa = { team: score }"""
    assert frozenset(byus.keys()) == frozenset(bydarpa.keys())

    our_ranking = ordered_sets(byus)
    darpa_ranking = ordered_sets(bydarpa)

    our_picks = our_ranking.values()[0]
    darpa_picks = darpa_ranking.values()[0]

    from scipy import stats
    # scipy takes them as ordered lists
    teamorder = list(byus.keys())  
    vals_us = [ byus[t] for t in teamorder ]
    vals_darpa = [ bydarpa[t] for t in teamorder ]
    tau, p_value = stats.kendalltau(vals_us, vals_darpa)

    def names(teams_set):
        return '[' + ' '.join(sorted(n.split()[0] for n in teams_set)) + ']'

    if our_picks == darpa_picks:
        print "[  ] All first choice(s)",names(our_picks),"match, excellent!"
    elif our_picks.isdisjoint(darpa_picks):
        print "[XX] Our first choice(s)",names(our_picks)," completely different from DARPA's",names(darpa_picks)
    else:
        print "[__] Partial match between our first choice(s) and DARPA's. Both have",names(darpa_picks&our_picks),"(we also have:",names(our_picks-darpa_picks)," -- darpa also has:",names(darpa_picks-our_picks),")"
    print "     FOR US:"
    for score,teams in our_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "     DARPA:"
    for score,teams in darpa_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "  %s Kendall tau: %.4f (p-value for being correlated: %.6f)" % (("<7" if tau < 0.7 else "<8") if tau < 0.8 else "  ", tau, p_value)
evaluator.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
        best_spans = kwargs["span"]
        span_logits = kwargs["score"]
        if self.eval == "triviaqa":
            scores = trivia_span_scores(data, best_spans)
        elif self.eval == "squad":
            scores = squad_span_scores(data, best_spans)
        else:
            raise RuntimeError()

        has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])

        selected_paragraphs = {}
        for i, point in enumerate(data):
            if self.per_doc:
                key = (point.question_id, point.doc_id)
            else:
                key = point.question_id
            if key not in selected_paragraphs:
                selected_paragraphs[key] = i
            elif span_logits[i] > span_logits[selected_paragraphs[key]]:
                selected_paragraphs[key] = i
        selected_paragraphs = list(selected_paragraphs.values())

        out = {
            "question-text-em": scores[selected_paragraphs, 2].mean(),
            "question-text-f1": scores[selected_paragraphs, 3].mean(),
        }

        if self.k_tau:
            out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
            out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]

        if self.paragraph_level:
            out["paragraph-text-em"] = scores[has_answer, 2].mean()
            out["paragraph-text-f1"] = scores[has_answer, 3].mean()

        prefix = "b%d/" % self.bound
        return Evaluation({prefix+k: v for k,v in out.items()})
evaluator.py 文件源码 项目:document-qa 作者: allenai 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
        if self.text_eval == "triviaqa":
            scores = trivia_span_scores(data, kargs["spans"])
        elif self.text_eval == "squad":
            scores = squad_span_scores(data, kargs["spans"])
        else:
            raise RuntimeError()

        has_answer = [len(x.answer.answer_spans) > 0 for x in data]
        aggregated_scores = scores[has_answer].mean(axis=0)
        prefix ="b%d/" % self.bound
        scalars = {
            prefix + "accuracy": aggregated_scores[0],
            prefix + "f1": aggregated_scores[1],
            prefix + "text-accuracy": aggregated_scores[2],
            prefix + "text-f1": aggregated_scores[3]
        }

        if self.rank_metric == "spr":
            metric = spearmanr
        elif self.rank_metric == "k-tau":
            metric = kendalltau
        else:
            raise ValueError()

        if "none_prob" in kargs:
            none_conf = kargs["none_prob"]
            scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
            scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]

        conf = kargs["conf"]
        scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
        scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
        return Evaluation(scalars)
hash2GloVe_test_stem2_kendall.py 文件源码 项目:hash2vec 作者: Roj 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def distance(a,b):
    #return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    return 1-sci.kendalltau(a,b)[0] #kendall tau



# Load the benchmark
hash2GloVe_test.py 文件源码 项目:hash2vec 作者: Roj 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def distance(a,b):
    return scipy.spatial.distance.cosine(a,b) # ya incluye el 1-cos(ab)
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark
hash2GloVe_test_log.py 文件源码 项目:hash2vec 作者: Roj 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def distance(a,b):
    return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark
LeaveNOut.py 文件源码 项目:AppsOfDataAnalysis 作者: nhanloukiala 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def select(self, X, Y, select_count=100):
        corr = []

        for i in range(X.shape[1]):
            kd = kendalltau(X[:, i], Y)
            corr.append((i, abs(kd.correlation)))

        corr = sorted(corr, key=operator.itemgetter)[0:select_count]
        indices = [x for x, y in corr]
        return X[:, indices], indices
asap_evaluator.py 文件源码 项目:nea 作者: nusnlp 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
experiment_corr_pca_ches.py 文件源码 项目:cptm 作者: NLeSC 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def do_kendallt(list1, list2, alpha=0.05):
    c, p = kendalltau(list1, list2)

    if p < alpha:
        return c
    return 'n.s.'
Conf_Measure.py 文件源码 项目:Stock-Prediction-Time-Series-Analysis-Python 作者: Nekooeimehr 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType):
    Predictions = RegModel.predict(Train_Data)        
    tau, p_value = stats.kendalltau(True_Labels, Predictions)
    R2_Measure = r2_score(True_Labels, Predictions)
    print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value)
    print('The R Square of ', ModelType, ' model is ', R2_Measure)
    print('')
    return(tau, p_value, R2_Measure)
concordance_analysis.py 文件源码 项目:microbiomeHD 作者: cduvallet 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def concordance(series1, series2, method, nreps=1000):
    """
    Measures the concordance between two pandas Series and returns a pvalue
    and measure of concordance.

    Parameters
    ----------
    series1, series2 : pandas Series
        Series with matching indexes.
    method : str
        ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
    nreps : int
        number of repititions to build the null. Only needed if method is
        'empirical'

    Returns
    -------
    measure : float
        some sort of measure of concordance (e.g. r for the correlation
        methods, n_observed - mean(n_expected) for empirical, etc)
    p : float
        p value of observed concordance between series1 and series2
    """

    if method == 'fisher':
        # Note: this automatically ignores any bugs which were not present
        # in both series.
        mat = pd.crosstab(series1, series2)
        return fisher_exact(mat)

    elif method == 'spearman':
        return spearmanr(series1, series2)

    elif method == 'kendalltau':
        return kendalltau(series1, series2, nan_policy='omit')

    elif method == 'empirical':
        return empirical_pval(series1, series2, nreps)

    elif method == 'cohen':
        tmp = pd.concat((series1, series2), axis=1).dropna()
        return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan

    else:
        raise ValueError('Unknown concordance method.')


问题


面经


文章

微信
公众号

扫码关注公众号