python类spearmanr()的实例源码

eval_word_sim.py 文件源码 项目:KATE 作者: hugochan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def calc_word_sim(model, eval_file):
    df = pd.read_csv(eval_file, sep=',', header=0) # eval dataset
    col1, col2, score = df.columns.values
    model_vocab = model.vocab.keys()
    ground = []
    sys = []
    for idx, row in df.iterrows():
        if row[col1] in model_vocab and row[col2] in model_vocab:
            ground.append(float(row[score]))
            sys.append(model.similarity(row[col1], row[col2]))

    # compute Spearman's rank correlation coefficient (https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
    print sys
    # import pdb;pdb.set_trace()
    corr, p_val = stats.spearmanr(sys, ground)
    logger.info("# of pairs found: %s / %s" % (len(ground), len(df)))
    logger.info("correlation: %s" % corr)
    return corr, p_val
similarity_test.py 文件源码 项目:kor2vec 作者: dongjun-Lee 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed
eval.py 文件源码 项目:SIF 作者: PrincetonML 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def sim_getCorrelation(We,words,f, weight4ind, scoring_function, params):
    f = open(f,'r')
    lines = f.readlines()
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    scores = scoring_function(We,x1,x2,m1,m2, params)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
eval.py 文件源码 项目:SIF 作者: PrincetonML 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def getCorrelation(model,words,f, params=[]):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    if params and params.weightfile:
        m1 = data_io.seq2weight(x1, m1, params.weight4ind)
        m2 = data_io.seq2weight(x2, m2, params.weight4ind)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
analyze_predictions.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def correlations(A,B,pc_n=100):
    p = (1 - distance.correlation(A.flatten(),B.flatten()))
    spear = spearmanr(A.flatten(),B.flatten())
    dist_genes = np.zeros(A.shape[0])
    for i in range(A.shape[0]):
        dist_genes[i] = 1 - distance.correlation(A[i],B[i])
    pg = (np.average(dist_genes[np.isfinite(dist_genes)]))
    dist_sample = np.zeros(A.shape[1])
    for i in range(A.shape[1]):
        dist_sample[i] = 1 - distance.correlation(A[:,i],B[:,i])
    ps = (np.average(dist_sample[np.isfinite(dist_sample)]))
    pc_dist = []
    if pc_n > 0:
        u0,s0,vt0 = np.linalg.svd(A)
        u,s,vt = np.linalg.svd(B)
        for i in range(pc_n):
            pc_dist.append(abs(1 - distance.cosine(u0[:,i],u[:,i])))
        pc_dist = np.array(pc_dist)
    return p,spear[0],pg,ps,pc_dist
white.py 文件源码 项目:procgen 作者: juancroldan 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def white4D_functional():
    print("Testing correlation for 4D white noise")
    N = 20
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    z1 = randrange(-1000, 1000, 1)
    w1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    z2 = z1 + randrange(-1000, 1000, 1)
    w2 = w1 + randrange(-1000, 1000, 1)
    values1 = [[[[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)] for z in range(z1, z1 + N)] for w in range(w1, w1 + N)]
    values2 = [[[[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)] for z in range(z2, z2 + N)] for w in range(w2, w2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
test.py 文件源码 项目:wordsim 作者: recski 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def run(self):
        for d_type, datasets in self.sim_datasets.iteritems():
            for data, fn in datasets:
                logging.info(
                    'testing on data {0} of type {1} ({2} pairs)'.format(
                        fn, d_type, len(data.pairs)))
                for e_type, models in self.e_models.iteritems():
                    for model, fn in models:
                        logging.info(
                            '\ttesting embedding {0} of type {1}'.format(
                                fn, e_type))
                        answers, gold_sims, oovs = [], [], 0
                        for (w1, w2), gold in data.pairs.iteritems():
                            sim = model.get_sim(w1, w2)
                            if sim:
                                answers.append(sim)
                                gold_sims.append(gold)
                            else:
                                oovs += 1
                        corr = spearmanr(answers, gold_sims)
                        logging.info('Spearman correlation: {0}'.format(corr))
                        logging.info('pairs skipped (OOVs): {0}'.format(oovs))
data_handler.py 文件源码 项目:pyktrader2 作者: harveywwu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def MA_RIBBON(df, ma_series):
    ma_array = np.zeros([len(df), len(ma_series)])
    ema_list = []
    for idx, ma_len in enumerate(ma_series):
        ema_i = EMA(df, n = ma_len, field = 'close')
        ma_array[:, idx] = ema_i
        ema_list.append(ema_i)
    corr = np.empty([len(df)])
    pval = np.empty([len(df)])
    dist = np.empty([len(df)])
    corr[:] = np.NAN
    pval[:] = np.NAN
    dist[:] = np.NAN
    max_n = max(ma_series)
    for idy in range(len(df)):
        if idy >= max_n - 1:
            corr[idy], pval[idy] = stats.spearmanr(ma_array[idy,:], range(len(ma_series), 0, -1))
            dist[idy] = max(ma_array[idy,:]) - min(ma_array[idy,:])
    corr_ts = pd.Series(corr*100, index = df.index, name = "MARIBBON_CORR")
    pval_ts = pd.Series(pval*100, index = df.index, name = "MARIBBON_PVAL")
    dist_ts = pd.Series(dist, index = df.index, name = "MARIBBON_DIST")
    return pd.concat([corr_ts, pval_ts, dist_ts] + ema_list, join='outer', axis=1)
eval.py 文件源码 项目:Hotpot 作者: Liang-Qiu 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
eval.py 文件源码 项目:Hotpot 作者: Liang-Qiu 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
evaluate.py 文件源码 项目:Learning-sentence-representation-with-guidance-of-human-attention 作者: wangshaonan 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
    if len(p1.split()[0].split('_')) == 2:
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
    else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
evaluate.py 文件源码 项目:Learning-sentence-representation-with-guidance-of-human-attention 作者: wangshaonan 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def getCorrelation2(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    sseq1 = []
    sseq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
    sseq1.append(SX1)
    sseq2.append(SX2)
        golds.append(score)
    x1,m1,s1 = utils.prepare_data2(seq1,sseq1)
    x2,m2,s2 = utils.prepare_data2(seq2,sseq2)
    scores = model.scoring_function2(x1,x2,m1,m2,s1,s2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
base.py 文件源码 项目:mmfeat 作者: douwekiela 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def spearman(self, dataset):
        if not isinstance(dataset, list) \
                or len(dataset) == 0 \
                or len(dataset[0]) != 3 \
                or not isinstance(dataset[0][2], float):
            raise TypeError('Dataset is not of correct type, list of [str, str, float] triples expected.')
        gs_scores, sys_scores = [], []
        for one, two, gs_score in dataset:
            try:
                sys_score = self.sim(one, two)
                gs_scores.append(gs_score)
                sys_scores.append(sys_score)
            except KeyError:
                if self.reportMissing:
                    print('Warning: Missing pair %s-%s - skipping' % (one, two))
                continue
        return spearmanr(gs_scores, sys_scores)
nanops.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]
evalrank.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def evaluate1Word(wv, reference):
    """Evaluate wv against reference, return (rho, count) where rwo is
    Spearman's rho and count is the number of reference word pairs
    that could be evaluated against.
    """
    count=0
    gold, predicted = [], []
    for words, sim in sorted(reference, key=lambda ws: ws[1]):
        if " " not in words[0] and " " not in words[1]:
            #print words[0],words[1]
            try:
                v1, v2 = wv[words[0]], wv[words[1]]
            except KeyError:
                count+=1
                continue
            #print words
            gold.append((words, sim))
            predicted.append((words, cosine(v1, v2)))

    simlist = lambda ws: [s for w,s in ws]
    rho, p = spearmanr(simlist(gold), simlist(predicted))
    print "Word not found in WordVector",count
    return (rho, len(gold))
correlate_counts.py 文件源码 项目:wub 作者: nanoporetech 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def _corrfunc(x, y, **kws):
    """ Annotate grid with correaltion coefficient.
    Solution from http://stackoverflow.com/a/30942817
    """
    if args.c == 'spearman':
        r, _ = stats.spearmanr(x, y)
        corr_type = 'Rho'
    elif args.c == 'pearson':
        r, _ = stats.pearsonr(x, y)
        corr_type = 'r'
    else:
        raise Exception('Invalid correlation statistic.')
    correlations.append(r)
    ax = plotter.plt.gca()
    ax.annotate("{} = {:.2f}".format(corr_type, r),
                xy=(.1, .9), xycoords=ax.transAxes)
split_data.py 文件源码 项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_feature_importance(feature):
    import scipy.stats as sps
    import pandas as pd
    y_train = pd.read_csv('../data/train.csv')['is_duplicate']
    return  sps.spearmanr(feature,y_train)[0]

# import pickle
# pickle.dump(X_train,open("data_train.pkl", 'wb'), protocol=2)
#
# data_file=['test_deptree','test_glove_sim_dist','test_pca_glove',
#            'test_pca_pattern','test_w2w','test_pos','test_pca_char']
#
# path='../test/'
# for it in range(6):
#     tmp=[]
#     flist=[item+str(it) for item in data_file]
#     test=np.empty((400000,0))
#     if it==5:
#         test=np.empty((345796,0))
#     for f in flist:
#         test=np.hstack([test,pd.read_pickle(path+f+'.pkl')])
#     pickle.dump(test,open('data_test{0}.pkl'.format(it),'wb'),protocol=2)
evaluate.py 文件源码 项目:iclr2016 作者: jwieting 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
validation_check.py 文件源码 项目:neurobind 作者: Kyubyong 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def validation_check():
    # Load graph
    g = Graph(is_training=False); print("Graph loaded")

    # Load data
    X, Y = load_data(mode="val")

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1]  # model name

            # Inference
            if not os.path.exists(hp.results): os.mkdir(hp.results)
            with open(os.path.join(hp.results, "validation_results.txt"), 'a') as fout:
                expected, predicted = [], []
                for step in range(len(X) // hp.batch_size):
                    x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
                    y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]

                    # predict intensities
                    logits = sess.run(g.logits, {g.x: x})

                    expected.extend(list(y))
                    predicted.extend(list(logits))

                # Get spearman coefficients
                score, _ = spearmanr(expected, predicted)
                fout.write("{}\t{}\n".format(mname, score))
spearman_correlation.py 文件源码 项目:systematic-metafeatures 作者: fhpinto 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _calculate(self, input):
        input = input[~np.isnan(input).any(axis=1)]

        return spearmanr(input[:,0], input[:,1])[0]
seriesanalysis.py 文件源码 项目:histwords 作者: williamleif 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def series_corr(word_year_series_1, word_year_series_2, i_year_words, start_year=1900, end_year=2000, series_1_norms=None, series_2_norms=None):
    """
    Gets the per-year correlation between the two word time series.
    Words are included even if they have values missing for a year, but there missing values are excluded from the year in question.
    """
    year_corrs = []
    year_ps = []
    years = range(start_year, end_year + 1)
    if start_year not in i_year_words:
        i_year_words = {year:i_year_words for year in years}
    if series_1_norms == None:
        series_1_norms = ([0 for year in years], [1 for year in years])
    if series_2_norms == None:
        series_2_norms = ([0 for year in years], [1 for year in years])
    for i in xrange(len(years)):
        year = years[i]
        s1 = []
        s2 = []
        for word in i_year_words[year]:
            if word in word_year_series_1 and word in word_year_series_2:
                if not np.isnan(word_year_series_1[word][year]) and not np.isnan(word_year_series_2[word][year]):
                    s1.append((word_year_series_1[word][year] - series_1_norms[0][i]) / series_1_norms[1][i])
                    s2.append((word_year_series_2[word][year] - series_2_norms[0][i]) / series_2_norms[1][i])
        corr, p = spearmanr(s1, s2)
        year_corrs.append(corr)
        year_ps.append(p)
    return year_corrs, year_ps
sick_evaluator.py 文件源码 项目:MP-CNN-Variants 作者: tuzhucheng 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_scores(self):
        self.model.eval()
        num_classes = self.dataset_cls.NUM_CLASSES
        predict_classes = torch.arange(1, num_classes + 1).expand(self.batch_size, num_classes)
        test_kl_div_loss = 0
        predictions = []
        true_labels = []

        for batch in self.data_loader:
            output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
            test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
            # handle last batch which might have smaller size
            if len(predict_classes) != len(batch.sentence_1):
                predict_classes = torch.arange(1, num_classes + 1).expand(len(batch.sentence_1), num_classes)

            if self.data_loader.device != -1:
                with torch.cuda.device(self.device):
                    predict_classes = predict_classes.cuda()

            true_labels.append((predict_classes * batch.label.data).sum(dim=1))
            predictions.append((predict_classes * output.data.exp()).sum(dim=1))

            del output

        predictions = torch.cat(predictions).cpu().numpy()
        true_labels = torch.cat(true_labels).cpu().numpy()
        test_kl_div_loss /= len(batch.dataset.examples)
        pearson_r = pearsonr(predictions, true_labels)[0]
        spearman_r = spearmanr(predictions, true_labels)[0]

        return [pearson_r, spearman_r, test_kl_div_loss], ['pearson_r', 'spearman_r', 'KL-divergence loss']
metrics.py 文件源码 项目:aes 作者: feidong1991 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def spearman(y_true, y_pred):
    """
    Calculate Spearman's rank correlation coefficient between ``y_true`` and
    ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Spearman's rank correlation coefficient if well-defined, else 0
    """
    ret_score = spearmanr(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
analyze_predictions.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
    if len(random_samples) == 0:
        random_samples = np.zeros(A.shape[1],dtype=np.bool)
        random_samples[:min(s,A.shape[1])] = True
        np.random.shuffle(random_samples)
    dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
    dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
    pear = pearsonr(dist_x,dist_y)
    spear = spearmanr(dist_x,dist_y)
    if pvalues:
        return pear,spear
    else:
        return pear[0],spear[0]
utils.py 文件源码 项目:MUSE 作者: MiuLab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def calAvgSimC(test_score, senseVec1, senseScore1,senseVec2, senseScore2):
  assert(len(senseVec1)==len(senseVec2))
  avgCos = []
  for t in xrange(len(senseVec1)):
    thisCos = []
    p1 = (senseScore1[t])
    p2 = (senseScore2[t])
    for i in xrange(len(senseVec1[t])):
      for j in xrange(len(senseVec2[t])):
        thisCos.append((1-cosine(senseVec1[t][i],senseVec2[t][j]))*p1[i]*p2[j])
    avgCos.append(np.sum(thisCos))
  return spearmanr(test_score, avgCos)[0]
utils.py 文件源码 项目:MUSE 作者: MiuLab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def calMaxSimC(test_score, senseVec1, senseScore1,senseVec2, senseScore2):
  assert(len(senseVec1)==len(senseVec2))
  avgCos = []
  for t in xrange(len(senseVec1)):
    i = np.argmax(senseScore1[t])
    j = np.argmax(senseScore2[t])
    thisCos = (1-cosine(senseVec1[t][i],senseVec2[t][j])) 
    avgCos.append(thisCos)
  return spearmanr(test_score, avgCos)[0]
white.py 文件源码 项目:procgen 作者: juancroldan 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def white2D_functional():
    print("Testing correlation for 2D white noise")
    N = 100
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    values1 = [[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)]
    values2 = [[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
white.py 文件源码 项目:procgen 作者: juancroldan 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def white3D_functional():
    print("Testing correlation for 3D white noise")
    N = 100
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    z1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    z2 = z1 + randrange(-1000, 1000, 1)
    values1 = [[[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)] for z in range(z1, z1 + N)]
    values2 = [[[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)] for z in range(z2, z2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
regression.py 文件源码 项目:wordsim 作者: recski 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def spearman_scorer(estimator, X, y):
    logging.info('predicting ...')
    predicted = estimator.predict(y)
    return spearmanr(list(predicted), y)
keras_test.py 文件源码 项目:wordsim 作者: recski 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")

    data = [((f[0], f[1]), float(f[2]))
            for f in [line.strip().split("|||")
                      for line in open(sys.argv[1])]]

    print "sample data:", data[:3]

    train_data, devel_data, test_data = cut(data)

    logging.info('loading model...')
    glove_embedding = GloveEmbedding(sys.argv[2])
    logging.info('done!')
    dim = int(sys.argv[3])
    X_train = featurize(train_data, glove_embedding, dim)

    Y_train = np.array([e[1] for e in train_data])

    logging.info("Input shape: {0}".format(X_train.shape))
    print X_train[:3]
    logging.info("Label shape: {0}".format(Y_train.shape))
    print Y_train[:3]

    input_dim = X_train.shape[1]
    output_dim = 1
    model = create_model(input_dim, output_dim)
    model.fit(X_train, Y_train, nb_epoch=int(sys.argv[4]), batch_size=32)

    X_devel = featurize(devel_data, glove_embedding, dim)
    Y_devel = np.array([e[1] for e in devel_data])

    pred = model.predict_proba(X_devel, batch_size=32)
    corr = spearmanr(pred, Y_devel)
    print "Spearman's R: {0}".format(corr)


问题


面经


文章

微信
公众号

扫码关注公众号