python类argsort()的实例源码

visualization.py 文件源码 项目:mitre 作者: gerberlab 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def deviation_plot(rp, variable_name, slope_cutoff=1, average_cutoff = 2.):
    average_panel = rp.value_panel(variable_name, types=['average'])
    average_panel = (average_panel.T - np.median(average_panel, axis=1)).T
    average_panel.sort()
    average_ranges = np.max(average_panel, axis=1) - np.min(average_panel, axis=1)
    average_panel = average_panel[np.argsort(average_ranges)][::-1]

    slope_panel = rp.value_panel(variable_name, types=['slope'])
    slope_panel = (slope_panel.T - np.median(slope_panel, axis=1)).T
    slope_panel.sort()
    slope_ranges = np.max(slope_panel, axis=1) - np.min(slope_panel, axis=1)
    slope_panel = slope_panel[np.argsort(slope_ranges)][::-1]

    return _multiplot(rp.dataset, variable_name, slope_panel, average_panel,
                     left_vmin = -1.0*slope_cutoff, left_vmax = slope_cutoff,
                     right_vmin = -1.0*average_cutoff, right_vmax = average_cutoff)
loc2lang.py 文件源码 项目:geomdn 作者: afshinrahimi 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_local_words(preds, vocab, NEs=[], k=50):
    """
    given the word probabilities over many coordinates,
    first normalize the probability of each word in different
    locations to get a probability distribution, then compute
    the entropy of the word's distribution over all coordinates
    and return the words that are low entropy and are not
    named entities.
    """
    #normalize the probabilites of each vocab using entropy
    normalized_preds = normalize(preds, norm='l1', axis=0)
    entropies = stats.entropy(normalized_preds)
    sorted_indices = np.argsort(entropies)
    sorted_local_words = np.array(vocab)[sorted_indices].tolist()


    filtered_local_words = []
    NEset = set(NEs)
    for word in sorted_local_words:
        if word in NEset: continue
        filtered_local_words.append(word)
    return filtered_local_words[0:k]
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite)
arf.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _get_sorted_channels_(self, all_keys, pattern):
        sub_list     = [f for f in all_keys if pattern in f]
        all_channels = [int(f.split(pattern)[1]) for f in sub_list]
        idx          = numpy.argsort(all_channels)
        return sub_list, idx
arf.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def set_streams(self, stream_mode):

        if stream_mode == 'single-file':

            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = [re.findall('\d+', u) for u in my_file.keys()]
            all_streams = []
            for m in all_matches:
                if len(m) > 0:
                    all_streams += [int(m[0])]

            idx = numpy.argsort(all_streams)

            for i in xrange(len(all_streams)):
                params['h5_key']  = my_file.keys()[idx[i]]
                new_data          = type(self)(self.file_name, params)
                sources          += [new_data]
                to_write         += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)
kwd.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def set_streams(self, stream_mode):

        if stream_mode == 'single-file':

            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = my_file.get('recordings').keys()
            all_streams = []
            for m in all_matches:
                all_streams += [int(m)]

            idx = numpy.argsort(all_streams)

            for count in xrange(len(all_streams)):
                params['recording_number'] = all_streams[idx[count]]
                new_data   = type(self)(self.file_name, params)
                sources   += [new_data]
                to_write  += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)
algorithms.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def rho_estimation(data, update=None, compute_rho=True, mratio=0.01):

    N     = len(data)
    rho   = numpy.zeros(N, dtype=numpy.float32)

    if update is None:
        dist = distancematrix(data)
        didx = lambda i,j: i*N + j - i*(i+1)//2 - i - 1
        nb_selec = max(5, int(mratio*N))
        sdist    = {}

        if compute_rho:
            for i in xrange(N):
                indices  = numpy.concatenate((didx(i, numpy.arange(i+1, N)), didx(numpy.arange(0, i-1), i)))
                tmp      = numpy.argsort(numpy.take(dist, indices))[:nb_selec]
                sdist[i] = numpy.take(dist, numpy.take(indices, tmp))
                rho[i]   = numpy.mean(sdist[i])

    else:
        M        = len(update[0])
        nb_selec = max(5, int(mratio*M))
        sdist    = {}

        for i in xrange(N):
            dist     = distancematrix(data[i].reshape(1, len(data[i])), update[0]).ravel()
            all_dist = numpy.concatenate((dist, update[1][i]))
            idx      = numpy.argsort(all_dist)[:nb_selec]
            sdist[i] = numpy.take(all_dist, idx)
            rho[i]   = numpy.mean(sdist[i])
    return rho, dist, sdist, nb_selec
gui.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def update_data_plot(self):
        reverse_sort = np.argsort(self.sort_idcs)

        if len(self.inspect_points):
            inspect = reverse_sort[np.array(sorted(self.inspect_points))]
            data = numpy.vstack((np.ones(len(inspect))*(2*self.raw_lags[-1]-self.raw_lags[-2]), inspect+0.5)).T
            self.inspect_markers.set_offsets(data)
            self.inspect_markers.set_color(self.inspect_colors)
        else:
            self.inspect_markers.set_offsets([])
            self.inspect_markers.set_color([])

        self.ui.data_overview.draw_idle()
regionsRanker.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def eigenDecompose(self, X, K, normalize=True):
        if (X.shape[1] >= X.shape[0]):
            s,U = la.eigh(K)
        else:
            U, s, _ = la.svd(X, check_finite=False, full_matrices=False)
            if (s.shape[0] < U.shape[1]): s = np.concatenate((s, np.zeros(U.shape[1]-s.shape[0])))  #note: can use low-rank formulas here           
            s=s**2
            if normalize: s /= float(X.shape[1])
        if (np.min(s) < -1e-10): raise Exception('Negative eigenvalues found')
        s[s<0]=0    
        ind = np.argsort(s)[::-1]
        U = U[:, ind]
        s = s[ind]  

        return s,U
classifier_tf.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]
classifier_tf.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]
classifier_tf.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]
classifier_tf.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 52 收藏 0 点赞 0 评论 0
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]
clustering.py 文件源码 项目:cellranger 作者: 10XGenomics 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def relabel_by_size(labels):
    """ Relabel clusters so they are sorted by number of members, descending.
    Args: labels (np.array(int)): 1-based cluster labels """
    order = np.argsort(np.argsort(-np.bincount(labels)))
    return 1 + order[labels]
diffexp.py 文件源码 项目:cellranger 作者: 10XGenomics 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def adjust_pvalue_bh(p):
    """ Multiple testing correction of p-values using the Benjamini-Hochberg procedure """
    descending = np.argsort(p)[::-1]
    # q = p * N / k where p = p-value, N = # tests, k = p-value rank
    scale = float(len(p)) / np.arange(len(p), 0, -1)
    q = np.minimum(1, np.minimum.accumulate(scale * p[descending]))

    # Return to original order
    return q[np.argsort(descending)]
stats.py 文件源码 项目:cellranger 作者: 10XGenomics 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
    ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
        reads (np.array(int)) - Read pairs for each UMI
        subsample_rate (float) - Subsample reads to this fraction.
        Returns threshold (int) - The RPPU threshold in the subsampled space '''

    if len(np.unique(reads)) < 2:
        print 'Skipping RPPU threshold calculation.'
        return 1

    print 'RPPU subsample rate: %0.4f' % subsample_rate

    reads = np.random.binomial(reads, subsample_rate)
    reads = reads[reads > 0]

    if len(np.unique(reads)) < 2:
        print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
        return 1

    new_n50 = tk_stats.NX(reads, 0.5)

    print 'New N50: %d:' % new_n50

    # Log-transform counts
    log_reads = np.log(reads)

    # Run K-Means. Reshape necessary because kmeans takes a matrix.
    kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
    kmeans.predict(log_reads.reshape((-1,1)))

    # Take the cluster with the smallest mean
    min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]

    print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
    print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))

    # Take the max element in the min-cluster
    threshold = np.max(reads[kmeans.labels_ == min_cluster])

    return threshold
PriorityQueue.py 文件源码 项目:hip-mdp-public 作者: dtak 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def rebalance(self):
        """
        Rebalances the binary heap.  Takes O(n log n) time to run.
        Avoid using, when possible.
        """
        # Sort array by priority
        sorted_indices_by_priority = np.argsort(-self.pq_array[:,0])
        self.pq_array = self.pq_array[sorted_indices_by_priority]
        pq_indices = range(self.size)
        # Create hash tables
        self.pq_hash = dict(zip(pq_indices,self.pq_array[:,1]))
        self.exp_hash = dict(zip(self.pq_array[:,1],pq_indices))
RankOrderedAutoencoder.py 文件源码 项目:rank-ordered-autoencoder 作者: paulbertens 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def rank_output(self):
        self.output_ranks = np.argsort(self.output_raw, axis=1, kind='mergesort').ravel()[::-1].astype(np.int32)
emojicam.py 文件源码 项目:emojidetect 作者: stnmrshx 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def overlay_emojiface(probs):
    if max(probs) > 0.8:
        emotion = emotions[np.argmax(probs)]
        return 'emoji/{}-{}.png'.format(emotion, emotion)
    else:
        index1, index2 = np.argsort(probs)[::-1][:2]
        emotion1 = emotions[index1]
        emotion2 = emotions[index2]
        return 'emoji/{}-{}.png'.format(emotion1, emotion2)
__init__.py 文件源码 项目:wmd-relax 作者: src-d 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __call__(self, words, weights, vocabulary_max):
        if len(words) < vocabulary_max * self.trigger_ratio:
            return words, weights

        if not isinstance(words, numpy.ndarray):
            words = numpy.array(words)

        # Tail optimization does not help with very large vocabularies
        if len(words) > vocabulary_max * 2:
            indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
            indices = indices[-vocabulary_max:]
            words = words[indices]
            weights = weights[indices]
            return words, weights

        # Vocabulary typically consists of these three parts:
        # 1) the core - we found it's border - `core_end` - 15%
        # 2) the body - 70%
        # 3) the minor tail - 15%
        # (1) and (3) are roughly the same size
        # (3) can be safely discarded, (2) can be discarded with care,
        # (1) shall never be discarded.

        sorter = numpy.argsort(weights)[::-1]
        weights = weights[sorter]
        trend_start = int(len(weights) * 0.2)
        trend_finish = int(len(weights) * 0.8)
        z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
                          numpy.log(weights[trend_start:trend_finish]),
                          1)
        exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
        avg_error = numpy.abs(weights[trend_start:trend_finish] -
                              exp_z[trend_start:trend_finish]).mean()
        tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
        weights = weights[:-tail_size][:vocabulary_max]
        words = words[sorter[:-tail_size]][:vocabulary_max]

        return words, weights


问题


面经


文章

微信
公众号

扫码关注公众号