python类argpartition()的实例源码

test_multiarray.py 文件源码 项目:radar 作者: amoose136 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
inference-sample-error-analysis.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
sort.py 文件源码 项目:cupy 作者: cupy 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def argpartition(a, kth, axis=-1):
    """Returns the indices that would partially sort an array.

    Args:
        a (cupy.ndarray): Array to be sorted.
        kth (int or sequence of ints): Element index to partition by. If
            supplied with a sequence of k-th it will partition all elements
            indexed by k-th of them into their sorted position at once.
        axis (int or None): Axis along which to sort. Default is -1, which
            means sort along the last axis. If None is supplied, the array is
            flattened before sorting.

    Returns:
        cupy.ndarray: Array of the same type and shape as ``a``.

    .. note::
        For its implementation reason, `cupy.argpartition` fully sorts the
        given array as `cupy.argsort` does. It also does not support ``kind``
        and ``order`` parameters that ``numpy.argpartition`` supports.

    .. seealso:: :func:`numpy.argpartition`

    """
    return a.argpartition(kth, axis=axis)
CSMSSMTools.py 文件源码 项目:SlidingWindowVideoTDA 作者: ctralie 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def CSMToBinary(D, Kappa):
    """
    Turn a cross-similarity matrix into a binary cross-simlarity matrix
    If Kappa = 0, take all neighbors
    If Kappa < 1 it is the fraction of mutual neighbors to consider
    Otherwise Kappa is the number of mutual neighbors to consider
    """
    N = D.shape[0]
    M = D.shape[1]
    if Kappa == 0:
        return np.ones((N, M))
    elif Kappa < 1:
        NNeighbs = int(np.round(Kappa*M))
    else:
        NNeighbs = Kappa
    J = np.argpartition(D, NNeighbs, 1)[:, 0:NNeighbs]
    I = np.tile(np.arange(N)[:, None], (1, NNeighbs))
    V = np.ones(I.size)
    [I, J] = [I.flatten(), J.flatten()]
    ret = sparse.coo_matrix((V, (I, J)), shape=(N, M))
    return ret.toarray()
tfidf_doc_ranker.py 文件源码 项目:DrQA 作者: facebookresearch 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores
my_utils.py 文件源码 项目:Y8M 作者: mpekalski 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def bottom_top_k_along_row(arr, k, ordered=True):
    """ bottom and top k of a 2d np.array, along the rows    
    http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array/18691983
    """    
    assert k>0, "bottom_top_k_along_row/column() requires k>0."
    rows = arr.shape[0]
    if ordered:
        tmp = np.argsort(arr, axis=1)      
        idx_bot = tmp[:, :k]
        idx_top = tmp[:,-k:]
    else:
        idx_bot = np.argpartition(arr, k, axis=1)[:,:k]
        idx_top = np.argpartition(arr, -k, axis=1)[:,-k:]

    indices = np.concatenate((idx_bot, idx_top), axis=1)
    vals = arr[np.repeat(np.arange(rows), 2*k), indices.ravel()].reshape(rows,2*k)
    return vals, indices
markov_model.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def top_k_recommendations(self, sequence, k=10, exclude=None, **kwargs):
        if exclude is None:
            exclude = []

        last_item = int(sequence[-1][0])
        if last_item not in self.previous_recommendations:
            self.get_all_recommendations(last_item)

        all_recommendations = deepcopy(self.previous_recommendations[last_item])
        for s in sequence:
            all_recommendations[int(s[0])] = 0
        for i in exclude:
            all_recommendations[i] = 0

        ranking = np.zeros(self.n_items)
        for i, x in enumerate(all_recommendations.most_common(k)):
            ranking[x[0]] = k-i
        return np.argpartition(-ranking, range(k))[:k]
fpmc.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
        '''

        if exclude is None:
            exclude = []

        last_item = sequence[-1][0]
        output = np.dot(self.V_user_item[user_id, :], self.V_item_user.T) + np.dot(self.V_prev_next[last_item, :], self.V_next_prev.T)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
fossil.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
        '''

        if exclude is None:
            exclude = []

        user_items = [i[0] for i in sequence]
        output = self.item_score(user_id, user_items)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
bprmf.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
        '''

        if exclude is None:
            exclude = []

        last_item = sequence[-1][0]
        output = self.bias + np.dot(self.V[user_id, :], self.H.T)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
stacked_denoising_autoencoder.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None, **kwargs):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
        '''

        # Compile network if needed
        if not hasattr(self, 'predict_function'):
            self._compile_predict_function()

        # Prepare RNN input
        X = np.zeros((1, self._input_size())) # input of the RNN
        X[0, :] = self._one_hot_encoding([i[0] for i in sequence])

        # Run RNN
        output = self.predict_function(X.astype(theano.config.floatX))[0]

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
rnn_sampling.py 文件源码 项目:sequence-based-recommendations 作者: rdevooght 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _compile_test_function(self):
        ''' Differs from base test function because of the added softmax operation
        '''
        print("Compiling test...")
        deterministic_output = T.nnet.softmax(lasagne.layers.get_output(self.l_out, deterministic=True))
        if self.interactions_are_unique:
            deterministic_output *= (1 - self.exclude)

        theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')

        def precision_test_function(theano_inputs, k=10):
            output = theano_test_function(*theano_inputs)
            ids = np.argpartition(-output, range(k), axis=-1)[0, :k]

            return ids

        self.test_function = precision_test_function
        print("Compilation done.")
utils.py 文件源码 项目:sockeye 作者: awslabs 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def smallest_k(matrix: np.ndarray, k: int,
               only_first_row: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], np.ndarray]:
    """
    Find the smallest elements in a numpy matrix.

    :param matrix: Any matrix.
    :param k: The number of smallest elements to return.
    :param only_first_row: If true the search is constrained to the first row of the matrix.
    :return: The row indices, column indices and values of the k smallest items in matrix.
    """
    if only_first_row:
        flatten = matrix[:1, :].flatten()
    else:
        flatten = matrix.flatten()

    # args are the indices in flatten of the k smallest elements
    args = np.argpartition(flatten, k)[:k]
    # args are the indices in flatten of the sorted k smallest elements
    args = args[np.argsort(flatten[args])]
    # flatten[args] are the values for args
    return np.unravel_index(args, matrix.shape), flatten[args]
test_multiarray.py 文件源码 项目:krpcScripts 作者: jwvanderbeck 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
knn.py 文件源码 项目:cebl 作者: idfah 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def probs(self, x):
        dists = np.hstack([self.distFunc(x, cls) for cls in self.trainData])
        indices = np.argpartition(dists, self.k, axis=1)[:,:self.k]

        #start = 0
        #votes = list()
        #for cls in self.trainData:
        #    end = start + cls.shape[0]
        #    votes.append(np.sum(np.logical_and(start <= indices, indices < end), axis=1))
        #    start = end

        ends = np.cumsum([len(cls) for cls in self.trainData])
        starts = ends - np.array([len(cls) for cls in self.trainData])
        votes = [np.sum(np.logical_and(start <= indices, indices < end), axis=1)
                 for start, end in zip(starts, ends)]
        votes = np.vstack(votes).T

        #probs = np.zeros((x.shape[0], self.nCls))
        #probs[np.arange(probs.shape[0]), np.argmax(votes, axis=1)] = 1.0
        ##probs = util.softmax(votes / float(self.k))
        probs = votes / float(self.k)

        return probs
argmaxk.py 文件源码 项目:hyperstar 作者: nlpub 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def argmaxk_rows_opt1(arr, k=10, sort=False):
    """
    Optimized implementation. When sort=False it is equal to argmaxk_rows_basic. When sort=True and k << arr.shape[1],
    it is should be faster, because we argsort only subarray of k max elements from each row of arr (arr.shape[0] x k) instead of
    the whole array arr (arr.shape[0] x arr.shape[1]).
    """
    best_inds = np.argpartition(arr, kth=-k, axis=1)[:, -k:]  # column indices of k max elements in each row (m x k)
    if not sort:
        return best_inds
    # generate row indices corresponding to best_ids (just current row id in each row) (m x k)
    rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1)
    best_elems = arr[rows, best_inds]  # select k max elements from each row using advanced indexing (m x k)
    # indices which sort each row of best_elems in descending order (m x k)
    best_elems_inds = np.argsort(best_elems, axis=1)[:, ::-1]
    # reorder best_indices so that arr[i, sorted_best_inds[i,:]] will be sorted in descending order
    sorted_best_inds = best_inds[rows, best_elems_inds]
    return sorted_best_inds
graphgenerator.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
        X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
        cosX = cosine_similarity(X)
        #return sparse.csc_matrix(X.dot(X.transpose())),newIds
        n = cosX.shape[0]
        graph = np.zeros((n,n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                #graph[i, j] += cosX[i, j]
                #graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic

        return sparse.csc_matrix(graph), newIds
graphgenerator.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def generateCosineNeighborGraphfromX(X, kNeighbors=10):
        cosX = cosine_similarity(X)
        # return sparse.csc_matrix(X.dot(X.transpose())),newIds
        #print cosX.shape
        n = cosX.shape[0]
        graph = np.zeros((n, n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                # graph[i, j] += cosX[i, j]
                # graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic
        #print 'graph generation done in %f seconds.' % toc
        return sparse.csc_matrix(graph)
feature_grid_search.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
    # Generate cosine similarity graph
    n = X_ent.shape[0]
    cosX = cosine_similarity(X_word)
    graph = np.zeros((n, n))
    for i in range(n):
        for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
            if j == i:
                continue
            graph[i, j] = cosX[i, j]
            graph[j, i] = cosX[i, j]

    D = sparse.diags([graph.sum(axis=0)], [0])
    L = D - graph
    f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
    score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
    laplacian_score = score
    return laplacian_score
test_multiarray.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
representation.py 文件源码 项目:harpreif 作者: harpribot 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def compute_nearest_neighbors(self, num_neighbors):
        result_list = []
        for key, value in self.im2index.iteritems():
            neighbor_list = [key]
            similarity_scores = self.similarity_mat[value]
            # removes best match as same as key
            ind = np.argpartition(similarity_scores, -(num_neighbors + 1))[-(num_neighbors + 1):-1]
            ind = ind[np.argsort(similarity_scores[ind])]
            neighbors = [self.index2im[x] for x in ind]
            neighbor_list.extend(neighbors)

            result_list.append(neighbor_list)

        # compute neighbor statistics
        NearestNeighbour.compute_neighbor_stats(result_list, num_neighbors)

        # plot the TSNE plot
        self.plot_tsne()

        return result_list
composite_feature.py 文件源码 项目:marvin 作者: aikanor 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def _calculate_topk_ndces(self, k):
        """
        Calculate the indices of the k specialists with highest b-value, 
        including the base classifier regardless of its b-value. 

        Args:
            k: int >= 0, approximately specifying the number of derived specialists to select. 
                Precisely, the best k (by Wilson error bound) are taken, along with the 
                base classifier if it is not already one of the best k. 

        Returns:
            A list containing the indices of the top k classifiers. 
            The list always at least contains the base classifier's index (i.e. 0).
            Therefore, the list is of length k if the base classifier is one of the top k, 
            and length k+1 otherwise. If k is greater than the total number of derived 
            specialists, returns all of them.
        """
        assert self.label_corrs is not None , "Label correlations must be calculated before top k indices."
        if k < len(self.label_corrs):
            topk_ndces = set(np.argpartition(-self.label_corrs, k)[:k])  #Only does a partial sort of b!
        else:
            topk_ndces = set(range(len(self.label_corrs)))
        topk_ndces.add(0)
        return list(topk_ndces & set(self._relevant_ndces))
matutils.py 文件源码 项目:nonce2vec 作者: minimalparts 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def argsort(x, topn=None, reverse=False):
    """
    Return indices of the `topn` smallest elements in array `x`, in ascending order.

    If reverse is True, return the greatest elements instead, in descending order.

    """
    x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
    if topn is None:
        topn = x.size
    if topn <= 0:
        return []
    if reverse:
        x = -x
    if topn >= x.size or not hasattr(np, 'argpartition'):
        return np.argsort(x)[:topn]
    # np >= 1.8 has a fast partial argsort, use that!
    most_extreme = np.argpartition(x, topn)[:topn]
    return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order
test_multiarray.py 文件源码 项目:aws-lambda-numpy 作者: vitolimandibhrata 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
tfidf_doc_ranker.py 文件源码 项目:DrQA_cn 作者: AmoseKang 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores
lmnn.py 文件源码 项目:pylmnn 作者: johny-c 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def _select_target_neighbors(self):
        """Find the target neighbors of each sample, that stay fixed during training.

        Returns
        -------
        array_like
            An array of neighbors indices for each sample with shape (n_samples, n_neighbors).

        """

        self.logger.info('Finding target neighbors...')
        target_neighbors = np.empty((self.X_.shape[0], self.n_neighbors_), dtype=int)
        for class_ in self.classes_:
            class_ind, = np.where(np.equal(self.y_, class_))
            dist = euclidean_distances(self.X_[class_ind], squared=True)
            np.fill_diagonal(dist, np.inf)
            neigh_ind = np.argpartition(dist, self.n_neighbors_ - 1, axis=1)
            neigh_ind = neigh_ind[:, :self.n_neighbors_]
            # argpartition doesn't guarantee sorted order, so we sort again but only the k neighbors
            row_ind = np.arange(len(class_ind))[:, None]
            neigh_ind = neigh_ind[row_ind, np.argsort(dist[row_ind, neigh_ind])]
            target_neighbors[class_ind] = class_ind[neigh_ind]

        return target_neighbors
search.py 文件源码 项目:hred-latent-piecewise 作者: julianser 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def select_next_words(self, next_costs, next_probs, step_num, how_many):
        # Pick only on the first line (for the beginning of sampling)
        # This will avoid duplicate <q> token.
        if step_num == 0:
            flat_next_costs = next_costs[:1, :].flatten()
        else:
            # Set the next cost to infinite for finished utterances (they will be replaced)
            # by other utterances in the beam
            flat_next_costs = next_costs.flatten()

        voc_size = next_costs.shape[1]

        args = numpy.argpartition(flat_next_costs, how_many)[:how_many]
        args = args[numpy.argsort(flat_next_costs[args])]

        return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args]
beam.py 文件源码 项目:nmt 作者: Playinf 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def find_nbest(score, n, threshold=None):
    num_vars = score.shape[1]

    score = score.flatten()
    nbest = np.argpartition(score, n)[:n]

    beam_indices = nbest / num_vars
    var_indices = nbest % num_vars
    nbest_score = score[nbest]

    if threshold:
        best = np.max(nbest_score)
        cond = nbest_score > best + threshold
        nbest_score = nbest_score[cond]
        beam_indices = beam_indices[cond]
        var_indices = var_indices[cond]

    return nbest_score, beam_indices, var_indices
tfidf_retrieval.py 文件源码 项目:ADEM 作者: mike-n-7 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file):
    print type(tfidf_vec)
    tfidf_vec = tfidf_vec.toarray()
    print tfidf_vec.shape
    prod_mat = np.dot(tfidf_vec, tfidf_vec.T)
    print prod_mat.shape
    prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec)
    print prod_mat.shape

    response_list = []
    for i in xrange(len(prod_mat)):
        row = prod_mat[i]
        # No idea what's going on here. See the following page:
        # stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
        ind = np.argpartition(row, -2)[-2:]
        ind = ind[np.argsort(row[ind])][0]
        response_list.append(train_responses_txt[ind])
        print train_contexts_txt[i]
        print response_list[i]

    with open(output_file, 'w') as f1:
        for response in response_list:
            f1.write(response)
visualize.py 文件源码 项目:workspace 作者: nojima 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def visualize_frequent_words(vectors_2d: np.ndarray, dataset: DataSet, k: int, ax: plt.Axes = None) -> None:
    word_ids, counts = np.unique(dataset.data, return_counts=True)

    indices = np.argpartition(-counts, k)[:k]
    frequent_word_ids = word_ids[indices]

    if ax is None:
        fig, ax = plt.subplots(figsize=(13, 13))
    else:
        fig = None

    vectors_2d = vectors_2d[frequent_word_ids]

    ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1], s=2, alpha=0.25)
    for i, id in enumerate(frequent_word_ids):
        ax.annotate(dataset.vocabulary.to_word(id), (vectors_2d[i, 0], vectors_2d[i, 1]))

    if fig is not None:
        fig.tight_layout()
        fig.show()


问题


面经


文章

微信
公众号

扫码关注公众号