python类flatnonzero()的实例源码-面圈网

utils.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def numpy_groupby(values, keys):
    """ Group a collection of numpy arrays by key arrays.
        Yields (key_tuple, view_tuple) where key_tuple is the key grouped on and view_tuple is a tuple of views into the value arrays.
          values: tuple of arrays to group
          keys: tuple of sorted, numeric arrays to group by """

    if len(values) == 0:
        return
    if len(values[0]) == 0:
        return

    for key_array in keys:
        assert len(key_array) == len(keys[0])
    for value_array in values:
        assert len(value_array) == len(keys[0])

    # The indices where any of the keys differ from the previous key become group boundaries
    key_change_indices = np.logical_or.reduce(tuple(np.concatenate(([1], np.diff(key))) != 0 for key in keys))
    group_starts = np.flatnonzero(key_change_indices)
    group_ends = np.roll(group_starts, -1)
    group_ends[-1] = len(keys[0])

    for group_start, group_end in itertools.izip(group_starts, group_ends):
        yield tuple(key[group_start] for key in keys), tuple(value[group_start:group_end] for value in values)

dls_funct.py 文件源码项目：dcss_single_cell 作者: srmcc 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def compute_clustering_accuracy(label1, label2):
    """
    From clustering_on_transcript_compatibility_counts, see github for MIT license
    """
    uniq1,uniq2 = np.unique(label1),np.unique(label2)
    # Create two dictionaries. Each will store the indices of each label
    entries1,entries2 = {},{}
    for label in uniq1: entries1[label] = set(np.flatnonzero((label1==label)))
    for label in uniq2: entries2[label] = set(np.flatnonzero((label2==label)))
    # Create an intersection matrix which counts the number of entries that overlap for each label combination        
    W = np.zeros((len(uniq1),len(uniq2)))
    for i,j in itertools.product(range(len(uniq1)),range(len(uniq2))):
        W[i,j]=len(entries1[uniq1[i]].intersection(entries2[uniq2[j]]))
    # find the max weight matching
    match_val = get_max_wt_matching(uniq1,uniq2,W)
    # return the error rate
    return (1-match_val/float(len(label1)))*100

load_data.py 文件源码项目：Kiddo 作者: Subarno 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def visualize(X, Y, classes, samples_per_class=10):
    nb_classes = len(classes)

    for y, cls in enumerate(classes):
        idxs = np.flatnonzero(Y == y)
        idxs = np.random.choice(idxs, samples_per_class, replace=False)

        for i, idx in enumerate(idxs):
            plt_idx = i * nb_classes + y + 1
            plt.subplot(samples_per_class, nb_classes, plt_idx)
            plt.imshow(X[idx], cmap='gray')
            plt.axis('off')
            if i == 0:
                plt.title(cls)
    #plt.show()
    plt.savefig('img/data.png')
    plt.clf()

descstore.py 文件源码项目：material-seg 作者: paulu 项目源码文件源码阅读 54 收藏 0 点赞 0 评论 0

def brute_radius_search(self, v, radius2=None, batchsize=1024):
        v = v.ravel()
        norm2 = self.get_dataset('norm2')
        v_norm2 = np.sum(v * v)
        candidates = []
        ids = self.ids[:]
        for i in xrange(0, self.num_ids, batchsize):
            blockdata = self.data[i:i+batchsize, :]
            dists = norm2[i:i+batchsize] + v_norm2 - 2 * np.dot(blockdata, v)
            assert dists.ndim == 1
            if radius2:
                for j in np.flatnonzero(dists < radius2):
                    candidates.append((dists[j], ids[i+j]))
            else:
                for j in xrange(dists.shape[0]):
                    candidates.append((dists[j], ids[i+j]))
        candidates.sort()
        return candidates

plane.py 文件源码项目：blmath 作者: bodylabs 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def points_in_front(self, points, inverted=False, ret_indices=False):
        '''
        Given an array of points, return the points which lie either on the
        plane or in the half-space in front of it (i.e. in the direction of
        the plane normal).

        points: An array of points.
        inverted: When `True`, invert the logic. Return the points that lie
          behind the plane instead.
        ret_indices: When `True`, return the indices instead of the points
          themselves.

        '''
        sign = self.sign(points)

        if inverted:
            mask = np.less_equal(sign, 0)
        else:
            mask = np.greater_equal(sign, 0)

        indices = np.flatnonzero(mask)

        return indices if ret_indices else points[indices]

metrics.py 文件源码项目：skggm 作者: skggm 项目源码文件源码阅读 91 收藏 0 点赞 0 评论 0

def has_approx_support(m, m_hat, prob=0.01):
    """Returns 1 if model selection error is less than or equal to prob rate,
    0 else.

    NOTE: why does np.nonzero/np.flatnonzero create so much problems?
    """
    m_nz = np.flatnonzero(np.triu(m, 1))
    m_hat_nz = np.flatnonzero(np.triu(m_hat, 1))

    upper_diagonal_mask = np.flatnonzero(np.triu(np.ones(m.shape), 1))
    not_m_nz = np.setdiff1d(upper_diagonal_mask, m_nz)

    intersection = np.in1d(m_hat_nz, m_nz)  # true positives
    not_intersection = np.in1d(m_hat_nz, not_m_nz)  # false positives

    true_positive_rate = 0.0
    if len(m_nz):
        true_positive_rate = 1. * np.sum(intersection) / len(m_nz)
        true_negative_rate = 1. - true_positive_rate

    false_positive_rate = 0.0
    if len(not_m_nz):
        false_positive_rate = 1. * np.sum(not_intersection) / len(not_m_nz)

    return int(np.less_equal(true_negative_rate + false_positive_rate, prob))

coords.py 文件源码项目：pyberny 作者: azag0 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def get_clusters(C):
    nonassigned = list(range(len(C)))
    clusters = []
    while nonassigned:
        queue = set([nonassigned[0]])
        clusters.append([])
        while queue:
            node = queue.pop()
            clusters[-1].append(node)
            nonassigned.remove(node)
            queue.update(n for n in np.flatnonzero(C[node]) if n in nonassigned)
    C = np.zeros_like(C)
    for cluster in clusters:
        for i in cluster:
            C[i, cluster] = True
    return clusters, C

coords.py 文件源码项目：pyberny 作者: azag0 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def __init__(self, geom, allowed=None):
        self._coords = []
        geom = geom.supercell()
        dist = geom.dist(geom)
        radii = np.array([get_property(sp, 'covalent_radius') for sp in geom.species])
        bondmatrix = dist < 1.3*(radii[None, :]+radii[:, None])
        self.fragments, C = get_clusters(bondmatrix)
        radii = np.array([get_property(sp, 'vdw_radius') for sp in geom.species])
        shift = 0.
        C_total = C.copy()
        while not C_total.all():
            bondmatrix |= ~C_total & (dist < radii[None, :]+radii[:, None]+shift)
            C_total = get_clusters(bondmatrix)[1]
            shift += 1.
        for i, j in combinations(range(len(geom)), 2):
            if bondmatrix[i, j]:
                bond = Bond(i, j, C=C)
                self.append(bond)
        for j in range(len(geom)):
            for i, k in combinations(np.flatnonzero(bondmatrix[j, :]), 2):
                ang = Angle(i, j, k, C=C)
                if ang.eval(geom.coords) > pi/4:
                    self.append(ang)
        for bond in self.bonds:
            self.extend(get_dihedrals([bond.i, bond.j], geom.coords, bondmatrix, C))

Math.py 文件源码项目：pyberny 作者: azag0 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def pinv(A, log=lambda _: None):
    U, D, V = np.linalg.svd(A)
    thre = 1e3
    thre_log = 1e8
    gaps = D[:-1]/D[1:]
    try:
        n = np.flatnonzero(gaps > thre)[0]
    except IndexError:
        n = len(gaps)
    else:
        gap = gaps[n]
        if gap < thre_log:
            log('Pseudoinverse gap of only: {:.1e}'.format(gap))
    D[n+1:] = 0
    D[:n+1] = 1/D[:n+1]
    return U.dot(np.diag(D)).dot(V)

tags.py 文件源码项目：pumpp 作者: bmcfee 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def inverse(self, encoded, duration=None):
        '''Inverse static tag transformation'''

        ann = jams.Annotation(namespace=self.namespace, duration=duration)

        if np.isrealobj(encoded):
            detected = (encoded >= 0.5)
        else:
            detected = encoded

        for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]:
            vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd)))
            ann.append(time=0,
                       duration=duration,
                       value=vd,
                       confidence=encoded[vid])
        return ann

utils.py 文件源码项目：enterprise 作者: nanograv 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def quant2ind(U):
    """
    Use quantization matrix to return slices of non-zero elements.

    :param U: quantization matrix

    :return: list of `slice`s for non-zero elements of U

    .. note:: This function assumes that the pulsar TOAs were sorted by time.

    """
    inds = []
    for cc, col in enumerate(U.T):
        epinds = np.flatnonzero(col)
        if epinds[-1] - epinds[0] + 1 != len(epinds):
            raise ValueError('ERROR: TOAs not sorted properly!')
        inds.append(slice(epinds[0], epinds[-1]+1))
    return inds

core.py 文件源码项目：Deep-Subspace-Clustering 作者: tonyabracadabra 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def find(a,n=None,d=None,nargout=1):
    if d:
        raise NotImplementedError

    # there is no promise that nonzero or flatnonzero
    # use or will use indexing of the argument without
    # converting it to array first.  So we use asarray
    # instead of asanyarray
    if nargout == 1:
        i = np.flatnonzero(np.asarray(a)).reshape(1,-1)+1
        if n is not None:
            i = i.take(n)
        return matlabarray(i)
    if nargout == 2:
        i,j = np.nonzero(np.asarray(a))
        if n is not None:
            i = i.take(n)
            j = j.take(n)
        return (matlabarray((i+1).reshape(-1,1)),
                matlabarray((j+1).reshape(-1,1)))
    raise NotImplementedError

model_old.py 文件源码项目：CElegansBehaviour 作者: ChristophKirst 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def error_contour(self, phi, epsilon = 2.0, delta_phi = None):
    """Error between worm shape and contour from image given implicitly by phi

    Arguments:
      phi (array): implicit worm contour, if none calculated from actual shape
      delta_phi (number): range of phi values to consider for error estimate
      epsilon (number): refularization parameter for the step functions
      curvature (bool): error due to curvature on phi
    """

    phi2 = self.phi(size = phi.shape);

    if delta_phi is None:
      error =  np.sum((np.tanh(phi / epsilon)-np.tanh(phi2 / epsilon))**2);
    else:
      idx = np.flatnonzero(np.logical_and(phi2 <= delta_phi, phi2 >= -delta_phi))
      error =np.sum((np.tanh(phi[idx] / epsilon)-np.tanh(phi2[idx] / epsilon))**2);

    return error;

metrics.py 文件源码项目：odin 作者: imito 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def compute_EER(Pfa, Pmiss):
  """ computes the equal error rate (EER) given
  Pmiss or False Negative Rate
  and
  Pfa or False Positive Rate
  calculated for a range of operating points on the DET curve

  @Author: "Timothee Kheyrkhah, Omid Sadjadi"
  """
  fpr = Pfa
  fnr = Pmiss
  diff_pm_fa = fnr - fpr
  x1 = np.flatnonzero(diff_pm_fa >= 0)[0]
  x2 = np.flatnonzero(diff_pm_fa < 0)[-1]
  a = (fnr[x1] - fpr[x1]) / (fpr[x2] - fpr[x1] - (fnr[x2] - fnr[x1]))
  return fnr[x1] + a * (fnr[x2] - fnr[x1])

svm.py 文件源码项目：CIFAR-10-basic 作者: Baichenjia 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def visual_data(self):
        print "visualized some cifar-10 picture..."
        classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
        num_classes = len(classes)
        samples_per_class = 7
        for y, cl in enumerate(classes):
            idxs = np.flatnonzero(self.y_train == y)
            idxs = np.random.choice(idxs, samples_per_class, replace=False)
            for i, idx in enumerate(idxs):
                plt_idx = i * num_classes + y + 1
                plt.subplot(samples_per_class, num_classes, plt_idx)
                plt.imshow(self.X_train[idx].astype('uint8'))
                plt.axis('off')
                if i == 0:
                    plt.title(cl)
        plt.show()
        print "visualized data done...\n---------\n"

    #

utils.py 文件源码项目：gm-cml 作者: wangyida 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def imcrop_tosquare(img):
    """Make any image a square image.

    Parameters
    ----------
    img : np.ndarray
        Input image to crop, assumed at least 2d.

    Returns
    -------
    crop : np.ndarray
        Cropped image.
    """
    size = np.min(img.shape[:2])
    extra = img.shape[:2] - size
    crop = img
    for i in np.flatnonzero(extra):
        crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
    return crop

utils.py 文件源码项目：TF-FaceLandmarkDetection 作者: mariolew 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def imcrop_tosquare(img):
    """Make any image a square image.

    Parameters
    ----------
    img : np.ndarray
        Input image to crop, assumed at least 2d.

    Returns
    -------
    crop : np.ndarray
        Cropped image.
    """
    size = np.min(img.shape[:2])
    extra = img.shape[:2] - size
    crop = img
    for i in np.flatnonzero(extra):
        crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
    return crop

poisson.py 文件源码项目：finite-element-course 作者: finite-element 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def boundary_nodes(fs):
    """Find the list of boundary nodes in fs. This is a
    unit-square-specific solution. A more elegant solution would employ
    the mesh topology and numbering.
    """
    eps = 1.e-10

    f = Function(fs)

    def on_boundary(x):
        """Return 1 if on the boundary, 0. otherwise."""
        if x[0] < eps or x[0] > 1 - eps or x[1] < eps or x[1] > 1 - eps:
            return 1.
        else:
            return 0.

    f.interpolate(on_boundary)

    return np.flatnonzero(f.values)

postProcess.py 文件源码项目：Polymers 作者: FracturedRocketSpace 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def computeEndToEnd(endtoendDistances, polWeights):
    # Initiate variables
    weightedEndtoendSq=np.zeros(c.nBeads)
    weightedEndtoendSqStd=np.zeros(c.nBeads)

    # Loop over all possible polymer lengths
    for z in range(c.nBeads):
        # Get data
        t1 = np.squeeze(np.asarray(endtoendDistances)[:,z])
        t2 = np.squeeze(np.asarray(polWeights)[:,z]);
        # Count non-Zero
        dataLength = len( np.flatnonzero(t2!=0) );
        # Calculate mean and standard deveation of mean
        weightedEndtoendSq[z]=np.average(t1, weights=t2)
        weightedEndtoendSqStd[z]=( (np.average((t1 - weightedEndtoendSq[z])**2, weights=t2)) / (dataLength))**(1/2)

    return weightedEndtoendSq, weightedEndtoendSqStd

 # Calculate gyradius and errors

postProcess.py 文件源码项目：Polymers 作者: FracturedRocketSpace 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def computeGyradiusStd(gyradiusSq, polWeights):
    # Calc Weighted gyradius and standard deviation

    # Initiate variables
    weightedGyradiusSq=np.zeros(c.nBeads)
    weightedGyradiusSqStd=np.zeros(c.nBeads)

    # Loop over all possible polymer lengths
    for z in range(c.nBeads):
        # Get weight for bead number z in all polymers
        w = np.squeeze(np.asarray(polWeights)[:,z])
        # Count nonzero
        dataLength = len( np.flatnonzero(w!=0) )
        # Calculate mean and standard deveation of mean
        weightedGyradiusSq[z]=np.average(gyradiusSq[:,z], weights=w)
        weightedGyradiusSqStd[z]=( (np.average((gyradiusSq[:,z] - weightedGyradiusSq[z])**2, weights=w)) / (dataLength))**(1/2)

    return weightedGyradiusSq, weightedGyradiusSqStd

2002.py 文件源码项目：HumanLearning 作者: dgtgrade 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def set_dropout(self):
        n_nodes = self.n_nodes
        dropout=np.random.binomial([np.ones(sum(n_nodes))],
                self.DORATE)[0].astype(np.bool)
        dropout[0:n_nodes[0]]=False # no dropout units in first layer
        dropout[-n_nodes[-1]:]=False # no dropout units in last layer
        self.dropout = dropout

        # ths_d ???
        self.ths_d = np.zeros(len(self.ths)).astype(np.bool)
        for l in range(len(self.ths_l)):
            if (l<len(self.ths_l)-1):
                # input node? dropout ? ??
                for i in np.flatnonzero(self.__get_dropout(l)):
                    self.ths_d[self.ths_l[l]+\
                        # ??? +1? bias ?? ??
                        np.arange(n_nodes[l+1])*(n_nodes[l]+1)+(i+1)] = True
                # output node? dropout ? ??
                for i in np.flatnonzero(self.__get_dropout(l+1)):
                    self.ths_d[self.ths_l[l]+i*(n_nodes[l]+1):\
                        self.ths_l[l]+(i+1)*(n_nodes[l]+1)] = True

TestFromFixedCountsToRhoOmegaBetter.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def mapToNewPos(curposIDs, bigtosmall):
    ''' Convert list of old ids to new positions after bigtosmall reordering.

    Example
    -------
    >>> curposIDs = [0, 2, 4]
    >>> N = [11, 9, 3, 1, 5]
    >>> bigtosmall = argsort_bigtosmall_stable(N)
    >>> print bigtosmall
    [0 1 4 2 3]
    >>> newposIDs = mapToNewPos(curposIDs, bigtosmall)
    >>> print newposIDs
    [0, 3, 2]
    '''
    newposIDs = np.zeros_like(curposIDs)
    for posID in range(len(curposIDs)):
        newposIDs[posID] = np.flatnonzero(bigtosmall == curposIDs[posID])[0]
    return newposIDs.tolist()

SuffStatBag.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def findMergePairByUID(self, uidA, uidB):
        ''' Find which currently tracked merge pair contains desired uids.

        Returns
        -------
        rowID : int
            index of tracked merge quantities related to specific uid pair.
        '''
        assert hasattr(self, 'mUIDPairs')
        rowID = np.flatnonzero(
            np.logical_and(uidA == self.mUIDPairs[:, 0],
                           uidB == self.mUIDPairs[:, 1]))
        if not rowID.size == 1:
            raise ValueError(
                'Bad search for correct merge UID pair.\n' + str(rowID))
        rowID = rowID[0]
        return rowID

SuffStatBag.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _discardAnyTrackedPairsThatOverlapWithAorB(self, uidA, uidB):
        ''' Update to discard remaining pairs that overlap uidA/uidB.

        Post Condition
        --------------
        Attributes mUIDPairs and _MergeTerms dont have any more info
        about other pairs (uidj,uidk) where where uidA or uidB are involved.
        '''
        if hasattr(self, 'mUIDPairs'):
            mUIDPairs = self.mUIDPairs
            # Remove any other pairs associated with kA or kB
            keepRowIDs = ((mUIDPairs[:, 0] != uidA) *
                          (mUIDPairs[:, 1] != uidA) *
                          (mUIDPairs[:, 0] != uidB) *
                          (mUIDPairs[:, 1] != uidB))
            keepRowIDs = np.flatnonzero(keepRowIDs)
            self.setMergeUIDPairs(mUIDPairs[keepRowIDs])

            # Remove any other pairs related to kA, kB
            if self.hasMergeTerms():
                for key, dims in self._MergeTerms._FieldDims.items():
                    mArr = getattr(self._MergeTerms, key)
                    if dims[0] == 'M':
                        mArr = mArr[keepRowIDs]
                        self._MergeTerms.setField(key, mArr, dims=dims)

DPlanner.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def makePlanForEmptyComps(curSS, dtargetMinCount=0.01, **kwargs):
    """ Create a Plan dict for any empty states.

        Returns
        -------
        Plan : dict with either no fields, or two fields named
               * candidateIDs
               * candidateUIDs

        Any "empty" Plan dict indicates that no empty comps exist.
    """
    Nvec = curSS.getCountVec()
    emptyIDs = np.flatnonzero(Nvec < dtargetMinCount)
    if len(emptyIDs) == 0:
        return dict()
    Plan = dict(candidateIDs=emptyIDs.tolist(),
                candidateUIDs=curSS.uIDs[emptyIDs].tolist(),
                )
    return Plan

ShapeUtil.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def argsortBigToSmallByTiers(tierAScores, tierBScores):
    ''' Perform argsort, prioritizing first arr then second arr.

    Example
    -------
    >>> AScores = [1, 1, 1, 0, 0]
    >>> BScores = [6, 5, 4, 8, 7]
    >>> print argsortBigToSmallByTiers(AScores, BScores)
    [0 1 2 3 4]
    >>> AScores = [1, 1, 1, 0, 0, -1, -1, -1]
    >>> BScores = [6, 5, 4, 8, 7, 11, 1.5, -1.5]
    >>> print argsortBigToSmallByTiers(AScores, BScores)
    [0 1 2 3 4 5 6 7]
    >>> print argsortBigToSmallByTiers(BScores, AScores)
    [5 3 4 0 1 2 6 7]
    '''
    tierAScores = np.asarray(tierAScores, dtype=np.float64)
    sortids = argsort_bigtosmall_stable(tierAScores)
    limits = np.flatnonzero(np.diff(tierAScores[sortids]) != 0) + 1
    sortids = sortids[
        argsort_bigtosmall_stable(tierBScores, limits=limits)]
    return sortids

ToyHMMK4.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def plot_sequence(seqID, Data, dimID=0, maxT=200):
    Xseq = Data.X[Data.doc_range[seqID]:Data.doc_range[seqID + 1]]
    Zseq = Data.TrueParams['Z'][
        Data.doc_range[seqID]:Data.doc_range[
            seqID +
            1]]

    Xseq = Xseq[:maxT, dimID]  # Xseq is 1D after this statement!
    Zseq = Zseq[:maxT]

    # Plot X, colored by segments Z
    changePts = np.flatnonzero(np.abs(np.diff(Zseq)))
    changePts = np.hstack([0, changePts + 1])
    for ii, loc in enumerate(changePts[:-1]):
        nextloc = changePts[ii + 1]
        ts = np.arange(loc, nextloc)
        xseg = Xseq[loc:nextloc]
        kseg = int(Zseq[loc])

        color = GaussViz.Colors[kseg % len(GaussViz.Colors)]
        pylab.plot(ts, xseg, '.-', color=color, markersize=8)
        pylab.plot(
            [nextloc - 1, nextloc], [Xseq[nextloc - 1], Xseq[nextloc]], 'k:')
    pylab.ylim([-2, 14])

TargetPlanner.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def calc_underprediction_scores_per_word(model, Data, LP=None, **kwargs):
    ''' Find scalar score for each vocab word. Larger => worse prediction.
    '''
    if LP is None:
        LP = model.calc_local_params(Data)
    DocWordFreq_emp = calcWordFreqPerDoc_empirical(Data)
    DocWordFreq_model = calcWordFreqPerDoc_model(model, LP)
    uError = np.maximum(DocWordFreq_emp - DocWordFreq_model, 0)
    # For each word, identify set of relevant documents
    DocWordMat = Data.to_sparse_docword_matrix().toarray()
    score = np.zeros(Data.vocab_size)
    # TODO: only consider words with many docs overall
    for vID in xrange(Data.vocab_size):
        countPerDoc = DocWordMat[:, vID]
        typicalWordCount = np.median(countPerDoc[countPerDoc > 0])
        candidateDocs = np.flatnonzero(countPerDoc > typicalWordCount)
        if len(candidateDocs) < 10:
            continue
        score[vID] = np.mean(uError[candidateDocs, vID])
    # Only give positive probability to words with above average score
    score = score - np.mean(score)
    score = np.maximum(score, 0)
    score = score * score  # make more peaked!
    score /= score.sum()
    return score

TargetDataSampler.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _sample_target_GroupXData(Data, model, LP, **kwargs):
    ''' Draw sample subset of provided GroupXData dataset
    '''
    randstate = kwargs['randstate']
    if not hasValidKey('targetCompID', kwargs):
        raise NotImplementedError('TODO')

    ktarget = kwargs['targetCompID']
    targetProbThr = kwargs['targetCompFrac']
    mask = LP['resp'][:, ktarget] > targetProbThr
    objIDs = np.flatnonzero(mask)
    if len(objIDs) < 2:
        return None, dict()
    randstate.shuffle(objIDs)
    targetObjIDs = objIDs[:kwargs['targetMaxSize']]
    TargetData = Data.select_subset_by_mask(atomMask=targetObjIDs,
                                            doTrackFullSize=False)
    TargetInfo = dict(ktarget=ktarget)
    return TargetData, TargetInfo

DPMixtureModel.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def removeEmptyComps_SSandLP(self, SS, LP):
        ''' Remove all parameters related to empty components from SS and LP

            Returns
            --------
            SS : bnpy SuffStatBag
            LP : dict for local params
        '''
        badks = np.flatnonzero(SS.N[:-1] < 1)
        # Remove in order, from largest index to smallest
        for k in badks[::-1]:
            SS.removeComp(k)
            mask = LP['Z'] > k
            LP['Z'][mask] -= 1
        if 'resp' in LP:
            del LP['resp']
        return SS, LP