python类unique()的实例源码-第2页-面圈网

DCWidget_Overburden_2_5D.py 文件源码项目：em_examples 作者: geoscixyz 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def get_Surface_Potentials(mtrue, survey, src, field_obj):

    phi = field_obj['phi']
    CCLoc = mesh.gridCC
    XLoc = np.unique(mesh.gridCC[:, 0])
    surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
    phiSurface = phi[surfaceInd]
    phiScale = 0.

    if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
        refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
        # refPoint =  CCLoc[refInd]
        # refSurfaceInd = np.where(xSurface == refPoint[0])
        # phiScale = np.median(phiSurface)
        phiScale = phi[refInd]
        phiSurface = phiSurface - phiScale

    return XLoc, phiSurface, phiScale

unique.py 文件源码项目：lps-anchor-pos-estimator 作者: bitcraze 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def unique(eq):
    eq = eqsize(eq)
    c1 = [None] * eq.shape
    for i in range(0, eq.size):
        c1.append[i] = hash(eq[i])

    c1 = np.asarray(c1)

    if c1.ndim == 1:
        _, ia, ic = np.unique(c1, return_index=True, return_inverse=True)
        ia = (ia[:, ]).conj().T
        ic = (ic[:, ]).conj().T
        u = eq[ia]

    else:
        a = c1
        b = np.ascontiguousarray(a).view(
            np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
        _, ia, ic = np.unique(b, return_index=True, return_inverse=True)

    return u, ia, ic

data_manager.py 文件源码项目：AutoML5 作者: djajetic 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def getTypeProblem (self, solution_filename):
            ''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
        if 'task' not in self.info.keys():
            solution = np.array(data_converter.file_to_array(solution_filename))
            target_num = solution.shape[1]
            self.info['target_num']=target_num
            if target_num == 1: # if we have only one column
                solution = np.ravel(solution) # flatten
                nbr_unique_values = len(np.unique(solution))
                if nbr_unique_values < len(solution)/8:
                    # Classification
                    self.info['label_num'] = nbr_unique_values
                    if nbr_unique_values == 2:
                        self.info['task'] = 'binary.classification'
                        self.info['target_type'] = 'Binary'
                    else:
                        self.info['task'] = 'multiclass.classification'
                        self.info['target_type'] = 'Categorical'
                else:
                    # Regression
                    self.info['label_num'] = 0
                    self.info['task'] = 'regression'
                    self.info['target_type'] = 'Numerical'     
            else:
                # Multilabel or multiclass       
                self.info['label_num'] = target_num
                self.info['target_type'] = 'Binary' 
                if any(item > 1 for item in map(np.sum,solution.astype(int))):
                    self.info['task'] = 'multilabel.classification'     
                else:
                    self.info['task'] = 'multiclass.classification'        
        return self.info['task']

libscores.py 文件源码项目：AutoML5 作者: djajetic 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def tiedrank(a):
    ''' Return the ranks (with base 1) of a list resolving ties by averaging.
     This works for numpy arrays.'''    
    m=len(a)
    # Sort a in ascending order (sa=sorted vals, i=indices)
    i=a.argsort()
    sa=a[i]
    # Find unique values
    uval=np.unique(a)     
    # Test whether there are ties 
    R=np.arange(m, dtype=float)+1 # Ranks with base 1
    if len(uval)!=m:
        # Average the ranks for the ties 
        oldval=sa[0]
        newval=sa[0]
        k0=0
        for k in range(1,m):
            newval=sa[k]
            if newval==oldval:
                # moving average
                R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
            else:
                k0=k;
                oldval=newval
    # Invert the index
    S=np.empty(m)
    S[i]=R
    return S

data_converter.py 文件源码项目：AutoML5 作者: djajetic 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def binarization (array):
    ''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
    array = np.array(array, dtype=float) # conversion needed to use np.inf after
    if len(np.unique(array)) > 2:
        raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))

    # manipulation which aims at avoid error in data with for example classes '1' and '2'.
    array[array == np.amax(array)] = np.inf
    array[array == np.amin(array)] = 0
    array[array == np.inf] = 1
    return np.array(array, dtype=int)

input_data.py 文件源码项目：IntroToDeepLearning 作者: robb-brown 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
    else:
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
                                                 labels.shape))
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
    try:
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    except:
      traceback.print_exc()
    self._epochs_completed = 0
    self._index_in_epoch = 0

input_data.py 文件源码项目：IntroToDeepLearning 作者: robb-brown 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
    else:
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
                                                 labels.shape))
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
    try:
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    except:
      traceback.print_exc()
    self._epochs_completed = 0
    self._index_in_epoch = 0

cluster.py 文件源码项目：rca-evaluation 作者: sieve-microservices 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def cluster_service(path, service, cluster_size, prev_metadata=None):

    filename = os.path.join(path, service["preprocessed_filename"])
    df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)

    initial_idx = None
    if prev_metadata:
        initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
        # adjust cluster_size if an initial assigment has been found
        if initial_idx is not None:
            cluster_size = len(np.unique(initial_idx))

    prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
    if os.path.exists(prefix + "_1.png"):
        print("skip " + prefix)
        return (None, None)

    cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
    if cluster_size < 2:
        # no silhouette_score for cluster size 1
        return (None, None)
    print("silhouette_score: %f" % score)

    # protect the write access to the metadata file
    metadata_lock.acquire()
    with metadata.update(path) as data:
        for srv in data["services"]:
            if srv["name"] == service["name"]:
                if "clusters" not in srv:
                    srv["clusters"] = {}
                d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
                srv["clusters"][cluster_size] = d
    metadata_lock.release()

    return (service["name"], cluster_size)

plot.py 文件源码项目：spyking-circus 作者: spyking-circus 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False):

    nb_templates = templates.shape[1]
    n_panels     = numpy.ceil(numpy.sqrt(nb_templates))
    mask         = numpy.where(halo > -1)[0]
    clust_idx    = numpy.unique(halo[mask])
    fig          = pylab.figure()    
    square       = True
    center       = len(data[0] - 1)//2
    for count, i in enumerate(xrange(nb_templates)):
        if square:
            pylab.subplot(n_panels, n_panels, count + 1)
            if (numpy.mod(count, n_panels) != 0):
                pylab.setp(pylab.gca(), yticks=[])
            if (count < n_panels*(n_panels - 1)):
                pylab.setp(pylab.gca(), xticks=[])

        subcurves = numpy.where(halo == clust_idx[count])[0]
        for k in numpy.random.permutation(subcurves)[:n_curves]:
            pylab.plot(data[k], '0.5')

        pylab.plot(templates[:, count], 'r')        
        pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5)
        pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5)

        xmin, xmax = pylab.xlim()
        pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--')
        pylab.plot([xmin, xmax], [threshold, threshold], 'k--')
        #pylab.ylim(-1.5*threshold, 1.5*threshold)
        ymin, ymax = pylab.ylim()
        pylab.plot([center, center], [ymin, ymax], 'k--')
        pylab.title('Cluster %d' %i)

    if nb_templates > 0:
        pylab.tight_layout()
    if save:
        pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1]))
        pylab.close()
    else:
        pylab.show()
    del fig

utils.py 文件源码项目：spyking-circus 作者: spyking-circus 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def check_consistent_length(*arrays):
    """Check that all arrays have consistent first dimensions.
    Checks whether all objects in arrays have the same shape or length.
    Parameters
    ----------
    *arrays : list or tuple of input objects.
        Objects that will be checked for consistent length.
    """

    uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
    if len(uniques) > 1:
        raise ValueError("Found arrays with inconsistent numbers of samples: "
                         "%s" % str(uniques))

mit_sceneparsing_benchmark_loader.py 文件源码项目：pytorch-semseg 作者: meetshah1995 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def transform(self, img, lbl):
        """transform

        :param img:
        :param lbl:
        """
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl

cityscapes_loader.py 文件源码项目：pytorch-semseg 作者: meetshah1995 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def transform(self, img, lbl):
        """transform

        :param img:
        :param lbl:
        """
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl

mklmm.py 文件源码项目：MKLMM 作者: omerwe 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100):

        #construct a list of kernel names (one for each region) 
        if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter)
        else: kernelNames = [kernelType] * len(regions)         

        #perform optimization
        kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter)

        #compute posterior distribution
        Ktraintrain = kernelObj.getTrainKernel(hyp_kernels)
        post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects)

        #fix intercept if phenotype is binary
        if (len(np.unique(y)) == 2):            
            controls = (y<y.mean())
            cases = ~controls
            meanVec = C.dot(fixedEffects)
            mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec)                                     
            fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x                

        #construct trainObj
        trainObj = dict([])
        trainObj['sig2e'] = sig2e
        trainObj['hyp_kernels'] = hyp_kernels
        trainObj['fixedEffects'] = fixedEffects     
        trainObj['kernelNames'] = kernelNames

        return trainObj

unet_d8g_222f.py 文件源码项目：kaggle_dsb2017 作者: astoc 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def load_scan(path):
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
    #slices.sort(key = lambda x: int(x.InstanceNumber))

    acquisitions = [x.AcquisitionNumber for x in slices]

    vals, counts = np.unique(acquisitions, return_counts=True)
    vals = vals[::-1]  # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
    counts = counts[::-1]

    ## take the acquistions that has more entries; if these are identical take the later  entrye
    acq_val_sel = vals[np.argmax(counts)]


    ##acquisitions = sorted(np.unique(acquisitions), reverse=True)

    if len(vals) > 1:
        print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
    slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]

    slices = slices2


    ## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
    ## example of the     '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'

    #slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))  # from v 8, BUG should be float
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))  # from v 9
    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)

    for s in slices:
        s.SliceThickness = slice_thickness

    return slices

lungs_var3_d8g_222f.py 文件源码项目：kaggle_dsb2017 作者: astoc 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def largest_label_volume(im, bg=-1):
    vals, counts = np.unique(im, return_counts=True)

    counts = counts[vals != bg]
    vals = vals[vals != bg]

    if len(counts) > 0:
        return vals[np.argmax(counts)]
    else:
        return None

#image=sample_image

molecule_counter.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 81 收藏 0 点赞 0 评论 0

def get_chunks_by_gem_group(self):
        """ Return exactly one chunk per gem group."""
        gem_group_arr = self.get_column('gem_group')
        # verify gem groups are sorted
        assert np.all(np.diff(gem_group_arr)>=0)
        unique_ggs = np.unique(gem_group_arr)
        gg_key = lambda i: gem_group_arr[i]
        chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key)
        for (gg, chunk) in zip(unique_ggs, chunk_iter):
            yield (gg, chunk[0], chunk[1])

stats.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def compute_readpairs_per_umi_threshold(reads, subsample_rate):
    ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
        reads (np.array(int)) - Read pairs for each UMI
        subsample_rate (float) - Subsample reads to this fraction.
        Returns threshold (int) - The RPPU threshold in the subsampled space '''

    if len(np.unique(reads)) < 2:
        print 'Skipping RPPU threshold calculation.'
        return 1

    print 'RPPU subsample rate: %0.4f' % subsample_rate

    reads = np.random.binomial(reads, subsample_rate)
    reads = reads[reads > 0]

    if len(np.unique(reads)) < 2:
        print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
        return 1

    new_n50 = tk_stats.NX(reads, 0.5)

    print 'New N50: %d:' % new_n50

    # Log-transform counts
    log_reads = np.log(reads)

    # Run K-Means. Reshape necessary because kmeans takes a matrix.
    kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
    kmeans.predict(log_reads.reshape((-1,1)))

    # Take the cluster with the smallest mean
    min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]

    print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
    print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))

    # Take the max element in the min-cluster
    threshold = np.max(reads[kmeans.labels_ == min_cluster])

    return threshold

hdf5.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def append_data_column(ds, column):

    # Extend the dataset to fit the new data
    new_count = column.shape[0]
    existing_count = ds.shape[0]
    ds.resize((existing_count + new_count,))

    levels = get_levels(ds)

    if levels is not None:
        # update levels if we have new unique values
        if type(column.values) == p.Categorical:
            added_levels = set(column.values.categories) - set(levels)
        elif len(column) == 0:
            # Workaround for bug in pandas - get a crash in .unique() for an empty series
            added_levels = set([])
        else:
            added_levels = set(column.unique()) - set(levels)

        new_levels = list(levels)
        new_levels.extend(added_levels)

        # Check if the new categorical column has more levels
        # than the current bit width supports.
        # If so, rewrite the existing column data w/ more bits
        if len(new_levels) > np.iinfo(ds.dtype).max:
            new_dtype = pick_cat_dtype(len(new_levels))
            ds = widen_cat_column(ds, new_dtype)

        new_levels = np.array(new_levels, dtype=np.object)
        new_data = make_index_array(new_levels, column.values, ds.dtype)

        clear_levels(ds)
        create_levels(ds, new_levels)
    else:
        new_data = column

    # Append new data
    ds[existing_count:(existing_count + new_count)] = new_data

colorlabel.py 文件源码项目：FCN_train 作者: 315386775 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)):
    """Visualise each segment in `label_field` with its mean color in `image`.

    Parameters
    ----------
    label_field : array of int
        A segmentation of an image.
    image : array, shape ``label_field.shape + (3,)``
        A color image of the same spatial shape as `label_field`.
    bg_label : int, optional
        A value in `label_field` to be treated as background.
    bg_color : 3-tuple of int, optional
        The color for the background label

    Returns
    -------
    out : array, same shape and type as `image`
        The output visualization.
    """
    out = np.zeros_like(image)
    labels = np.unique(label_field)
    bg = (labels == bg_label)
    if bg.any():
        labels = labels[labels != bg_label]
        out[bg] = bg_color
    for label in labels:
        mask = (label_field == label).nonzero()
        color = image[mask].mean(axis=0)
        out[mask] = color
    return out

soccerstan.py 文件源码项目：soccerstan 作者: Torvaney 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def stan_map(vector):
    """ Create a map of vector items : id. """
    unique_items = np.unique(vector)
    return {item: id_ for id_, item in enumerate(unique_items, start=1)}