python类digitize()的实例源码-面圈网

tsbitmapper.py 文件源码项目：tsbitmaps 作者: binhmop 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def discretize(self, ts, bins=None, global_min=None, global_max=None):
        if bins is None:
            bins = self._bins

        if np.isscalar(bins):
            num_bins = bins

            min_value = ts.min()
            max_value = ts.max()
            if min_value == max_value:
                min_value = global_min
                max_value = global_max
            step = (max_value - min_value) / num_bins
            ts_bins = np.arange(min_value, max_value, step)
        else:
            ts_bins = bins

        inds = np.digitize(ts, ts_bins)
        binned_ts = tuple(str(i - 1) for i in inds)
        return binned_ts

raindistdemo.py 文件源码项目：rain-metrics-python 作者: apendergrass 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap

rainmetricdemo.py 文件源码项目：rain-metrics-python 作者: apendergrass 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap

utils.py 文件源码项目：traffic_detection_yolo2 作者: wAuner 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def set_responsibilities(anchor_frames, iou_thresh=0.6):
    """
    Changes the IOU values for the anchor frames to binary values

    anchor_frames: list of frames where each frame contains all features for a specific anchor
    iou_thresh: threshold to decide which anchor is responsible
    """
    # set box with maximum IOU to 1
    anchor_frames = [frame.copy() for frame in anchor_frames]
    # find maximum IOU value over all frames
    helper_array = np.array([frame[frame.columns[0]] for frame in anchor_frames]).T
    max_indices = np.argmax(helper_array, axis=1)
    data_idx = np.arange(len(max_indices))
    for obj_idx, frame_idx in zip(data_idx, max_indices):
        temp_frame = anchor_frames[frame_idx]
        temp_frame.loc[obj_idx, temp_frame.columns[0]] = 1

    # applying the iou threshold on a copy of the dataframes
    for frame in anchor_frames:
        frame[frame.columns[0]] = np.digitize(frame[frame.columns[0]], [iou_thresh])

    return anchor_frames

sssrm.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _init_classes(self, y):
        """Map all possible classes to the range [0,..,C-1]

        Parameters
        ----------

        y : list of arrays of int, each element has shape=[samples_i,]
            Labels of the samples for each subject


        Returns
        -------
        new_y : list of arrays of int, each element has shape=[samples_i,]
            Mapped labels of the samples for each subject

        Note
        ----
            The mapping of the classes is saved in the attribute classes_.
        """
        self.classes_ = unique_labels(utils.concatenate_not_none(y))
        new_y = [None] * len(y)
        for s in range(len(y)):
            new_y[s] = np.digitize(y[s], self.classes_) - 1
        return new_y

information_process.py 文件源码项目：IDNNs 作者: ravidziv 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
                              unique_inverse_y, calc_DKL=False):
    bins = bins.astype(np.float32)
    num_of_bins = bins.shape[0]
    # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
    # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
    digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
    b2 = np.ascontiguousarray(digitized).view(
        np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
    unique_array, unique_inverse_t, unique_counts = \
        np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
    p_ts = unique_counts / float(sum(unique_counts))
    PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
    if calc_DKL:
        pxy_given_T = np.array(
            [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
        )
        p_XgT = np.vstack(pxy_given_T[:, 0])
        p_YgT = pxy_given_T[:, 1]
        p_YgT = np.vstack(p_YgT).T
        DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
        H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
    local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
                                                     unique_array)
    return local_IXT, local_ITY

methods.py 文件源码项目：tensorpac 作者: EtienneCmb 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _kl_hr(pha, amp, nbins, optimize):
    """Binarize the amplitude according to phase values.

    This function is shared by the Kullback-Leibler Distance and the
    Height Ratio.
    """
    vecbin = np.linspace(-np.pi, np.pi, nbins + 1)
    phad = np.digitize(pha, vecbin) - 1

    abin = []
    for i in np.unique(phad):
        # Find where phase take vecbin values :
        idx = phad == i
        # Take the sum of amplitude inside the bin :
        abin_pha = np.einsum('i...j, k...j->ik...', amp, idx,
                             optimize=optimize)
        abin.append(abin_pha)

    return np.array(abin)

_tuningcurve.py 文件源码项目：nelpy 作者: nelpy 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _compute_ratemap(self, min_duration=None):

        if min_duration is None:
            min_duration = self._min_duration

        ext = self.trans_func(self._extern, at=self._bst.bin_centers)

        ext_bin_idx = np.digitize(ext, self.bins, True)
        # make sure that all the events fit between extmin and extmax:
        # TODO: this might rather be a warning, but it's a pretty serious warning...
        if ext_bin_idx.max() > self.n_bins:
            raise ValueError("ext values greater than 'ext_max'")
        if ext_bin_idx.min() == 0:
            raise ValueError("ext values less than 'ext_min'")

        ratemap = np.zeros((self.n_units, self.n_bins))

        for tt, bidx in enumerate(ext_bin_idx):
            ratemap[:,bidx-1] += self._bst.data[:,tt]

        # apply minimum observation duration
        for uu in range(self.n_units):
            ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0

        return ratemap / self._bst.ds

linear_interpolators.py 文件源码项目：yt 作者: yt-project 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def __call__(self, data_object):
        orig_shape = data_object[self.x_name].shape
        x_vals = data_object[self.x_name].ravel().astype('float64')
        y_vals = data_object[self.y_name].ravel().astype('float64')

        x_i = (np.digitize(x_vals, self.x_bins) - 1).astype('int32')
        y_i = (np.digitize(y_vals, self.y_bins) - 1).astype('int32')
        if np.any((x_i == -1) | (x_i == len(self.x_bins)-1)) \
            or np.any((y_i == -1) | (y_i == len(self.y_bins)-1)):
            if not self.truncate:
                mylog.error("Sorry, but your values are outside" + \
                            " the table!  Dunno what to do, so dying.")
                mylog.error("Error was in: %s", data_object)
                raise ValueError
            else:
                x_i = np.minimum(np.maximum(x_i,0), len(self.x_bins)-2)
                y_i = np.minimum(np.maximum(y_i,0), len(self.y_bins)-2)

        my_vals = np.zeros(x_vals.shape, dtype='float64')
        lib.BilinearlyInterpolate(self.table,
                                 x_vals, y_vals, self.x_bins, self.y_bins,
                                 x_i, y_i, my_vals)
        my_vals.shape = orig_shape
        return my_vals

depth.py 文件源码项目：gridded 作者: NOAA-ORR-ERD 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def interpolation_alphas(self, points, *args, **kwargs):
        '''
        Returns a pair of values. The 1st value is an array of the depth indices of all the particles.
        The 2nd value is an array of the interpolation alphas for the particles between their depth
        index and depth_index+1. If both values are None, then all particles are on the surface layer.
        '''
        points = np.asarray(points, dtype=np.float64)
        points = points.reshape(-1, 3)
        underwater = points[:, 2] > 0
        if len(np.where(underwater)[0]) == 0:
            return None, None
        indices = -np.ones((len(points)), dtype=np.int64)
        alphas = -np.ones((len(points)), dtype=np.float64)
        pts = points[underwater]
        und_ind = -np.ones((len(np.where(underwater)[0])))
        und_alph = und_ind.copy()
        und_ind = np.digitize(pts[:,2], self.depth_levels) - 1
        for i,n in enumerate(und_ind):
            if n == len(self.depth_levels) -1:
                und_ind[i] = -1
            if und_ind[i] != -1:
                und_alph[i] = (pts[i,2] - self.depth_levels[und_ind[i]]) / (self.depth_levels[und_ind[i]+1] - self.depth_levels[und_ind[i]])
        indices[underwater] = und_ind
        alphas[underwater] = und_alph
        return indices, alphas

test_random_forest_classifier_numeric.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        num_classes = 3
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'

test_random_forest_classifier.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 56 收藏 0 点赞 0 评论 0

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model

test_boosted_trees_classifier_numeric.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def setUpClass(self):
        from sklearn.datasets import load_boston

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'

test_decision_tree_classifier.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.preprocessing import MultiLabelBinarizer
        import numpy as np

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model

test_boosted_trees_classifier.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model

identify.py 文件源码项目：Thrifty 作者: swkrueger 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def auto_classify_transmitters(detections):
    """Identify transmitter IDs based on carrier frequency."""
    # Split by receiver
    detections_by_rx = defaultdict(list)
    for detection in detections:
        detections_by_rx[detection.rxid].append(detection)

    edges = {}
    for rxid, rx_detections in detections_by_rx.iteritems():
        freqs = np.array([d.carrier_info.bin for d in rx_detections])
        rx_edges = detect_transmitter_windows(freqs)

        summary = ("Detected {} transmitter(s) at RX {}:"
                   .format(len(rx_edges) - 1, rxid))
        for i in range(len(rx_edges) - 1):
            summary += " {}-{}".format(rx_edges[i], rx_edges[i+1] - 1)
        print(summary)

        edges[rxid] = rx_edges[:-1]

    txids = [np.digitize(d.carrier_info.bin, edges[d.rxid]) - 1
             for d in detections]

    return txids

pymod_main.py 文件源码项目：pymod 作者: pymodproject 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def assign_dope_items(self, selection):
        # Builds a list of all DOPE values of the residues in the selection.
        ldope = []
        for chain_element in selection:
            ldope.extend(chain_element.dope_scores)
        # Takes the min and max values among all the selected residues.
        min_value = min(ldope)
        max_value = max(ldope)
        # An array with the equally sapced limits generated with the list above.
        bins = numpy.array(numpy.linspace(min_value, max_value, num=10))
        for chain_element in selection:
            # An array with all the DOPE values of a single chain in the selection.
            adope = numpy.array(chain_element.dope_scores)
            # An array with the id of the bins where those values reside.
            inds = numpy.digitize(adope, bins)
            # Returns a list like:
            # [(-0.052, 4), (-0.03, 3), (-0.04, 5), (-0.04, 6), (-0.041, 7), (-0.042, 8), (-0.043, 10), ...]
            # which contains for all standard residues of a polypeptidic chain a tuple. The
            # first value of the tuple is the DOPE score of that residues, the second is the id
            # (going from 1 to 10) of the bin where that value resides.
            chain_element.dope_items = []
            for dope_score, bin_id in zip(adope, inds):# zip(ldope, inds):
                chain_element.dope_items.append({"dope-score":dope_score, "interval": bin_id})

environment.py 文件源码项目：harpreif 作者: harpribot 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __update_state(self):
        """
        Updates the state space (self.gamestate) after the suggested action is taken
        :return: None
        """
        jigsaw_id, place_id = self.decode_action()
        self.__update_placed_pieces(jigsaw_id, place_id)
        if self.state_type == 'hog':
            self.__render_gamestate()
        elif self.state_type == 'image':
            resized_discrete_im = np.digitize(
                            imresize(self.jigsaw_image, (self.state_height, self.state_width)),
                            self.bins)
            self.gamestate = np.array([resized_discrete_im]).transpose().swapaxes(0, 1)

        else:
            ValueError('The state type is not valid, enter "hog" or "image"')

helpers.py 文件源码项目：hypertools 作者: ContextLab 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def vals2colors(vals,cmap='GnBu_d',res=100):
    """Maps values to colors
    Args:
    values (list or list of lists) - list of values to map to colors
    cmap (str) - color map (default is 'husl')
    res (int) - resolution of the color map (default: 100)
    Returns:
    list of rgb tuples
    """
    # flatten if list of lists
    if any(isinstance(el, list) for el in vals):
        vals = list(itertools.chain(*vals))

    # get palette from seaborn
    palette = np.array(sns.color_palette(cmap, res))
    ranks = np.digitize(vals, np.linspace(np.min(vals), np.max(vals)+1, res+1)) - 1
    return [tuple(i) for i in palette[ranks, :]]

binning.py 文件源码项目：ugali 作者: DarkEnergySurvey 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def reverseHistogram(data,bins=None):
    """             
    Bins data using numpy.histogram and calculates the
    reverse indices for the entries like IDL.
    Parameters:
    data  : data to pass to numpy.histogram
    bins  : bins to pass to numpy.histogram 
    Returns: 
    hist  : bin content output by numpy.histogram 
    edges : edges output from numpy.histogram 
    rev   : reverse indices of entries in each bin 
    Using Reverse Indices: 
        h,e,rev = histogram(data, bins=bins) 
        for i in range(h.size):  
            if rev[i] != rev[i+1]: 
                # data points were found in this bin, get their indices
                indices = rev[ rev[i]:rev[i+1] ] 
                # do calculations with data[indices] ...  
    """
    if bins is None: bins = numpy.arange(data.max()+2)
    hist, edges = numpy.histogram(data, bins=bins)
    digi = numpy.digitize(data.flat,bins=numpy.unique(data)).argsort()
    rev = numpy.hstack( (len(edges), len(edges) + numpy.cumsum(hist), digi) )
    return hist,edges,rev

generator_ll.py 文件源码项目：WaveNet-Enhancement 作者: auspicious3000 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def run_semi_online(self, sess, inputs_clean, inputs_noisy, num_samples):
        dump = sess.run(self.init_ops, 
                        feed_dict={self.history_clean: inputs_clean[:,0:self.len_pad+1]})
        skips_noisy_sum = sess.run(self.skips_noisy_sum, 
                                   feed_dict={self.inputs_noisy: inputs_noisy})
        indices = inputs_clean[:,self.len_pad:self.len_pad+1]
        predictions_ = []
        for step in xrange(num_samples):
            #indices = inputs_clean[:,self.len_pad+step:self.len_pad+1+step]
            feed_dict = feed_dict={self.inputs_clean: indices,
                                   self.skips_noisy: skips_noisy_sum[:,:,step]}
            output_dist = sess.run(self.out_ops, feed_dict=feed_dict)[0]
            #indices = np.argmax(output_dist, axis=1)[:,None]
            #inputs = self.bins_center[indices[:,0]].astype(np.float32)
            inputs = np.matmul(output_dist, self.bins_center).astype(np.float32)
            indices = np.digitize(inputs, self.bins_edge, right=False)[:,None]
            predictions_.append(indices)

        predictions = np.concatenate(predictions_, axis=1)
        dump = sess.run(self.dequ_ops)

        return predictions

bawn.py 文件源码项目：WaveNet-Enhancement 作者: auspicious3000 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def run_semi_online_v2(sess,
                       out_ops,
                       skips_noisy_batch, 
                       indices, 
                       inputs_noisy, 
                       num_samples):
    skips_noisy_sum = sess.run(skips_noisy_batch)
    predictions_ = []
    for step in xrange(num_samples):
        feed_dict = feed_dict={self.inputs_clean: indices,
                               self.skips_noisy: skips_noisy_sum[:,:,step]}
        output_dist = sess.run([out_ops], feed_dict=feed_dict)[0]
        #output dim = 1 x 256, it is 2D but we need 1D input to argmax
        indices = random_bins(NUM_CLASSES, output_dist)
        inputs = self.bins[indices]
        #inputs = np.array(np.matmul(output_dist,self.bins), dtype=np.float32)[:,None]
        #indices = np.digitize(inputs[:,0], self.bins, right=False)[:,None]
        predictions_.append(inputs)

utils.py 文件源码项目：spyking-circus-ort 作者: spyking-circus 项目源码文件源码阅读 75 收藏 0 点赞 0 评论 0

def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
        """Compute the un-normalized cross-correlogram"""

        bin_width = width / float(nb_bins)
        start = - width / 2.0
        stop = + width / 2.0
        bins = np.linspace(start, stop, nb_bins + 1)
        values = np.zeros(nb_bins, dtype=np.int)
        for v in a:
            d = b - v - f * bin_width
            is_selected = np.abs(d) < width / 2.0
            d = d[is_selected]
            indices = np.digitize(d, bins) - 1
            values[indices] += 1
        if 't_min' in kwargs and 't_max' in kwargs:
            t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
            if t_min is not None and t_max is not None:
                values = values.astype(np.float) / (t_max - t_min)
        bins = bins * 1e+3
        values = np.append(values, [values[-1]])

        return bins, values

utils.py 文件源码项目：spyking-circus-ort 作者: spyking-circus 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
        """Compute the un-normalized cross-correlogram"""

        bin_width = width / float(nb_bins)
        start = - width / 2.0
        stop = + width / 2.0
        bins = np.linspace(start, stop, nb_bins + 1)
        values = np.zeros(nb_bins, dtype=np.int)
        for v in a:
            d = b - v - f * bin_width
            is_selected = np.abs(d) < width / 2.0
            d = d[is_selected]
            indices = np.digitize(d, bins) - 1
            values[indices] += 1
        if 't_min' in kwargs and 't_max' in kwargs:
            t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
            if t_min is not None and t_max is not None:
                values = values.astype(np.float) / (t_max - t_min)
        bins = bins * 1e+3
        values = np.append(values, [values[-1]])

        return bins, values

surveys.py 文件源码项目：sims_featureScheduler 作者: lsst 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def roundx(x, y, binstart=0.1):
    """Round off to try and grid-up nearly gridded data
    """
    bins = np.arange(x.min(), x.max()+binstart, binstart)
    counts, bin_edges = np.histogram(x, bins=bins)

    # merge together bins that are nighboring and have counts
    new_bin_edges = []
    new_bin_edges.append(bin_edges[0])
    for i, b in enumerate(bin_edges[1:]):
        if (counts[i] > 0) & (counts[i-1] > 0):
            pass
        else:
            new_bin_edges.append(bin_edges[i])
    if bin_edges[-1] != new_bin_edges[-1]:
        new_bin_edges.append(bin_edges[-1])
    indx = np.digitize(x, new_bin_edges)
    new_bin_edges = np.array(new_bin_edges)
    bin_centers = (new_bin_edges[1:]-new_bin_edges[:-1])/2. + new_bin_edges[:-1]
    new_x = bin_centers[indx-1]
    return new_x

p1b3_baseline_keras2.py 文件源码项目：Benchmarks 作者: ECP-CANDLE 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def evaluate_model(model, generator, steps, metric, category_cutoffs=[0.]):
    y_true, y_pred = None, None
    count = 0
    while count < steps:
        x_batch, y_batch = next(generator)
        y_batch_pred = model.predict_on_batch(x_batch)
        y_batch_pred = y_batch_pred.ravel()
        y_true = np.concatenate((y_true, y_batch)) if y_true is not None else y_batch
        y_pred = np.concatenate((y_pred, y_batch_pred)) if y_pred is not None else y_batch_pred
        count += 1

    loss = evaluate_keras_metric(y_true.astype(np.float32), y_pred.astype(np.float32), metric)

    y_true_class = np.digitize(y_true, category_cutoffs)
    y_pred_class = np.digitize(y_pred, category_cutoffs)

    # theano does not like integer input
    acc = evaluate_keras_metric(y_true_class.astype(np.float32), y_pred_class.astype(np.float32), 'binary_accuracy')  # works for multiclass labels as well

    return loss, acc, y_true, y_pred, y_true_class, y_pred_class

utils.py 文件源码项目：neural-slack-bot 作者: juliakreutzer 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def put_in_buckets(data_array, labels, buckets, mode='pad'):
    """
    Given bucket edges and data, put the data in buckets according to their length
    :param data_array:
    :param labels:
    :param buckets:
    :return:
    """
    input_lengths = np.array([len(s) for s in data_array], dtype='int')
    input_bucket_index = [i if i<len(buckets) else len(buckets)-1 for i in np.digitize(input_lengths, buckets, right=False)]  # during testing, longer sentences are just truncated
    if mode == 'truncate':
        input_bucket_index -= 1
    bucketed_data = {}
    reordering_indexes = {}
    for bucket in list(np.unique(input_bucket_index)):
        length_indexes = np.where(input_bucket_index == bucket)[0]
        reordering_indexes[bucket] = length_indexes
        maxlen = int(np.floor(buckets[bucket]))
        padded = pad_data(data_array[length_indexes], labels[length_indexes], max_len=maxlen)
        bucketed_data[bucket] = padded  # in final dict, start counting by zero
    return bucketed_data, reordering_indexes

base.py 文件源码项目：xam 作者: MaxHalford 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def transform(self, X, y=None):
        """Binarize X based on the fitted cut points."""

        # scikit-learn checks
        X = check_array(X)

        if self.cut_points is None:
            raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.')

        if X.shape[1] != len(self.cut_points):
            raise ValueError("Provided array's dimensions do not match with the ones from the "
                             "array `fit` was called on.")

        binned = np.array([
            np.digitize(x, self.cut_points[i])
            if len(self.cut_points[i]) > 0
            else np.zeros(x.shape)
            for i, x in enumerate(X.T)
        ]).T

        return binned

sv_candidates.py 文件源码项目：grocsvs 作者: grocsvs 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def get_bg_mats(fragsx, fragsy, sv_region, window_size):
    bg_mats = {}
    selectors = {"+":"end_pos", "-":"start_pos"}

    binsx = numpy.arange(sv_region["startx"], sv_region["endx"]+window_size, window_size)
    binsy = numpy.arange(sv_region["starty"], sv_region["endy"]+window_size, window_size)

    for orientationx in "+-":
        binx = numpy.digitize(fragsx[selectors[orientationx]], binsx)-1
        gx = fragsx.groupby(binx)
        bcsx = [set(gx.get_group(k)["bc"]) if k in gx.groups else set() for k in range(len(binsx))]

        for orientationy in "+-":
            biny = numpy.digitize(fragsy[selectors[orientationy]], binsy)-1
            gy = fragsy.groupby(biny)
            bcsy = [set(gy.get_group(k)["bc"]) if k in gy.groups else set() for k in range(len(binsy))]

            bg_mats[orientationx+orientationy] = get_bg_mat(bcsx, bcsy)

    return bg_mats

fixes.py 文件源码项目：decoding_challenge_cortana_2016_3rd 作者: kingjr 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _digitize(x, bins, right=False):
    """Replacement for digitize with right kwarg (numpy < 1.7).

    Notes
    -----
    This fix is only meant for integer arrays. If ``right==True`` but either
    ``x`` or ``bins`` are of a different type, a NotImplementedError will be
    raised.
    """
    if right:
        x = np.asarray(x)
        bins = np.asarray(bins)
        if (x.dtype.kind not in 'ui') or (bins.dtype.kind not in 'ui'):
            raise NotImplementedError("Only implemented for integer input")
        return np.digitize(x - 1e-5, bins)
    else:
        return np.digitize(x, bins)