python类split()的实例源码

stereo_utils.py 文件源码 项目:pybot 作者: spillai 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def setup_ps3eye_dataset(filename, start_idx=0, max_length=None, every_k_frames=1, scale=1): 
    dataset = stereo_dataset(filename=filename, 
                             channel='CAMERA', start_idx=start_idx, max_length=max_length, 
                             every_k_frames=every_k_frames, scale=scale, split='horizontal')

    # Setup one-time calibration
    calib_params = setup_ps3eye(scale=scale)
    dataset.calib = calib_params
    dataset.scale = scale
    return dataset


# def bumblebee_stereo_calib_params_ming(scale=1.0): 
#     fx, fy = 809.53*scale, 809.53*scale
#     cx, cy = 321.819*scale, 244.555*scale
#     baseline = 0.119909
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)

# def bumblebee_stereo_calib_params(scale=1.0): 
#     fx, fy = 0.445057*640*scale, 0.59341*480*scale
#     cx, cy = 0.496427*640*scale, 0.519434*480*scale
#     baseline = 0.120018 
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)
utils.py 文件源码 项目:CausalGAN 作者: mkocaoglu 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def save_figure_images(model_type, tensor, filename, size, padding=2, normalize=False, scale_each=False):

    print('[*] saving:',filename)

    #nrow=size[0]
    nrow=size[1]#Was this number per row and now number of rows?

    if model_type=='began':
        began_save_image(tensor,filename,nrow,padding,normalize,scale_each)
    elif model_type=='dcgan':
        #images = np.split(tensor,len(tensor))
        images=tensor
        dcgan_save_images(images,size,filename)


#Began originally
spikedetection.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)
spikedetection.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)
utils.py 文件源码 项目:SRGAN-tensorflow 作者: zoharli 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def batch_ssim(dbatch):
    im1,im2=np.split(dbatch,2)
    imgsize=im1.shape[1]*im1.shape[2]
    avg1=im1.mean((1,2),keepdims=1)
    avg2=im2.mean((1,2),keepdims=1)
    std1=im1.std((1,2),ddof=1)
    std2=im2.std((1,2),ddof=1)
    cov=((im1-avg1)*(im2-avg2)).mean((1,2))*imgsize/(imgsize-1)
    avg1=np.squeeze(avg1)
    avg2=np.squeeze(avg2)
    k1=0.01
    k2=0.03
    c1=(k1*255)**2
    c2=(k2*255)**2
    c3=c2/2
    return np.mean((2*avg1*avg2+c1)*2*(cov+c3)/(avg1**2+avg2**2+c1)/(std1**2+std2**2+c2))
Routines.py 文件源码 项目:structured-output-ae 作者: sbelharbi 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def read_pts_file(self, pts_path):
        """Read a pts file that contains the coordinates of the landmarks.

        """
        with open(pts_path) as f:
            content = f.readlines()
        content = content[3:-1] # exclude the 4 cases and the last case.
        nbr = len(content)
        X = np.zeros((nbr,1))
        Y = np.zeros((nbr,1))
        for i in xrange(nbr):
            line = content[i].split(' ')
            X[i] = np.float(line[0])
            Y[i] = np.float(line[1].replace('\n', ''))

        # remove 1 to start counting from 0 (python)        
        X = X - 1
        Y = Y - 1

        return X,Y
utils.py 文件源码 项目:kor-char-rnn-tensorflow 作者: insikk 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor)
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
sketch_rnn.py 文件源码 项目:Pytorch-Sketch-RNN 作者: alexis-jacq 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def forward(self, inputs, batch_size, hidden_cell=None):
        if hidden_cell is None:
            # then must init with zeros
            if use_cuda:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
            else:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
            hidden_cell = (hidden, cell)
        _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell)
        # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size):
        hidden_forward, hidden_backward = torch.split(hidden,1,0)
        hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1)
        # mu and sigma:
        mu = self.fc_mu(hidden_cat)
        sigma_hat = self.fc_sigma(hidden_cat)
        sigma = torch.exp(sigma_hat/2.)
        # N ~ N(0,1)
        z_size = mu.size()
        if use_cuda:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda())
        else:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)))
        z = mu + sigma*N
        # mu and sigma_hat are needed for LKL loss
        return z, mu, sigma_hat
audioutils.py 文件源码 项目:gtzan.keras 作者: Hguimaraes 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def splitsongs_melspect(self, X, y, cnn_type = '1D'):
    temp_X = []
    temp_y = []

    for i, song in enumerate(X):
      song_slipted = np.split(song, self.augment_factor)
      for s in song_slipted:
        temp_X.append(s)
        temp_y.append(y[i])

    temp_X = np.array(temp_X)
    temp_y = np.array(temp_y)

    if not cnn_type == '1D':
      temp_X = temp_X[:, np.newaxis]

    return temp_X, temp_y
base.py 文件源码 项目:scikit-kge 作者: mnick 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _optim(self, xys):
        idx = np.arange(len(xys))
        self.batch_size = np.ceil(len(xys) / self.nbatches)
        batch_idx = np.arange(self.batch_size, len(xys), self.batch_size)

        for self.epoch in range(1, self.max_epochs + 1):
            # shuffle training examples
            self._pre_epoch()
            shuffle(idx)

            # store epoch for callback
            self.epoch_start = timeit.default_timer()

            # process mini-batches
            for batch in np.split(idx, batch_idx):
                # select indices for current batch
                bxys = [xys[z] for z in batch]
                self._process_batch(bxys)

            # check callback function, if false return
            for f in self.post_epoch:
                if not f(self):
                    break
SVBRDFNetTraining.py 文件源码 项目:self-augmented-net 作者: msraig 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def RealUnlabelDataLoadProcess(pipe, datafile, params):
    path, file = os.path.split(datafile)
    batchSize = params['batchSize']
    dataset = RealDataLoaderSVBRDF(path, file)

    dataset.shuffle(params['randomSeed'])
    pipe.send(dataset.dataSize)
    counter = 0
    posInDataSet = 0
    epoch = 0

    while(True):
        imgbatch = dataset.GetBatch(posInDataSet, batchSize)
        for i in range(0, batchSize):
            imgbatch[i,:,:,:] = autoExposure(imgbatch[i,:,:,:])
        pipe.send(imgbatch)
        counter = counter + batchSize
        posInDataSet = (posInDataSet + batchSize) % dataset.dataSize
        newepoch = counter / dataset.dataSize
        if(newepoch != epoch):
            dataset.shuffle()
        epoch = newepoch
trainer.py 文件源码 项目:ANN-PONR-Python3 作者: anon-42 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def chooseErrorData(self, game, lesson=None):
        ''' 
        Choose saved error function data by lesson and game name in 
        history database.
        '''
        self.history.setGame(game)
        self.load()
        if lesson is not None:
            self.error_data_training = np.split(self.data[0,:], 
                np.argwhere(self.data[0,:] == -1))[lesson][1:]
            self.error_data_test = np.split(self.data[1,:], 
                np.argwhere(self.data[1,:] == -1))[lesson][1:]
        else:
            self.error_data_training = np.delete(self.data[0,:], 
                np.argwhere(self.data[0,:]==-1))
            self.error_data_test = np.delete(self.data[1,:], 
                np.argwhere(self.data[1,:]==-1))

# ------------------- for test and show reasons only ----------------------
utils.py 文件源码 项目:Tree-LSTM-LM 作者: vgene 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor) # maybe useless?
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
_python_core.py 文件源码 项目:jitcdde 作者: neurophysik 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def orthonormalise(self, n_lyap, delay):
        """
        Orthonormalise separation functions (with Gram-Schmidt) and return their norms after orthogonalisation (but before normalisation).
        """

        vectors = np.split(np.arange(self.n, dtype=int), n_lyap+1)[1:]

        norms = []
        for i,vector in enumerate(vectors):
            for j in range(i):
                sp = self.scalar_product(delay, vector, vectors[j])
                self.subtract_from_past(vector, vectors[j], sp)
            norm = self.norm(delay, vector)
            if norm > NORM_THRESHOLD:
                self.scale_past(vector, 1./norm)
            norms.append(norm)

        return np.array(norms)
_lpso.py 文件源码 项目:brainpipe 作者: EtienneCmb 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _fit(x, y, train, test, self, n_jobs):
    """Sub fit function
    """
    nsuj, nfeat = x.shape
    iteract = product(range(nfeat), zip(train, test))
    ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)(
            np.concatenate(tuple(x[i].iloc[k[0]])),
            np.concatenate(tuple(x[i].iloc[k[1]])),
            np.concatenate(tuple(y[0].iloc[k[0]])),
            np.concatenate(tuple(y[0].iloc[k[1]])),
            self) for i, k in iteract)
    # Re-arrange ypred and ytrue:
    ypred, ytrue = zip(*ya)
    ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)]
    ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)]
    da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)])
    return da, ytrue, ypred
model.py 文件源码 项目:densecap-tensorflow 作者: rampage644 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def generate_batches(positive_batch, negative_batch, batch_size):
    positive_boxes, positive_scores, positive_labels = positive_batch
    negative_boxes, negative_scores, negative_labels = negative_batch

    half_batch = batch_size // 2

    pos_batch = np.concatenate([positive_boxes, positive_scores, positive_labels], axis=1)
    neg_batch = np.concatenate([negative_boxes, negative_scores, negative_labels], axis=1)

    np.random.shuffle(pos_batch)
    np.random.shuffle(neg_batch)

    pos_batch = pos_batch[:half_batch]
    pad_size = half_batch - len(pos_batch)
    pos_batch = np.concatenate([pos_batch, neg_batch[:pad_size]])
    neg_batch = neg_batch[pad_size:pad_size+half_batch]

    return (
        np.split(pos_batch, [4, 6], axis=1),
        np.split(neg_batch, [4, 6], axis=1)
    )
kaggle_titanic.py 文件源码 项目:stacked_generalization 作者: fukatani 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_sample(self, N=600, scale=False):
        all_data = self.pre_process(self.file_name)
        #print('data_type: ' + str(all_data.dtypes))
        all_data = all_data.values
        xs = all_data[:, 2:]
        y = all_data[:, 1]
        if scale:
            xs = preprocessing.scale(xs)
        if N != -1:
            perm = np.random.permutation(xs.shape[0])
            xs = xs[perm]
            y = y[perm]
            xs_train, xs_test = np.split(xs, [N])
            y_train, y_test = np.split(y, [N])
            return xs_train, xs_test, y_train, y_test
        else:
            return xs, y
dmp_sequence.py 文件源码 项目:bolero 作者: rock-learning 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def set_params(self, params):
        """Utility function: set currently optimizable parameters."""
        weights, goals, goal_vels = np.split(params, (self.n_weights,
            self.n_weights + (self.n_dmps - 1) * self.n_task_dims))
        G = np.split(goals, [i * self.n_task_dims
                             for i in range(1, self.n_dmps - 1)])
        self.weights = [w.reshape(self.n_weights_per_dmp[i], self.n_task_dims)
                        for i, w in enumerate(np.split(
                            weights, self.split_weights * self.n_task_dims)[
                                :self.n_dmps])]

        for i in range(self.n_dmps - 1):
            self.subgoals[i + 1] = G[i]
        if self.learn_goal_velocities:
            self.subgoal_velocities = np.split(
                goal_vels, [i * self.n_task_dims
                            for i in xrange(1, self.n_dmps)])
neural_network.py 文件源码 项目:dl4nlp 作者: yohokuno 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def flatten_cost_gradient(cost_gradient_hetero, shapes):
    """
    Allow cost function to have heterogeneous parameters (which is not allowed in numpy array)
    :param cost_gradient_hetero: cost function that receives heterogeneous parameters
    :param shapes: list of shapes of parameter
    :return: cost function that receives concatenated parameters and returns concatenated gradients
    """
    def cost_gradient_wrapper(concatenated_parameters, input, output):
        all_parameters = []

        for shape in shapes:
            split_index = np.prod(shape)
            single_parameter, concatenated_parameters = np.split(concatenated_parameters, [split_index])
            single_parameter = single_parameter.reshape(shape)
            all_parameters.append(single_parameter)

        cost, gradients = cost_gradient_hetero(all_parameters, input, output)
        flatten_gradients = [gradient.flatten() for gradient in gradients]
        concatenated_gradients = np.concatenate(flatten_gradients)
        return cost, concatenated_gradients

    return cost_gradient_wrapper
model.py 文件源码 项目:hydrus 作者: mark-r-g 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def ests_ll_quad(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses Gaussian quadrature, and thus returns an *approximate*
        integral.
        """
        mu0, gamma0, err0 = np.split(params, 3)
        x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1))  # (QCOUNTXnhospXnmeas)
        loc = mu0 + np.outer(QC1, gamma0)
        loc = np.tile(loc, (self.n, 1, 1))
        loc = np.transpose(loc, (1, 0, 2))
        scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
        zs = lpdf_3d(x=x, loc=loc, scale=scale)

        w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
        wted = np.nansum(w2 * zs, axis=2).T  # (nhosp X QCOUNT)
        qh = np.tile(QC1, (self.n, 1))  # (nhosp X QCOUNT)
        combined = wted + norm.logpdf(qh)  # (nhosp X QCOUNT)

        return logsumexp(np.nan_to_num(combined), b=QC2, axis=1)  # (nhosp)
model.py 文件源码 项目:hydrus 作者: mark-r-g 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def ests_ll_exact(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses an exact integral and returns exact ll values, i.e.
        it does not use quadrature to approximate the integral.
        """
        mu, gamma, err = np.split(params, 3)
        d = self.num2 - mu
        q = self.w2 / err**2
        r = d * q

        f = self.w2 @ (2 * np.log(abs(err)) + LOG2PI)
        a = q @ gamma**2
        b = r @ gamma
        c = nsum_row(d * r)

        return .5 * (b * b / (a+1) - c - f - np.log1p(a))
utils.py 文件源码 项目:tacotron 作者: jinfagang 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def restore_shape(arry, step, r):
    '''Reduces and adjust the shape and content of `arry` according to r.

    Args:
      arry: A 2d array with shape of [T, C]
      step: An int. Overlapping span.
      r: Reduction factor

    Returns:
      A 2d array with shape of [-1, C*r]
    '''
    T, C = arry.shape
    sliced = np.split(arry, list(range(step, T, step)), axis=0)

    started = False
    for s in sliced:
        if not started:
            restored = np.vstack(np.split(s, r, axis=1))
            started = True
        else:
            restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1))))

    # Trim zero paddings
    restored = restored[:np.count_nonzero(restored.sum(axis=1))]
    return restored
genotype_filters.py 文件源码 项目:varapp-backend-py 作者: varapp 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and):
        """Run c_apply_bitwise in parallel. Takes the same arguments."""
        N = len(genotypes)
        nprocs = mp.cpu_count()
        pool = mp.Pool(processes=nprocs)
        B = round(N/nprocs + 0.5)  # batch size
        # Split variant_ids in batches (genotype batches are equally-sized, but not
        #   variant ids, in case a subset was given)
        split_at = variant_ids.searchsorted([(k+1)*B+1 for k in range(nprocs-1)])
        variant_ids_batches = np.split(variant_ids, split_at)
        assert len(variant_ids_batches) == nprocs
        # Run one job for each batch
        passing = [pool.apply(c_apply_bitwise,
            args=(genotypes[k*B:(k+1)*B,:],
                   variant_ids_batches[k],
                   conditions, active_idx, is_and, B))
            for k in range(nprocs)]
        passing = np.concatenate(passing)
        pool.close()
        return passing

    #@timer
learning.py 文件源码 项目:factorix 作者: gbouchar 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def create_minibatch_indices(n, minibatch_size, shuffling=True):
    """
    :param n: total number of indices from which to pick from
    :param minibatch_size: size of the minibatches (must be lower than n)
    :return: (list of random indices, number of random duplicate indices in the last minibatch to complete it)
    """
    if shuffling:
        all_indices = np.random.permutation(n)  # shuffle order randomly
    else:
        all_indices = np.arange(n)
    n_steps = (n - 1) // minibatch_size + 1  # how many batches fit per epoch
    n_rem = n_steps * minibatch_size - n  # remainder
    if n_rem > 0:
        inds_to_add = np.random.randint(0, n_rem, size=n_rem)
        all_indices = np.concatenate((all_indices, inds_to_add))
    return np.split(all_indices, n_steps), n_rem
util.py 文件源码 项目:sciDT 作者: edvisees 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def make_folds(train_X, train_Y, num_folds):
  num_points = train_X.shape[0]
  fol_len = num_points / num_folds
  rem = num_points % num_folds
  X_folds = numpy.split(train_X, num_folds) if rem == 0 else numpy.split(train_X[:-rem], num_folds)
  Y_folds = numpy.split(train_Y, num_folds) if rem == 0 else numpy.split(train_Y[:-rem], num_folds)
  cv_folds = []
  for i in range(num_folds):
    train_folds_X = []
    train_folds_Y = []
    for j in range(num_folds):
      if i != j:
        train_folds_X.append(X_folds[j])
        train_folds_Y.append(Y_folds[j])
    train_fold_X = numpy.concatenate(train_folds_X)
    train_fold_Y = numpy.concatenate(train_folds_Y)
    cv_folds.append(((train_fold_X, train_fold_Y), (X_folds[i], Y_folds[i])))
  return cv_folds
trajutil.py 文件源码 项目:rltools 作者: sisl 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, arrays, lengths=None):
        if lengths is None:
            # Without provided lengths, `arrays` is interpreted as a list of arrays
            # and self.lengths is set to the list of lengths for those arrays
            self.arrays = arrays
            self.stacked = np.concatenate(arrays, axis=0)
            self.lengths = np.array([len(a) for a in arrays])
        else:
            # With provided lengths, `arrays` is interpreted as concatenated data
            # and self.lengths is set to the provided lengths.
            self.arrays = np.split(arrays, np.cumsum(lengths)[:-1])
            self.stacked = arrays
            self.lengths = np.asarray(lengths, dtype=int)
            assert all(len(a) == l for a, l in util.safezip(self.arrays, self.lengths))
            self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)])
            assert self.boundaries[-1] == len(self.stacked)
preprocess_fields_v3.py 文件源码 项目:the-magical-csv-merge-machine 作者: entrepreneur-interet-general 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, t, lexicon, maxTokens = 0, scorer = tokenization_based_score, distinctCount = 0, stopWords = None):
        super(TokenizedMatcher, self).__init__(t)
        currentMax = maxTokens
        self.scorer = scorer
        self.phrasesMap = validated_lexical_map(lexicon)
        self.tokenIdx = dict()
        self.distinctCount = distinctCount
        self.stopWords = stop_words_as_normalized_list(stopWords)
        for np in self.phrasesMap.keys():
            tokens = list([t for t in np.split(' ') if t not in self.stopWords])
            if len(tokens) < 1: continue
            if maxTokens < 1 and len(tokens) > currentMax:
                currentMax = len(tokens)
                if currentMax > DTC:
                    logging.warning('Full tokenization of lexicon: encountered token of length {}, above DTC!'.format(currentMax))
            matchedRefPhrase = ' '.join(tokens[:currentMax])
            if matchedRefPhrase not in self.tokenIdx or len(self.tokenIdx[matchedRefPhrase]) < len(np):
                self.tokenIdx[matchedRefPhrase] = np
        self.maxTokens = currentMax
        logging.info('SET UP %d-token matcher (%s-defined length) for <%s> with lexicon of size %d, total variants %d',
            self.maxTokens, 'user' if maxTokens > 0 else 'data', self.t, len(self.phrasesMap), len(self.tokenIdx))
preprocess_fields_v3.py 文件源码 项目:the-magical-csv-merge-machine 作者: entrepreneur-interet-general 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, variantsMapFile, targetType, keepContext, domainType = None, scorer = tokenization_based_score):
        super(VariantExpander, self).__init__(targetType)
        self.domainType = domainType
        self.keepContext = keepContext # if true, then the main variant will be surrounded by original context in the normalized value
        self.variantsMap = file_to_variant_map(variantsMapFile) # map from original alternative variant to original main variant
        self.scorer = scorer
        self.tokenIdx = defaultdict(set) # map from alternative variant as joined-normalized-token-list to original alternative variant
        self.minTokens = 3
        self.maxTokens = DTC
        # map of alternative variant`s (including main or not!), from normalized string to list of original strings:
        phrasesMap = validated_lexical_map(self.variantsMap.keys(), tokenize = True)
        for (phrase, altVariants) in phrasesMap.items():
            tokens = phrase.split()
            l = len(tokens)
            if l < 1 or l > DTC: continue
            self.minTokens = min(self.minTokens, l)
            self.maxTokens = max(self.maxTokens, l)
            matchedVariantPhrase = ' '.join(tokens[:self.maxTokens])
            for altVariant in altVariants:
                self.tokenIdx[matchedVariantPhrase].add(altVariant)
                if altVariant not in self.variantsMap:
                    raise RuntimeError('Alternative variant {} not found in variants map'.format(altVariant))
simulateUncertDependencyOnExpTime.py 文件源码 项目:imgProcessor 作者: radjkarl 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _capture(f, t, t0, factor):
    '''
    capture signal and return its standard deviation
    #TODO: more detail
    '''
    n_per_sec = len(t) / t[-1]

    # len of one split:
    n = int(t0 * factor * n_per_sec)
    s = len(f) // n
    m = s * n
    f = f[:m]
    ff = np.split(f, s)
    m = np.mean(ff, axis=1)

    return np.std(m)
train.py 文件源码 项目:dong_iccv_2017 作者: woozzu 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def preprocess(img, desc, len_desc, txt_encoder):
    img = Variable(img.cuda() if not args.no_cuda else img)
    desc = Variable(desc.cuda() if not args.no_cuda else desc)

    len_desc = len_desc.numpy()
    sorted_indices = np.argsort(len_desc)[::-1]
    original_indices = np.argsort(sorted_indices)
    packed_desc = nn.utils.rnn.pack_padded_sequence(
        desc[sorted_indices, ...].transpose(0, 1),
        len_desc[sorted_indices]
    )
    _, txt_feat = txt_encoder(packed_desc)
    txt_feat = txt_feat.squeeze()
    txt_feat = txt_feat[original_indices, ...]

    txt_feat_np = txt_feat.data.cpu().numpy() if not args.no_cuda else txt_feat.data.numpy()
    txt_feat_mismatch = torch.Tensor(np.roll(txt_feat_np, 1, axis=0))
    txt_feat_mismatch = Variable(txt_feat_mismatch.cuda() if not args.no_cuda else txt_feat_mismatch)
    txt_feat_np_split = np.split(txt_feat_np, [txt_feat_np.shape[0] // 2])
    txt_feat_relevant = torch.Tensor(np.concatenate([
        np.roll(txt_feat_np_split[0], -1, axis=0),
        txt_feat_np_split[1]
    ]))
    txt_feat_relevant = Variable(txt_feat_relevant.cuda() if not args.no_cuda else txt_feat_relevant)
    return img, txt_feat, txt_feat_mismatch, txt_feat_relevant


问题


面经


文章

微信
公众号

扫码关注公众号