python类LongTensor()的实例源码

categorical.py 文件源码 项目:pyro 作者: uber 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def sample(self):
        """
        Returns a sample which has the same shape as `ps` (or `vs`), except
        that if ``one_hot=True`` (and no `vs` is specified), the last dimension
        will have the same size as the number of events. The type of the sample
        is `numpy.ndarray` if `vs` is a list or a numpy array, else a tensor
        is returned.

        :return: sample from the Categorical distribution
        :rtype: numpy.ndarray or torch.LongTensor
        """
        sample = torch_multinomial(self.ps.data, 1, replacement=True).expand(*self.shape())
        sample_one_hot = torch_zeros_like(self.ps.data).scatter_(-1, sample, 1)

        if self.vs is not None:
            if isinstance(self.vs, np.ndarray):
                sample_bool_index = sample_one_hot.cpu().numpy().astype(bool)
                return self.vs[sample_bool_index].reshape(*self.shape())
            else:
                return self.vs.masked_select(sample_one_hot.byte())
        if self.one_hot:
            return Variable(sample_one_hot)
        return Variable(sample)
embedding.py 文件源码 项目:jack 作者: uclmr 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def forward(self, unique_word_chars, unique_word_lengths, sequences_as_uniqs):
        long_tensor = torch.cuda.LongTensor if torch.cuda.device_count() > 0 else torch.LongTensor
        embedded_chars = self._embeddings(unique_word_chars.type(long_tensor))
        # [N, S, L]
        conv_out = self._conv(embedded_chars.transpose(1, 2))
        # [N, L]
        conv_mask = misc.mask_for_lengths(unique_word_lengths)
        conv_out = conv_out + conv_mask.unsqueeze(1)
        embedded_words = conv_out.max(2)[0]

        if not isinstance(sequences_as_uniqs, list):
            sequences_as_uniqs = [sequences_as_uniqs]

        all_embedded = []
        for word_idxs in sequences_as_uniqs:
            all_embedded.append(functional.embedding(
                word_idxs.type(long_tensor), embedded_words))
        return all_embedded
wordembed.py 文件源码 项目:pytorch-skipthoughts 作者: kaniblu 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def generate(self):
        if self.shuffle:
            idx = np.random.permutation(np.arange(self.vocab_size))
            idx = torch.LongTensor(idx)
            src = self.src[idx]
            target = self.target[idx]
        else:
            src = self.src
            target = self.target

        for i in range(0, self.vocab_size, self.batch_size):
            b_idx = i
            e_idx = i + self.batch_size
            batch_x = src[b_idx:e_idx].contiguous()
            batch_y = target[b_idx:e_idx].contiguous()

            batch_x = Variable(batch_x)
            batch_y = Variable(batch_y)

            yield batch_x, batch_y
data_utils.py 文件源码 项目:pytorch-tutorial 作者: yunjey 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_data(self, path, batch_size=20):
        # Add words to the dictionary
        with open(path, 'r') as f:
            tokens = 0
            for line in f:
                words = line.split() + ['<eos>']
                tokens += len(words)
                for word in words: 
                    self.dictionary.add_word(word)  

        # Tokenize the file content
        ids = torch.LongTensor(tokens)
        token = 0
        with open(path, 'r') as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    ids[token] = self.dictionary.word2idx[word]
                    token += 1
        num_batches = ids.size(0) // batch_size
        ids = ids[:num_batches*batch_size]
        return ids.view(batch_size, -1)
storage.py 文件源码 项目:pytorch-a2c-ppo-acktr 作者: ikostrikov 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def feed_forward_generator(self, advantages, num_mini_batch):
        num_steps, num_processes = self.rewards.size()[0:2]
        batch_size = num_processes * num_steps
        mini_batch_size = batch_size // num_mini_batch
        sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False)
        for indices in sampler:
            indices = torch.LongTensor(indices)

            if advantages.is_cuda:
                indices = indices.cuda()

            observations_batch = self.observations[:-1].view(-1,
                                        *self.observations.size()[2:])[indices]
            states_batch = self.states[:-1].view(-1, self.states.size(-1))[indices]
            actions_batch = self.actions.view(-1, self.actions.size(-1))[indices]
            return_batch = self.returns[:-1].view(-1, 1)[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices]
            adv_targ = advantages.view(-1, 1)[indices]

            yield observations_batch, states_batch, actions_batch, \
                return_batch, masks_batch, old_action_log_probs_batch, adv_targ
data.py 文件源码 项目:Tree-LSTM-LM 作者: vgene 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def tokenize(self, path):
        """Tokenizes a text file."""
        assert os.path.exists(path)
        # Add words to the dictionary
        with open(path, 'r') as f:
            tokens = 0
            for line in f:
                words = line.split() + ['<eos>']
                tokens += len(words)
                for word in words:
                    self.dictionary.add_word(word)

        # Tokenize file content
        with open(path, 'r') as f:
            ids = torch.LongTensor(tokens)
            token = 0
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    ids[token] = self.dictionary.word2idx[word]
                    token += 1

        return ids
sentiment_trainer.py 文件源码 项目:treehopper 作者: tomekkorbak 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test(self, dataset):
        self.model.eval()
        self.embedding_model.eval()
        loss = 0
        accuracies = torch.zeros(len(dataset))

        output_trees = []
        outputs = []
        for idx in tqdm(range(len(dataset)), desc='Testing epoch  '+str(self.epoch)+''):
            tree, sent, label = dataset[idx]
            input = Var(sent, volatile=True)
            target = Var(torch.LongTensor([int(label)]), volatile=True)
            if self.args.cuda:
                input = input.cuda()
                target = target.cuda()
            emb = F.torch.unsqueeze(self.embedding_model(input),1)
            output, _, acc, tree = self.model(tree, emb)
            err = self.criterion(output, target)
            loss += err.data[0]
            accuracies[idx] = acc
            output_trees.append(tree)
            outputs.append(tree.output_softmax.data.numpy())
            # predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu()))
        return loss/len(dataset), accuracies, outputs, output_trees
bnlstm.py 文件源码 项目:FewShotLearning 作者: gitabcworld 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def forward(self, input_, length=None, hx=None):
        if self.batch_first:
            input_ = input_.transpose(0, 1)
        max_time, batch_size, _ = input_.size()
        if length is None:
            length = Variable(torch.LongTensor([max_time] * batch_size))
            if input_.is_cuda:
                length = length.cuda()
        if hx is None:
            hx = Variable(input_.data.new(batch_size, self.hidden_size).zero_())
            hx = (hx, hx)
        h_n = []
        c_n = []
        layer_output = None
        for layer in range(self.num_layers):
            layer_output, (layer_h_n, layer_c_n) = LSTM._forward_rnn(
                cell=self.cells[layer], input_=input_, length=length, hx=hx)
            input_ = self.dropout_layer(layer_output)
            h_n.append(layer_h_n)
            c_n.append(layer_c_n)
        output = layer_output
        h_n = torch.stack(h_n, 0)
        c_n = torch.stack(c_n, 0)
        return output, (h_n, c_n)
data.py 文件源码 项目:fairseq-py 作者: facebookresearch 项目源码 文件源码 阅读 50 收藏 0 点赞 0 评论 0
def collate(samples, pad_idx, eos_idx):

        def merge(key, left_pad, move_eos_to_beginning=False):
            return LanguagePairDataset.collate_tokens(
                [s[key] for s in samples], pad_idx, eos_idx, left_pad, move_eos_to_beginning)

        return {
            'id': torch.LongTensor([s['id'].item() for s in samples]),
            'src_tokens': merge('source', left_pad=LanguagePairDataset.LEFT_PAD_SOURCE),
            # we create a shifted version of targets for feeding the previous
            # output token(s) into the next decoder step
            'input_tokens': merge('target', left_pad=LanguagePairDataset.LEFT_PAD_TARGET,
                                  move_eos_to_beginning=True),
            'target': merge('target', left_pad=LanguagePairDataset.LEFT_PAD_TARGET),
            'ntokens': sum(len(s['target']) for s in samples),
        }
model.py 文件源码 项目:pytorch-nlp 作者: endymecy 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def forward(self, pos_u, pos_v, neg_u, neg_v):
        losses = []
        emb_u = []
        for i in range(len(pos_u)):
            emb_ui = self.u_embeddings(Variable(torch.LongTensor(pos_u[i])))
            emb_u.append(np.sum(emb_ui.data.numpy(), axis=0).tolist())
        emb_u = Variable(torch.FloatTensor(emb_u))
        emb_v = self.v_embeddings(Variable(torch.LongTensor(pos_v)))
        score = torch.mul(emb_u, emb_v)
        score = torch.sum(score, dim=1)
        score = F.logsigmoid(score)
        losses.append(sum(score))

        neg_emb_u = []
        for i in range(len(neg_u)):
            neg_emb_ui = self.u_embeddings(Variable(torch.LongTensor(neg_u[i])))
            neg_emb_u.append(np.sum(neg_emb_ui.data.numpy(), axis=0).tolist())
        neg_emb_u = Variable(torch.FloatTensor(neg_emb_u))
        neg_emb_v = self.v_embeddings(Variable(torch.LongTensor(neg_v)))
        neg_score = torch.mul(neg_emb_u, neg_emb_v)
        neg_score = torch.sum(neg_score, dim=1)
        neg_score = F.logsigmoid(-1 * neg_score)
        losses.append(sum(neg_score))

        return -1 * sum(losses)
dataset.py 文件源码 项目:R-net 作者: matthew-z 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def padding(seqs, pad, batch_first=False):
    """

    :param seqs: tuple of seq_length x dim
    :return: seq_length x Batch x dim
    """
    lengths = [len(s) for s in seqs]

    seqs = [torch.Tensor(s) for s in seqs]
    batch_length = max(lengths)
    seq_tensor = torch.LongTensor(batch_length, len(seqs)).fill_(pad)
    for i, s in enumerate(seqs):
        end_seq = lengths[i]
        seq_tensor[:end_seq, i].copy_(s[:end_seq])
    if batch_first:
        seq_tensor = seq_tensor.t()
    return (seq_tensor, lengths)
dataset.py 文件源码 项目:R-net 作者: matthew-z 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _create_collate_fn(self, batch_first=True):

        def collate(examples, this):
            if this.split == "train":
                items, question_ids, questions, questions_char, passages, passages_char, answers_positions, answer_texts, passage_tokenized = zip(
                    *examples)
            else:
                items, question_ids, questions, questions_char, passages, passages_char, passage_tokenized = zip(*examples)

            questions_tensor, question_lengths = padding(questions, this.PAD, batch_first=batch_first)
            passages_tensor, passage_lengths = padding(passages, this.PAD, batch_first=batch_first)

            # TODO: implement char level embedding

            question_document = Documents(questions_tensor, question_lengths)
            passages_document = Documents(passages_tensor, passage_lengths)

            if this.split == "train":
                return question_ids, question_document, passages_document, torch.LongTensor(answers_positions), answer_texts

            else:
                return question_ids, question_document, passages_document, passage_tokenized

        return partial(collate, this=self)
preprocess.py 文件源码 项目:AoAReader 作者: kevinkwl 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def vectorize_stories(stories, vocab : Vocabulary):
    X = []
    Q = []
    C = []
    A = []

    for s, q, a, c in stories:
        x = vocab.convert2idx(s)
        xq = vocab.convert2idx(q)
        xc = vocab.convert2idx(c)
        X.append(x)
        Q.append(xq)
        C.append(xc)
        A.append(vocab.getIdx(a))

    X = X
    Q = Q
    C = C
    A = torch.LongTensor(A)
    return X, Q, A, C
mps_base.py 文件源码 项目:MSDN 作者: yikang-li 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def prepare_message(self, target_features, source_features, select_mat, gate_module):
        feature_data = []

        transfer_list = np.where(select_mat > 0)
        source_indices = Variable(torch.from_numpy(transfer_list[1]).type(torch.LongTensor)).cuda()
        target_indices = Variable(torch.from_numpy(transfer_list[0]).type(torch.LongTensor)).cuda()
        source_f = torch.index_select(source_features, 0, source_indices)
        target_f = torch.index_select(target_features, 0, target_indices)
        transferred_features = gate_module(target_f, source_f)

        for f_id in range(target_features.size()[0]):
            if len(np.where(select_mat[f_id, :] > 0)[0]) > 0:
                feature_indices = np.where(transfer_list[0] == f_id)[0]
                indices = Variable(torch.from_numpy(feature_indices).type(torch.LongTensor)).cuda()
                features = torch.index_select(transferred_features, 0, indices).mean(0).view(-1)
                feature_data.append(features)
            else:
                temp = Variable(torch.zeros(target_features.size()[1:]), requires_grad=True).type(torch.FloatTensor).cuda()
                feature_data.append(temp)
        return torch.stack(feature_data, 0)
eval.py 文件源码 项目:DREAM 作者: LaceyChen17 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def eval_batch(dr_model, ub, up, batch_size, is_reordered = False):
    '''
        Using dr_model to predict (u,p) score in batch
        Parameters:
        - ub: users' baskets
        - up: users' history purchases
        - batch_size
    '''
    # turn on evaluation mode
    dr_model.eval()
    is_cuda =  dr_model.config.cuda
    item_embedding = dr_model.encode.weight
    dr_hidden = dr_model.init_hidden(batch_size)

    id_u, item_u, score_u, dynam_u = [], [], [], []
    start_time = time()
    num_batchs = ceil(len(ub) / batch_size)
    for i, x in enumerate(batchify(ub, batch_size, is_reordered)):
        if is_reordered is True:
            baskets, lens, uids, r_baskets, h_baskets = x
        else:
            baskets, lens, uids = x
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        for uid, l, du in zip(uids, lens, dynamic_user):
            du_latest =  du[l - 1].unsqueeze(0)
            # calculating <u,p> score for all history <u,p> pair 
            history_item = [int(i) for i in up[up.user_id == uid]['product_id'].values[0]]
            history_item = torch.cuda.LongTensor(history_item) if is_cuda else torch.LongTensor(history_item)
            score_up = torch.mm(du_latest, item_embedding[history_item].t()).cpu().data.numpy()[0]
            id_u.append(uid), dynam_u.append(du_latest.cpu().data.numpy()[0]), item_u.append(history_item.cpu().numpy()),score_u.append(score_up)
        # Logging
        elapsed = time() - start_time; start_time = time()
        print('[Predicting]| Batch {:5d} / {:5d} | seconds/batch {:02.02f}'.format(i, num_batchs, elapsed))
    return id_u, item_u, score_u, dynam_u
eval.py 文件源码 项目:DREAM 作者: LaceyChen17 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_item_embedding(pid, dr_model):
    '''
        get item's embedding
        pid can be a integer or a torch.cuda.LongTensor
    '''
    if isinstance(pid, torch.cuda.LongTensor) or isinstance(pid, torch.LongTensor):
        return dr_model.encode.weight[pid]
    elif isinstance(pid, int):
        return dr_model.encode.weight[pid].unsqueeze(0)
    else:
        print('Unsupported Index Type %s'%type(pid))
        return None
train.py 文件源码 项目:DREAM 作者: LaceyChen17 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def bpr_loss(x, dynamic_user, item_embedding, config):
    '''
        bayesian personalized ranking loss for implicit feedback
        parameters:
        - x: batch of users' baskets
        - dynamic_user: batch of users' dynamic representations
        - item_embedding: item_embedding matrix
        - config: model configuration
    '''
    nll = 0
    ub_seqs = []
    for u,du in zip(x, dynamic_user):
        du_p_product = torch.mm(du, item_embedding.t()) # shape: max_len, num_item
        nll_u = [] # nll for user
        for t, basket_t in enumerate(u):
            if basket_t[0] != 0 and t != 0:
                pos_idx = torch.cuda.LongTensor(basket_t) if config.cuda else torch.LongTensor(basket_t)
                # Sample negative products
                neg = [random.choice(range(1, config.num_product)) for _ in range(len(basket_t))] # replacement
                # neg = random.sample(range(1, config.num_product), len(basket_t)) # without replacement
                neg_idx = torch.cuda.LongTensor(neg) if config.cuda else torch.LongTensor(neg)
                # Score p(u, t, v > v')
                score = du_p_product[t - 1][pos_idx] - du_p_product[t - 1][neg_idx]
                #Average Negative log likelihood for basket_t
                nll_u.append(- torch.mean(torch.nn.LogSigmoid()(score)))
        nll += torch.mean(torch.cat(nll_u))
    return nll
test_utils.py 文件源码 项目:crnn 作者: wulivicte 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def checkOneHot(self):
        v = torch.LongTensor([1, 2, 1, 2, 0])
        v_length = torch.LongTensor([2, 3])
        v_onehot = utils.oneHot(v, v_length, 4)
        target = torch.FloatTensor([[[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]],
                                    [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]])
        assert target.equal(v_onehot)
dataset.py 文件源码 项目:crnn 作者: wulivicte 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __iter__(self):
        n_batch = len(self) // self.batch_size
        tail = len(self) % self.batch_size
        index = torch.LongTensor(len(self)).fill_(0)
        for i in range(n_batch):
            random_start = random.randint(0, len(self) - self.batch_size)
            batch_index = random_start + torch.range(0, self.batch_size - 1)
            index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index
        # deal with tail
        if tail:
            random_start = random.randint(0, len(self) - self.batch_size)
            tail_index = random_start + torch.range(0, tail - 1)
            index[(i + 1) * self.batch_size:] = tail_index

        return iter(index)


问题


面经


文章

微信
公众号

扫码关注公众号