python类tqdm()的实例源码

col_generation.py 文件源码 项目:pdpipe 作者: shaypal5 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def _op(self, df, verbose):
        inter_df = df
        colnames = list(self._bin_map.keys())
        if verbose:
            colnames = tqdm.tqdm(colnames)
        for colname in colnames:
            if verbose:
                colnames.set_description(colname)
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_bin"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=source_col.apply(
                    self._get_col_binner(self._bin_map[colname])),
                loc=loc,
                column_name=new_name)
        return inter_df
sklearn_stages.py 文件源码 项目:pdpipe 作者: shaypal5 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def _op(self, df, verbose):
        columns_to_encode = self._columns
        if self._columns is None:
            columns_to_encode = list(set(df.select_dtypes(
                include=['object', 'category']).columns).difference(
                    self._exclude_columns))
        if verbose:
            columns_to_encode = tqdm.tqdm(columns_to_encode)
        inter_df = df
        for colname in columns_to_encode:
            lbl_enc = sklearn.preprocessing.LabelEncoder()
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_enc"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=lbl_enc.fit_transform(source_col),
                loc=loc,
                column_name=new_name)
            self.encoders[colname] = lbl_enc
        return inter_df
field.py 文件源码 项目:text 作者: pytorch 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def segment(self, *args):
        """Segment one or more datasets with this subword field.

        Arguments:
            Positional arguments: Dataset objects or other indexable
                mutable sequences to segment. If a Dataset object is provided,
                all columns corresponding to this field are used; individual
                columns can also be provided directly.
        """
        sources = []
        for arg in args:
            if isinstance(arg, Dataset):
                sources += [getattr(arg, name) for name, field in
                            arg.fields.items() if field is self]
            else:
                sources.append(arg)
        for data in sources:
            for x in tqdm(data, 'segmenting'):
                x[:] = self.vocab.segment(x)
pytorch_word2vec.py 文件源码 项目:pytorch_word2vec 作者: bamtercelboo 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def cbow_train(self):
        print("CBOW Training......")
        self.cbow_model.save_embedding(self.data.id2word, 'cbow_begin_embedding.txt')
        pos_all_pairs = self.data.get_cbow_batch_all_pairs(self.batch_size, self.context_size)
        pair_count = len(pos_all_pairs)
        process_bar = tqdm(range(int(pair_count / self.batch_size)))
        for _ in process_bar:
            pos_pairs = self.data.get_cbow_batch_pairs(self.batch_size, self.window_size)
            if self.using_hs:
                pos_pairs, neg_pairs = self.data.get_cbow_pairs_by_huffman(pos_pairs)
            else:
                pos_pairs, neg_pairs = self.data.get_cbow_pairs_by_neg_sampling(pos_pairs, self.context_size)

            pos_u = [pair[0] for pair in pos_pairs]
            pos_v = [int(pair[1]) for pair in pos_pairs]
            neg_u = [pair[0] for pair in neg_pairs]
            neg_v = [int(pair[1]) for pair in neg_pairs]

            self.optimizer.zero_grad()
            loss = self.cbow_model.forward(pos_u, pos_v, neg_u, neg_v)
            loss.backward()
            self.optimizer.step()
        print("CBOW Trained and Saving File......")
        self.cbow_model.save_embedding(self.data.id2word, self.output_file_name)
        print("CBOW Trained and Saved File.")
fasttext.py 文件源码 项目:embeddings 作者: vzhong 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file(path.join('fasttext', '{}.zip'.format(self.lang)), url=self.url.format(self.lang))
        seen = set()

        with zipfile.ZipFile(fin_name) as fin:
            content = fin.read('wiki.{}.vec'.format(self.lang))
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
glove.py 文件源码 项目:embeddings 作者: vzhong 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file(path.join('glove', '{}.zip'.format(self.name)), url=self.setting.url)
        seen = set()

        with zipfile.ZipFile(fin_name) as fin:
            fname_zipped = [fzipped.filename for fzipped in fin.filelist if str(self.d_emb) in fzipped.filename][0]
            content = fin.read(fname_zipped)
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines, total=self.setting.size)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
kazuma.py 文件源码 项目:embeddings 作者: vzhong 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def load_word2emb(self, show_progress=True, batch_size=1000):
        fin_name = self.ensure_file('kazuma.tar.gz', url=self.url)
        seen = set()

        with tarfile.open(fin_name, 'r:gz') as fzip:
            ftxt = fzip.extractfile('charNgram.txt')
            content = ftxt.read()
            ftxt.close()
            lines = content.splitlines()
            if show_progress:
                lines = tqdm(lines)
            batch = []
            for line in lines:
                elems = line.decode().rstrip().split()
                vec = [float(n) for n in elems[-self.d_emb:]]
                word = ' '.join(elems[:-self.d_emb])
                if word in seen:
                    continue
                seen.add(word)
                batch.append((word, vec))
                if len(batch) == batch_size:
                    self.insert_batch(batch)
                    batch.clear()
            if batch:
                self.insert_batch(batch)
SiteFab.py 文件源码 项目:SiteFab 作者: ebursztein 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def render_posts(self):
        """Render posts using jinja2 templates."""

        for post in tqdm(self.posts, unit=' pages', miniters=1, desc="Posts"):
            template_name = "%s.html" % post.meta.template
            template = self.jinja2.get_template(template_name)
            html = post.html.decode("utf-8", 'ignore')
            rv = template.render(content=html, meta=post.meta, posts=self.posts, plugin_data=self.plugin_data, config=self.config,
            categories=self.posts_by_category.get_as_dict(), tags=self.posts_by_tag.get_as_dict(), templates=self.posts_by_template.get_as_dict(), 
            microdata=self.posts_by_microdata.get_as_dict())

            # Liniting            
            linter_results = self.linter.lint(post, rv, self)
            # Are we stopping on linting errors?
            if linter_results.has_errors and self.config.linter.stop_on_error:
                print post.filename
                for error in linter_results.info:
                    print "\t-%s:%s" % (error[0], error[1])
                sys.exit(-1)

            path = "%s%s/" % (self.get_output_dir(), post.meta.permanent_url)
            path = path.replace('//', '/')
            files.write_file(path, 'index.html', rv)

    ### Templates functions ###
snli.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def read(self, file_path: str):
        # if `file_path` is a URL, redirect to the cache
        file_path = cached_path(file_path)

        instances = []
        with open(file_path, 'r') as snli_file:
            logger.info("Reading SNLI instances from jsonl dataset at: %s", file_path)
            for line in tqdm.tqdm(snli_file):
                example = json.loads(line)

                label = example["gold_label"]
                if label == '-':
                    # These were cases where the annotators disagreed; we'll just skip them.  It's
                    # like 800 out of 500k examples in the training data.
                    continue

                premise = example["sentence1"]
                hypothesis = example["sentence2"]
                instances.append(self.text_to_instance(premise, hypothesis, label))
        if not instances:
            raise ConfigurationError("No instances were read from the given filepath {}. "
                                     "Is the path correct?".format(file_path))
        return Dataset(instances)
seq2seq.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def read(self, file_path):
        instances = []
        with open(file_path, "r") as data_file:
            logger.info("Reading instances from lines in file at: %s", file_path)
            for line_num, line in enumerate(tqdm.tqdm(data_file)):
                line = line.strip("\n")

                if not line:
                    continue

                line_parts = line.split('\t')
                if len(line_parts) != 2:
                    raise ConfigurationError("Invalid line format: %s (line number %d)" % (line, line_num + 1))
                source_sequence, target_sequence = line_parts
                instances.append(self.text_to_instance(source_sequence, target_sequence))
        if not instances:
            raise ConfigurationError("No instances read!")
        return Dataset(instances)
evaluate.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def evaluate(model: Model,
             dataset: Dataset,
             iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator, total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics()
train.py 文件源码 项目:torch_light 作者: ne7ermore 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def train():
    rnn.train()
    total_loss = 0
    hidden = rnn.init_hidden(args.batch_size)
    for data, label in tqdm(training_data, mininterval=1,
                desc='Train Processing', leave=False):
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        target, hidden = rnn(data, hidden)
        loss = criterion(target, label)

        loss.backward()
        torch.nn.utils.clip_grad_norm(rnn.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.data
    return total_loss[0]/training_data.sents_size

# ##############################################################################
# Save Model
# ##############################################################################
main.py 文件源码 项目:torch_light 作者: ne7ermore 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def train():
    rnn.train()
    total_loss = 0
    hidden = rnn.init_hidden()
    for data, label in tqdm(training_data, mininterval=1,
                desc='Train Processing', leave=False):
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        target, hidden = rnn(data, hidden)
        loss = criterion(target, label)

        loss.backward()
        optimizer.step()

        total_loss += loss.data
    return total_loss[0]/training_data.sents_size

# ##############################################################################
# Save Model
# ##############################################################################
autoencoder.py 文件源码 项目:AVSR-Deep-Speech 作者: pandeydivesh15 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def fit(self, 
            data_x_train,
            data_x_dev=None,
            data_x_test=None,
            n_epochs=10,
            batch_size=10):
        assert n_epochs > 0
        assert batch_size < data_x_train.shape[0]

        size_x_train = data_x_train.shape[0]

        n_batches = size_x_train / batch_size

        for e in range(n_epochs):
            epoch_costs = np.zeros(n_batches)
            bar = tqdm(range(n_batches), desc='Epoch: {:d}'.format(e))

            for i in bar:
                batch_x = data_x_train[i*batch_size:(i+1)*batch_size]
                err = self.partial_fit(batch_x)
                epoch_costs[i] = err

            mean_cost = epoch_costs.mean()
            print 'Train error: {:.4f}'.format(mean_cost)

            if data_x_dev is not None:
                random_indices = np.random.randint(0, data_x_dev.shape[0], batch_size)
                batch_x = data_x_dev[random_indices]
                err = self.get_cost(batch_x)
                print 'Validation data error: {:.4f}'.format(err)

        if data_x_test is not None:
                err = self.get_cost(data_x_test)
                print 'Test data error: {:.4f}'.format(err)
utils.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_tqdm_progressbar(iterator):
    sys.stderr.flush()
    return tqdm.tqdm(iterator, bar_format='{desc}{percentage:3.0f}%|{bar}|[{elapsed}<{remaining}, {rate_fmt}]'  , ncols=72)
validate.py 文件源码 项目:pytorch-semseg 作者: meetshah1995 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()

    for i, (images, labels) in tqdm(enumerate(valloader)):
        model.cuda()
        images = Variable(images.cuda(), volatile=True)
        labels = Variable(labels.cuda(), volatile=True)

        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
pascal_voc_loader.py 文件源码 项目:pytorch-semseg 作者: meetshah1995 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def setup(self, pre_encode=False):
        sbd_path = get_data_path('sbd')
        voc_path = get_data_path('pascal')

        target_path = self.root + '/SegmentationClass/pre_encoded/'
        if not os.path.exists(target_path):
            os.makedirs(target_path)

        sbd_train_list = tuple(open(sbd_path + 'dataset/train.txt', 'r'))
        sbd_train_list = [id_.rstrip() for id_ in sbd_train_list]

        self.files['train_aug'] = self.files['train'] + sbd_train_list

        if pre_encode:
            print("Pre-encoding segmentation masks...")
            for i in tqdm(sbd_train_list):
                lbl_path = sbd_path + 'dataset/cls/' + i + '.mat'
                lbl = io.loadmat(lbl_path)['GTcls'][0]['Segmentation'][0].astype(np.int32)
                lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min())
                m.imsave(target_path + i + '.png', lbl)

            for i in tqdm(self.files['trainval']):
                lbl_path = self.root + '/SegmentationClass/' + i + '.png'
                lbl = self.encode_segmap(m.imread(lbl_path))
                lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min())
                m.imsave(target_path + i + '.png', lbl)
build_feature_files.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 52 收藏 0 点赞 0 评论 0
def build_feature_files(base_directory,
                        new_directory,
                        data_loader,
                        n=None,
                        negative_example_keep_prob=1.0):
    os.makedirs(new_directory, exist_ok=False)
    episode_paths = frame.episode_paths(base_directory)
    label_counts = [0, 0]
    if n is not None:
        np.random.shuffle(episode_paths)
        episode_paths = episode_paths[:n]
    for episode_path in tqdm.tqdm(episode_paths):
        try:
            features, labels = data_loader.load_features_and_labels([episode_path])
        except:
            traceback.print_exc()
        else:
            keep = np.logical_or(labels, (np.less(
                np.random.rand(len(labels)), negative_example_keep_prob)))
            labels = labels[keep]

            for i in range(len(label_counts)):
                label_counts[i] += np.count_nonzero(labels == i)
            features = {k: v[keep] for k, v in features.items()}
            new_path = path_relative_to_new_directory(base_directory, new_directory, episode_path,
                                                      ".features")
            os.makedirs(os.path.dirname(new_path), exist_ok=True)
            with open(new_path, 'wb') as f:
                pickle.dump((features, labels), f)
    return label_counts
build_training_data_tfrecord.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def copy_episodes(indir, outdir, n):
    episode_paths = frame.episode_paths(indir)
    np.random.shuffle(episode_paths)
    episode_paths = episode_paths[:n]
    start = len(indir)
    for p in tqdm.tqdm(episode_paths):
        assert p.startswith(indir), p
        outfile = outdir + p[start:]
        os.makedirs(os.path.dirname(outfile), exist_ok=True)
        shutil.copyfile(p, outfile)
build_training_data_tfrecord.py 文件源码 项目:human-rl 作者: gsastry 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def label_episodes(directory, classifier):
    episode_paths = frame.episode_paths(directory)
    data_loader = DataLoader(hparams=classifier.hparams)
    for episode_path in tqdm.tqdm(episode_paths):
        try:
            data_loader.predict_episodes(classifier, [episode_path], prefix="frame/classifier_")
        except EOFError as e:
            traceback.print_exception(e)
            print("Error reading {}".format(episode_path))
            os.remove(episode_path)


问题


面经


文章

微信
公众号

扫码关注公众号