python类savez_compressed()的实例源码

util.py 文件源码 项目:sentence-classification 作者: jind11 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def process_word2vec(word2vec_dir, vocab, save_path, random_init=True):

    # read pre-trained word embedddings from the binary file
    print('Loading google word2vec...')
    word2vec_path = word2vec_dir + '/GoogleNews-vectors-negative300.bin.gz'
    word_vectors = KeyedVectors.load_word2vec_format(word2vec_path, binary=True)
    print('Word2vec loaded!')

    if random_init:
        word2vec = np.random.uniform(-0.25, 0.25, (len(vocab), 300))
    else:
        word2vec = np.zeros((len(vocab), 300))
    found = 0
    for idx, token in enumerate(vocab):
        try:
            vec = word_vectors[token]
        except:
            pass
        else:
            word2vec[idx, :] = vec
            found += 1

    del word_vectors

    print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab), word2vec_path))
    np.savez_compressed(save_path, word2vec=word2vec)
    print("saved trimmed word2vec matrix at: {}".format(save_path))


# construct embedding vectors according to the GloVe word vectors and vocabulary
util.py 文件源码 项目:sentence-classification 作者: jind11 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def process_glove(glove_dir, glove_dim, vocab_dir, save_path, random_init=True):
    """
    :param vocab_list: [vocab]
    :return:
    """
    save_path = save_path + '.{}'.format(glove_dim)
    if not os.path.isfile(save_path + ".npz"):
        # read vocabulary
        with open(vocab_dir + '/vocabulary.pickle', 'rb') as f:
            vocab_map = cPickle.load(f)
            f.close()
        vocab_list = list(zip(*vocab_map)[0])

        glove_path = os.path.join(glove_dir, "glove.6B.{}d.txt".format(glove_dim))
        if random_init:
            glove = np.random.uniform(-0.25, 0.25, (len(vocab_list), glove_dim))
        else:
            glove = np.zeros((len(vocab_list), glove_dim))
        found = 0
        with open(glove_path, 'r') as fh:
            for line in fh.readlines():
                array = line.lstrip().rstrip().split(" ")
                word = array[0]
                vector = list(map(float, array[1:]))
                if word in vocab_list:
                    idx = vocab_list.index(word)
                    glove[idx, :] = vector
                    found += 1
                if word.capitalize() in vocab_list:
                    idx = vocab_list.index(word.capitalize())
                    glove[idx, :] = vector
                    found += 1
                if word.upper() in vocab_list:
                    idx = vocab_list.index(word.upper())
                    glove[idx, :] = vector
                    found += 1

        print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab_list), glove_path))
        np.savez_compressed(save_path, glove=glove)
        print("saved trimmed glove matrix at: {}".format(save_path))
Jive.py 文件源码 项目:py_jive 作者: idc9 项目源码 文件源码 阅读 49 收藏 0 点赞 0 评论 0
def save_estimates(self, fname='', notes='', force=False):
        """
        Saves the JIVE estimates

        U, D, V, full, rank for block secific joint/individual spaces
        U, D, V, rank for common joint space
        some metadata (when saved, some nots)

        Parameters
        ----------
        fname: name of the file
        notes: any notes you want to include
        force: whether or note to overwrite a file with the same name
        """

        if os.path.exists(fname) and (not force):
            raise ValueError('%s already exists' % fname)

        kwargs = {}
        svd_dat = ['scores', 'sing_vals', 'loadings', 'rank']
        kwargs['K'] = self.K

        block_estimates = self.get_block_specific_estimates()
        for k in range(self.K):
            for mode in ['joint', 'individual']:
                for dat in svd_dat + ['full']:
                    label = '%d_%s_%s' % (k, mode, dat)
                    kwargs[label] = block_estimates[k][mode][dat]

        common_joint = self.get_common_joint_space_estimate()
        for dat in svd_dat:
            kwargs['common_%s' % dat] = common_joint[dat]

        current_time = time.strftime("%m/%d/%Y %H:%M:%S")
        kwargs['metadata'] = [current_time, notes]

        np.savez_compressed(fname, **kwargs)
Jive.py 文件源码 项目:py_jive 作者: idc9 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def save_init_svd(self, fname='', notes='', force=False):
        """
        Saves the initial SVD so it can be loaded later without recomputing

        Parameters
        ----------
        fname: name of the file
        notes: any notes you want to include
        force: whether or note to overwrite a file with the same name
        """

        if not hasattr(self.blocks[0], 'scores'):
            raise ValueError('initial svd has not yet been computed')

        if os.path.exists(fname) and (not force):
            raise ValueError('%s already exists' % fname)

        kwargs = {}
        svd_dat = ['scores', 'sing_vals', 'loadings', 'rank']
        kwargs['K'] = self.K

        for k in range(self.K):
            kwargs['%d_scores' % k] = self.blocks[k].scores
            kwargs['%d_sv' % k] = self.blocks[k].sv
            kwargs['%d_loadings' % k ] = self.blocks[k].loadings
            kwargs['%d_init_svd_rank' % k] = self.blocks[k].init_svd_rank

        np.savez_compressed(fname, **kwargs)
agent.py 文件源码 项目:malmo-challenge 作者: Kaixhin 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def save(self, out_file):
        """
        Save the current memory into a file in Numpy format
        :param out_file: File storage path
        :return:
        """
        np.savez_compressed(out_file, states=self._states, actions=self._actions,
                            rewards=self._rewards, terminals=self._terminals)
cnn_embedding.py 文件源码 项目:painters 作者: inejc 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def _save_np_compressed_data(file_name, *args):
    mkdirs_if_not_exist(dirname(file_name))
    np.savez_compressed(file_name, *args)
agent.py 文件源码 项目:malmo-challenge 作者: Microsoft 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def save(self, out_file):
        """
        Save the current memory into a file in Numpy format
        :param out_file: File storage path
        :return:
        """
        np.savez_compressed(out_file, states=self._states, actions=self._actions,
                            rewards=self._rewards, terminals=self._terminals)
io.py 文件源码 项目:cvcalib 作者: Algomorph 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def save_frame_data(archive, path, videos, object_point_set, verbose=True):
    if verbose:
        print("Saving corners to {0:s}".format(path))
    for video in videos:
        archive[IMAGE_POINTS + str(video.name)] = video.image_points
        archive[FRAME_NUMBERS + str(video.name)] = list(video.usable_frames.keys())
        if len(video.poses) > 0:
            archive[POSES + str(video.name)] = np.array([pose.T for pose in video.poses])

    archive[OBJECT_POINT_SET] = object_point_set
    np.savez_compressed(path, **archive)
io.py 文件源码 项目:cvcalib 作者: Algomorph 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def save_calibration_intervals(archive, path, videos, verbose=True):
    if verbose:
        print("Saving calibration intervals to {0:s}".format(path))
    ranges = []
    for video in videos:
        if video.calibration_interval is None:
            raise ValueError("Expecting all cameras to have valid calibration frame ranges. Got: None")
        ranges.append(video.calibration_interval)
    ranges = np.array(ranges)
    archive[CALIBRATION_INTERVALS] = ranges
    np.savez_compressed(path, **archive)
trainer.py 文件源码 项目:luna16 作者: gzuidhof 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def save_model(self):
        logging.info("Saving model")
        save_filename = os.path.join(self.model_folder,'{}_epoch{}.npz'.format(self.model_name, self.epoch))
        np.savez_compressed(save_filename, *lasagne.layers.get_all_param_values(self.network))
utils.py 文件源码 项目:Dave-Godot 作者: finchMFG 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _preprocess(self, input_file, tensor_file):
        if input_file.endswith(".bz2"): file_reference = BZ2File(input_file, "r")
        elif input_file.endswith(".txt"): file_reference = io.open(input_file, "r")
        raw_data = file_reference.read()
        file_reference.close()
        data = raw_data.encode(encoding=self.encoding)
        # Convert the entirety of the data file from characters to indices via the vocab dictionary.
        # How? map(function, iterable) returns a list of the output of the function
        # executed on each member of the iterable. E.g.:
        # [14, 2, 9, 2, 0, 6, 7, 0, ...]
        # np.array converts the list into a numpy array.
        self.tensor = np.array(list(map(self.vocab.get, data)))
        # Compress and save the numpy tensor array to data.npz.
        np.savez_compressed(tensor_file, tensor_data=self.tensor)
create_vocab.py 文件源码 项目:DeepMoji 作者: bfelbo 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def save_vocab(self, path_count, path_vocab, word_limit=100000):
        """ Saves the master vocabulary into a file.
        """

        # reserve space for 10 special tokens
        words = OrderedDict()
        for token in SPECIAL_TOKENS:
            # store -1 instead of np.inf, which can overflow
            words[token] = -1

        # sort words by frequency
        desc_order = OrderedDict(sorted(self.master_vocab.items(),
                                        key=lambda kv: kv[1], reverse=True))
        words.update(desc_order)

        # use encoding of up to 30 characters (no token conversions)
        # use float to store large numbers (we don't care about precision loss)
        np_vocab = np.array(words.items(),
                            dtype=([('word', '|S30'), ('count', 'float')]))

        # output count for debugging
        counts = np_vocab[:word_limit]
        np.savez_compressed(path_count, counts=counts)

        # output the index of each word for easy lookup
        final_words = OrderedDict()
        for i, w in enumerate(words.keys()[:word_limit]):
            final_words.update({w: i})
        with open(path_vocab, 'w') as f:
            f.write(json.dumps(final_words, indent=4, separators=(',', ': ')))
test_format.py 文件源码 项目:krpcScripts 作者: jwvanderbeck 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_compressed_roundtrip():
    arr = np.random.rand(200, 200)
    npz_file = os.path.join(tempdir, 'compressed.npz')
    np.savez_compressed(npz_file, arr=arr)
    arr1 = np.load(npz_file)['arr']
    assert_array_equal(arr, arr1)
network.py 文件源码 项目:nuts-ml 作者: maet3608 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def save_weights(self, weightspath=None):
        weightspath = super(LasagneNetwork, self)._weightspath(weightspath)
        weights = {name: p.get_value() for name, p in
                   LasagneNetwork._get_named_params(self.out_layer)}
        np.savez_compressed(weightspath, **weights)
utils.py 文件源码 项目:chatbot-rnn 作者: zenixls2 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _preprocess(self, input_file, tensor_file):
        if input_file.endswith(".bz2"): file_reference = BZ2File(input_file, "r")
        elif input_file.endswith(".txt"): file_reference = io.open(input_file, "r")
        raw_data = file_reference.read()
        file_reference.close()
        data = raw_data.encode(encoding=self.encoding)
        # Convert the entirety of the data file from characters to indices via the vocab dictionary.
        # How? map(function, iterable) returns a list of the output of the function
        # executed on each member of the iterable. E.g.:
        # [14, 2, 9, 2, 0, 6, 7, 0, ...]
        # np.array converts the list into a numpy array.
        self.tensor = np.array(map(self.vocab.get, data))
        # Compress and save the numpy tensor array to data.npz.
        np.savez_compressed(tensor_file, tensor_data=self.tensor)
embedding2matrix.py 文件源码 项目:PyTorchText 作者: chenyuntc 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def main(em_file, em_result):
    '''
    embedding ->numpy
    '''
    em = word2vec.load(em_file)
    vec = (em.vectors)
    word2id = em.vocab_hash
    # d = dict(vector = vec, word2id = word2id)
    # t.save(d,em_result)
    np.savez_compressed(em_result,vector=vec,word2id=word2id)
models_learners.py 文件源码 项目:smp_base 作者: x75 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def savelogs(self, ts=None, saveres=True, filename=None):
        # FIXME: consider HDF5
        if ts == None:
            ts = time.strftime("%Y%m%d-%H%M%S")

        # np.save("%s/log-x-%s" % (self.cfgprefix, ts), self.iosm.x_)
        # np.save("%s/log-x_raw-%s" % (self.cfgprefix, ts), self.iosm.x_raw_)
        # np.save("%s/log-z-%s" % (self.cfgprefix, ts), self.iosm.z_)
        # np.save("%s/log-zn-%s" % (self.cfgprefix, ts), self.iosm.zn_)
        # np.save("%s/log-zn_lp-%s" % (self.cfgprefix, ts), self.iosm.zn_lp_)
        # np.save("%s/log-r-%s" % (self.cfgprefix, ts), self.iosm.r_)
        # np.save("%s/log-w-%s" % (self.cfgprefix, ts), self.iosm.w_)
        # network data, pickling reservoir, input weights, output weights
        # self.res.save("%s/log-%s-res-%s.bin" % (self.cfgprefix, self.cfgprefix, ts))

        if filename == None:
            logfile = "%s/log-learner-%s" % (self.cfgprefix, ts)
        else:
            logfile = filename
        if saveres:
            np.savez_compressed(logfile, x = self.iosm.x_,
                            x_raw = self.iosm.x_raw_, z = self.iosm.z_, zn = self.iosm.zn_,
                            zn_lp = self.iosm.zn_lp_, r = self.iosm.r_, w = self.iosm.w_, e = self.iosm.e_,
                            t = self.iosm.t_, mse = self.iosm.mse_)
        else:
            np.savez_compressed(logfile, x = self.iosm.x_,
                            x_raw = self.iosm.x_raw_, z = self.iosm.z_, zn = self.iosm.zn_,
                            zn_lp = self.iosm.zn_lp_, w = self.iosm.w_, e = self.iosm.e_,
                            t = self.iosm.t_,
                            mse = self.iosm.mse_)
        print "logs saved to %s" % logfile
        return logfile
matrix.py 文件源码 项目:ottertune 作者: cmu-db 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def save_matrix(self, path):
        with open(path, 'w') as f:
            np.savez_compressed(f,
                                data=self.__data,
                                rowlabels=self.__rowlabels,
                                columnlabels=self.__columnlabels)
evaluation.py 文件源码 项目:adascan_public 作者: amlankar 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def save_scores(model_options,name_scores):
    if not os.path.exists('scores/'):
        os.system('mkdir scores/')
    save_name = 'scores/scores_'+model_options['name'].split('/')[-1]
    print 'Dumping scores to: '+save_name
    if not os.path.isdir('scores/'):
        os.mkdir('scores')
    np.savez_compressed(save_name,name_scores)
npz.py 文件源码 项目:chainer-deconv 作者: germanRos 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def savez_compressed(file, *args, **kwds):
    """Saves one or more arrays into a file in compressed ``.npz`` format.

    It is equivalent to :func:`cupy.savez` function except the output file is
    compressed.

    .. seealso::
       :func:`cupy.savez` for more detail,
       :func:`numpy.savez_compressed`

    """
    args = map(cupy.asnumpy, args)
    for key in kwds:
        kwds[key] = cupy.asnumpy(kwds[key])
    numpy.savez_compressed(file, *args, **kwds)


问题


面经


文章

微信
公众号

扫码关注公众号