python类File()的实例源码

add_attr_to_hdf5.py 文件源码 项目:higlass-server 作者: hms-dbmi 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def main():
    parser = argparse.ArgumentParser(description="""

    python add_attr_to_hdf5.py file.hdf5 attr_name attr_value

    Add an attribute to an HDF5 file.
""")

    parser.add_argument('filepath')
    parser.add_argument('attr_name')
    parser.add_argument('attr_value')
    #parser.add_argument('-o', '--options', default='yo',
    #                    help="Some option", type='str')
    #parser.add_argument('-u', '--useless', action='store_true', 
    #                    help='Another useless option')

    args = parser.parse_args()

    with h5py.File(args.filepath) as f:
        f.attrs[args.attr_name] = args.attr_value
mparray.py 文件源码 项目:mpnum 作者: dseuss 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def dump(self, target):
        """Serializes MPArray to :code:`h5py.Group`. Recover using
        :func:`~load`.

        :param target: :code:`h5py.Group` the instance should be saved to or
            path to h5 file (it's then serialized to /)

        """
        if isinstance(target, str):
            import h5py
            with h5py.File(target, 'w') as outfile:
                return self.dump(outfile)

        for prop in ('ranks', 'shape'):
            # these are only saved for convenience
            target.attrs[prop] = str(getattr(self, prop))

        # these are actually used in MPArray.load
        target.attrs['len'] = len(self)
        target.attrs['canonical_form'] = self.canonical_form

        for site, lten in enumerate(self._lt):
            target[str(site)] = lten
mparray_test.py 文件源码 项目:mpnum 作者: dseuss 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_dump_and_load(tmpdir, dtype):
    mpa = factory.random_mpa(5, [(4,), (2, 3), (1,), (4,), (4, 3)],
                             (4, 7, 1, 3), dtype=dtype)
    mpa.canonicalize(left=1, right=3)

    with h5.File(str(tmpdir / 'dump_load_test.h5'), 'w') as buf:
        newgroup = buf.create_group('mpa')
        mpa.dump(newgroup)
    with h5.File(str(tmpdir / 'dump_load_test.h5'), 'r') as buf:
        mpa_loaded = mp.MPArray.load(buf['mpa'])
    assert_mpa_identical(mpa, mpa_loaded)

    mpa.dump(str(tmpdir / 'dump_load_test_str.h5'))
    mpa_loaded = mp.MPArray.load(str(tmpdir / 'dump_load_test_str.h5'))
    assert_mpa_identical(mpa, mpa_loaded)


###############################################################################
#                            Algebraic operations                             #
###############################################################################
tools.py 文件源码 项目:monogreedy 作者: jinjunqi 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def average_models(best, L=6, model_dir='', model_name='ra.h5'):
    print '... merging'
    print '{} {:d}-{:d}'.format(model_dir, best-L/2, best+L/2)
    params = {}
    side_info = {}
    attrs = {}
    for i in xrange(max(best-L/2, 0), best+L/2):
        with h5py.File(osp.join(model_dir, model_name+'.'+str(i)), 'r') as f:
            for k, v in f.attrs.items():
                attrs[k] = v
            for p in f.keys():
                if '#' not in p:
                    side_info[p] = f[p][...]
                elif p in params:
                    params[p] += np.array(f[p]).astype('float32') / L
                else:
                    params[p] = np.array(f[p]).astype('float32') / L
    with h5py.File(osp.join(model_dir, model_name+'.merge'), 'w') as f:
        for p in params.keys():
            f[p] = params[p]
        for s in side_info.keys():
            f[s] = side_info[s]
        for k, v in attrs.items():
            f.attrs[k] = v
utils.py 文件源码 项目:crema 作者: bmcfee 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def save_h5(filename, **kwargs):
    '''Save data to an hdf5 file.

    Parameters
    ----------
    filename : str
        Path to the file

    kwargs
        key-value pairs of data

    See Also
    --------
    load_h5
    '''
    with h5py.File(filename, 'w') as hf:
        hf.update(kwargs)
makedb.py 文件源码 项目:SNPmatch 作者: Gregor-Mendel-Institute 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def save_as_hdf5_acc(g, outHDF5):
    NumAcc = len(g.accessions)
    log.info("Writing into HDF5 file acc wise")
    h5file = h5py.File(outHDF5, 'w')
    NumSNPs = len(g.snps)
    h5file.create_dataset('accessions', data=g.accessions, shape=(NumAcc,))
    h5file.create_dataset('positions', data=g.positions, shape=(NumSNPs,),dtype='i4')
    h5file['positions'].attrs['chrs'] = g.chrs
    h5file['positions'].attrs['chr_regions'] = g.chr_regions
    h5file.create_dataset('snps', shape=(NumSNPs, NumAcc), dtype='int8', compression="gzip", chunks=((NumSNPs, 1)))
    for i in range(NumAcc):
        h5file['snps'][:,i] = np.array(g.snps)[:,i]
        if i+1 % 10 == 0:
            log.info("written SNP info for %s accessions", i+1)
    h5file['snps'].attrs['data_format'] = g.data_format
    h5file['snps'].attrs['num_snps'] = NumSNPs
    h5file['snps'].attrs['num_accessions'] = NumAcc
    h5file.close()
pre_sumstats.py 文件源码 项目:PleioPred 作者: yiminghu 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_1000G_snps(sumstats, out_file):
    sf = np.loadtxt(sumstats,dtype=str,skiprows=1)
    h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r')
    rf = h5f['snp_chr'][:]
    h5f.close()
    ind1 = np.in1d(sf[:,1],rf[:,2])
    ind2 = np.in1d(rf[:,2],sf[:,1])
    sf1 = sf[ind1]
    rf1 = rf[ind2]
    ### check order ###
    if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]):
        print 'Good!'
    else:
        print 'Shit happens, sorting sf1 to have the same order as rf1'
        O1 = np.argsort(sf1[:,1])
        O2 = np.argsort(rf1[:,2])
        O3 = np.argsort(O2)
        sf1 = sf1[O1][O3]
    out = ['hg19chrc snpid a1 a2 bp or p'+'\n']
    for i in range(len(sf1[:,1])):
        out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n')
    ff = open(out_file,"w")
    ff.writelines(out)
    ff.close()
sharpen.py 文件源码 项目:text2image 作者: emansim 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_weights(params, path, num_conv):
    print 'Loading gan weights from ' + path
    with h5py.File(path, 'r') as hdf5:
        params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image']))
        params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias']))

        for i in xrange(num_conv):
            params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)]))
            params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)]))

            # Flip w,h axes
            params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1]

            w = np.abs(np.copy(hdf5['W_conv{}'.format(i)]))
            print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w)
            b = np.abs(np.copy(hdf5['b_conv{}'.format(i)]))
            print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b)

    return params
elmo_test.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def _load_sentences_embeddings(self):
        # load the test sentences and the expected LM embeddings
        with open(os.path.join(FIXTURES, 'sentences.json')) as fin:
            sentences = json.load(fin)

        # the expected embeddings
        expected_lm_embeddings = []
        for k in range(len(sentences)):
            embed_fname = os.path.join(
                    FIXTURES, 'lm_embeddings_{}.hdf5'.format(k)
            )
            expected_lm_embeddings.append([])
            with h5py.File(embed_fname, 'r') as fin:
                for i in range(10):
                    sent_embeds = fin['%s' % i][...]
                    sent_embeds_concat = numpy.concatenate(
                            (sent_embeds[0, :, :], sent_embeds[1, :, :]),
                            axis=-1
                    )
                    expected_lm_embeddings[-1].append(sent_embeds_concat)

        return sentences, expected_lm_embeddings
embedding_test.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def test_read_hdf5_format_file(self):
        vocab = Vocabulary()
        vocab.add_token_to_namespace("word")
        vocab.add_token_to_namespace("word2")
        embeddings_filename = self.TEST_DIR + "embeddings.hdf5"
        embeddings = numpy.random.rand(vocab.get_vocab_size(), 5)
        with h5py.File(embeddings_filename, 'w') as fout:
            _ = fout.create_dataset(
                    'embedding', embeddings.shape, dtype='float32', data=embeddings
            )

        params = Params({
                'pretrained_file': embeddings_filename,
                'embedding_dim': 5,
                })
        embedding_layer = Embedding.from_params(vocab, params)
        assert numpy.allclose(embedding_layer.weight.data.numpy(), embeddings)
embedding_test.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_read_hdf5_raises_on_invalid_shape(self):
        vocab = Vocabulary()
        vocab.add_token_to_namespace("word")
        embeddings_filename = self.TEST_DIR + "embeddings.hdf5"
        embeddings = numpy.random.rand(vocab.get_vocab_size(), 10)
        with h5py.File(embeddings_filename, 'w') as fout:
            _ = fout.create_dataset(
                    'embedding', embeddings.shape, dtype='float32', data=embeddings
            )

        params = Params({
                'pretrained_file': embeddings_filename,
                'embedding_dim': 5,
                })
        with pytest.raises(ConfigurationError):
            _ = Embedding.from_params(vocab, params)
embedding.py 文件源码 项目:allennlp 作者: allenai 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _read_pretrained_hdf5_format_embedding_file(embeddings_filename: str, # pylint: disable=invalid-name
                                                embedding_dim: int,
                                                vocab: Vocabulary,
                                                namespace: str = "tokens") -> torch.FloatTensor:
    """
    Reads from a hdf5 formatted file.  The embedding matrix is assumed to
    be keyed by 'embedding' and of size ``(num_tokens, embedding_dim)``.
    """
    with h5py.File(embeddings_filename, 'r') as fin:
        embeddings = fin['embedding'][...]

    if list(embeddings.shape) != [vocab.get_vocab_size(namespace), embedding_dim]:
        raise ConfigurationError(
                "Read shape {0} embeddings from the file, but expected {1}".format(
                        list(embeddings.shape), [vocab.get_vocab_size(namespace), embedding_dim]))

    return torch.FloatTensor(embeddings)
utils.py 文件源码 项目:Learning-to-navigate-without-a-map 作者: ToniRV 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def load_grid8(return_imsize=True):
    """Load grid 8x8.

    Parameters
    ----------
    return_imsize : bool
        return a tuple with grid size if True

    Returns
    -------
    db : h5py.File
        a HDF5 file object
    imsize : tuple
        (optional) grid size
    """
    file_path = os.path.join(rlvision.RLVISION_DATA,
                             "HDF5", "gridworld_8.hdf5")
    if not os.path.isfile(file_path):
        raise ValueError("The dataset %s is not existed!" % (file_path))

    if return_imsize is True:
        return h5py.File(file_path, mode="r"), (8, 8)
    else:
        return h5py.File(file_path, mode="r")
sample.py 文件源码 项目:keras-molecules 作者: maxhodak 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def encoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if args.save_h5:
        h5f = h5py.File(args.save_h5, 'w')
        h5f.create_dataset('charset', data = charset)
        h5f.create_dataset('latent_vectors', data = x_latent)
        h5f.close()
    else:
        np.savetxt(sys.stdout, x_latent, delimiter = '\t')
sample_latent.py 文件源码 项目:keras-molecules 作者: maxhodak 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter = '\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data = charset)
            h5f.create_dataset('latent_vectors', data = x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)
h5T.py 文件源码 项目:NuGridPy 作者: NuGrid 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def fetch_data_one(self,dataitem,cycle):
        self.h5 = mrT.File(self.filename,'r')

        try:
            data = self.h5[self.cycle_header+str(cycle)]['SE_DATASET'][dataitem]
        except ValueError:
            try:
                data = self.h5[self.cycle_header+str(cycle)].attrs.get(dataitem, None)
            except TypeError:
                data = self.h5[self.cycle_header+str(cycle)][dataitem]

        try:
            while data.shape[0] < 2:
                data = data[0]
        except (IndexError, AttributeError):
            None


        self.h5.close()
        return data
io_utils.py 文件源码 项目:inferno 作者: inferno-pytorch 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def fromh5(path, datapath=None, dataslice=None, asnumpy=True, preptrain=None):
    """
    Opens a hdf5 file at path, loads in the dataset at datapath, and returns dataset
    as a numpy array.
    """
    # Check if path exists (thanks Lukas!)
    assert os.path.exists(path), "Path {} does not exist.".format(path)
    # Init file
    h5file = h5.File(path)
    # Init dataset
    h5dataset = h5file[datapath] if datapath is not None else h5file.values()[0]
    # Slice dataset
    h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
    # Convert to numpy if required
    h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
    # Apply preptrain
    h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
    # Close file
    h5file.close()
    # Return
    return h5dataset
hdf5.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __check_valid_key__(self, key):
        file       = h5py.File(self.file_name)
        all_fields = []
        file.visit(all_fields.append)    
        if not key in all_fields:
            print_and_log(['The key %s can not be found in the dataset! Keys found are:' %key, 
                         ", ".join(all_fields)], 'error', logger)
            sys.exit(1)
        file.close()
hdf5.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def _open(self, mode='r'):
        if mode in ['r+', 'w'] and self._parallel_write:
            self.my_file = h5py.File(self.file_name, mode=mode, driver='mpio', comm=comm)
        else:
            self.my_file = h5py.File(self.file_name, mode=mode)

        self.data = self.my_file.get(self.h5_key)
arf.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def set_streams(self, stream_mode):

        if stream_mode == 'single-file':

            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = [re.findall('\d+', u) for u in my_file.keys()]
            all_streams = []
            for m in all_matches:
                if len(m) > 0:
                    all_streams += [int(m[0])]

            idx = numpy.argsort(all_streams)

            for i in xrange(len(all_streams)):
                params['h5_key']  = my_file.keys()[idx[i]]
                new_data          = type(self)(self.file_name, params)
                sources          += [new_data]
                to_write         += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)


问题


面经


文章

微信
公众号

扫码关注公众号