python类memmap()的实例源码-面圈网

mcs_raw_binary.py 文件源码项目：spyking-circus 作者: spyking-circus 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _read_from_header(self):

        a, b, c                = self._get_header()
        header                 = a
        header['data_offset']  = b
        header['nb_channels']  = c
        #header['dtype_offset'] = int(header['ADC zero'])
        header['gain']         = float(re.findall("\d+\.\d+", header['El'])[0])
        header['data_dtype']   = self.params['data_dtype']

        self.data   = numpy.memmap(self.file_name, offset=header['data_offset'], dtype=header['data_dtype'], mode='r')
        self.size   = len(self.data)
        self._shape = (self.size//header['nb_channels'], header['nb_channels'])
        del self.data

        return header

test_validating.py 文件源码项目：spyking-circus 作者: spyking-circus 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_validating(self):
        #mpi_launch('fitting', self.file_name, 2, 0, 'False')


        a, b            = os.path.splitext(os.path.basename(self.file_name))
        file_name, ext  = os.path.splitext(self.file_name)
        file_out        = os.path.join(os.path.abspath(file_name), a)
        result_name     = os.path.join(file_name, 'injected')
        spikes          = {}
        result          = h5py.File(os.path.join(result_name, '%s.result.hdf5' %a))
        for key in result.get('spiketimes').keys():
            spikes[key] = result.get('spiketimes/%s' %key)[:]

        juxta_file = file_out + '.juxta.dat'

        f = numpy.memmap(juxta_file, shape=(self.length,1), dtype=self.parser.get('validating', 'juxta_dtype'), mode='w+')
        f[spikes['temp_9']] = 100
        del f

        mpi_launch('validating', self.file_name, 2, 0, 'False')

MetaArray.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def _readData1(self, fd, meta, mmap=False, **kwds):
        ## Read array data from the file descriptor for MetaArray v1 files
        ## read in axis values for any axis that specifies a length
        frameSize = 1
        for ax in meta['info']:
            if 'values_len' in ax:
                ax['values'] = np.fromstring(fd.read(ax['values_len']), dtype=ax['values_type'])
                frameSize *= ax['values_len']
                del ax['values_len']
                del ax['values_type']
        self._info = meta['info']
        if not kwds.get("readAllData", True):
            return
        ## the remaining data is the actual array
        if mmap:
            subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape'])
        else:
            subarr = np.fromstring(fd.read(), dtype=meta['type'])
            subarr.shape = meta['shape']
        self._data = subarr

neuralynxio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def __mmap_ncs_packet_headers(self, filename):
        """
        Memory map of the Neuralynx .ncs file optimized for extraction of
        data packet headers
        Reading standard dtype improves speed, but timestamps need to be
        reconstructed
        """
        filesize = getsize(self.sessiondir + sep + filename)  # in byte
        if filesize > 16384:
            data = np.memmap(self.sessiondir + sep + filename,
                             dtype='<u4',
                             shape=((filesize - 16384) / 4 / 261, 261),
                             mode='r', offset=16384)

            ts = data[:, 0:2]
            multi = np.repeat(np.array([1, 2 ** 32], ndmin=2), len(data),
                              axis=0)
            timestamps = np.sum(ts * multi, axis=1)
            # timestamps = data[:,0] + (data[:,1] *2**32)
            header_u4 = data[:, 2:5]

            return timestamps, header_u4
        else:
            return None

neuralynxio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def __mmap_nev_file(self, filename):
        """ Memory map the Neuralynx .nev file """
        nev_dtype = np.dtype([
            ('reserved', '<i2'),
            ('system_id', '<i2'),
            ('data_size', '<i2'),
            ('timestamp', '<u8'),
            ('event_id', '<i2'),
            ('ttl_input', '<i2'),
            ('crc_check', '<i2'),
            ('dummy1', '<i2'),
            ('dummy2', '<i2'),
            ('extra', '<i4', (8,)),
            ('event_string', 'a128'),
        ])

        if getsize(self.sessiondir + sep + filename) > 16384:
            return np.memmap(self.sessiondir + sep + filename,
                             dtype=nev_dtype, mode='r', offset=16384)
        else:
            return None

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def __read_nsx_data_variant_b(self, nsx_nb):
        """
        Extract nsx data (blocks) from a 2.2 or 2.3 .nsx file. Blocks can arise
        if the recording was paused by the user.
        """
        filename = '.'.join([self._filenames['nsx'], 'ns%i' % nsx_nb])

        data = {}
        for data_bl in self.__nsx_data_header[nsx_nb].keys():
            # get shape and offset of data
            shape = (
                self.__nsx_data_header[nsx_nb][data_bl]['nb_data_points'],
                self.__nsx_basic_header[nsx_nb]['channel_count'])
            offset = \
                self.__nsx_data_header[nsx_nb][data_bl]['offset_to_data_block']

            # read data
            data[data_bl] = np.memmap(
                filename, dtype='int16', shape=shape, offset=offset)

        return data

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def __read_nev_data(self, nev_data_masks, nev_data_types):
        """
        Extract nev data from a 2.1 or 2.2 .nev file
        """
        filename = '.'.join([self._filenames['nev'], 'nev'])
        data_size = self.__nev_basic_header['bytes_in_data_packets']
        header_size = self.__nev_basic_header['bytes_in_headers']

        # read all raw data packets and markers
        dt0 = [
            ('timestamp', 'uint32'),
            ('packet_id', 'uint16'),
            ('value', 'S{0}'.format(data_size - 6))]

        raw_data = np.memmap(filename, offset=header_size, dtype=dt0)

        masks = self.__nev_data_masks(raw_data['packet_id'])
        types = self.__nev_data_types(data_size)

        data = {}
        for k, v in nev_data_masks.items():
            data[k] = raw_data.view(types[k][nev_data_types[k]])[masks[k][v]]

        return data

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def __get_nev_rec_times(self):
        """
        Extracts minimum and maximum time points from a nev file.
        """
        filename = '.'.join([self._filenames['nev'], 'nev'])

        dt = [('timestamp', 'uint32')]
        offset = \
            self.__get_file_size(filename) - \
            self.__nev_params('bytes_in_data_packets')
        last_data_packet = np.memmap(filename, offset=offset, dtype=dt)[0]

        n_starts = [0 * self.__nev_params('event_unit')]
        n_stops = [
            last_data_packet['timestamp'] * self.__nev_params('event_unit')]

        return n_starts, n_stops

MetaArray.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _readData1(self, fd, meta, mmap=False, **kwds):
        ## Read array data from the file descriptor for MetaArray v1 files
        ## read in axis values for any axis that specifies a length
        frameSize = 1
        for ax in meta['info']:
            if 'values_len' in ax:
                ax['values'] = np.fromstring(fd.read(ax['values_len']), dtype=ax['values_type'])
                frameSize *= ax['values_len']
                del ax['values_len']
                del ax['values_type']
        self._info = meta['info']
        if not kwds.get("readAllData", True):
            return
        ## the remaining data is the actual array
        if mmap:
            subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape'])
        else:
            subarr = np.fromstring(fd.read(), dtype=meta['type'])
            subarr.shape = meta['shape']
        self._data = subarr

neuralynxio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def __mmap_ncs_packet_headers(self, filename):
        """
        Memory map of the Neuralynx .ncs file optimized for extraction of
        data packet headers
        Reading standard dtype improves speed, but timestamps need to be
        reconstructed
        """
        filesize = getsize(self.sessiondir + sep + filename)  # in byte
        if filesize > 16384:
            data = np.memmap(self.sessiondir + sep + filename,
                             dtype='<u4',
                             shape=((filesize - 16384) / 4 / 261, 261),
                             mode='r', offset=16384)

            ts = data[:, 0:2]
            multi = np.repeat(np.array([1, 2 ** 32], ndmin=2), len(data),
                              axis=0)
            timestamps = np.sum(ts * multi, axis=1)
            # timestamps = data[:,0] + (data[:,1] *2**32)
            header_u4 = data[:, 2:5]

            return timestamps, header_u4
        else:
            return None

neuralynxio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def __mmap_nev_file(self, filename):
        """ Memory map the Neuralynx .nev file """
        nev_dtype = np.dtype([
            ('reserved', '<i2'),
            ('system_id', '<i2'),
            ('data_size', '<i2'),
            ('timestamp', '<u8'),
            ('event_id', '<i2'),
            ('ttl_input', '<i2'),
            ('crc_check', '<i2'),
            ('dummy1', '<i2'),
            ('dummy2', '<i2'),
            ('extra', '<i4', (8,)),
            ('event_string', 'a128'),
        ])

        if getsize(self.sessiondir + sep + filename) > 16384:
            return np.memmap(self.sessiondir + sep + filename,
                             dtype=nev_dtype, mode='r', offset=16384)
        else:
            return None

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def __read_nsx_data_variant_b(self, nsx_nb):
        """
        Extract nsx data (blocks) from a 2.2 or 2.3 .nsx file. Blocks can arise
        if the recording was paused by the user.
        """
        filename = '.'.join([self._filenames['nsx'], 'ns%i' % nsx_nb])

        data = {}
        for data_bl in self.__nsx_data_header[nsx_nb].keys():
            # get shape and offset of data
            shape = (
                self.__nsx_data_header[nsx_nb][data_bl]['nb_data_points'],
                self.__nsx_basic_header[nsx_nb]['channel_count'])
            offset = \
                self.__nsx_data_header[nsx_nb][data_bl]['offset_to_data_block']

            # read data
            data[data_bl] = np.memmap(
                filename, dtype='int16', shape=shape, offset=offset)

        return data

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def __read_nev_data(self, nev_data_masks, nev_data_types):
        """
        Extract nev data from a 2.1 or 2.2 .nev file
        """
        filename = '.'.join([self._filenames['nev'], 'nev'])
        data_size = self.__nev_basic_header['bytes_in_data_packets']
        header_size = self.__nev_basic_header['bytes_in_headers']

        # read all raw data packets and markers
        dt0 = [
            ('timestamp', 'uint32'),
            ('packet_id', 'uint16'),
            ('value', 'S{0}'.format(data_size - 6))]

        raw_data = np.memmap(filename, offset=header_size, dtype=dt0)

        masks = self.__nev_data_masks(raw_data['packet_id'])
        types = self.__nev_data_types(data_size)

        data = {}
        for k, v in nev_data_masks.items():
            data[k] = raw_data.view(types[k][nev_data_types[k]])[masks[k][v]]

        return data

blackrockio.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def __get_nev_rec_times(self):
        """
        Extracts minimum and maximum time points from a nev file.
        """
        filename = '.'.join([self._filenames['nev'], 'nev'])

        dt = [('timestamp', 'uint32')]
        offset = \
            self.__get_file_size(filename) - \
            self.__nev_params('bytes_in_data_packets')
        last_data_packet = np.memmap(filename, offset=offset, dtype=dt)[0]

        n_starts = [0 * self.__nev_params('event_unit')]
        n_stops = [
            last_data_packet['timestamp'] * self.__nev_params('event_unit')]

        return n_starts, n_stops

dataset.py 文件源码项目：untwist 作者: IoSR-Surrey 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __init__(self, path,
        x_width = 0, x_type = np.float,
        y_width = 0, y_type = types.int_):

        if os.path.exists(path + "/dataset.json"):
            print("Using existing dataset in "+path)
            self.load(path)
        else:
            if x_width == 0 : raise "X width must be specified for new dataset"
            self.X = np.memmap(path + "/X.npy", x_type, "w+", 0, (1, x_width))
            self.X.flush()
            if y_width > 0:
                self.Y = np.memmap(path + "/Y.npy", y_type, "w+", 0, (1, y_width))
                self.Y.flush()
            else: self.Y = None
            self.index = None
            self.nrows = 0
            self.running_mean = np.zeros((1, x_width), x_type)
            self.running_dev = np.zeros((1, x_width), x_type)
            self.running_max = np.zeros((1, x_width), x_type)
            self.running_min = np.zeros((1, x_width), x_type)
            self.path = path

dataset.py 文件源码项目：untwist 作者: IoSR-Surrey 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def load(self, path):
            metadata = json.loads(open(path + "/dataset.json").read())
            self.index = np.array(metadata["index"])
            x_shape = tuple(metadata["x_shape"])
            x_type = metadata["x_type"]
            if "y_shape" in metadata:
                y_shape = tuple(metadata["y_shape"])
                y_type = metadata["y_type"]
                self.Y = np.memmap(path+"/Y.npy", y_type, shape = y_shape)
            else:
                self.Y = None
            self.nrows = x_shape[0]
            self.running_mean = np.asarray(metadata["running_mean"])
            self.running_dev = np.asarray(metadata["running_dev"])
            self.running_max = np.asarray(metadata["running_min"])
            self.running_min = np.asarray(metadata["running_max"])
            self.X =  np.memmap(path+"/X.npy", x_type, shape = x_shape)
            self.path = path

dataset.py 文件源码项目：untwist 作者: IoSR-Surrey 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def add(self, x, y = None):
        self.X =  np.memmap(
            self.path+"/X.npy", self.X.dtype,
            shape = (self.nrows + x.shape[0] , x.shape[1])
        )
        self.X[self.nrows:self.nrows + x.shape[0],:] = x

        if y is not None:
            if x.shape != y.shape: raise "x and y should have the same shape"
            self.Y = np.memmap(
                self.path+"/Y.npy", self.Y.dtype,
                shape = (self.nrows + y.shape[0] , y.shape[1])
            )
            self.Y[self.nrows:self.nrows + y.shape[0],:] = y

        delta = x - self.running_mean
        n = self.X.shape[0] + np.arange(x.shape[0]) + 1
        self.running_dev += np.sum(delta * (x - self.running_mean), 0)
        self.running_mean += np.sum(delta / n[:, np.newaxis], 0)
        self.running_max  = np.amax(np.vstack((self.running_max, x)), 0)
        self.running_min  = np.amin(np.vstack((self.running_min, x)), 0)
        self.nrows += x.shape[0]

_tifffile.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def asarray(self, memmap=False, *args, **kwargs):
        """Read image data from all files and return as single numpy array.

        If memmap is True, return an array stored in a binary file on disk.
        The args and kwargs parameters are passed to the imread function.

        Raise IndexError or ValueError if image shapes don't match.

        """
        im = self.imread(self.files[0], *args, **kwargs)
        shape = self.shape + im.shape
        if memmap:
            with tempfile.NamedTemporaryFile() as fh:
                result = numpy.memmap(fh, dtype=im.dtype, shape=shape)
        else:
            result = numpy.zeros(shape, dtype=im.dtype)
        result = result.reshape(-1, *im.shape)
        for index, fname in zip(self._indices, self.files):
            index = [i-j for i, j in zip(index, self._start_index)]
            index = numpy.ravel_multi_index(index, self.shape)
            im = self.imread(fname, *args, **kwargs)
            result[index] = im
        result.shape = shape
        return result

_tifffile.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def stack_pages(pages, memmap=False, *args, **kwargs):
    """Read data from sequence of TiffPage and stack them vertically.

    If memmap is True, return an array stored in a binary file on disk.
    Additional parameters are passsed to the page asarray function.

    """
    if len(pages) == 0:
        raise ValueError("no pages")

    if len(pages) == 1:
        return pages[0].asarray(memmap=memmap, *args, **kwargs)

    result = pages[0].asarray(*args, **kwargs)
    shape = (len(pages),) + result.shape
    if memmap:
        with tempfile.NamedTemporaryFile() as fh:
            result = numpy.memmap(fh, dtype=result.dtype, shape=shape)
    else:
        result = numpy.empty(shape, dtype=result.dtype)

    for i, page in enumerate(pages):
        result[i] = page.asarray(*args, **kwargs)

    return result

memmap.py 文件源码项目：radar 作者: amoose136 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def flush(self):
        """
        Write any changes in the array to the file on disk.

        For further information, see `memmap`.

        Parameters
        ----------
        None

        See Also
        --------
        memmap

        """
        if self.base is not None and hasattr(self.base, 'flush'):
            self.base.flush()

memory_map.py 文件源码项目：jack 作者: uclmr 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def load_memory_map_dir(directory: str) -> Embeddings:
    """
    Loads embeddings from a memory map directory to allow lazy loading (and reduce the memory usage).
    Args:
        directory: a file prefix. This function loads two files in the directory: a meta json file with shape information
        and the vocabulary, and the actual memory map file.

    Returns:
        Embeddings object with a lookup matrix that is backed by a memory map.

    """
    meta_file = os.path.join(directory, "meta.json")
    mem_map_file = os.path.join(directory, "memory_map")
    with open(meta_file, "r") as f:
        meta = json.load(f)
    shape = tuple(meta['shape'])
    vocab = meta['vocab']
    mem_map = np.memmap(mem_map_file, dtype='float32', mode='r+', shape=shape)
    result = Embeddings(vocab, mem_map, filename=directory, emb_format="memory_map_dir")
    return result

memory_map.py 文件源码项目：jack 作者: uclmr 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def save_as_memory_map_dir(directory: str, emb: Embeddings):
    """
    Saves the given embeddings as memory map file and corresponding meta data in a directory.
    Args:
        directory: the directory to store the memory map file in (called `memory_map`) and the meta file (called
        `meta.json` that stores the shape of the memory map and the actual vocabulary.
        emb: the embeddings to store.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)

    meta_file = os.path.join(directory, "meta.json")
    mem_map_file = os.path.join(directory, "memory_map")
    with open(meta_file, "w") as f:
        json.dump({
            "vocab": emb.vocabulary,
            "shape": emb.shape
        }, f)
    mem_map = np.memmap(mem_map_file, dtype='float32', mode='w+', shape=emb.shape)
    mem_map[:] = emb.lookup[:]
    mem_map.flush()
    del mem_map

bark.py 文件源码项目：bark 作者: kylerbrown 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def write_sampled(datfile, data, sampling_rate, **params):
    """Writes a sampled dataset to disk as a raw binary file, plus a meta file.

    Args:
        datfile (str): path to file to write to. If the file exists, it is
            overwritten.
        data (sequence): time series data of at most 2 dimensions
        sampling_rate (int or float): sampling rate of `data`
        **params: all other keyword arguments are treated as dataset attributes,
            and added to the meta file

    Returns:
        SampledData: sampled dataset containing `data`
    """
    if 'columns' not in params:
        params['columns'] = sampled_columns(data)
    params["dtype"] = data.dtype.str
    shape = data.shape
    mdata = np.memmap(datfile, dtype=params["dtype"], mode="w+", shape=shape)
    mdata[:] = data[:]
    write_metadata(datfile, sampling_rate=sampling_rate, **params)
    params['sampling_rate'] = sampling_rate
    return SampledData(mdata, datfile, params)

bark.py 文件源码项目：bark 作者: kylerbrown 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def read_sampled(datfile, mode="r"):
    """Loads raw binary file and associated metadata into a sampled dataset.

    Args:
        datfile (str): path to raw binary file to read from
        mode: may be "r" or "r+"; use "r+" for modifying the data
            (not recommended)

    Returns:
        SampledData: sampled dataset containing `datfile`'s data
    """
    path = os.path.abspath(datfile)
    params = read_metadata(datfile)
    try:
        data = np.memmap(datfile, dtype=params["dtype"], mode=mode)
    except ValueError:
        data = np.array([])
    data = data.reshape(-1, len(params['columns']))
    return SampledData(data, path, params)

train_pts_model.py 文件源码项目：kaggle-right-whale 作者: felixlaumon 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def load_data(fname):
    n = 4543
    size = int(fname.split('_')[0])

    X_fname = 'cache/X_%s.npy' % fname
    y_fname = 'cache/pts_%s.npy' % fname

    X_shape = (n, 3, size, size)
    y_shape = (n, 4)

    X = np.memmap(X_fname, dtype=np.float32, mode='r', shape=X_shape)
    y = np.memmap(y_fname, dtype=np.int32, mode='r', shape=y_shape)

    y = y.astype(np.float32)
    y = y / size

    return X, y

create_test_cropped_image.py 文件源码项目：kaggle-right-whale 作者: felixlaumon 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def load_data(fname, data_grey=False):
    n = 6925
    size = int(fname.split('_')[0])

    if data_grey:
        X_fname = 'cache/X_test_grey_%s.npy' % fname
    else:
        X_fname = 'cache/X_test_%s.npy' % fname

    num_channels = 1 if data_grey else 3
    X_shape = (n, num_channels, size, size)

    print 'Load test data from %s' % X_fname
    X = np.memmap(X_fname, dtype=np.float32, mode='r', shape=X_shape)

    return X

train_localization_model.py 文件源码项目：kaggle-right-whale 作者: felixlaumon 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def load_data(fname):
    n = 4543
    size = int(fname.split('_')[0])

    X_fname = 'cache/X_%s.npy' % fname
    y_fname = 'cache/bbox_%s.npy' % fname

    X_shape = (n, 3, size, size)
    y_shape = (n, 4)

    X = np.memmap(X_fname, dtype=np.float32, mode='r', shape=X_shape)
    y = np.memmap(y_fname, dtype=np.int32, mode='r', shape=y_shape)

    y = y.astype(np.float32)
    y = y / size

    return X, y

loader.py 文件源码项目：tensorflow-yolo 作者: hjimce 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def walk(self, size):
        if self.eof: return None
        end_point = self.offset + 4 * size
        assert end_point <= self.size, \
        'Over-read {}'.format(self.path)

        float32_1D_array = np.memmap(
            self.path, shape = (), mode = 'r', 
            offset = self.offset,
            dtype='({})float32,'.format(size)
        )

        self.offset = end_point
        if end_point == self.size: 
            self.eof = True
        return float32_1D_array

load_files.py 文件源码项目：BioIR 作者: nlpaueb 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def load_vectors_mmaped(VECTORS_FILE, NOFTYPES, D, UPDATE_VECTORS):
    if (not os.path.exists('vectors.mymemmap')) or UPDATE_VECTORS == 1:
        v = np.memmap('vectors.mymemmap', dtype='float', mode='w+', shape=(NOFTYPES, D))
        i = 0

        # Showing percentage to user
        limit = 100000
        with open(VECTORS_FILE, 'r') as f:
            for line in f:
                if i >= limit:
                    print(limit/float(NOFTYPES)*100)
                    limit += 100000
                line = line.strip()
                if len(line) > 0:
                    v[i][:] = map(float, line.split(' '))
                i += 1
    else:
        v = np.memmap('vectors.mymemmap', dtype='float', mode='r', shape = (NOFTYPES, D))
    return v

hashing.py 文件源码项目：django-estimators 作者: fridiculous 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __init__(self, hash_name='md5', coerce_mmap=False):
        """
            Parameters
            ----------
            hash_name: string
                The hash algorithm to be used
            coerce_mmap: boolean
                Make no difference between np.memmap and np.ndarray
                objects.
        """
        self.coerce_mmap = coerce_mmap
        Hasher.__init__(self, hash_name=hash_name)
        # delayed import of numpy, to avoid tight coupling
        import numpy as np
        self.np = np
        if hasattr(np, 'getbuffer'):
            self._getbuffer = np.getbuffer
        else:
            self._getbuffer = memoryview