def save(self, filename):
"""Saves the collection to a file.
Parameters
----------
filename : :obj:`str`
The file to save the collection to.
Raises
------
ValueError
If the file extension is not .npy or .npz.
"""
file_root, file_ext = os.path.splitext(filename)
if file_ext == '.npy':
np.save(filename, self._data)
elif file_ext == '.npz':
np.savez_compressed(filename, self._data)
else:
raise ValueError('Extension %s not supported for point saves.' %(file_ext))
python类savez_compressed()的实例源码
def save_weights(fname, params, metadata=None):
""" assumes all params have unique names.
"""
# Includes batchnorm params now
names = [par.name for par in params]
if len(names) != len(set(names)):
raise ValueError('need unique param names')
param_dict = { param.name : param.get_value(borrow=False)
for param in params }
if metadata is not None:
param_dict['metadata'] = pickle.dumps(metadata)
logging.info('saving {} parameters to {}'.format(len(params), fname))
# try to avoid half-written files
fname = Path(fname)
if fname.exists():
tmp_fname = Path(fname.stripext() + '.tmp.npz') # TODO yes, this is a hack
np.savez_compressed(str(tmp_fname), **param_dict)
tmp_fname.rename(fname)
else:
np.savez_compressed(str(fname), **param_dict)
def save(self, NumpyFile):
# open file
nfile = open(NumpyFile, "wb")
# save internals
numpy.savez_compressed(nfile,
_ImgNum = numpy.array([self._ImgNum]),
_MeasNum = numpy.array([self._MeasNum]),
_KeySizeBits = numpy.array([self._KeySizeBits]),
_KeySizeBytes = numpy.array([self._KeySizeBytes]),
_KeyData = self._KeyData,
_ImageInfo = self._ImageInfo,
*[self._ImageData[idx] for idx in range(0, self._ImgNum)])
# close file
nfile.close()
def process_training_data(num_clips):
"""
Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by
default.
@param num_clips: The number of clips to process. Default = 5000000 (set in __main__).
@warning: This can take a couple of hours to complete with large numbers of clips.
"""
num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*'))
for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips):
clip = process_clip()
np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip)
if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1)
def main():
"""
Commandline interface to extract parameters.
"""
log_sockeye_version(logger)
params = argparse.ArgumentParser(description="Extract specific parameters.")
arguments.add_extract_args(params)
args = params.parse_args()
if os.path.isdir(args.input):
param_path = os.path.join(args.input, C.PARAMS_BEST_NAME)
else:
param_path = args.input
ext_params = extract(param_path, args.names, args.list_all)
if len(ext_params) > 0:
utils.check_condition(args.output != None, "An output filename must be specified. (Use --output)")
logger.info("Writting extracted parameters to '%s'", args.output)
np.savez_compressed(args.output, **ext_params)
def save_vocab(self, path=None):
""" Saves the vocabulary into a file.
# Arguments:
path: Where the vocabulary should be saved. If not specified, a
randomly generated filename is used instead.
"""
dtype = ([('word', '|S{}'.format(self.word_length_limit)), ('count', 'int')])
np_dict = np.array(self.word_counts.items(), dtype=dtype)
# sort from highest to lowest frequency
np_dict[::-1].sort(order='count')
data = np_dict
if path is None:
path = str(uuid.uuid4())
np.savez_compressed(path, data=data)
print("Saved dict to {}".format(path))
def savetofile(self, outfile):
"""Save model parameters to file."""
# Pickle non-matrix params into bytestring, then convert to numpy byte array
pklbytes = pickle.dumps({'hyper': self.hyper, 'epoch': self.epoch, 'pos': self.pos},
protocol=pickle.HIGHEST_PROTOCOL)
p = np.fromstring(pklbytes, dtype=np.uint8)
# Gather parameter matrices and names
pvalues = { n:m.get_value() for n, m in self.params.items() }
# Now save params and matrices to file
try:
np.savez_compressed(outfile, p=p, **pvalues)
except OSError as e:
raise e
else:
if isinstance(outfile, str):
stdout.write("Saved model parameters to {0}\n".format(outfile))
def save_npz(filename, obj, compression=True):
"""Saves an object to the file in NPZ format.
This is a short-cut function to save only one object into an NPZ file.
Args:
filename (str): Target file name.
obj: Object to be serialized. It must support serialization protocol.
compression (bool): If ``True``, compression in the resulting zip file
is enabled.
"""
s = DictionarySerializer()
s.save(obj)
with open(filename, 'wb') as f:
if compression:
numpy.savez_compressed(f, **s.target)
else:
numpy.savez(f, **s.target)
def export_trimmed_glove_vectors(vocab, glove_filename, trimmed_filename, dim):
"""Saves glove vectors in numpy array
Args:
vocab: dictionary vocab[word] = index
glove_filename: a path to a glove file
trimmed_filename: a path where to store a matrix in npy
dim: (int) dimension of embeddings
"""
embeddings = np.zeros([len(vocab), dim])
with open(glove_filename) as f:
for line in f:
line = line.strip().split(' ')
word = line[0]
embedding = [float(x) for x in line[1:]]
if word in vocab:
word_idx = vocab[word]
embeddings[word_idx] = np.asarray(embedding)
np.savez_compressed(trimmed_filename, embeddings=embeddings)
def export_trimmed_glove_vectors(vocab, glove_filename, trimmed_filename, dim):
"""
Saves glove vectors in numpy array
Args:
vocab: dictionary vocab[word] = index
glove_filename: a path to a glove file
trimmed_filename: a path where to store a matrix in npy
dim: (int) dimension of embeddings
"""
embeddings = np.zeros([len(vocab), dim])
with open(glove_filename,encoding="utf-8") as f:
for line in f:
line = line.strip().split()
word = line[0]
embedding = map(float, line[1:])
if word in vocab:
word_idx = vocab[word]
embeddings[word_idx] = np.asarray(list(embedding))
np.savez_compressed(trimmed_filename, embeddings=embeddings)
def storeData(df, fileLoc='./tmp/', cv=0.30, rs=21):
"""
# Store the train and CV data in the tmp location for the classifiers.
# Input: df: Transformed DataFrame of the Adult Dataset.
# fileLoc: location of tmp where the binary data will be stored.
# cv: ratio of the cross_validation set in train-cv split
# rs: random_state used to the split.
# returns: None
# Note: data can be accessed using:
# Ex: data = np.load('./tmp/testTrainData.npz')
# and access the train/test using split using dictionary formatting.
# Ex: data['XTrain']
"""
if not os.path.exists('tmp'):
os.makedirs('tmp')
filename = fileLoc+'testTrainData'
XTrain, XTest, yTrain, yTest = trainCvSplit(df, cv, rs)
kwargs = {'XTrain': XTrain,
'XTest': XTest,
'yTrain': yTrain,
'yTest': yTest
}
np.savez_compressed(filename, **kwargs)
return None
def _make_npz(path, urls):
x_url, y_url = urls
x_path = download.cached_download(x_url)
y_path = download.cached_download(y_url)
with gzip.open(x_path, 'rb') as fx, gzip.open(y_path, 'rb') as fy:
fx.read(4)
fy.read(4)
N, = struct.unpack('>i', fx.read(4))
if N != struct.unpack('>i', fy.read(4))[0]:
raise RuntimeError('wrong pair of MNIST images and labels')
fx.read(8)
x = numpy.empty((N, 784), dtype=numpy.uint8)
y = numpy.empty(N, dtype=numpy.uint8)
for i in six.moves.range(N):
y[i] = ord(fy.read(1))
for j in six.moves.range(784):
x[i, j] = ord(fx.read(1))
numpy.savez_compressed(path, x=x, y=y)
return {'x': x, 'y': y}
def load(filename, n_episodes_model=1):
""" Load model (T,R) from <filename>_model.npz. Update t, r, s0
if no model is available, generate and save from SASR_step file """
global t, r, s0
file_model = filename + ".npz"
if os.path.isfile(file_model):
print("Model file found")
with np.load(file_model) as fm:
t = fm['T']
r = fm['R']
s0 = fm['s0']
else:
print("Model file not found")
generate_t_and_r(filename, n_episodes_model) # create t, r, s0
""" Save model (T,R) to <filename>_model.npz """
np.savez_compressed(file_model, T=t, R=r, s0=s0)
return
def save_mean_representations(model, model_filename, X, labels, pred_file):
n_items, dv = X.shape
n_classes = model.n_classes
n_topics = model.d_t
# try normalizing input vectors
test_X = normalize(np.array(X, dtype='float32'), axis=1)
model.load_params(model_filename)
# evaluate bound on test set
item_mus = []
for item in range(n_items):
y = labels[item]
# save the mean document representation
r_mu = model.get_mean_doc_rep(test_X[item, :], y)
item_mus.append(np.array(r_mu))
# write all the test doc representations to file
if pred_file is not None and n_topics > 1:
np.savez_compressed(pred_file, X=np.array(item_mus), y=labels)
def save(self, filename):
"""Writes the image to a file.
Parameters
----------
filename : :obj:`str`
The file to save the image to. Must be one of .png, .jpg,
.npy, or .npz.
Raises
------
ValueError
If an unsupported file type is specified.
"""
file_root, file_ext = os.path.splitext(filename)
if file_ext in COLOR_IMAGE_EXTS:
im_data = self._image_data()
pil_image = PImage.fromarray(im_data.squeeze())
pil_image.save(filename)
elif file_ext == '.npy':
np.save(filename, self._data)
elif file_ext == '.npz':
np.savez_compressed(filename, self._data)
else:
raise ValueError('Extension %s not supported' % (file_ext))
def write_npz(windows, proximity_matrix, output_file):
"""Write a proximity matrix to an npz file.
npz files are a compressed numpy-specific format, meaning
they take up less disk space, but cannot be easily opened
by other programming languages (e.g. R). For more information
see :func:`numpy.savez_compressed`.
:param tuple windows: (list of x-axis windows, list of y-axis windows)
:param proximity_matrix: Input proximity matrix.
:type proximity_matrix: :class:`numpy array <numpy.ndarray>`
:param str filepath: Path to save matrix file.
"""
window_dict = {
'windows_{}'.format(i): win for i,
win in enumerate(windows)}
np.savez_compressed(output_file, scores=proximity_matrix, **window_dict)
def SaveGeometryMatrix(self,filename='geo_matrix'):
if ((self.LMatrixColumns is not None) &
(self.LMatrixRows is not None) &
(self.LMatrixValues is not None)):
np.savez_compressed(filename, \
columns = self.LMatrixColumns,\
rows = self.LMatrixRows,\
values = self.LMatrixValues,\
shape = self.LMatrixShape, \
grid_rmin = self.Rmin,\
grid_rmax = self.Rmax,\
grid_nr = self.nR,\
grid_zmin = self.Zmin,\
grid_zmax = self.Zmax,\
grid_nz = self.nZ,\
gridtype = 'RectangularGeometryMatrix')
def save_weights(self,filename): # save both weights and variables
with open(filename,'wb') as f:
# extract all weights in one go:
w = self.get_value_of(self.get_weights()+self.traverse('variables'))
print(len(w),'weights (and variables) obtained.')
# create an array object and put all the arrays into it.
# otherwise np.asanyarray() within np.savez_compressed()
# might make stupid mistakes
arrobj = np.empty([len(w)],dtype='object') # array object
for i in range(len(w)):
arrobj[i] = w[i]
np.savez_compressed(f,w=arrobj)
print('successfully saved to',filename)
return True
def run(self):
while True:
name, data = self.queue.get()
if name is None:
break
if data.shape[2] == 1 or data.shape[2] == 3:
name += '.png'
cv2.imwrite(os.path.join(self.path + name), data)
#imgOut = cv2.resize(imgOut, dsize=(img.shape[1],img.shape[0]))
#original[:,:,0] = np.repeat(np.mean(original, axis=2, keepdims=True), 3, axis=2)
#original[:,:,0] *= 1-imgOut* 1.3
#original[:,:,1] *= 1-imgOut* 1.3
#original[:,:,2] *= imgOut* 1.3
#cv2.imshow('OUT2', original /255)
#cv2.waitKey(1)
#cv2.imwrite('%s-shown.png' % fileName, original)
else:
name += '.npz'
np.savez_compressed(os.path.join(self.path + name), data=data)
def install(
self, local_dst_dir_=None, local_src_dir_=None, clean_install_=False):
'''
Install the dataset into directly usable format,
requires downloading for public dataset.
Args:
local_dst_dir_: string or None
where to install the dataset, None -> "%(default_dir)s"
local_src_dir_: string or None
where to find the raw downloaded files, None -> "%(default_dir)s"
'''
local_dst_dir = self.DEFAULT_DIR if local_dst_dir_ is None else Path(local_dst_dir_)
local_src_dir = self.DEFAULT_DIR if local_src_dir_ is None else Path(local_src_dir_)
local_dst_dir.mkdir(parents=True, exist_ok=True)
assert local_src_dir.exists()
images = np.empty((60000,3,32,32), dtype=np.uint8)
labels = np.empty((60000,), dtype=np.uint8)
tarfile_name = str(local_src_dir / 'cifar-10-python.tar.gz')
with tarfile.open(tarfile_name, 'r:gz') as tf:
for i in range(5):
with tf.extractfile('cifar-10-batches-py/data_batch_%d'%(i+1)) as f:
data_di = pickle.load(f, encoding='bytes')
images[(10000*i):(10000*(i+1))] = data_di[b'data'].reshape((10000,3,32,32))
labels[(10000*i):(10000*(i+1))] = np.asarray(data_di[b'labels'], dtype=np.uint8)
with tf.extractfile('cifar-10-batches-py/test_batch') as f:
data_di = pickle.load(f, encoding='bytes')
images[50000:60000] = data_di[b'data'].reshape((10000,3,32,32))
labels[50000:60000] = data_di[b'labels']
np.savez_compressed(str(local_dst_dir / 'cifar10.npz'), images=images, labels=labels)
if clean_install_:
os.remove(tarfile_name)
def write_sar_log(sars: List, logdir: str, episode_reward: int, suffix: str=''):
"""Write state-action-rewards to a log file."""
np.savez_compressed(os.path.join(logdir,
'%s_%s%s' % (str(time.time())[-5:], episode_reward, suffix)), np.vstack(sars))
def install(
self, local_dst_dir_=None, local_src_dir_=None, clean_install_=False):
'''
Install the dataset into directly usable format,
requires downloading for public dataset.
Args:
local_dst_dir_: string or None
where to install the dataset, None -> "%(default_dir)s"
local_src_dir_: string or None
where to find the raw downloaded files, None -> "%(default_dir)s"
'''
local_dst_dir = self.DEFAULT_DIR if local_dst_dir_ is None else Path(local_dst_dir_)
local_src_dir = self.DEFAULT_DIR if local_src_dir_ is None else Path(local_src_dir_)
local_dst_dir.mkdir(parents=True, exist_ok=True)
assert local_src_dir.exists()
images = np.empty((60000,3,32,32), dtype=np.uint8)
labels = np.empty((60000,), dtype=np.uint8)
tarfile_name = str(local_src_dir / 'cifar-10-python.tar.gz')
with tarfile.open(tarfile_name, 'r:gz') as tf:
for i in range(5):
with tf.extractfile('cifar-10-batches-py/data_batch_%d'%(i+1)) as f:
data_di = pickle.load(f, encoding='bytes')
images[(10000*i):(10000*(i+1))] = data_di[b'data'].reshape((10000,3,32,32))
labels[(10000*i):(10000*(i+1))] = np.asarray(data_di[b'labels'], dtype=np.uint8)
with tf.extractfile('cifar-10-batches-py/test_batch') as f:
data_di = pickle.load(f, encoding='bytes')
images[50000:60000] = data_di[b'data'].reshape((10000,3,32,32))
labels[50000:60000] = data_di[b'labels']
np.savez_compressed(str(local_dst_dir / 'cifar10.npz'), images=images, labels=labels)
if clean_install_:
os.remove(tarfile_name)
def train_glove(infile, inputSize=20000, batchSize=100, dimensionSize=100, maxEpochs=1000, outfile='result', x_max=100, alpha=0.75):
options = locals().copy()
print 'initializing parameters'
params = init_params(options)
tparams = init_tparams(params)
print 'loading data'
I, J, Weight = load_data(infile)
n_batches = int(np.ceil(float(I.get_value(borrow=True).shape[0]) / float(batchSize)))
print 'building models'
weightVector, iVector, jVector, cost = build_model(tparams, options)
grads = T.grad(cost, wrt=tparams.values())
f_grad_shared, f_update = adadelta(tparams, grads, weightVector, iVector, jVector, cost)
logFile = outfile + '.log'
print 'training start'
for epoch in xrange(maxEpochs):
costVector = []
iteration = 0
for batchIndex in random.sample(range(n_batches), n_batches):
cost = f_grad_shared(Weight.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize:(batchIndex+1)*batchSize],
I.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize: (batchIndex+1)*batchSize],
J.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize: (batchIndex+1)*batchSize])
f_update()
costVector.append(cost)
if (iteration % 1000 == 0):
buf = 'epoch:%d, iteration:%d/%d, cost:%f' % (epoch, iteration, n_batches, cost)
print buf
print2file(buf, logFile)
iteration += 1
trainCost = np.mean(costVector)
buf = 'epoch:%d, cost:%f' % (epoch, trainCost)
print buf
print2file(buf, logFile)
tempParams = unzip(tparams)
np.savez_compressed(outfile + '.' + str(epoch), **tempParams)
def save_matrix(f, m):
np.savez_compressed(f, data=m.data, indices=m.indices, indptr=m.indptr, shape=m.shape)
def save_pkl_files(dsm_prefix, dsm, save_in_one_file=False):
"""
Save the space to separate pkl files.
:param dsm_prefix:
:param dsm:
"""
# Save in a single file (for small spaces)
if save_in_one_file:
io_utils.save(dsm, dsm_prefix + '.pkl')
# Save in multiple files: npz for the matrix and pkl for the other data members of Space
else:
mat = coo_matrix(dsm.cooccurrence_matrix.get_mat())
np.savez_compressed(dsm_prefix + 'cooc.npz', data=mat.data, row=mat.row, col=mat.col, shape=mat.shape)
with open(dsm_prefix + '_row2id.pkl', 'wb') as f_out:
pickle.dump(dsm._row2id, f_out, 2)
with open(dsm_prefix + '_id2row.pkl', 'wb') as f_out:
pickle.dump(dsm._id2row, f_out, 2)
with open(dsm_prefix + '_column2id.pkl', 'wb') as f_out:
pickle.dump(dsm._column2id, f_out, 2)
with open(dsm_prefix + '_id2column.pkl', 'wb') as f_out:
pickle.dump(dsm._id2column, f_out, 2)
def _close(self):
# Write everything
np.savez_compressed(self.request.get_file(), *self._images)
def test_compressed_roundtrip():
arr = np.random.rand(200, 200)
npz_file = os.path.join(tempdir, 'compressed.npz')
np.savez_compressed(npz_file, arr=arr)
arr1 = np.load(npz_file)['arr']
assert_array_equal(arr, arr1)
def savez_compressed(file, *args, **kwds):
"""Saves one or more arrays into a file in compressed ``.npz`` format.
It is equivalent to :func:`cupy.savez` function except the output file is
compressed.
.. seealso::
:func:`cupy.savez` for more detail,
:func:`numpy.savez_compressed`
"""
args = map(cupy.asnumpy, args)
for key in kwds:
kwds[key] = cupy.asnumpy(kwds[key])
numpy.savez_compressed(file, *args, **kwds)
def process_glove(args, vocab_list, save_path, size=4e5, random_init=True):
"""
:param vocab_list: [vocab]
:return:
"""
if not gfile.Exists(save_path + ".npz"):
glove_path = os.path.join(args.glove_dir, "glove.6B.{}d.txt".format(args.glove_dim))
if random_init:
glove = np.random.randn(len(vocab_list), args.glove_dim)
else:
glove = np.zeros((len(vocab_list), args.glove_dim))
found = 0
with open(glove_path, 'r') as fh:
for line in tqdm(fh, total=size):
array = line.lstrip().rstrip().split(" ")
word = array[0]
vector = list(map(float, array[1:]))
if word in vocab_list:
idx = vocab_list.index(word)
glove[idx, :] = vector
found += 1
if word.capitalize() in vocab_list:
idx = vocab_list.index(word.capitalize())
glove[idx, :] = vector
found += 1
if word.upper() in vocab_list:
idx = vocab_list.index(word.upper())
glove[idx, :] = vector
found += 1
print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab_list), glove_path))
np.savez_compressed(save_path, glove=glove)
print("saved trimmed glove matrix at: {}".format(save_path))
def write_values(self, tensors, compress=False):
"""
write dictionary of numpy.ndarray's with Op name as key to file
Arguments:
tensors (dict): A dictionary of numpy.ndarray's with Op name as key
compress: specify whether to compress tensors
"""
if compress:
np.savez_compressed(self.name, **tensors)
else:
np.savez(self.name, **tensors)