def maybe_pickle(data_dirs, force=False):
dataset_names = []
for dir in data_dirs:
set_filename = dir + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may overwrite by setting force=True
print('%s already present - Skipping pickling. ' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_logo(dir)
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
python类dump()的实例源码
def save_pickle(train_dataset, train_labels, valid_dataset, valid_labels,
test_dataset, test_labels):
try:
f = open(PICKLE_FILENAME, 'wb')
save = {
'train_dataset': train_dataset,
'train_labels': train_labels,
'valid_dataset': valid_dataset,
'valid_labels': valid_labels,
'test_dataset': test_dataset,
'test_labels': test_labels,
}
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', PICKLE_FILENAME, ':', e)
raise
def save_params(self, f_, format_=Default, filter_=None):
'''
This saves all parameters in current group, using pickle protocol
An dict containing string->shared_variables will be dumped to f_.
Args:
f_: writable file or filename string
format_: string, file format.
Default is to interpret from file name
supported format: "pkl"
filter_: string or None, regex pattern to filter
'''
if isinstance(f_, str):
f_ = open(f_, 'wb')
if filter_ is None:
pickle.dump(self._current_group_di, f_)
else:
pat = re.compile(filter_)
pickle.dump({k:v for k,v in self._current_group_di.items() if pat.fullmatch(k)}, f_)
def save_snapshot(self, filename=None):
"""
Save a snapshot of current process to file
Warning: this is not thread safe, do not use with multithread program
Args:
- filename: target file to save snapshot
Returns:
- Bool
"""
if not filename:
filename = self.get_config_filename("snapshot")
snapshot = self.take_snapshot()
if not snapshot:
return False
# dump to file
fd = open(filename, "wb")
pickle.dump(snapshot, fd, pickle.HIGHEST_PROTOCOL)
fd.close()
return True
def dumpmem(self, start, end):
"""
Dump process memory from start to end
Args:
- start: start address (Int)
- end: end address (Int)
Returns:
- memory content (raw bytes)
"""
mem = None
logfd = tmpfile(is_binary_file=True)
logname = logfd.name
out = self.execute_redirect("dump memory %s 0x%x 0x%x" % (logname, start, end))
if out is None:
return None
else:
logfd.flush()
mem = logfd.read()
logfd.close()
return mem
def readmem(self, address, size):
"""
Read content of memory at an address
Args:
- address: start address to read (Int)
- size: bytes to read (Int)
Returns:
- memory content (raw bytes)
"""
# try fast dumpmem if it works
mem = self.dumpmem(address, address+size)
if mem is not None:
return mem
# failed to dump, use slow x/gx way
mem = ""
out = self.execute_redirect("x/%dbx 0x%x" % (size, address))
if out:
for line in out.splitlines():
bytes = line.split(":\t")[-1].split()
mem += "".join([chr(int(c, 0)) for c in bytes])
return mem
def main():
glove_dict = LoadGlove()
imdb_dict = LoadImdb()
out_path = '../../data/imdb.glove.emb.pkl'
emb = np.zeros([len(imdb_dict) + 2, 300], dtype=np.float32)
for i in range(len(imdb_dict) + 2):
if i in imdb_dict:
# not 0 or 1
word = imdb_dict[i]
if word in glove_dict:
# If in glove dict, use the embedding
emb[i, :] = glove_dict[word]
pickle.dump(emb, open(out_path, 'wb'))
def main():
embedding_size = 300
path = '../../data/imdb.dict.pkl'
dictionary = pickle.load(open(path))
out_path = '../../data/imdb.emb.pkl'
path = '../../data/output.txt'
# Account for missing index 0 and 1.
emb = np.zeros([len(dictionary) + 2, embedding_size], dtype=np.float32)
with open(path, 'r') as f:
while True:
word = f.readline()
if not word:
break
word = word.rstrip()
embeddings = f.readline().rstrip().split()
embeddings = [float(e) for e in embeddings]
if word in dictionary:
emb[int(dictionary[word]), :] = np.array(embeddings, dtype=np.float32)
pickle.dump(emb, open(out_path, 'wb'))
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
dataset_names = []
folders_list = os.listdir(data_folders)
for folder in folders_list:
#print(os.path.join(data_folders, folder))
curr_folder_path = os.path.join(data_folders, folder)
if os.path.isdir(curr_folder_path):
set_filename = curr_folder_path + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_letter(curr_folder_path, min_num_images_per_class) # load and normalize the data
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
def read_dataset(data_dir):
pickle_filename = "PascalVoc.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True)
PascalVoc_folder = "VOCdevkit"
result = create_image_lists(os.path.join(data_dir, PascalVoc_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
def read_dataset(data_dir):
pickle_filename = "MITSceneParsing.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
def preprocess(self, input_file, vocab_file, tensor_file):
f = open(input_file, "r")
data = f.read()
f.close()
#data = data.lower()
#data = re.sub("[^a-z, ']+"," ",data) # replace unknown sumbols with space
counter = collections.Counter(data)
count_pairs = sorted(counter.items(), key=lambda x: -x[1])
self.chars, _ = zip(*count_pairs)
self.vocab_size = len(self.chars)
self.vocab = dict(zip(self.chars, range(len(self.chars))))
print(self.vocab)
with open(vocab_file, 'wb') as f:
cPickle.dump(self.chars, f)
#print(map(self.vocab.get,data))
self.tensor = np.array(list(map(self.vocab.get, data)))
np.save(tensor_file, self.tensor)
def read_dataset(data_dir):
pickle_filename = "celebA.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
celebA_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, celebA_folder))
print ("Training set: %d" % len(result['train']))
print ("Test set: %d" % len(result['test']))
print ("Validation set: %d" % len(result['validation']))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_images = result['train']
testing_images = result['test']
validation_images = result['validation']
del result
return training_images, testing_images, validation_images
def load(fname):
"""Load an embedding dump generated by `save`"""
content = _open(fname).read()
if PY2:
state = pickle.loads(content)
else:
state = pickle.loads(content, encoding='latin1')
voc, vec = state
if len(voc) == 2:
words, counts = voc
word_count = dict(zip(words, counts))
vocab = CountedVocabulary(word_count=word_count)
else:
vocab = OrderedVocabulary(voc)
return Embedding(vocabulary=vocab, vectors=vec)
def update_default_setting(self, key_tree, value):
"""
Update a default value in the local settings file.
:param key_tree:
A tuple containing a tree of dictionary keys.
:param value:
The value for the setting.
"""
# Open the defaults.
with open(self._default_settings_path, "rb") as fp:
defaults = yaml.load(fp)
branch = defaults
for key in key_tree[:-1]:
branch.setdefault(key, {})
branch = branch[key]
branch[key_tree[-1]] = value
with open(self._default_settings_path, "w") as fp:
fp.write(yaml.dump(defaults))
return True
def maybe_pickle(self, data_folders, min_num_images_per_class, force=False):
dataset_names = []
for folder in data_folders:
set_filename = folder + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = self.load_letter(folder, min_num_images_per_class, self.image_size, self.pixel_depth)
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
def empty_network(network):
logger.debug("Storing pypsa timeseries to disk")
from .components import all_components
panels = {}
for c in all_components:
attr = network.components[c]["list_name"] + "_t"
panels[attr] = getattr(network, attr)
setattr(network, attr, None)
fd, fn = tempfile.mkstemp()
with os.fdopen(fd, 'wb') as f:
pickle.dump(panels, f, -1)
del panels
gc.collect()
yield
logger.debug("Reloading pypsa timeseries from disk")
with open(fn, 'rb') as f:
panels = pickle.load(f)
os.remove(fn)
for attr, pnl in iteritems(panels):
setattr(network, attr, pnl)
def save_train_and_test_set(dataset, labels, ratio, pickle_file):
split = int(len(dataset) * ratio)
train_dataset = dataset[:split]
train_labels = labels[:split]
test_dataset = dataset[split:]
test_labels = labels[split:]
try:
f = open(pickle_file, 'wb')
save = {
'train_dataset': train_dataset,
'train_labels': train_labels,
'test_dataset': test_dataset,
'test_labels': test_labels,
}
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', pickle_file, ':', e)
raise
statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)
# Main
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
dataset_names = []
for folder in data_folders:
set_filename = folder + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_letter(folder, min_num_images_per_class)
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
def preprocess(self, input_file, vocab_file, tensor_file):
with codecs.open(input_file, 'r', 'utf-8') as f:
lines = f.readlines()
if lines[0][:1] == codecs.BOM_UTF8:
lines[0] = lines[0][1:]
lines = [line.strip().split() for line in lines]
self.vocab, self.words = self.build_vocab(lines)
self.vocab_size = len(self.words)
#print 'word num: ', self.vocab_size
with open(vocab_file, 'wb') as f:
cPickle.dump(self.words, f)
raw_data = [[0] * self.seq_length +
[self.vocab.get(w, 1) for w in line] +
[2] * self.seq_length for line in lines]
self.raw_data = raw_data #???????
# np.save(tensor_file, self.raw_data)
def preprocess(self, input_file, vocab_file, tensor_file):
with codecs.open(input_file, 'r', 'utf-8') as f:
lines = f.readlines()
if lines[0][:1] == codecs.BOM_UTF8:
lines[0] = lines[0][1:]
lines = [line.strip().split() for line in lines]
self.vocab, self.words = self.build_vocab(lines)
self.vocab_size = len(self.words)
#print 'word num: ', self.vocab_size
with open(vocab_file, 'wb') as f:
cPickle.dump(self.words, f)
raw_data = [[0] * self.seq_length +
[self.vocab.get(w, 1) for w in line] +
[2] * self.seq_length for line in lines]
self.raw_data = raw_data #???????
# np.save(tensor_file, self.raw_data)
def save(self, key, data):
try:
with open(self.cache_path, 'wb') as fh:
self.data[pickle.dumps(key)] = data
pickle.dump(self.data, fh, protocol=2)
except Exception as e:
log.warning("Could not save cache %s err: %s" % (
self.cache_path, e))
if not os.path.exists(self.cache_path):
directory = os.path.dirname(self.cache_path)
log.info('Generating Cache directory: %s.' % directory)
try:
os.makedirs(directory)
except Exception as e:
log.warning("Could not create directory: %s err: %s" % (
directory, e))
def save_snapshot(self, filename=None):
"""
Save a snapshot of current process to file
Warning: this is not thread safe, do not use with multithread program
Args:
- filename: target file to save snapshot
Returns:
- Bool
"""
if not filename:
filename = self.get_config_filename("snapshot")
snapshot = self.take_snapshot()
if not snapshot:
return False
# dump to file
fd = open(filename, "wb")
pickle.dump(snapshot, fd, pickle.HIGHEST_PROTOCOL)
fd.close()
return True
def dumpmem(self, start, end):
"""
Dump process memory from start to end
Args:
- start: start address (Int)
- end: end address (Int)
Returns:
- memory content (raw bytes)
"""
mem = None
logfd = tmpfile(is_binary_file=True)
logname = logfd.name
out = self.execute_redirect("dump memory %s 0x%x 0x%x" % (logname, start, end))
if out is None:
return None
else:
logfd.flush()
mem = logfd.read()
logfd.close()
return mem
def readmem(self, address, size):
"""
Read content of memory at an address
Args:
- address: start address to read (Int)
- size: bytes to read (Int)
Returns:
- memory content (raw bytes)
"""
# try fast dumpmem if it works
mem = self.dumpmem(address, address+size)
if mem is not None:
return mem
# failed to dump, use slow x/gx way
mem = ""
out = self.execute_redirect("x/%dbx 0x%x" % (size, address))
if out:
for line in out.splitlines():
bytes = line.split(":\t")[-1].split()
mem += "".join([chr(int(c, 0)) for c in bytes])
return mem
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
dataset_names = []
for folder in data_folders:
set_filename = folder + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_letter(folder, min_num_images_per_class)
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
def read_dataset(data_dir):
pickle_filename = "MITSceneParsing.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
def create():
f = N.Sequence([
N.Conv(8, (3, 3), strides=1, pad='same'),
N.Dimshuffle(pattern=(0, 3, 1, 2)),
N.FlattenLeft(outdim=2),
N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'),
N.Dense(128, activation=K.relu),
N.Dropout(level=0.3, noise_dims=None),
N.Dense(10, activation=K.softmax)
], debug=True)
y = f(X)
yT = f.T(y)
f1 = K.function(X, y)
f2 = K.function(X, yT)
cPickle.dump(f, open(U.get_modelpath('dummy.ai', override=True), 'w'))
_ = f1(x)
print(_.shape, _.sum())
_ = f2(x)
print(_.shape, _.sum())
def preprocess(self, input_file, vocab_file, tensor_file):
with open(input_file, "r") as f:
data = f.read()
# Optional text cleaning or make them lower case, etc.
#data = self.clean_str(data)
x_text = data.split()
self.vocab, self.words = self.build_vocab(x_text)
self.vocab_size = len(self.words)
with open(vocab_file, 'wb') as f:
cPickle.dump(self.words, f)
#The same operation like this [self.vocab[word] for word in x_text]
# index of words as our basic data
self.tensor = np.array(list(map(self.vocab.get, x_text)))
# Save the data to data.npy
np.save(tensor_file, self.tensor)
def preprocess(self, input_file, vocab_file, tensor_file, encoding):
with codecs.open(input_file, "r", encoding=encoding) as f:
data = f.read()
# Optional text cleaning or make them lower case, etc.
#data = self.clean_str(data)
x_text = data.split()
self.vocab, self.words = self.build_vocab(x_text)
self.vocab_size = len(self.words)
with open(vocab_file, 'wb') as f:
cPickle.dump(self.words, f)
#The same operation like this [self.vocab[word] for word in x_text]
# index of words as our basic data
self.tensor = np.array(list(map(self.vocab.get, x_text)))
# Save the data to data.npy
np.save(tensor_file, self.tensor)