def load(fname):
"""Load an embedding dump generated by `save`"""
content = _open(fname).read()
if PY2:
state = pickle.loads(content)
else:
state = pickle.loads(content, encoding='latin1')
voc, vec = state
if len(voc) == 2:
words, counts = voc
word_count = dict(zip(words, counts))
vocab = CountedVocabulary(word_count=word_count)
else:
vocab = OrderedVocabulary(voc)
return Embedding(vocabulary=vocab, vectors=vec)
python类loads()的实例源码
def check_pickling(self, x_data):
x = chainer.Variable(x_data)
y = self.link(x)
y_data1 = y.data
del x, y
pickled = pickle.dumps(self.link, -1)
del self.link
self.link = pickle.loads(pickled)
x = chainer.Variable(x_data)
y = self.link(x)
y_data2 = y.data
gradient_check.assert_allclose(y_data1, y_data2, atol=0, rtol=0)
def test_transform_then_prediction(self):
with TemporaryDirectory() as temp:
from sklearn.pipeline import Pipeline
path = os.path.join(temp, 'audio.sph')
urlretrieve(filename=path,
url='https://s3.amazonaws.com/ai-datasets/sw02001.sph')
f = Pipeline([
('mspec', model.SpeechTransform('mspec', fs=8000, vad=False)),
('slice', model.Transform(lambda x: x[:, :40])),
('pred', model.SequentialModel(N.Dropout(0.3),
N.Dense(20, activation=K.relu),
N.Dense(10, activation=K.softmax))
)
])
x1 = f.predict(path)
x2 = f.predict_proba(path)
f = cPickle.loads(cPickle.dumps(f))
y1 = f.predict(path)
y2 = f.predict_proba(path)
self.assertEqual(np.array_equal(x1, y1), True)
self.assertEqual(np.array_equal(x2, y2), True)
def test_complex_transform(self):
with TemporaryDirectory() as temp:
from sklearn.pipeline import Pipeline
path = os.path.join(temp, 'audio.sph')
urlretrieve(filename=path,
url='https://s3.amazonaws.com/ai-datasets/sw02001.sph')
f = Pipeline([
('step1', model.SpeechTransform('mspec', fs=8000, vad=True)),
('step2', model.Transform(lambda x: (x[0][:, :40],
x[1].astype(str)))),
('step3', model.Transform(lambda x: (np.sum(x[0]),
''.join(x[1].tolist()))))
])
x = f.transform(path)
f = cPickle.loads(cPickle.dumps(f))
y = f.transform(path)
self.assertEqual(x[0], y[0])
self.assertEqual(y[0], -3444229.0)
self.assertEqual(x[1], y[1])
def test_load_save1(self):
K.set_training(True)
X = K.placeholder((None, 1, 28, 28))
f = N.Dense(128, activation=K.relu)
y = f(X)
W, b = [K.get_value(p).sum() for p in K.ComputationGraph(y).parameters]
num_units = f.num_units
W_init = f.W_init
b_init = f.b_init
activation = f.activation
f = cPickle.loads(cPickle.dumps(f))
W1, b1 = [K.get_value(p).sum() for p in f.parameters]
num_units1 = f.num_units
W_init1 = f.W_init
b_init1 = f.b_init
activation1 = f.activation
self.assertEqual(W1, W)
self.assertEqual(b1, b)
self.assertEqual(num_units1, num_units)
self.assertEqual(W_init1.__name__, W_init.__name__)
self.assertEqual(b_init.__name__, b_init1.__name__)
self.assertEqual(activation1, activation)
def test_load_save2(self):
K.set_training(True)
X = K.placeholder((None, 1, 28, 28))
f = N.Dense(128, activation=K.relu)
y = f(X)
yT = f.T(y)
f1 = K.function(X, y)
f2 = K.function(X, yT)
f = cPickle.loads(cPickle.dumps(f))
y = f(X)
yT = f.T(y)
f3 = K.function(X, y)
f4 = K.function(X, yT)
x = np.random.rand(12, 1, 28, 28)
self.assertEqual(f1(x).sum(), f3(x).sum())
self.assertEqual(f2(x).sum(), f4(x).sum())
def str_to_func(s, sandbox=None):
if isinstance(s, (tuple, list)):
code, closure, defaults = s
elif isinstance(s, string_types): # path to file
if os.path.isfile(s):
with open(s, 'rb') as f:
code, closure, defaults = cPickle.load(f)
else: # pickled string
code, closure, defaults = cPickle.loads(s)
else:
raise ValueError("Unsupport str_to_func for type:%s" % type(s))
code = marshal.loads(cPickle.loads(code).tostring())
func = types.FunctionType(code=code, name=code.co_name,
globals=sandbox if isinstance(sandbox, Mapping) else globals(),
closure=closure, argdefs=defaults)
return func
def test_pickling(self):
so = ex.SomeObj(minlen=5)
assert so._sav.entity.minlen == 5
pstr = pickle.dumps(so)
del so
so2 = pickle.loads(pstr)
assert so2._sav.entity.minlen == 5
# make sure it's a weakref
vh = so2._sav
del so2
gc.collect()
try:
vh.entity
assert False, 'expected exception'
except EntityRefMissing:
pass
def _deserialise_args(shared_objects, local_objects,
serialised_args): # pragma: no cover
args = []
for arg in serialised_args:
if isinstance(arg, _SharedRef):
key = arg.key
if key in local_objects:
x = local_objects[key]
else:
x = loads(shared_objects[arg.key])
local_objects[arg.key] = x
else:
x = arg
args.append(x)
return tuple(args)
# pragma: no cover
def check_pickling(self, x_data):
x = chainer.Variable(x_data)
y = self.link(x)
y_data1 = y.data
del x, y
pickled = pickle.dumps(self.link, -1)
del self.link
self.link = pickle.loads(pickled)
x = chainer.Variable(x_data)
y = self.link(x)
y_data2 = y.data
testing.assert_allclose(y_data1, y_data2, atol=0, rtol=0)
def test_map(self):
def plus_one(x):
return x + 1
N = 10
x = np.arange(N)
futures_original = self.wrenexec.map(plus_one, x)
futures_str = pickle.dumps(futures_original)
futures = pickle.loads(futures_str)
result_count = 0
while result_count < N:
fs_dones, fs_notdones = pywren.wait(futures)
result_count = len(fs_dones)
res = np.array([f.result() for f in futures])
np.testing.assert_array_equal(res, x + 1)
def from_bytes(bytes_graph, check_version=True):
"""Reads a graph from bytes (the result of pickling the graph).
:param bytes bytes_graph: File or filename to write
:param bool check_version: Checks if the graph was produced by this version of PyBEL
:return: A BEL graph
:rtype: BELGraph
"""
graph = loads(bytes_graph)
raise_for_not_bel(graph)
if check_version:
raise_for_old_graph(graph)
return graph
def get_remote_messages(config, queue, fill=True, block=False):
"""
Get all messages from queue without removing from it
:return: yield raw deserialized messages
:rtype: json
"""
to_inject = []
try:
while 1:
message = queue.get(block=False, timeout=1)
# --------------------------------------------------------------------------
# Try to deserialize
# --------------------------------------------------------------------------
# Is Pickle info?
try:
deserialized = loads(message.body)
except SerializationError:
pass
yield deserialized
to_inject.append(deserialized)
except Empty:
# When Queue is Empty -> reinject all removed messages
if fill is True:
for x in to_inject:
queue.put(x, serializer="pickle")
# ----------------------------------------------------------------------
def _loads(s):
return cPickle.loads(s)
def test_pickle_cpu(self):
fs2_serialized = pickle.dumps(self.fs2)
fs2_loaded = pickle.loads(fs2_serialized)
self.assertTrue((self.fs2.b.p.data == fs2_loaded.b.p.data).all())
self.assertTrue(
(self.fs2.fs1.a.p.data == fs2_loaded.fs1.a.p.data).all())
def test_pickle_gpu(self):
self.fs2.to_gpu()
fs2_serialized = pickle.dumps(self.fs2)
fs2_loaded = pickle.loads(fs2_serialized)
fs2_loaded.to_cpu()
self.fs2.to_cpu()
self.assertTrue((self.fs2.b.p.data == fs2_loaded.b.p.data).all())
self.assertTrue(
(self.fs2.fs1.a.p.data == fs2_loaded.fs1.a.p.data).all())
def test_pickle_cpu(self):
s = pickle.dumps(self.fs)
fs2 = pickle.loads(s)
self.check_equal_fs(self.fs, fs2)
def test_pickle_gpu(self):
self.fs.to_gpu()
s = pickle.dumps(self.fs)
fs2 = pickle.loads(s)
self.fs.to_cpu()
fs2.to_cpu()
self.check_equal_fs(self.fs, fs2)
def test_get(self):
#mock the pick and set it to the data variable
test_pickle = pickle.dumps(
{pickle.dumps(self.test_key): self.test_value}, protocol=2)
self.test_cache.data = pickle.loads(test_pickle)
#assert
self.assertEquals(self.test_cache.get(self.test_key), self.test_value)
self.assertEquals(self.test_cache.get(self.bad_key), None)
def _restore_dict(self, path, read_only, cache_size):
# ====== already exist ====== #
if os.path.exists(path):
if os.path.getsize(path) == 0:
if read_only:
raise Exception('File at path:"%s" has zero size, no data '
'found in (read-only mode).' % path)
file = open(str(path), mode='rb+')
if file.read(len(MmapDict.HEADER)) != MmapDict.HEADER:
raise Exception('Given file is not in the right format '
'for MmapDict.')
# 48 bytes for the file size
max_position = int(file.read(MmapDict.SIZE_BYTES))
# length of pickled indices dictionary
dict_size = int(file.read(MmapDict.SIZE_BYTES))
# read dictionary
file.seek(max_position)
pickled_indices = file.read(dict_size)
self._indices_dict = async(lambda: cPickle.loads(pickled_indices))()
# ====== create new file from scratch ====== #
else:
file = open(str(path), mode='wb+')
file.write(MmapDict.HEADER)
# just write the header
header = ('%' + str(MmapDict.SIZE_BYTES) + 'd') % \
(len(MmapDict.HEADER) + MmapDict.SIZE_BYTES * 2)
file.write(header.encode())
# write the length of Pickled indices dictionary
data_size = ('%' + str(MmapDict.SIZE_BYTES) + 'd') % 0
file.write(data_size.encode())
file.flush()
# init indices dict
self._indices_dict = {}
# ====== create Mmap from offset file ====== #
self._file = file
self._mmap = mmap.mmap(file.fileno(), length=0, offset=0,
flags=mmap.MAP_SHARED)
self._increased_indices_size = 0. # in MB
# store all the (key, value) recently added
self._cache_dict = {}
def __getitem__(self, key):
if key in self._cache_dict:
return self._cache_dict[key]
# ====== load from mmap ====== #
start, size = self.indices[key]
self._mmap.seek(start)
return marshal.loads(self._mmap.read(size))
def values(self):
for name, (start, size) in self.indices.items():
self._mmap.seek(start)
yield marshal.loads(self._mmap.read(size))
for val in self._cache_dict.values():
yield val
def items(self):
for name, (start, size) in self.indices.items():
self._mmap.seek(start)
yield name, marshal.loads(self._mmap.read(size))
for key, val in self._cache_dict.values():
yield key, val
# ===========================================================================
# SQLiteDict
# ===========================================================================
def __getitem__(self, key):
# ====== multiple keys select ====== #
if isinstance(key, (tuple, list, np.ndarray)):
query = """SELECT value FROM {tb}
WHERE key IN {keyval};"""
keyval = '(' + ', '.join(['"%s"' % str(k) for k in key]) + ')'
self.cursor.execute(
query.format(tb=self._current_table, keyval=keyval))
results = self.cursor.fetchall()
# check if any not found keys
if len(results) != len(key):
raise KeyError("Cannot find all `key`='%s' in the dictionary." % keyval)
# load binary data
results = [marshal.loads(r[0]) for r in results]
# ====== single key select ====== #
else:
key = str(key)
if key in self.current_cache:
return self.current_cache[key]
query = """SELECT value FROM {tb} WHERE key="{keyval}" LIMIT 1;"""
results = self.connection.execute(
query.format(tb=self._current_table, keyval=key)).fetchone()
# results = self.cursor.fetchone()
if results is None:
raise KeyError("Cannot find `key`='%s' in the dictionary." % key)
results = marshal.loads(results[0])
return results
def items(self):
for item in self.cursor.execute(
"""SELECT key, value from {tb};""".format(tb=self._current_table)):
yield (item[0], marshal.loads(item[1]))
for k, v in self.current_cache.items():
yield k, v
def test_seq(self):
X = K.placeholder((None, 28, 28, 1))
f = N.Sequence([
N.Conv(8, (3, 3), strides=1, pad='same'),
N.Dimshuffle(pattern=(0, 3, 1, 2)),
N.Flatten(outdim=2),
N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'),
N.Dense(128, activation=tf.nn.relu),
N.Dropout(level=0.3, noise_dims=None),
N.Dense(10, activation=tf.nn.softmax)
])
y = f(X)
yT = f.T(y)
f1 = K.function(X, y, defaults={K.is_training(): True})
f2 = K.function(X, yT, defaults={K.is_training(): False})
f = cPickle.loads(cPickle.dumps(f))
y = f(X)
yT = f.T(y)
f3 = K.function(X, y, defaults={K.is_training(): True})
f4 = K.function(X, yT, defaults={K.is_training(): False})
x = np.random.rand(12, 28, 28, 1)
self.assertEquals(f1(x).shape, (2688, 10))
self.assertEquals(f3(x).shape, (2688, 10))
self.assertEqual(np.round(f1(x).sum(), 4),
np.round(f3(x).sum(), 4))
self.assertEquals(y.get_shape().as_list(), (None, 10))
self.assertEquals(f2(x).shape, (12, 28, 28, 1))
self.assertEquals(f4(x).shape, (12, 28, 28, 1))
self.assertEqual(str(f2(x).sum())[:4], str(f4(x).sum())[:4])
self.assertEquals(yT.get_shape().as_list(), (None, 28, 28, 1))
def test_simple_rnn(self):
np.random.seed(12082518)
x = np.random.rand(128, 8, 32)
#
X = K.placeholder(shape=(None, 8, 32))
X1 = K.placeholder(shape=(None, 8, 32))
X2 = K.placeholder(shape=(None, 8, 32))
X3 = K.placeholder(shape=(None, 8, 33))
f = N.RNN(32, activation=K.relu, input_mode='skip')
#
y = f(X, mask=K.ones(shape=(128, 8)))
graph = K.ComputationGraph(y)
self.assertEqual(len(graph.inputs), 1)
f1 = K.function([X], y)
x1 = f1(x)
# ====== different placeholder ====== #
y = f(X1)
f2 = K.function([X1], y)
x2 = f1(x)
self.assertEqual(np.sum(x1[0] == x2[0]), np.prod(x1[0].shape))
# ====== pickle load ====== #
f = cPickle.loads(cPickle.dumps(f))
y = f(X2)
f2 = K.function([X2], y)
x3 = f2(x)
self.assertEqual(np.sum(x2[0] == x3[0]), np.prod(x2[0].shape))
# ====== other input shape ====== #
error_happen = False
try:
y = f(X3)
f3 = K.function([X3], y)
x3 = f3(np.random.rand(128, 8, 33))
except (ValueError, Exception):
error_happen = True
self.assertTrue(error_happen)
def __setstate__(self, states):
(self._sandbox,
self._source,
self._argsmap) = states
# ====== deserialize the function ====== #
if isinstance(self._sandbox, string_types):
self._function = cPickle.loads(self._sandbox)
else:
self._function, sandbox = _deserialize_function_sandbox(self._sandbox)
if self._function is None:
raise RuntimeError('[funtionable] Cannot find function in sandbox.')
# ==================== properties ==================== #
def _read_meta_data(data_dir_path, metadata_file_path, max_number_length, rand_bbox_count):
if metadata_file_path.endswith('.mat'):
return parse_data(metadata_file_path, max_number_length,
data_dir_path, rand_bbox_count)
elif metadata_file_path.endswith('.pickle'):
metadata = pickle.loads(open(metadata_file_path, 'rb').read())
return metadata['filenames'], metadata['labels'], metadata['bboxes'], metadata['sep_bboxes']
def test_combined_infer(self):
from nsrec.nets import iclr_mnr, lenet_v2
from six.moves import cPickle as pickle
metadata = pickle.loads(open(test_helper.train_data_dir_path + '/metadata.pickle', 'rb').read())
def test_img_data_generator(new_size, crop_bbox=False):
for i in range(10):
filename = '%s.png' % (i + 1)
img_idx = metadata['filenames'].index(filename)
bbox, label = metadata['bboxes'][img_idx], metadata['labels'][img_idx]
input_data = inputs.read_img(os.path.join(test_helper.train_data_dir_path, filename))
width, height = input_data.shape[1], input_data.shape[0]
if crop_bbox:
input_data = inputs.read_img(os.path.join(test_helper.train_data_dir_path, filename), bbox)
input_data = inputs.normalize_img(input_data, [new_size[0], new_size[1]])
yield (input_data, (width, height), bbox, label)
bbox_model = Inferrable(test_helper.output_bbox_graph_file, 'initializer-bbox', 'input-bbox', 'output-bbox')
for input_data, (width, height), bbox, _ in test_img_data_generator([lenet_v2.image_width, lenet_v2.image_height]):
bbox_in_rate = bbox_model.infer(np.array([input_data]))
print(width, height)
print('label bbox: %s, bbox: %s' % (bbox, [bbox_in_rate[0] * width, bbox_in_rate[1] * height,
bbox_in_rate[2] * width, bbox_in_rate[3] * height]))
nsr_model = Inferrable(test_helper.output_graph_file, 'initializer', 'input', 'output')
for input_data, _, _, label in test_img_data_generator([iclr_mnr.image_width, iclr_mnr.image_height], True):
pb = nsr_model.infer(np.array([input_data]))
print('actual: %s, length pb: %s, numbers: %s' % (
label, np.argmax(pb[:5]), np.argmax(pb[5:].reshape([5, 11]), axis=1)))