def dump_binary(self, f):
"""
Dumps the model as Jubatus binary model file to binary stream ``f``.
"""
# Dump header
self.header.dump(f)
# Dump system_data
self.system.dump(f)
# Dump user_data
if self._user_raw is None:
printe('Warning: conversion from Python object to binary model format may generate corrupt model')
self.user.dump(f)
else:
f.write(self._user_raw)
python类dump()的实例源码
def dump_json(self, f, without_raw=False):
"""
Dumps the model as JSON file to a text stream ``f``.
"""
record = {}
# Dump header
record['header'] = dict(self.header.get())
# Dump system_data
record['system'] = dict(self.system.get())
# Dump user_data
record['user'] = dict(self.user.get())
if not without_raw:
record['user_raw'] = base64.b64encode(self._user_raw).decode()
json.dump(record, f, indent=2)
def load_object(path, build_fn, *args, **kwargs):
""" load from serialized form or build an object, saving the built
object; kwargs provided to `build_fn`.
"""
save = False
obj = None
if path is not None and os.path.isfile(path):
with open(path, 'rb') as obj_f:
obj = msgpack.load(obj_f, use_list=False, encoding='utf-8')
else:
save = True
if obj is None:
obj = build_fn(*args, **kwargs)
if save and path is not None:
with open(path, 'wb') as obj_f:
msgpack.dump(obj, obj_f)
return obj
def dump(cls, data):
"""
Returns the dumped model data structure of the raw model data.
"""
with tempfile.NamedTemporaryFile(mode='wb', prefix='jubakit-jubadump-') as f:
f.write(data)
f.flush()
return cls.dump_file(f.name)
def dump(self, f, *args, **kwargs):
# Must be implemented in sub classes.
raise NotImplementedError
def dumps(self, *args, **kwargs):
f = BytesIO()
self.dump(f, *args, **kwargs)
return f.getvalue()
def dump(self, f):
values = list(map(lambda x: x[1], self.get()))
msgpack.dump(values, f)
def dump(filepath, data, options=None):
"""Dump the output to disk (JSON, msgpack, etc)
:param filepath: output file path
:param data: serializable data to write to disk
:param options: (Default value = None)
:type options: dict
"""
options = options or {}
logger.debug("io.dump(%s, data, options=%s)", filepath, options)
compress = options.get(constants.COMPRESSION, constants.NONE)
if compress == constants.MSGPACK:
try:
import msgpack
except ImportError:
logger.error("msgpack module not found")
raise
logger.info("Dumping to msgpack")
func = lambda x, y: msgpack.dump(x, y)
mode = 'wb'
else:
round_off = options.get(constants.ENABLE_PRECISION)
if round_off:
_json.ROUND = options[constants.PRECISION]
else:
_json.ROUND = None
indent = options.get(constants.INDENT, True)
indent = 4 if indent else None
compact_separators = (',', ':')
logger.info("Dumping to JSON")
func = lambda x, y: _json.json.dump(x, y, indent=indent, separators=compact_separators)
mode = 'w'
logger.info("Writing to %s", filepath)
with open(filepath, mode=mode) as stream:
func(data, stream)
def freqs_to_cBpack(input_file, output_file, cutoff=600):
"""
Convert a frequency list into the idiosyncratic 'cBpack' format that
will be loaded by wordfreq: a list in msgpack format of frequency
tiers, each tier being one centibel (a factor of 10^(1/100))
less frequent than the previous tier.
"""
cBpack = []
for line in input_file:
word, strfreq = line.rstrip().split('\t', 1)
if word == '__total__':
raise ValueError(
"This is a count file, not a frequency file"
)
freq = float(strfreq)
neg_cB = -(round(math.log10(freq) * 100))
if neg_cB >= cutoff:
break
while neg_cB >= len(cBpack):
cBpack.append([])
cBpack[neg_cB].append(word)
for sublist in cBpack:
sublist.sort()
cBpack_data = [{'format': 'cB', 'version': 1}] + cBpack
msgpack.dump(cBpack_data, output_file)
def to_msgpack(self, *args, **kwargs):
return self.__dict__ #msgpack.dump(self.to_dict(*args, **kwargs))