def save_session(fname=None, session=None, pickleProto=4):
import dill as pickle
if fname is None:
fname = conf.session
if not fname:
conf.session = fname = utils.get_temp_file(keep=True)
log_interactive.info("Use [%s] as session file" % fname)
if session is None:
session = builtins.__dict__["scapy_session"]
to_be_saved = session.copy()
for k in list(to_be_saved.keys()):
if k in ["__builtins__", "In", "Out", "conf"] or k.startswith("_") or \
(hasattr(to_be_saved[k], "__module__") and str(to_be_saved[k].__module__).startswith('IPython')):
del(to_be_saved[k])
continue
if type(to_be_saved[k]) in [type, types.ModuleType, types.MethodType]:
log_interactive.info("[%s] (%s) can't be saved." % (k, type(to_be_saved[k])))
del(to_be_saved[k])
try:
os.rename(fname, fname+".bak")
except OSError:
pass
f=gzip.open(fname,"wb")
for i in to_be_saved.keys():
#d = {i: to_be_saved[i]}
#pickle.dump(d, f, pickleProto)
pickle.dump(to_be_saved, f, pickleProto)
f.close()
python类dump()的实例源码
def save_object(fname, obj):
import dill as pickle
pickle.dump(obj,gzip.open(fname,"wb"))
def saga_score_struct_cache(*args):
arghash = sha1(repr(("score_struct",) + args).encode('utf-8')).hexdigest()
fn = "res/baseline_linear_{}.dill".format(arghash)
try:
with open(fn, 'rb') as f:
out = dill.load(f)
logging.info("Loaded cached version.")
except FileNotFoundError:
logging.info("Computing...")
out = saga_score_struct(*args)
with open(fn, 'wb') as f:
dill.dump(out, f)
return out
def saga_decision_function(dataset, k, link_alpha, prop_alpha, l1_ratio):
fn = cache_fname("linear_val_df", (dataset, k, link_alpha, prop_alpha,
l1_ratio))
if os.path.exists(fn):
logging.info("Loading {}".format(fn))
with open(fn, "rb") as f:
return dill.load(f)
ds = 'erule' if dataset == 'cdcp' else 'ukp-essays' # sorry
path = os.path.join("data", "process", ds, "folds", "{}", "{}")
# sorry again: get val docs
n_folds = 5 if dataset == 'ukp' else 3
load, ids = get_dataset_loader(dataset, "train")
for k_, (_, val) in enumerate(KFold(n_folds).split(ids)):
if k_ == k:
break
val_docs = list(load(ids[val]))
X_tr_link, y_tr_link = load_csr(path.format(k, 'train.npz'),
return_y=True)
X_te_link, y_te_link = load_csr(path.format(k, 'val.npz'),
return_y=True)
X_tr_prop, y_tr_prop = load_csr(path.format(k, 'prop-train.npz'),
return_y=True)
X_te_prop, y_te_prop = load_csr(path.format(k, 'prop-val.npz'),
return_y=True)
baseline = BaselineStruct(link_alpha, prop_alpha, l1_ratio)
baseline.fit(X_tr_link, y_tr_link, X_tr_prop, y_tr_prop)
Y_marg = baseline.decision_function(X_te_link, X_te_prop, val_docs)
with open(fn, "wb") as f:
logging.info("Saving {}".format(fn))
dill.dump((Y_marg, baseline), f)
return Y_marg, baseline
def linear_cv_score(dataset, alpha, l1_ratio, constraints):
fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio,
constraints))
if os.path.exists(fn):
logging.info("Loading {}".format(fn))
with open(fn, "rb") as f:
return dill.load(f)
load, ids = get_dataset_loader(dataset, split="train")
n_folds = 5 if dataset == 'ukp' else 3
scores = []
for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio)
val_docs = list(load(ids[val]))
Y_true = [doc.label for doc in val_docs]
Y_pred = bl.fast_decode(Y_marg, val_docs, constraints)
scores.append(bl._score(Y_true, Y_pred))
with open(fn, "wb") as f:
logging.info("Saving {}".format(fn))
dill.dump(scores, f)
return scores
def write_dill(self, file_):
"""
Serialize a computation to a file or file-like object
:param file_: If string, writes to a file
:type file_: File-like object, or string
"""
node_serialize = nx.get_node_attributes(self.dag, _AN_TAG)
if all(serialize for name, serialize in six.iteritems(node_serialize)):
obj = self
else:
obj = self.copy()
for name, tags in six.iteritems(node_serialize):
if _T_SERIALIZE not in tags:
obj._set_uninitialized(name)
if isinstance(file_, six.string_types):
with open(file_, 'wb') as f:
dill.dump(obj, f)
else:
dill.dump(obj, file_)
def save(self, folder):
"""Save object and return corresponding files."""
if not os.path.exists(folder):
os.makedirs(folder)
files = []
# annoy objects can't be pickled, so save these separately
for k, v in self._annoy_objects.items():
annoy_filepath = os.path.join(folder, '{}.ann'.format(k))
v._ann_obj.save(annoy_filepath)
files.append(annoy_filepath)
pickle_filepath = os.path.join(folder, 'object.pickle')
with open(pickle_filepath, 'wb') as handle:
dill.dump(self, handle)
files.append(pickle_filepath)
# write entity types
enttypes = self.get_entity_types()
info_file = os.path.join(folder, 'entity_info.json')
with open(info_file, 'w') as handle:
json.dump(enttypes, handle)
files.append(info_file)
return files
def _write_args(self, input_filename):
# serialize args to file
if self._pass_op_args():
with open(input_filename, 'wb') as f:
arg_dict = ({'args': self.op_args, 'kwargs': self.op_kwargs})
if self.use_dill:
dill.dump(arg_dict, f)
else:
pickle.dump(arg_dict, f)
def _generate_python_code(self):
if self.use_dill:
pickling_library = 'dill'
else:
pickling_library = 'pickle'
fn = self.python_callable
# dont try to read pickle if we didnt pass anything
if self._pass_op_args():
load_args_line = 'with open(sys.argv[1], "rb") as f: arg_dict = {}.load(f)'.format(pickling_library)
else:
load_args_line = 'arg_dict = {"args": [], "kwargs": {}}'
# no indents in original code so we can accept any type of indents in the original function
# we deserialize args, call function, serialize result if necessary
return dedent("""\
import {pickling_library}
import sys
{load_args_code}
args = arg_dict["args"]
kwargs = arg_dict["kwargs"]
with open(sys.argv[3], 'r') as f: virtualenv_string_args = list(map(lambda x: x.strip(), list(f)))
{python_callable_lines}
res = {python_callable_name}(*args, **kwargs)
with open(sys.argv[2], 'wb') as f: res is not None and {pickling_library}.dump(res, f)
""").format(
load_args_code=load_args_line,
python_callable_lines=dedent(inspect.getsource(fn)),
python_callable_name=fn.__name__,
pickling_library=pickling_library)
self.log.info("Done.")
def save(self, path):
""" Save the model.
Parameters
----------
path : str
a full path to a file where a model will be saved to
"""
if self.estimator is not None:
pickle.dump(self.estimator, path)
else:
raise ValueError("Scikit-learn estimator does not exist. Check your config for 'estimator'.")
def save_session(fname=None, session=None, pickleProto=4):
import dill as pickle
if fname is None:
fname = conf.session
if not fname:
conf.session = fname = utils.get_temp_file(keep=True)
log_interactive.info("Use [%s] as session file" % fname)
if session is None:
session = builtins.__dict__["scapy_session"]
to_be_saved = session.copy()
for k in list(to_be_saved.keys()):
if k in ["__builtins__", "In", "Out", "conf"] or k.startswith("_") or \
(hasattr(to_be_saved[k], "__module__") and str(to_be_saved[k].__module__).startswith('IPython')):
del(to_be_saved[k])
continue
if type(to_be_saved[k]) in [type, types.ModuleType, types.MethodType]:
log_interactive.info("[%s] (%s) can't be saved." % (k, type(to_be_saved[k])))
del(to_be_saved[k])
try:
os.rename(fname, fname+".bak")
except OSError:
pass
f=gzip.open(fname,"wb")
for i in to_be_saved.keys():
#d = {i: to_be_saved[i]}
#pickle.dump(d, f, pickleProto)
pickle.dump(to_be_saved, f, pickleProto)
f.close()
def save_object(fname, obj):
import dill as pickle
pickle.dump(obj,gzip.open(fname,"wb"))
def main():
'''
Beginning on START_DATE, step forward hourly, training on last
hour's NLDAS FORA dataset with transformers in a 2-layer hierarchical
ensemble, training on the last hour of data and making
out-of-training-sample predictions for the current hour. Makes
a dill dump file for each hour run. Runs fro NSTEPS hour steps.
'''
date = START_DATE
add_hour = datetime.timedelta(hours=1)
get_file_name = lambda date: date.isoformat(
).replace(':','_').replace('-','_') + '.dill'
scalers = zip(('MinMaxScaler', 'RobustScaler', 'StandardScaler', 'None'),
(minmax, robust, standard, None))
estimators = zip(('LinearRegression', ),
(linear, ))
init_func = partial(ensemble_init_func,
pca=pca,
scalers=scalers,
n_components=n_components,
estimators=estimators,
preamble=preamble,
log=log,
minmax_bounds=minmax_bounds,
summary='Flatten, Subset, Drop NaN Rows, Get Y Data, Difference X in Time')
for step in range(NSTEPS):
last_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
date += add_hour
this_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
current_file = get_file_name(date)
out = train_model_on_models(last_hour_data, this_hour_data, init_func)
dill.dump(out, open(current_file, 'wb'))
print('Dumped to:', current_file)
l2, t2, models, preds, models2, preds2 = out
layer_1_scores = [model._score for _, model in models]
layer_2_scores = [model._score for _, model in models2]
print('Scores in layer 1 models:', layer_1_scores)
print('Scores in layer 2 models:', layer_2_scores)
return last_hour_data, this_hour_data, models, preds, models2, preds2
def dump(self, file, protocol=None, byref=None, fmode=None, recurse=None):
'''pickle (dill) an object to a file'''
getattr(self, '_close', lambda: [])()
return dill.dump(self, file, protocol=protocol,
byref=byref, fmode=fmode, recurse=recurse)
def predict_to_pickle(prediction, fname_base):
'''Dump a prediction y data'''
mkdir_p(fname_base)
fname = fname_base + '.xr'
with open(fname, 'wb') as f:
return dill.dump(prediction, f)
def gen_brown_dataset(output_folder, num=None):
sentences = brown.sents()
if num:
if num > len(sentences):
num = len(sentences)
sentences = sentences[:num]
(X_train, X_test), (y_train, y_test), (K_train, K_test), param_dict = \
gen_dataset(sentences)
if output_folder:
np.save(os.path.join(output_folder, 'X_train.npy'), X_train)
np.save(os.path.join(output_folder, 'X_test.npy'), X_test)
np.save(os.path.join(output_folder, 'y_train.npy'), y_train)
np.save(os.path.join(output_folder, 'y_test.npy'), y_test)
np.save(os.path.join(output_folder, 'K_train.npy'), K_train)
np.save(os.path.join(output_folder, 'K_test.npy'), K_test)
with open(os.path.join(output_folder, 'gen_param_dict.pkl'), 'w') as f:
cPickle.dump(param_dict, f)
def train_brown_lemmatizer(output_folder):
obs_set = np.load(os.path.join(output_folder, 'X_train.npy'))
out_set = np.load(os.path.join(output_folder, 'y_train.npy'))
count_set = np.load(os.path.join(output_folder, 'K_train.npy'))
nn_param_set = train_lemmatizer(
obs_set,
out_set,
count_set,
window_size=[2,2],
positive_samples_only=True,
batch_size=128,
param_scale=0.01,
num_epochs=4000,
step_size=0.001,
l2_lambda=0.1)
if output_folder:
with open(os.path.join(output_folder, 'nn_param_dict.pkl'), 'w') as f:
dill.dump(nn_param_set, f)
def save(textdata, fname):
with open(fname, 'wb') as fout:
dill.dump(textdata, fout)
def write(obj, fn):
import dill
with open(fn, "wb") as f:
dill.dump(obj, f)
def save(self, path):
"""Save model to a pickle located at `path`"""
with tempfile.TemporaryDirectory() as td:
U.save_state(os.path.join(td, "model"))
arc_name = os.path.join(td, "packed.zip")
with zipfile.ZipFile(arc_name, 'w') as zipf:
for root, dirs, files in os.walk(td):
for fname in files:
file_path = os.path.join(root, fname)
if file_path != arc_name:
zipf.write(file_path, os.path.relpath(file_path, td))
with open(arc_name, "rb") as f:
model_data = f.read()
with open(path, "wb") as f:
dill.dump((model_data), f)