def dump(self, filepath):
"""Save the SMPS object to disk"""
return joblib.dump(self, filepath)
python类dump()的实例源码
def save_itr_params(itr, params):
if _snapshot_dir:
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, 'params.pkl')
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == "gap":
if itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
def log_parameters_lite(log_file, args):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
def train(filePath):
try:
if not filePath.lower().endswith('json'):
return {'success':False,'message':'Training file should be in json format'}
with open(filePath) as file:
ent_data = json.load(file)
dataset = [jsonToCrf(q, nlp) for q in ent_data['entity_examples']]
X_train = [sent2features(s) for s in dataset]
y_train = [sent2labels(s) for s in dataset]
crf = sklearn_crfsuite.CRF(
algorithm='lbfgs',
c1=0.1,
c2=0.1,
max_iterations=100,
all_possible_transitions=True
)
crf.fit(X_train, y_train)
if(not os.path.exists("crfModel")):
os.mkdir("crfModel")
if(os.path.isfile("crfModel/classifier.pkl")):
os.remove("crfModel/classifier.pkl")
joblib.dump(crf,"crfModel/classifier.pkl")
return {'success':True,'message':'Model Trained Successfully'}
except Exception as ex:
return {'success':False,'message':'Error while Training the model - '+str(ex)}
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
def log_parameters_lite(log_file, args, json_kwargs=dict()):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(json_kwargs.items()):
log_params["json_args"][k] = v
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
def save(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir()
pool_file = os.path.join(checkpoint_dir, 'pool.chk')
if self.save_format == 'pickle':
pickle_dump(pool_file + '.tmp', self.pool)
elif self.save_format == 'joblib':
joblib.dump(self.pool, pool_file + '.tmp', compress=1, cache_size=1e9)
else: raise NotImplementedError
shutil.move(pool_file + '.tmp', pool_file)
checkpoint_file = os.path.join(checkpoint_dir, 'params.chk')
sess = tf.get_default_session()
saver = tf.train.Saver()
saver.save(sess, checkpoint_file)
tabular_file = os.path.join(checkpoint_dir, 'progress.csv')
if os.path.isfile(tabular_file):
tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk')
shutil.copy(tabular_file, tabular_chk_file)
logger.log('Saved to checkpoint %s'%checkpoint_file)
def read_data_from_pkl(datafile):
"""
read file in joblib.dump pkl
:param datafile: filename of pkl
:return:
"""
datas = joblib.load(datafile)
for i in range(10):
datas = np.random.permutation(datas)
inputs, labels = [], []
for data in datas:
inputs.append(data["input"])
labels.append(data["label"])
inputs = np.array(inputs).reshape(-1, 15, 101, 101, 3).astype(np.float32)
inputs -= np.mean(inputs, axis=(2, 3), keepdims=True)
inputs /= np.std(inputs, axis=(2, 3), keepdims=True)
labels = np.array(labels).reshape(-1, 1).astype(np.float32)
return inputs, labels
def save_itr_params(itr, params):
if _snapshot_dir:
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, 'params.pkl')
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == "gap":
if itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + \
"." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
def log_parameters_lite(log_file, args):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
def add_file(model, create, value, *args, **kwargs):
model_params = {
"RandomForestClassifier": {
"bootstrap": True, "criterion": "gini",
"oob_score": False, "max_features": "auto",
"n_estimators": 10, "random_state": 0},
"RandomForestRegressor": {
"bootstrap": True, "criterion": "mse",
"oob_score": False, "max_features": "auto",
"n_estimators": 10},
"LinearSGDClassifier": {
"loss": "hinge"},
"LinearRegressor": {
"fit_intercept": True}}
fset_data, data = featurize.load_featureset(model.featureset.file_uri)
model_data = MODELS_TYPE_DICT[model.type](**model_params[model.type])
model_data.fit(fset_data, data['labels'])
model.file_uri = pjoin('/tmp/', '{}.pkl'.format(str(uuid.uuid4())))
joblib.dump(model_data, model.file_uri)
DBSession().commit()
def save_itr_params(itr, params):
if _snapshot_dir:
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, 'params.pkl')
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == "gap":
if itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
def log_parameters_lite(log_file, args):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
def save_itr_params(itr, params):
if _snapshot_dir:
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, 'params.pkl')
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == "gap":
if itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
def log_parameters_lite(log_file, args):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
def save_vector_cache(vectors, vector_out_file, filetype='', **kwargs):
logging.info("Saving {} vectors to cache {}".format(len(vectors),vector_out_file))
if (vector_out_file.endswith('.dill') or filetype == 'dill'):
with open(vector_out_file, 'wb') as data_file:
dill.dump(vectors, data_file, protocol=kwargs.get('dill_protocol', 3))
elif (vector_out_file.endswith('.joblib') or filetype == 'joblib'):
joblib.dump(vectors, vector_out_file, compress=kwargs.get('joblib_compression', 3),
protocol=kwargs.get('joblib_protocol', 3))
elif (vector_out_file.endswith('.sqlite') or filetype == 'sqlite'):
autocommit = kwargs.pop('autocommit', True)
if (isinstance(vectors, SqliteDict)):
vectors.commit()
else:
with SqliteDict(vector_out_file, autocommit=autocommit) as data_file:
for key, value in vectors.items():
data_file[key] = value
if (not autocommit):
data_file.commit()
else:
raise NotImplementedError
def dump_caffemodel_weights():
net = caffe.Net(args.prototxt_path, args.caffemodel_path, caffe.TEST)
weights = {}
n_layers = len(net.layers)
for i in range(n_layers):
layer_name = net._layer_names[i]
layer = net.layers[i]
layer_blobs = [o.data for o in layer.blobs]
weights[layer_name] = layer_blobs
joblib.dump(weights, args.caffe_weights_path)
def joblib_dump(item, path):
import joblib
create_path_if_not_exists(path)
joblib.dump(item, path)
def create_model(clf_name, features, groundtruths, outdir, classifiers):
begin = int(round(time.time() * 1000))
utils.print_success("Starting " + clf_name)
clf_dir = outdir + clf_name + "/"
utils.create_dir(clf_dir)
clf = classifiers[clf_name]
clf.fit(features, groundtruths)
joblib.dump(clf, clf_dir + clf_name + ".pkl")
utils.print_info(clf_name + " done in " + str(int(round(time.time() * 1000)) - begin) + "ms")
def save(self, model_name):
"""Save model to file."""
joblib.dump(self, model_name, compress=1, protocol=2)
def dump(obj, file_name):
if file_name.endswith('.json'):
with open(file_name, 'w') as f:
f.write(jsonpickle.dumps(obj))
return
if isinstance(obj, np.ndarray):
np.save(file_name, obj)
return
# Using joblib instead of pickle because of http://bugs.python.org/issue11564
joblib.dump(obj, file_name, protocol=pickle.HIGHEST_PROTOCOL)
def dump_caffemodel_weights():
net = caffe.Net(args.prototxt_path, args.caffemodel_path, caffe.TEST)
weights = {}
n_layers = len(net.layers)
for i in range(n_layers):
layer_name = net._layer_names[i]
layer = net.layers[i]
layer_blobs = [o.data for o in layer.blobs]
weights[layer_name] = layer_blobs
joblib.dump(weights, args.caffe_weights_path)
def log_variant(log_file, variant_data):
mkdir_p(os.path.dirname(log_file))
if hasattr(variant_data, "dump"):
variant_data = variant_data.dump()
variant_json = stub_to_json(variant_data)
with open(log_file, "w") as f:
json.dump(variant_json, f, indent=2, sort_keys=True, cls=MyEncoder)
def onEnd(self, agent):
if (self._active == False):
return
bestIndex = np.argmax(self._validationScores)
print("Best neural net obtained after {} epochs, with validation score {}".format(bestIndex+1, self._validationScores[bestIndex]))
if self._testID != None:
print("Test score of this neural net: {}".format(self._testScores[bestIndex]))
try:
os.mkdir("scores")
except Exception:
pass
basename = "scores/" + self._filename
joblib.dump({"vs": self._validationScores, "ts": self._testScores}, basename + "_scores.jldump")
def dumpNetwork(self, fname, nEpoch=-1):
""" Dump the network
Parameters
-----------
fname : string
Name of the file where the network will be dumped
nEpoch : int
Epoch number (Optional)
"""
try:
os.mkdir("nnets")
except Exception:
pass
basename = "nnets/" + fname
for f in os.listdir("nnets/"):
if fname in f:
os.remove("nnets/" + f)
all_params = self._network.getAllParams()
if (nEpoch>=0):
joblib.dump(all_params, basename + ".epoch={}".format(nEpoch))
else:
joblib.dump(all_params, basename, compress=True)
def fit(self, train_data, eval_data, eval_metric='acc', **kargs):
snapshot = kargs.pop('snapshot')
self.clf.fit(*self._get_data_label(train_data))
jb.dump(self.clf, snapshot + '-0001.params')
if not isinstance(eval_metric, mx.metric.EvalMetric):
eval_metric = mx.metric.create(eval_metric)
data, label = self._get_data_label(eval_data)
pred = self.clf.predict(data).astype(np.int64)
prob = np.zeros((len(pred), pred.max() + 1))
prob[np.arange(len(prob)), pred] = 1
eval_metric.update([mx.nd.array(label)], [mx.nd.array(prob)])
for name, val in eval_metric.get_name_value():
logger.info('Epoch[0] Validation-{}={}', name, val)