def persist(self, model_dir):
# type: (Text) -> Dict[Text, Any]
"""Persist this model into the passed directory.
Returns the metadata necessary to load the model again."""
from sklearn.externals import joblib
if self.ent_tagger:
model_file_name = os.path.join(model_dir, "crf_model.pkl")
joblib.dump(self.ent_tagger, model_file_name)
return {"entity_extractor_crf": {"model_file": "crf_model.pkl",
"crf_features": self.crf_features,
"BILOU_flag": self.BILOU_flag,
"version": 1}}
else:
return {"entity_extractor_crf": None}
python类dump()的实例源码
def newKMeansModel(vectorFile, outputFile, numClusters):
# https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering
model = Doc2Vec.load("Models\\" + vectorFile)
docVecs = model.docvecs.doctag_syn0
km = KMeans(n_clusters=numClusters)
print("Starting")
km.fit(docVecs)
print("Fitting Data")
joblib.dump(km, outputFile)
def newDBSCANModel(vectorFile, outputFile):
model = Doc2Vec.load("Models\\" + vectorFile)
vecs = []
for doc in range(0, len(model.docvecs)):
doc_vec = model.docvecs[doc]
# print doc_vec
vecs.append(doc_vec.reshape((1, 300)))
doc_vecs = np.array(vecs, dtype='float') # TSNE expects float type values
# print doc_vecs
docs = []
for i in doc_vecs:
docs.append(i[0])
db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs)
joblib.dump(db, outputFile)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
clusters = db.labels_.tolist()
cluster_info = {'labels': model.docvecs.offset2doctag,
"index, wordcount and repeated words": [model.docvecs.doctags[x] for x in
model.docvecs.offset2doctag],
'clusters': clusters}
sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
columns=['labels', "index, wordcount and repeated words", 'clusters'])
print(sentenceDF)
sentenceDF.to_csv("DBSCAN.csv")
print('Estimated number of clusters: %d' % n_clusters_)
def run_model(ms, i_fold):
model = ModelVW(ms.name(), i_fold)
prms = model_params_dict[ms.model_params]
if not prms.has_key("interaction"):
prms["interaction"] = vw_inter_list[ms.feature_set]
model.set_params(prms)
model.set_data(ms.feature_set, i_fold) # special
model.train()
pred = model.predict()
train_pred = model.predict_train()
model.dump()
model.dump_pred(pred, "pred.pkl")
return pred, train_pred
def persist(self, model_dir):
# type: (Text) -> Dict[Text, Any]
"""Persist this model into the passed directory.
Returns the metadata necessary to load the model again."""
from sklearn.externals import joblib
if self.ent_tagger:
model_file_name = os.path.join(model_dir, "crf_model.pkl")
joblib.dump(self.ent_tagger, model_file_name)
return {"entity_extractor_crf": {"model_file": "crf_model.pkl",
"crf_features": self.crf_features,
"BILOU_flag": self.BILOU_flag,
"version": 1}}
else:
return {"entity_extractor_crf": None}
def create_model(self, training_articles):
model = OneVsRestClassifier(svm.SVC(probability=True))
features = []
labels = []
i = 0
for article in training_articles:
print("Generating features for article " + str(i) + "...")
google_cloud_response = self.analyze_text_google_cloud(article["article"])
relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])
# Only count this article if a relevant entity is present
if relevant_entities:
article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
features.append(article_features)
labels.append(article["label"])
else:
print("Skipping article " + str(i) + "...")
i = i + 1
print("Performing feature scaling...")
scaler = preprocessing.StandardScaler().fit(features)
features_scaled = scaler.transform(features)
print("Fitting model...")
model.fit(features_scaled, labels)
print("Saving model...")
joblib.dump(scaler, "data_analysis/caler.pkl")
joblib.dump(model, "data_analysis/model.pkl")
print("Done!")
# For use in prod
def train():
DataTrain=loadPybrainData()
fnn=buildNet()
trainer=BackpropTrainer(fnn,dataset=DataTrain,momentum=0.05,verbose=True,weightdecay=0.005)
trainer.trainUntilConvergence(maxEpochs=500)
joblib.dump(fnn,PKL)
return fnn
def printfile(X, filename):
joblib.dump((X), filename)
load_feature.py 文件源码
项目:EmotiW-2017-Audio-video-Emotion-Recognition
作者: xujinchang
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def use_SVM(X_data,y_data):
p_gamma = 0.1
p_C = 10
svm = SVC(kernel = 'rbf',random_state=0, gamma=p_gamma ,C=p_C, probability=True)
svm.fit(X_data,y_data)
joblib.dump(svm,"./sklearn_model/svm_trainval1_{param1}_{param2}".format(param1 = p_gamma,param2 = p_C))
return svm
def save(self, path):
"""
Persist the model itself and it's classes with joblib and pickle.
Parameters
----------
path: string
The location of the persistence directory where model and classes will be stored.
Return
----------
None
"""
joblib.dump(self.model, path + 'tree.pkl')
joblib.dump(self.classes, path + 'classes.pkl')
def save(self):
home_dir = self.__home_dir(self.field_manager.app_id)
if not os.path.isdir(home_dir):
print("making directory for app {}...".format(self.field_manager.app_id))
os.mkdir(home_dir)
path_fieldm = os.path.join(home_dir, self.FIELD_MANAGER_FILE)
with open(path_fieldm, mode="w", encoding="utf-8") as fm:
serialized = self.field_manager.to_dict()
json.dump(serialized, fm, indent=2)
if self.model:
joblib.dump(self.model, os.path.join(home_dir, self.MODEL_FILE))
def save(self, filebase):
# re-train best model on full data set
self.model_.fit(self.data, self.data[LABEL].values)
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
# logging wrappers don't serialize
del self.logger
joblib.dump(self,'{0}/model_{1}.pkl'.format(filebase, ts))
def train(estimator, feats_train, labels_train, weights_train, model='model.pkl'):
'''
Train and Evaluate (using k-fold cross validation) the generated machine learning model for severity classification
@param estimator: the ML estimator to use
@param feats_train: feats_train: the training features
@param labels_train: labels for training data
@return estimator: trained estimator (model)
'''
estimator = estimator.fit(feats_train, labels_train, sample_weight=weights_train)
if model is not None:
joblib.dump(estimator, cfg.PATH_RESOURCES+model)
return estimator
def save(self, location="brain"):
""" Pickle the brain """
if self._trained:
joblib.dump(self.lobe, location + ".pickle")
logger.info('Brain %s saved', location + '.pickle')
else:
return logger.error('Brain is not trained yet! Nothing to save...')
def getFeat(TrainData, TestData):
for data in TestData:
image = np.reshape(data[0].T, (32, 32, 3))
gray = rgb2gray(image)/255.0
fd = hog(gray, 9, [8, 8], [2, 2], 'L2-Hys', False, True)
fd = np.concatenate((fd, data[1]))
filename = list(data[2])
fd_name = filename[0].split('.')[0]+'.feat'
fd_path = os.path.join('./data/features/test/', fd_name)
joblib.dump(fd, fd_path)
print "Test features are extracted and saved."
for data in TrainData:
image = np.reshape(data[0].T, (32, 32, 3))
gray = rgb2gray(image)/255.0
fd = hog(gray, 9, [8, 8], [2, 2], 'L2-Hys', False, True)
fd = np.concatenate((fd, data[1]))
filename = list(data[2])
fd_name = filename[0].split('.')[0]+'.feat'
fd_path = os.path.join('./data/features/train/', fd_name)
joblib.dump(fd, fd_path)
print "Train features are extracted and saved."
def save_pkl(obj, path, log_description=None, logger=None,
logging_level=logging.INFO, verbose_start=True,
verbose_end=True, end_in_new_line=True, log_prefix="..."):
if log_description is None:
log_description = "Pickling to " + (path)
with open(path, "wb") as fp, \
SimpleTimer(log_description, logger, logging_level, verbose_start,
verbose_end, end_in_new_line, log_prefix):
cPickle.dump(obj, fp, protocol=cPickle.HIGHEST_PROTOCOL)
def save_joblib_pkl(obj, path, log_description=None, logger=None,
logging_level=logging.INFO, verbose_start=True,
verbose_end=True, end_in_new_line=True, log_prefix="..."):
try:
from sklearn.externals import joblib
except ImportError:
raise ImportError("This function requires sklearn module. "
"You can install it via "
"\"pip install scikit-learn\".")
if log_description is None:
log_description = "Pickling to " + (path)
with SimpleTimer(log_description, logger, logging_level, verbose_start,
verbose_end, end_in_new_line, log_prefix):
joblib.dump(obj, path)
def save(self, filename):
"""
Saves trained model to filename.
:param filename: Name of file to save model as.
"""
joblib.dump(self.svc, filename)
def save_corpus( out_prefix, X, terms, doc_ids, classes = None ):
"""
Save a pre-processed scikit-learn corpus and associated metadata using Joblib.
"""
matrix_outpath = "%s.pkl" % out_prefix
joblib.dump((X,terms,doc_ids,classes), matrix_outpath )
def save_term_rankings( out_path, term_rankings, labels = None ):
"""
Save a list of multiple term rankings using Joblib.
"""
# no labels? generate some standard ones
if labels is None:
labels = []
for i in range( len(term_rankings) ):
labels.append( "C%02d" % (i+1) )
joblib.dump((term_rankings,labels), out_path )