python类dump()的实例源码

crf_entity_extractor.py 文件源码 项目:rasa_nlu 作者: RasaHQ 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def persist(self, model_dir):
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory.

        Returns the metadata necessary to load the model again."""

        from sklearn.externals import joblib

        if self.ent_tagger:
            model_file_name = os.path.join(model_dir, "crf_model.pkl")

            joblib.dump(self.ent_tagger, model_file_name)
            return {"entity_extractor_crf": {"model_file": "crf_model.pkl",
                                             "crf_features": self.crf_features,
                                             "BILOU_flag": self.BILOU_flag,
                                             "version": 1}}
        else:
            return {"entity_extractor_crf": None}
echoDoc0.1.py 文件源码 项目:EchoBurst 作者: TyJK 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def newKMeansModel(vectorFile, outputFile, numClusters):
    # https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering

    model = Doc2Vec.load("Models\\" + vectorFile)
    docVecs = model.docvecs.doctag_syn0
    km = KMeans(n_clusters=numClusters)
    print("Starting")
    km.fit(docVecs)
    print("Fitting Data")
    joblib.dump(km, outputFile)
echoDoc0.1.py 文件源码 项目:EchoBurst 作者: TyJK 项目源码 文件源码 阅读 55 收藏 0 点赞 0 评论 0
def newDBSCANModel(vectorFile, outputFile):
    model = Doc2Vec.load("Models\\" + vectorFile)
    vecs = []
    for doc in range(0, len(model.docvecs)):
        doc_vec = model.docvecs[doc]
        # print doc_vec
        vecs.append(doc_vec.reshape((1, 300)))

    doc_vecs = np.array(vecs, dtype='float')  # TSNE expects float type values

    # print doc_vecs
    docs = []
    for i in doc_vecs:
        docs.append(i[0])
    db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs)
    joblib.dump(db, outputFile)


    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    clusters = db.labels_.tolist()
    cluster_info = {'labels': model.docvecs.offset2doctag,
                    "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in
                                                            model.docvecs.offset2doctag],
                    'clusters': clusters}
    sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
                              columns=['labels', "index, wordcount and repeated words", 'clusters'])
    print(sentenceDF)
    sentenceDF.to_csv("DBSCAN.csv")

    print('Estimated number of clusters: %d' % n_clusters_)
model_vw.py 文件源码 项目:kaggle-prudential-sample 作者: threecourse 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def run_model(ms, i_fold):

    model = ModelVW(ms.name(), i_fold)

    prms = model_params_dict[ms.model_params]

    if not prms.has_key("interaction"):
        prms["interaction"] = vw_inter_list[ms.feature_set]

    model.set_params(prms)
    model.set_data(ms.feature_set, i_fold)  # special

    model.train()

    pred = model.predict()
    train_pred = model.predict_train()

    model.dump()
    model.dump_pred(pred, "pred.pkl")

    return pred, train_pred
crf_entity_extractor.py 文件源码 项目:Rasa_NLU_Chi 作者: crownpku 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def persist(self, model_dir):
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory.

        Returns the metadata necessary to load the model again."""

        from sklearn.externals import joblib

        if self.ent_tagger:
            model_file_name = os.path.join(model_dir, "crf_model.pkl")

            joblib.dump(self.ent_tagger, model_file_name)
            return {"entity_extractor_crf": {"model_file": "crf_model.pkl",
                                             "crf_features": self.crf_features,
                                             "BILOU_flag": self.BILOU_flag,
                                             "version": 1}}
        else:
            return {"entity_extractor_crf": None}
data_analysis.py 文件源码 项目:algo-trading-pipeline 作者: NeuralKnot 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def create_model(self, training_articles):
        model = OneVsRestClassifier(svm.SVC(probability=True))

        features = []
        labels = []
        i = 0
        for article in training_articles:
            print("Generating features for article " + str(i) + "...")
            google_cloud_response = self.analyze_text_google_cloud(article["article"])
            relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])

            # Only count this article if a relevant entity is present
            if relevant_entities:
                article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
                features.append(article_features)
                labels.append(article["label"])
            else:
                print("Skipping article " + str(i) + "...")

            i = i + 1

        print("Performing feature scaling...")
        scaler = preprocessing.StandardScaler().fit(features)
        features_scaled = scaler.transform(features)

        print("Fitting model...")
        model.fit(features_scaled, labels)

        print("Saving model...")
        joblib.dump(scaler, "data_analysis/caler.pkl")
        joblib.dump(model, "data_analysis/model.pkl")

        print("Done!")

    # For use in prod
pybrain_captcha.py 文件源码 项目:Verification-code-crack 作者: weixianglin 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def train():
    DataTrain=loadPybrainData()
    fnn=buildNet()
    trainer=BackpropTrainer(fnn,dataset=DataTrain,momentum=0.05,verbose=True,weightdecay=0.005)
    trainer.trainUntilConvergence(maxEpochs=500)
    joblib.dump(fnn,PKL)
    return fnn
amazon_stacking.py 文件源码 项目:ensemble_amazon 作者: kaz-Anova 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def printfile(X, filename):

    joblib.dump((X), filename)
load_feature.py 文件源码 项目:EmotiW-2017-Audio-video-Emotion-Recognition 作者: xujinchang 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def use_SVM(X_data,y_data):
    p_gamma = 0.1
    p_C = 10
    svm = SVC(kernel = 'rbf',random_state=0, gamma=p_gamma ,C=p_C, probability=True)
    svm.fit(X_data,y_data)
    joblib.dump(svm,"./sklearn_model/svm_trainval1_{param1}_{param2}".format(param1 = p_gamma,param2 = p_C))
    return svm
DecisionTrees.py 文件源码 项目:a-cadmci 作者: florez87 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def save(self, path):
        """
        Persist the model itself and it's classes with joblib and pickle.

        Parameters
        ----------
        path: string
            The location of the persistence directory where model and classes will be stored.

        Return
        ----------
        None
        """
        joblib.dump(self.model, path + 'tree.pkl')
        joblib.dump(self.classes, path + 'classes.pkl')
model_manager.py 文件源码 项目:karura 作者: icoxfog417 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def save(self):
        home_dir = self.__home_dir(self.field_manager.app_id)
        if not os.path.isdir(home_dir):
            print("making directory for app {}...".format(self.field_manager.app_id))
            os.mkdir(home_dir)

        path_fieldm = os.path.join(home_dir, self.FIELD_MANAGER_FILE)
        with open(path_fieldm, mode="w", encoding="utf-8") as fm:
            serialized = self.field_manager.to_dict()
            json.dump(serialized, fm, indent=2)

        if self.model:
            joblib.dump(self.model, os.path.join(home_dir, self.MODEL_FILE))
_model.py 文件源码 项目:probablyPOTUS 作者: jjardel 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def save(self, filebase):

        # re-train best model on full data set
        self.model_.fit(self.data, self.data[LABEL].values)

        ts = datetime.now().strftime('%Y%m%d_%H%M%S')

        # logging wrappers don't serialize
        del self.logger

        joblib.dump(self,'{0}/model_{1}.pkl'.format(filebase, ts))
modelData.py 文件源码 项目:rdocChallenge 作者: Elyne 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def train(estimator, feats_train, labels_train, weights_train, model='model.pkl'):
    '''
    Train and Evaluate (using k-fold cross validation) the generated machine learning model for severity classification
    @param estimator: the ML estimator to use
    @param feats_train: feats_train: the training features
    @param labels_train: labels for training data
    @return estimator: trained estimator (model)
    '''
    estimator = estimator.fit(feats_train, labels_train, sample_weight=weights_train)
    if model is not None:
        joblib.dump(estimator, cfg.PATH_RESOURCES+model)
    return estimator
__init__.py 文件源码 项目:marconibot 作者: s4w3d0ff 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def save(self, location="brain"):
        """ Pickle the brain """
        if self._trained:
            joblib.dump(self.lobe, location + ".pickle")
            logger.info('Brain %s saved', location + '.pickle')
        else:
            return logger.error('Brain is not trained yet! Nothing to save...')
extractFeat.py 文件源码 项目:- 作者: YoPatapon 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def getFeat(TrainData, TestData):
    for data in TestData:
        image = np.reshape(data[0].T, (32, 32, 3))
        gray = rgb2gray(image)/255.0
        fd = hog(gray, 9, [8, 8], [2, 2], 'L2-Hys', False, True)
        fd = np.concatenate((fd, data[1]))
        filename = list(data[2])
        fd_name = filename[0].split('.')[0]+'.feat'
        fd_path = os.path.join('./data/features/test/', fd_name)
        joblib.dump(fd, fd_path)
    print "Test features are extracted and saved."
    for data in TrainData:
        image = np.reshape(data[0].T, (32, 32, 3))
        gray = rgb2gray(image)/255.0
        fd = hog(gray, 9, [8, 8], [2, 2], 'L2-Hys', False, True)
        fd = np.concatenate((fd, data[1]))
        filename = list(data[2])
        fd_name = filename[0].split('.')[0]+'.feat'
        fd_path = os.path.join('./data/features/train/', fd_name)
        joblib.dump(fd, fd_path)
    print "Train features are extracted and saved."
shortcuts.py 文件源码 项目:serialtime 作者: ianlini 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def save_pkl(obj, path, log_description=None, logger=None,
             logging_level=logging.INFO, verbose_start=True,
             verbose_end=True, end_in_new_line=True, log_prefix="..."):
    if log_description is None:
        log_description = "Pickling to " + (path)
    with open(path, "wb") as fp, \
            SimpleTimer(log_description, logger, logging_level, verbose_start,
                        verbose_end, end_in_new_line, log_prefix):
        cPickle.dump(obj, fp, protocol=cPickle.HIGHEST_PROTOCOL)
shortcuts.py 文件源码 项目:serialtime 作者: ianlini 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def save_joblib_pkl(obj, path, log_description=None, logger=None,
                    logging_level=logging.INFO, verbose_start=True,
                    verbose_end=True, end_in_new_line=True, log_prefix="..."):
    try:
        from sklearn.externals import joblib
    except ImportError:
        raise ImportError("This function requires sklearn module. "
                          "You can install it via "
                          "\"pip install scikit-learn\".")
    if log_description is None:
        log_description = "Pickling to " + (path)
    with SimpleTimer(log_description, logger, logging_level, verbose_start,
                     verbose_end, end_in_new_line, log_prefix):
        joblib.dump(obj, path)
classify.py 文件源码 项目:eigenfish 作者: sethdp 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def save(self, filename):
        """
        Saves trained model to filename.

        :param filename: Name of file to save model as.
        """
        joblib.dump(self.svc, filename)
util.py 文件源码 项目:topic-ensemble 作者: derekgreene 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def save_corpus( out_prefix, X, terms, doc_ids, classes = None ):
    """
    Save a pre-processed scikit-learn corpus and associated metadata using Joblib.
    """
    matrix_outpath = "%s.pkl" % out_prefix 
    joblib.dump((X,terms,doc_ids,classes), matrix_outpath )
rankings.py 文件源码 项目:topic-ensemble 作者: derekgreene 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def save_term_rankings( out_path, term_rankings, labels = None ):
    """
    Save a list of multiple term rankings using Joblib.
    """
    # no labels? generate some standard ones
    if labels is None:
        labels = []
        for i in range( len(term_rankings) ):
            labels.append( "C%02d" % (i+1) )
    joblib.dump((term_rankings,labels), out_path )


问题


面经


文章

微信
公众号

扫码关注公众号