python类dump()的实例源码-面圈网

track.py 文件源码项目：libskeletal 作者: bobbybee 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def trainModel(featureCount, imageCount, save):
    clf = RandomForestRegressor(n_estimators=1, n_jobs=-1)

    features = generateFeatures(featureCount)

    for image in range(0, imageCount):
        print "Image " + str(image)
        train(clf, features, image)

    clf = clf.fit(X, Y)
    model = (clf, features)

    if save:
        joblib.dump(model, "model.pkl")

    return model

__init__.py 文件源码项目：rosie 作者: datasciencebr 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def load_trained_model(self, classifier):
        filename = '{}.pkl'.format(classifier.__name__.lower())
        path = os.path.join(self.data_path, filename)

        # palliative: this outputs a model too large for joblib
        if classifier.__name__ == 'MonthlySubquotaLimitClassifier':
            model = classifier()
            model.fit(self.dataset)

        else:
            if os.path.isfile(path):
                model = joblib.load(path)
            else:
                model = classifier()
                model.fit(self.dataset)
                joblib.dump(model, path)

        return model

solver.py 文件源码项目：Dense-Net 作者: achyudhk 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def make_check_point(self):

        num, last_checkpoints = self.load_current_checkpoints()

        if self.best_val_acc > last_checkpoints['best_val_acc']:
            best_val_acc = self.best_val_acc
            best_params = self.best_params
        else:
            best_val_acc = last_checkpoints['best_val_acc']
            best_params = last_checkpoints['best_params']

        checkpoints = {
            'model': self.model,
            'epoch': self.epoch,
            'best_params': best_params,
            'best_val_acc': best_val_acc,
            'loss_history': self.loss_history,
            'train_acc_history': self.train_acc_history,
            'val_acc_history': self.val_acc_history}

        name = 'check_' + str(num + 1)
        os.mkdir(os.path.join(self.path_checkpoints, name))
        joblib.dump(checkpoints, os.path.join(
            self.path_checkpoints, name, name + '.pkl'))

save_models.py 文件源码项目：elephant_sense 作者: chakki-works 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def __init__(self, clf, scaler, pf_df, data_folder=""):
        model_file_name = "banana.pkl"
        scaler_file_name = "banana_scaler.pkl"
        list_file_name = "banana_list.txt"

        def_file_path = "../../models/"
        self.data_folder = data_folder

        if not data_folder:
            model_file = os.path.join(os.path.dirname(__file__), def_file_path) + model_file_name
            scaler_file = os.path.join(os.path.dirname(__file__), def_file_path) + scaler_file_name
            list_file = os.path.join(os.path.dirname(__file__), def_file_path) + list_file_name
        else:
            model_file = self.data_folder + model_file_name
            scaler_file = self.data_folder + scaler_file_name
            list_file = self.data_folder + list_file_name


        joblib.dump(clf, model_file)
        joblib.dump(scaler, scaler_file)

        with open(list_file, "w") as f:
            f.write(" ".join(pf_df.columns.tolist()))

util.py 文件源码项目：stacked_generalization 作者: fukatani 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_cache_file(model_id, index, cache_dir='', suffix='csv'):
    # Identify index trick.
    # If sum of first 20 index, recognize as the same index.
    if index is None:
        raise IOError
    if len(index) < 20:
        sum_index = sum(index)
    else:
        sum_index = sum(index[:20])
    return "{0}{1}_{2}.{3}".format(cache_dir,
                                   model_id,
                                   sum_index,
                                   suffix)

##def saving_fit(learner, X, y, index):
##    import os
##    pkl_file = "{0}_{1}_{2}.pkl".format(learner.id, min(index), max(index))
##    try:
##        learner = joblib.load(pkl_file)
##        print("**** learner is loaded from {0} ****".format(pkl_file))
##    except IOError:
##        learner.fit(X, y)
##        joblib.dump(learner, pkl_file)
##    return learner

similar_posts.py 文件源码项目：hugo_similar_posts 作者: elbaulp 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def KmeansWrapper(true_k, data, load=False):
    from sklearn.externals import joblib

    modelName = 'doc_cluster.%s.plk' % true_k

    if load:
        km = joblib.load(modelName)
        labels = km.labels_
    else:
        km = KMeans(n_clusters=true_k,
                    init='k-means++',
                    # max_iter=1000,
                    n_init=10,
                    n_jobs=-1,
                    random_state=0,
                    verbose=0)
        km.fit_predict(data)
        labels = km.labels_
        joblib.dump(km,  modelName)

    return labels, km.cluster_centers_

ex3-self_learning_quant.py 文件源码项目：sl-quant 作者: danielzak 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def init_state(indata, test=False):
    close = indata['close'].values
    diff = np.diff(close)
    diff = np.insert(diff, 0, 0)
    sma15 = SMA(indata, timeperiod=15)
    sma60 = SMA(indata, timeperiod=60)
    rsi = RSI(indata, timeperiod=14)
    atr = ATR(indata, timeperiod=14)

    #--- Preprocess data
    xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))

    xdata = np.nan_to_num(xdata)
    if test == False:
        scaler = preprocessing.StandardScaler()
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
        joblib.dump(scaler, 'data/scaler.pkl')
    elif test == True:
        scaler = joblib.load('data/scaler.pkl')
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
    state = xdata[0:1, 0:1, :]

    return state, xdata, close

#Take Action

run_model_fit.py 文件源码项目：time_series_modeling 作者: rheineke 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def persist_pipelines(pipelines):
    Path('models').mkdir(exist_ok=True)
    fp_fmt = 'models/{}-{:%y-%m-%d}.pkl'
    now = dt.datetime.now()
    for pipe in pipelines:
        print(utils.pipeline_name(pipe))
        fp_name = fp_fmt.format(utils.pipeline_name(pipe), now)
        joblib.dump(pipe, fp_name)
        # Pickle fails to work on RandomForestRegressor
        # with open(fp_name, 'wb') as fp:
        #     pickle.dump(pipe, fp)

vectorizer.py 文件源码项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def _vectorize_chunk(dsid_dir, k, pars, pretend=False):
    """ Extract features on a chunk of files """
    from sklearn.feature_extraction.text import HashingVectorizer
    from sklearn.externals import joblib

    filenames = pars['filenames_abs']
    chunk_size = pars['chunk_size']
    n_samples = pars['n_samples']

    mslice = slice(k*chunk_size, min((k+1)*chunk_size, n_samples))

    hash_opts = {key: vals for key, vals in pars.items()
                 if key in ['stop_words', 'n_features',
                            'analyser', 'ngram_range']}
    hash_opts['alternate_sign'] = False
    fe = HashingVectorizer(input='content', norm=None, **hash_opts)
    if pretend:
        return fe
    fset_new = fe.transform(_read_file(fname) for fname in filenames[mslice])

    fset_new.eliminate_zeros()

    joblib.dump(fset_new, str(dsid_dir / 'features-{:05}'.format(k)))

classification.py 文件源码项目：CAAPR 作者: Stargrazer82301 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def dump_classifier(self):

        """
        This function ...
        :return:
        """

        # Determine the path to the pickle file
        classifier_path = os.path.join(self.classification_mode_path, "classifier.pkl")

        # Inform the user
        self.log.info("Writing the classifier to " + classifier_path)

        # Serialize and dump the classifier
        joblib.dump(self.vector_classifier, classifier_path)

    # -----------------------------------------------------------------

classification.py 文件源码项目：CAAPR 作者: Stargrazer82301 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def dump_classifier(self):

        """
        This function ...
        :return:
        """

        # Determine the path to the pickle file
        classifier_path = os.path.join(self.classification_mode_path, "classifier.pkl")

        # Inform the user
        self.log.info("Writing the classifier to " + classifier_path)

        # Serialize and dump the classifier
        joblib.dump(self.vector_classifier, classifier_path)

    # -----------------------------------------------------------------

LR_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def generate_LR_model(file_name):
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|connectionType_.*|telecomsOperator_.*|sitesetID_.*|positionType_.*|gender_.*|haveBaby_.*|age_scaled')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Logistic Regression Model...'
    start_time  = datetime.datetime.now()
    clf = linear_model.LogisticRegression(penalty='l2',C=1.0,solver='sag',n_jobs=-1, tol=1e-6, max_iter=200)#, class_weight='balanced')
    clf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: '
    print (end_time-start_time).seconds

    print 'Save Model...'
    joblib.dump(clf, 'LR.model')
    return clf

GBDT_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test():
    iris = load_iris()  
    #print iris
    #print iris['target'].shape  
    gbdt=GradientBoostingRegressor(n_estimators=1000, max_depth=4) 
    gbdt.fit(iris.data[:120],iris.target[:120])

    #Save GBDT Model
    joblib.dump(gbdt, 'GBDT.model') 

    predict = gbdt.predict(iris.data[:120])
    total_err = 0
    for i in range(len(predict)):
        print predict[i],iris.target[i]
        err = predict[i] - iris.target[i]
        total_err += err * err
    print 'Training Error: %f' % (total_err / len(predict))

    pred = gbdt.predict(iris.data[120:])
    error = 0
    for i in range(len(pred)):
        print pred[i],iris.target[i+120]
        err = pred[i] - iris.target[i+120]
        error += err * err
    print 'Test Error: %f' % (error / len(pred))

GBDT_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def generate_GBDT_model(file_name):
    train_df = read_from_file(file_name)
    #featrue 18
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby|hometown|residence')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Gradient Boosting Regression Model...'
    start_time  = datetime.datetime.now()
    gbdt = GradientBoostingRegressor(n_estimators=120, max_depth=10) #, class_weight='balanced')
    gbdt.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: '
    print (end_time - start_time).seconds

    print 'Save Model...'
    joblib.dump(gbdt, 'GBDT.model')
    return gbdt

XGB_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def generate_XGB_model(train_df):
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    '''print 'Train And Fix Missing Age Value...'
    train_df, xgb_age = train_model_for_age(train_df)
    joblib.dump(xgb_age, 'XGB_age.model')'''
    train_df.drop(['marriageStatus','haveBaby','sitesetID', 'positionType'], axis=1, inplace=True)
    print 'Done'
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Xgboost Model...'
    start_time  = datetime.datetime.now()
    xbg_clf = XGBRegressor(n_estimators=100, max_depth=6, objective="binary:logistic", silent=False)
    xbg_clf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: %d' % ((end_time - start_time).seconds)
    model_df = pd.DataFrame({'columns':list(train_df.columns)[1:], 'values':xbg_clf.feature_importances_})
    print model_df
    return xbg_clf

XGB_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def xgb_model_select(train_file_name):  
    train_df = merge_features_to_use(train_file_name)
    train_df.drop(['conversionTime'], axis=1, inplace=True)
    print 'Train And Fix Missing App Count Value...'
    train_df, xgb_appcount = train_model_for_appcounts(train_df)
    joblib.dump(xgb_appcount, 'XGB_missing.model')
    print train_df.info()
    print train_df.describe()
    print train_df.isnull().sum()
    train_np = train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]

    print 'Select Model...'
    start_time  = datetime.datetime.now()
    xgb_clf = xgb.XGBRegressor() 
    parameters = {'n_estimators': [120, 100, 140], 'max_depth':[3,5,7,9], 'gamma':[0.1,0.3,0.5,0.7], 'min_child_weight':[1,3,5,7], }
    grid_search = GridSearchCV(estimator=xgb_clf, param_grid=parameters, cv=10, n_jobs=-1)
    print("parameters:")
    pprint.pprint(parameters)
    grid_search.fit(X, y)
    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters=grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
    end_time = datetime.datetime.now()
    print 'Select Done..., Time Cost: %d' % ((end_time - start_time).seconds)

RF_solver.py 文件源码项目：tpai_comp 作者: luuuyi 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def generate_RF_model(file_name):
    train_df = read_from_file(file_name)
    selected_train_df = train_df.filter(regex='label|creativeID|positionID|connectionType|telecomsOperator|adID|camgaignID|advertiserID|appID|appPlatform|sitesetID|positionType|age|gender|education|marriageStatus|haveBaby|hometown|residence')
    train_np = selected_train_df.as_matrix()
    y = train_np[:,0]
    X = train_np[:,1:]
    print 'Train Random Forest Regression Model...'
    start_time  = datetime.datetime.now()
    rf = RandomForestRegressor(n_estimators=25, n_jobs=-1)#, class_weight='balanced')
    rf.fit(X,y)
    end_time = datetime.datetime.now()
    print 'Training Done..., Time Cost: '
    print (end_time-start_time).seconds

    print 'Save Model...'
    joblib.dump(rf, 'RF.model')
    return rf

util.py 文件源码项目：facial-keypoints-detection 作者: saber1988 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def load_data(test=False):
    fname = FTEST if test else FTRAIN
    df = pd.read_csv(fname)

    cols = df.columns[:-1]

    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' ') / 255.0)
    df = df.dropna()

    X = np.vstack(df['Image'])
    X = X.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
    if not test:
        # y = (df[cols].values -48) / 48.0
        y = df[cols].values / 96.0
        X, y = shuffle(X, y)
        joblib.dump(cols, 'data/cols.pkl', compress=3)

    else:
        y = None
    return X, y

learning_kernel.py 文件源码项目：HappyCat 作者: sparktsao 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def Dump(model,fnameMODEL,fnameWeight):
    if str(type(model)).find("sklearn.")==-1:
        from keras.models import Sequential
        from keras.layers.core import Dense, Dropout, Activation
        from keras.optimizers import SGD
        json_string = model.to_json()
        fm = open(fnameMODEL+".json","w")
        fm.write(json_string)
        fm.close()

        model.save_weights(fnameWeight+".hdf5",overwrite=True)
    else:
        from sklearn.externals import joblib
        def ensure_dir(f):
            d = os.path.dirname(f)
            if not os.path.exists(d):
                os.makedirs(d)
        ensure_dir('./skmodel/')
        joblib.dump(model, "./skmodel/"+fnameMODEL+".pkl",compress=3)

train_svms.py 文件源码项目：rcnn-with-tflearn 作者: Redoblue 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def train_svms():
    if not os.path.isfile('models/fine_tune.model.index'):
        print('models/fine_tune.model doesn\'t exist.')
        return

    net = create_alexnet()
    model = tflearn.DNN(net)
    model.load('models/fine_tune.model')

    train_file_dir = 'svm_train/'
    flist = os.listdir(train_file_dir)
    svms = []
    for train_file in flist:
        if "pkl" in train_file:
            continue
        X, Y = generate_single_svm_train_data(train_file_dir + train_file)
        train_features = []
        for i in X:
            feats = model.predict([i])
            train_features.append(feats[0])
        print("feature dimension of fitting: {}".format(np.shape(train_features)))
        clf = svm.LinearSVC()
        clf.fit(train_features, Y)
        svms.append(clf)
    joblib.dump(svms, 'models/train_svm.model')

retina_net.py 文件源码项目：qtim_ROP 作者: QTIM-Lab 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def train(self, training_data, trees=100,rf_out=None):

        # Use CNN to extract features
        self.cnn.set_intermediate(self.feature_layer)
        features = self.extract_features(training_data)

        # Create random forest
        self.rf = RandomForestClassifier(n_estimators=trees, class_weight='balanced_subsample')
        X_train = features['y_pred']  # inputs to train the random forest
        y_train = np.asarray(features['y_true'])  # ground truth for random forest

        print "Training RF..."
        self.rf.fit(X_train, y_train)

        if rf_out:
            joblib.dump(self.rf, rf_out)

        return self.rf, X_train, y_train

training.py 文件源码项目：whereareyou 作者: futurice 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def train_model(data, with_mac=True):
    global without_mac_clf, mac_clf
    df = pd.DataFrame.from_dict(data)
    y = df.pop("location")
    features = [f for f in df.columns if f is not 'mac']
    df = df.rename(columns=dict(zip(features, [POWER_SLAVE_PREFIX + f for f in features])))
    model_name = MODEL_MAC_NAME if with_mac else MODEL_NAME
    if with_mac:
        df = df.apply(LabelEncoder().fit_transform)
    else:
        df.drop("mac", axis=1, inplace=True)
    clf = DecisionTreeClassifier()
    clf.fit(df, y)
    joblib.dump(clf, model_name)
    if with_mac and mac_clf is None:
        mac_clf = clf
    if not with_mac and without_mac_clf is None:
        without_mac_clf = clf
    export_graphviz(clf, feature_names=list(df.columns), class_names=y.unique(), filled=True, rounded=True, out_file='model.dot')
    os.system("dot -Tpng model.dot -o model.png")

trainingandtesting.py 文件源码项目：SBB4-damage-tracker 作者: whorn 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def trainClassifier(foldername,classifierName):
    model = cv2.ml.KNearest_create()
    features = []
    labels = []
    os.chdir(foldername)
    for filename in glob.iglob('*.png'):
        features.append(cv2.imread((filename),-1))
        labels.append(filename[0])
    list_hog_fd = []
    for feature in features:
        fd = hog(feature.reshape((27, 35)), orientations=9, pixels_per_cell=(9, 7), cells_per_block=(1, 1), visualise=False)
        list_hog_fd.append(fd)
    hog_features = np.array(list_hog_fd, 'float64')
    os.chdir("..")
    clf = LinearSVC()
    clf.fit(hog_features, labels)
    joblib.dump(clf,classifierName, compress=3)
    os.chdir("..")

classify.py 文件源码项目：Machine-Learning-Projects 作者: poke19962008 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def learn(fName, features, nRows=-1):
    with open('bin/train.bin', 'r') as f:
        train = np.load(f)

        x = np.mat(train[:nRows,timbreVector[features[0]]]).reshape(nRows,1)
        y = np.mat(train[:nRows,timbreVector[features[1]]]).reshape(nRows,1)
        z = np.mat(train[:nRows,timbreVector[features[2]]]).reshape(nRows,1)

        X = np.concatenate((x, y, z), axis=1)
        Y = train[:nRows,0] % minYear

        clf = svm.SVC(verbose=3)
        clf.fit(X, Y)
        print "[SUCCESS] Fitted training data to SVM (kernel: rbf)."

        print "[STARTED] Dumping classifier."
        joblib.dump(clf, 'bin/%s'%fName)
        print "[SUCCESS] Dumped to ", fName

CART_Trainer.py 文件源码项目：kdd99-scikit 作者: PENGZhaoqing 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def train(self, training_set, training_target, fea_index):

        clf = tree.DecisionTreeClassifier(criterion="entropy", min_samples_split=30, class_weight="balanced")
        clf = clf.fit(training_set, training_target)

        class_names = np.unique([str(i) for i in training_target])
        feature_names = [attr_list[i] for i in fea_index]

        dot_data = tree.export_graphviz(clf, out_file=None,
                                        feature_names=feature_names,
                                        class_names=class_names,
                                        filled=True, rounded=True,
                                        special_characters=True)

        graph = pydotplus.graph_from_dot_data(dot_data)
        graph.write_pdf("output/tree-vis.pdf")
        joblib.dump(clf, 'output/CART.pkl')

LearnAlg.py 文件源码项目：bnpy 作者: bnpy 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def saveDebugStateAtBatch(self, name, batchID, LPchunk=None, SS=None,
                              SSchunk=None, hmodel=None,
                              Dchunk=None):
        if self.outputParams['debugBatch'] == batchID:
            debugLap = self.outputParams['debugLap']
            debugLapBuffer = self.outputParams['debugLapBuffer']
            if self.lapFrac < 1:
                joblib.dump(dict(Dchunk=Dchunk),
                            os.path.join(self.task_output_path, 'Debug-Data.dump'))
            belowWindow = self.lapFrac < debugLap - debugLapBuffer
            aboveWindow = self.lapFrac > debugLap + debugLapBuffer
            if belowWindow or aboveWindow:
                return
            filename = 'DebugLap%04.0f-%s.dump' % (np.ceil(self.lapFrac), name)
            SaveVars = dict(LP=LPchunk, SS=SS, hmodel=hmodel,
                            SSchunk=SSchunk,
                            lapFrac=self.lapFrac)
            joblib.dump(SaveVars, os.path.join(self.task_output_path, filename))
            if self.lapFrac < 1:
                joblib.dump(dict(Dchunk=Dchunk),
                            os.path.join(self.task_output_path, 'Debug-Data.dump'))

6_PSO+PCA.py 文件源码项目：SVM-classification-localization 作者: HandsomeHans 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def pca(dataMat,n):
    print "Start to do PCA..."
    newData,meanVal=zeroMean(dataMat)

#    covMat=np.cov(newData,rowvar=0)
#    eigVals,eigVects=np.linalg.eig(np.mat(covMat))
#    joblib.dump(eigVals,'./features/PCA/eigVals_train_%s.eig' %m,compress=3)
#    joblib.dump(eigVects,'./features/PCA/eigVects_train_%s.eig' %m,compress=3)

    eigVals = joblib.load('./features/PCA/eigVals_train_%s.eig' %m)
    eigVects = joblib.load('./features/PCA/eigVects_train_%s.eig' %m)

    eigValIndice=np.argsort(eigVals)
    n_eigValIndice=eigValIndice[-1:-(n+1):-1]
    n_eigVect=eigVects[:,n_eigValIndice]
#    joblib.dump(n_eigVect,'./features/PCA/n_eigVects_train_%s_%s.eig' %(m,n))
    lowDDataMat=newData*n_eigVect
    return lowDDataMat

4_Train_PCA+SVM.py 文件源码项目：SVM-classification-localization 作者: HandsomeHans 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def pca(dataMat,n):   
    print "Start to do PCA..."   
    t1 = time.time()   
    newData,meanVal=zeroMean(dataMat)   
    covMat=np.cov(newData,rowvar=0)   
    eigVals,eigVects=np.linalg.eig(np.mat(covMat)) # calculate feature value and feature vector   
    joblib.dump(eigVals,'./features/PCA/%s/eigVals_train_%s.eig' %(m,m),compress=3)    
    joblib.dump(eigVects,'./features/PCA/%s/eigVects_train_%s.eig' %(m,m),compress=3)  
    # eigVals = joblib.load('./features/PCA/%s/eigVals_train_%s.eig' %(m,m))  
    # eigVects = joblib.load('./features/PCA/%s/eigVects_train_%s.eig' %(m,m))   
    eigValIndice=np.argsort(eigVals) # sort feature value
    n_eigValIndice=eigValIndice[-1:-(n+1):-1] # take n feature value   
    n_eigVect=eigVects[:,n_eigValIndice] # take n feature vector 
    joblib.dump(n_eigVect,'./features/PCA/%s/n_eigVects_train_%s_%s.eig' %(m,m,n))    
    lowDDataMat=newData*n_eigVect # calculate low dimention data
    # reconMat=(lowDDataMat*n_eigVect.T)+meanVal   
    t2 = time.time()   
    print "PCA takes %f seconds" %(t2-t1)   
    return lowDDataMat

1_HoG_extract_feature.py 文件源码项目：SVM-classification-localization 作者: HandsomeHans 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def getFeat(Data,mode): # get and save feature valuve
    num = 0  
    for data in Data:  
        image = np.reshape(data[0], (200, 200, 3)) 
        gray = rgb2gray(image)/255.0 # trans image to gray
        fd = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm, visualize, normalize)  
        fd = np.concatenate((fd, data[1])) # add label in the end of the array
        filename = list(data[2])  
        fd_name = filename[0].split('.')[0]+'.feat' # set file name  
        if mode == 'train':  
            fd_path = os.path.join('./features/train/', fd_name)  
        else:  
            fd_path = os.path.join('./features/test/', fd_name)  
        joblib.dump(fd, fd_path,compress=3) # save data to local  
        num += 1  
        print "%d saving: %s." %(num,fd_name)

train_novelty_detection.py 文件源码项目：keras-transfer-learning-for-oxford102 作者: Arsey 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def train_logistic():
    df = pd.read_csv(config.activations_path)
    df, y, classes = encode(df)

    X_train, X_test, y_train, y_test = train_test_split(df.values, y, test_size=0.2, random_state=17)

    params = {'C': [10, 2, .9, .4, .1], 'tol': [0.0001, 0.001, 0.0005]}
    log_reg = LogisticRegression(solver='lbfgs', multi_class='multinomial', class_weight='balanced')
    clf = GridSearchCV(log_reg, params, scoring='neg_log_loss', refit=True, cv=3, n_jobs=-1)
    clf.fit(X_train, y_train)

    print("best params: " + str(clf.best_params_))
    print("Accuracy: ", accuracy_score(y_test, clf.predict(X_test)))

    setattr(clf, '__classes', classes)
    # save results for further using
    joblib.dump(clf, config.get_novelty_detection_model_path())