gaussianProcess.py 文件源码-python代码片段

def trainGP(df, dstPath, featureset="facefeatures", train_on_PCA=True, generate_PCA=True, transformer_func=None):
    #we need to train for both male and female
    grouped = df.groupby("gender")

    for gender, group in grouped:
        print("training regression for {}'s on {} features".format(gender,featureset))

        X = np.array(group[featureset].as_matrix().tolist())
        Y = np.array(group["attractiveness"].as_matrix().tolist())

        if featureset == "facefeaturesCNN":
            X = X[:, 0:99]

        pipe = []

        if transformer_func == "facefeatures3D":
            pipe.append(('custom_transformer',CustomTransformer(transformer_func)))

        if generate_PCA or train_on_PCA:
            pca = fitPCA(X)
            if train_on_PCA:
                pipe.append(('pca',pca))
        else:
            pca = None

        #scale the data
        # pipe.append(('scaling',sklearn.preprocessing.StandardScaler()))

        estimator = sklearn.svm.SVR(kernel='rbf')
        # estimator = sklearn.linear_model.LinearRegression()
        # estimator = sklearn.ensemble.RandomForestRegressor()
        pipe.append(('estimator', estimator))

        pipeline = sklearn.pipeline.Pipeline(pipe)

        parameters_to_search = {'estimator__C': np.logspace(0, 2, 3), "estimator__epsilon":np.logspace(-2, 2, 5), "estimator__gamma": np.logspace(-2, 2, 5)}
        if train_on_PCA:
            parameters_to_search["pca__n_components"] = np.arange(10, int(X.shape[1]), step=2)
        gridsearch = sklearn.model_selection.GridSearchCV(pipeline, parameters_to_search)
        gridsearch.fit(X,Y)

        print("Best parameters set found on development set:")
        print(gridsearch.best_params_)

        pipeline = gridsearch.best_estimator_


        score = sklearn.model_selection.cross_val_score(pipeline, X, Y).mean()
        print("Score with the entire dataset = %.2f" % score)

        # plot_learning_curve(pipeline, "learning curve for linear regression", X, Y, train_sizes=np.linspace(.1, 1.0, 5))
        # plt.draw()

        pickle.dump((pca,pipeline), open(os.path.join(dstPath,"GP_%s.p"%gender), "wb"))