python类Lasso()的实例源码

friedman_scores.py 文件源码 项目:mlens 作者: flennerhag 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _cv_r0( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
regularize.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))
model.py 文件源码 项目:poormining 作者: bowenpay 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
linvpy_latest.py 文件源码 项目:linvpy 作者: LCAV 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def lasso_regularization(matrix_a, vector_y, lambda_parameter=0):
    """
    Lasso algorithm that solves min ||y - Ax||_2^2 + lambda ||x||_1
    :param matrix_a:
    :param vector_y:
    :param lambda_parameter:
    :return: estimated x
    """

    # convert regularization parameter (sklearn considers (1/2m factor))
    reg_parameter = lambda_parameter / (2 * len(vector_y))

    # initialize model
    clf = linear_model.Lasso(reg_parameter, fit_intercept=False, normalize=False)

    # fit it
    clf.fit(matrix_a, vector_y)

    # return estimate
    x = clf.coef_

    return x
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
kgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
jutil.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def mlr_val_vseq_lasso( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
    """
    Validation is peformed using vseq indexed values.
    """
    org_seq = list(range( len( yE)))
    t_seq = [x for x in org_seq if x not in v_seq]

    RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
    RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]

    clf = linear_model.Lasso( alpha = alpha)
    clf.fit( RMt, yEt)

    if disp: print('Training result')
    mlr_show( clf, RMt, yEt, disp = disp, graph = graph)

    if disp: print('Validation result')
    r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)

    #if r_sqr < 0:
    #   print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr

    return r_sqr, RMSE
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf_n_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
kgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_splits = xM.shape[0]

    # print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
kutil.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def mlr_val_vseq_lasso( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
    """
    Validation is peformed using vseq indexed values.
    """
    org_seq = list(range( len( yE)))
    t_seq = [x for x in org_seq if x not in v_seq]

    RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
    RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]

    clf = linear_model.Lasso( alpha = alpha)
    clf.fit( RMt, yEt)

    if disp: print('Training result')
    mlr_show( clf, RMt, yEt, disp = disp, graph = graph)

    if disp: print('Validation result')
    r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)

    #if r_sqr < 0:
    #   print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr

    return r_sqr, RMSE
jpandas.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def pd_gscv( pdr, method, xM, yV, alphas_log, colname = 'Predicted-RP', fname = 'sheet/rafa36795_cxcalc_prp1000.csv'):
    """
    This run grid search, perform cross-validation for plotting and save the predicted values,
    """

    print("1. Searching the best hyper-parameter by a grid method.")
    gr = jgrid.gs( method, xM, yV, alphas_log)
    print(gr.grid_scores_)
    print("Best alpha:", gr.best_params_['alpha'])

    print("2. Predicting the property using the best hyper-parameter and show a x-y plot")
    yV_pred = jgrid.cv( 'Lasso', xM, yV, alpha = gr.best_params_['alpha'], grid_std = gr_beststd(gr))

    print("3. Saving the predicted results in crossvalidation into", fname)
    pdw = pdr.copy()
    pdw[ colname] = yV_pred.tolist()
    pdw.to_csv( fname, index = False)

    print("4. Saving the best estimator as a pkl file")
    print(gr.best_estimator_)
    externals.joblib.dump(gr.best_estimator_, fname[:-3] +  "pkl")
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def cv_SVR(xM, yV, svr_params, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = svm.SVR(**svr_params)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)

    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV.A1, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _cv_r0(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def cv(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    Return
    --------
    yV_pred
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
jgrid.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def _cv_LOO_r0(method, xM, yV, alpha, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    # print("Note - shuffling is not applied because of LOO.")
    kf_n_c = model_selection.KFold(n_splits=n_folds)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
_jgrid_r0.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
11.7 feature_selection_embeded.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_Lasso(*data):
    '''
    test the correlation between alpha and sparse condition
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X,y=data
    alphas=np.logspace(-2,2)
    zeros=[]
    for alpha in alphas:
        regr=Lasso(alpha=alpha)
        regr.fit(X,y)
        num=0
        for ele in regr.coef_:
            if abs(ele) < 1e-5:num+=1
        zeros.append(num)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,zeros)
    ax.set_xlabel(r"$\alpha$")
    ax.set_xscale("log")
    ax.set_ylim(0,X.shape[1]+1)
    ax.set_ylabel("zeros in coef")
    ax.set_title("Sparsity In Lasso")
    plt.show()
3.Lasso regression.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_Lasso_alpha(*data):
    '''
    test the score with different alpha
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''

    X_train,X_test,y_train,y_test=data
    alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
    scores=[]
    for i,alpha in enumerate(alphas):
        regr = linear_model.Lasso(alpha=alpha)
        regr.fit(X_train, y_train)
        scores.append(regr.score(X_test, y_test))
    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,scores)
    ax.set_xlabel(r"$\alpha$")
    ax.set_ylabel(r"score")
    ax.set_xscale('log')
    ax.set_title("Lasso")
    plt.show()
test_least_angle.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in ([[5, 0],
               [0, 5],
               [10, 10]],

              [[10, 10, 0],
               [1e-32, 0, 0],
               [0, 0, 1]],
              ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. / (2. * 3.)
                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
                    + .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                  + .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8))
test_least_angle.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

    alphas_min = [10, 0.9, 1e-4]
    # same test, with normalization
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
                                      tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)
model.py 文件源码 项目:Tback 作者: ideaplat 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
model.py 文件源码 项目:Tback 作者: ideaplat 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
LassoRegression.py 文件源码 项目:kaggle 作者: RankingAI 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def train(self):
        """"""
        start = time.time()

        extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')

        print('size before truncated outliers is %d ' % len(self.TrainData))
        self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        #self.TrainData = self.TrainData.join(extra_tr, on='parcelid', how= 'left')
        self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)
        print('size after truncated outliers is %d ' % len(self.TrainData))

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = Lasso(alpha= self._lr_alpha, max_iter= self._lr_iter, tol= 1e-4, random_state= 2017, selection= self._lr_sel)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
models.py 文件源码 项目:sanergy-public 作者: dssg 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))


问题


面经


文章

微信
公众号

扫码关注公众号