python类Ridge()的实例源码

RidgeRegression.py 文件源码 项目:kaggle 作者: RankingAI 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(TrainData))

        X = TrainData.drop(self._l_drop_cols, axis=1)
        Y = TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        rr = Ridge(alpha= self._alpha,
                   max_iter = self._iter,
                   solver= 'svd')

        self._model = rr.fit(X, Y)
        end = time.time()

        print('time consumed %d ' % ((end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
                                   ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
valid.py 文件源码 项目:yt8m 作者: forwchen 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def predict(self):
        """
        Train the regression model with predictions on validation set.
        Save the learned weights to apply to test set predictions.
        """
        pred_array = np.stack(self.pred_list, -1)
        reg = linear_model.Ridge(alpha=.5)
        pred = np.reshape(pred_array, [-1, len(self.pred_list)])
        y = np.reshape(self.labels_val, [-1,1])
        reg.fit(pred, y)

        self.weights = reg.coef_[0].tolist()
main.py 文件源码 项目:OpenAPS 作者: medicinexlab 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def ridge_regression_model(parameter_array):
    alpha_value = parameter_array[0]
    # ridge_solver = parameter_array[0]
    return linear_model.Ridge(alpha=alpha_value, fit_intercept=True, normalize=True, copy_X=True, max_iter=None, tol=0.001, solver='auto', random_state=None)

#Returns the lasso regression model
linearregressionmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setClf(self):
#         self.clf = Ridge(alpha=0.0000001, tol=0.0000001)
        clf = LinearRegression()
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
models.py 文件源码 项目:AutoML4 作者: djajetic 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
dl_simulation.py 文件源码 项目:CS-SMAF 作者: brian-cleary 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def update_sparse_predictions(Y,D,W,Psi,lda=0.0001):
    X = np.zeros((Psi.shape[0],W.shape[1]))
    for i in range(W.shape[1]):
        used = (W[:,i] != 0)
        if used.sum() > 0:
            d = np.copy(D)
            d = d[:,used]
            model = Ridge(alpha=lda)
            model.fit(d,Y[:,i])
            X[:,i] = model.predict(Psi[:,used])
    return X
regression.py 文件源码 项目:l1l2py 作者: slipguru 项目源码 文件源码 阅读 48 收藏 0 点赞 0 评论 0
def __init__(self, mu=.5, tau=1.0, lamda=1, use_gpu=False, threshold=1e-16,
                 alpha=None, l1_ratio=None, fit_intercept=True,
                 normalize=False, precompute=False, max_iter=10000,
                 copy_X=True, tol=1e-4, warm_start=False, positive=False,
                 random_state=None, selection='cyclic'):
        vs = L1L2(mu=mu, tau=tau, use_gpu=use_gpu, threshold=threshold,
                  alpha=alpha, l1_ratio=l1_ratio, fit_intercept=fit_intercept,
                  normalize=normalize, precompute=precompute,
                  max_iter=max_iter, copy_X=copy_X, tol=tol,
                  warm_start=warm_start, positive=positive,
                  random_state=random_state, selection=selection)
        mdl = Ridge(alpha=lamda, fit_intercept=fit_intercept,
                    normalize=normalize, copy_X=copy_X, max_iter=max_iter,
                    tol=tol, random_state=random_state)
        super(L1L2TwoStep, self).__init__(
            (('l1l2', vs), ('ridge', mdl)))

        self.mu = mu
        self.tau = tau
        self.lamda = lamda
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.use_gpu = use_gpu
        self.threshold = threshold

        self.fit_intercept = fit_intercept
        self.normalize = normalize
        self.precompute = precompute
        self.max_iter = max_iter
        self.copy_X = copy_X
        self.tol = tol
        self.warm_start = warm_start
        self.positive = positive
        self.intercept_ = 0.0
        self.random_state = random_state
        self.selection = selection
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
lsClassifier.py 文件源码 项目:USTC_AILab2 作者: overflocat 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def lsClassifier(trainData, trainLabel, testData, testLabel, lambdaS):
    reg = linear_model.Ridge(alpha=lambdaS)
    reg.fit(trainData, trainLabel.tolist())

    W = reg.coef_
    testResult = np.array(testData.dot(W))
    testResult = np.where(testResult > 0, 1, -1).astype(np.int32)
    accu = np.sum(np.where(testResult == testLabel, 1, 0)) / float(testLabel.shape[0])

    return testResult, accu
RegressionRidgeReg.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, isTrain):
        super(RegressionRidgeReg, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create linear regression object
        self.model = linear_model.Ridge(alpha = 24420.530945486549)
SparseLassoADMM.py 文件源码 项目:SparkADMM 作者: yahoo 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def localupdate(b,A,z,u,rho,eps):
        ridge = Ridge(alpha=rho/2.0, fit_intercept=False, tol=eps)
        #print "b",b
        #print "z",z
        #print "u",u
        #print A * (z-u/rho)
        b_new = b - A * (z-u/rho)
        #print "bnew",b_new
        ret = ridge.fit(A,b_new)
        #print ret
        #print ret.coef_
        return (ret.coef_ + (z-u/rho))
lrcall.py 文件源码 项目:FLASH 作者: yuyuz 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get_next_by_EI(ni, alpha, lr, lr_time, X, y, ei_xi):
    '''
    Args:
        ni: number of units in the each layer
        alpha: lambda for Ridge regression
        lr: fitted performance model in burning period
        lr_time: fitted time model in burning period
        X: all previous inputs x
        y: all previous observations corresponding to X
        ei_xi: parameter for EI exploitation-exploration trade-off

    Returns:
        x_next: a nested list [[0,1,0], [1,0,0,0], ...] as the next input x to run a specified pipeline
    '''
    var = np.var(lr.predict(X) - y)
    m = np.dot(X.T, X)
    inv = np.linalg.inv(m + alpha * np.eye(sum(ni)))
    maxEI = float('-inf')
    x_next = None
    for i in range(np.prod(ni)):
        x = [[0]*n for n in ni]
        x_flat = []
        pipeline = get_pipeline_by_flatten_index(ni, i)
        for layer in range(len(ni)):
            x[layer][pipeline[layer]] = 1
            x_flat += x[layer]
        x_flat = np.array(x_flat)
        mu_x = lr.predict([x_flat])
        var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T))
        sigma_x = np.sqrt(var_x)
        u = (np.min(y) - ei_xi - mu_x) / sigma_x
        EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u))
        estimated_time = lr_time.predict([x_flat])[0]
        EIPS = EI / estimated_time
        if EIPS > maxEI:
            maxEI = EIPS
            x_next = x

    return x_next
regularize.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def main(dataset_size, test_proportion):
    diabetes = load_diabetes()
    X = diabetes.data[:dataset_size]
    y = diabetes.target[:dataset_size]

    fig, ax_list = plt.subplots(3, 1, figsize=(8, 6))
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Ridge, ax=ax_list[0])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Lasso, ax=ax_list[1])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=LinearRegression, ax=ax_list[2])

    plt.tight_layout()
    plt.show()
__init__.py 文件源码 项目:mlprojects-py 作者: srinathperera 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]
decompose.py 文件源码 项目:channel-pruning 作者: yihui-he 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def fc_kernel(X, Y, copy_X=True, W=None, B=None, ret_reg=False,fit_intercept=True):
    """
    return: n c
    """
    assert copy_X == True
    assert len(X.shape) == 2
    if dcfgs.ls == cfgs.solvers.gd:
        w = Worker()
        def wo():
            from .GDsolver import fc_GD
            a,b=fc_GD(X,Y, W, B, n_iters=1)
            return {'a':a, 'b':b}
        outputs = w.do(wo)
        return outputs['a'], outputs['b']
    elif dcfgs.ls == cfgs.solvers.tls:
        return tls(X,Y, debug=True)
    elif dcfgs.ls == cfgs.solvers.keras:
        _reg=keras_kernel()
        _reg.fit(X, Y, W, B)
        return _reg.coef_, _reg.intercept_
    elif dcfgs.ls == cfgs.solvers.lightning:
        #_reg = SGDRegressor(eta0=1e-8, intercept_decay=0, alpha=0, verbose=2)
        _reg = CDRegressor(n_jobs=-1,alpha=0, verbose=2)
        if 0:
            _reg.intercept_=B
            _reg.coef_=W
    elif dcfgs.fc_ridge > 0:
        _reg = Ridge(alpha=dcfgs.fc_ridge)
    else:
        _reg = LinearRegression(n_jobs=-1 , copy_X=copy_X, fit_intercept=fit_intercept)
    _reg.fit(X, Y)
    if ret_reg:
        return _reg
    return _reg.coef_, _reg.intercept_
lasso_regression.py 文件源码 项目:House-Pricing 作者: playing-kaggle 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def ridge_train(X,y):
    alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]
    cv_ridge = [rmse_cv(Ridge(alpha = alpha),X,y).mean() for alpha in alphas]
    cv_ridge = pd.Series(cv_ridge, index = alphas)
    cv_ridge.plot(title = "Validation - Just Do It")

    print ('min cv is : ',cv_ridge.min())

    return alphas[cv_ridge.values.argmin()]

#%%
# ridge regression doesn't remove any property
tbs_ml.py 文件源码 项目:eezzy 作者: 3Blades 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
model.py 文件源码 项目:poormining 作者: bowenpay 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_classifier(self, X, Y):
        """ ???????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = Ridge()
        clf.fit(X, Y)
        return clf
predict.py 文件源码 项目:momoCrawler 作者: njames741 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def ridge_regression(data, a):
    features = data.columns.tolist()
    features.remove('label')
    response = ['label']
    # ????Ridge Regression model
    lr = Ridge(alpha=a)
    # ?????: label(????DataFrame)
    y = data[response]
    # ??features (????DataFrame)
    X = data[features]

    # _leave_one_out(lr, X.values, y.values)

    # fit regression model to the data
    model = lr.fit(X, y)
    # ?????model?????
    predicted_y = model.predict(X) # predicted_y?????numpy array
    # ???y?DataFrame?????numpy array???????
    y = np.array(y)

    # ?????
    _print_y_and_predicted_y_and_corr(y, predicted_y)
    _print_r2_score(y, predicted_y)
    _print_coefficients(model, features, '~/Desktop/??_???_lt30.csv')
    _print_MSE(y, predicted_y)
    plot_true_and_pred_scatter(y, predicted_y)
    # std_error(y, predicted_y)
models.py 文件源码 项目:qml 作者: quantum13 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def _load_model(self, model_id):
        _, conn = get_engine()

        #todo
        models = {
            'QXgb': QXgb,
            'QXgb2': QXgb2,
            'Ridge': Ridge,
            'RidgeClassifier': RidgeClassifier,
            'KNeighborsClassifier': KNeighborsClassifier,
            'QAvg': QAvg,
            'QRankedAvg': QRankedAvg,
            'QRankedByLineAvg': QRankedByLineAvg,
            'QStackModel': QStackModel,
            'LogisticRegression': LogisticRegression,
            'DecisionTreeClassifier': DecisionTreeClassifier,
            'QPostProcessingModel': QPostProcessingModel,
            'RandomForestClassifier': RandomForestClassifier,
            'ExtraTreesClassifier': ExtraTreesClassifier,
            'QAvgOneModelData': QAvgOneModelData,
            'QNN1': QNN1,
            'QNN2': QNN2,
        }

        res = conn.execute(
            """
                select cls, params, descr, predict_fn
                from qml_models 
                where 
                    model_id='{}'
            """.format(model_id)
        ).fetchone()

        if not res:
            raise Exception('Missing {} model'.format(model_id))

        model = models[res['cls']](**json.loads(res['params']))
        self.add(model_id, model, res['descr'], res['predict_fn'])
        return model


问题


面经


文章

微信
公众号

扫码关注公众号