python类add_constant()的实例源码

balance_preditctor.py 文件源码 项目:DSI-personal-reference-kit 作者: teb311 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def build_model(y, x, add_constant=True):
    '''
        Build a linear regression model from the provided data

        Provided:
        y: a series or single column dataframe holding our solution vector for linear regression
        x: a dataframe that runs parallel to y, with all the features for our linear regression
        add_constant: a boolean, if true it will add a constant row to our provided x data. Otherwise
                      this method assumes you've done-so already, or do not want one for some good reason

        Return: a linear regression model from StatsModels and the data which was used to train the model.
                If add_constant was true this will be a new dataframe, otherwise it will be x
    '''
    if add_constant:
        x = sm.add_constant(x)

    model = sm.OLS(y, x).fit()
    return (model, x)
smoothers.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def lm(data, xseq, **params):
    """
    Fit OLS / WLS if data has weight
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)

    try:
        model = sm.WLS(data['y'], X, weights=data['weight'])
    except KeyError:
        model = sm.OLS(data['y'], X)

    results = model.fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(
            results, Xseq, alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u

    return data
smoothers.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def gls(data, xseq, **params):
    """
    Fit GLS
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)
    results = sm.GLS(data['y'], X).fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(
            results, Xseq, alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u

    return data
MackChainladder.py 文件源码 项目:chainladder-python 作者: jbogaardt 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __get_tail_weighted_time_period(self):
        """ Method to approximate the weighted-average development age assuming
        exponential tail fit.

        Returns: float32
        """
        #n = self.triangle.ncol-1
        #y = self.f[:n]
        #x = np.array([i + 1 for i in range(len(y))]) 
        #ldf_reg = stats.linregress(x, np.log(y - 1))
        #time_pd = (np.log(self.f[n] - 1) - ldf_reg[1]) / ldf_reg[0]

        n = self.triangle.ncol-1
        y = Series(self.f[:n])
        x = [num+1 for num, item in enumerate(y)]
        y.index = x
        x = sm.add_constant((y.index)[y>1])
        y = y[y>1]
        ldf_reg = sm.OLS(np.log(y-1),x).fit()
        time_pd = (np.log(self.f[n] - 1) - ldf_reg.params[0]) / ldf_reg.params[1]
        #tail_factor = np.exp(tail_model.params[0] + np.array([i+2 for i in range(n,n+100)]) * tail_model.params[1]).astype(float) + 1)

        return time_pd
MunichChainladder.py 文件源码 项目:chainladder-python 作者: jbogaardt 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __get_MCL_model(self): 
        modelsI=[]
        modelsP=[]
        for i in range(len(self.Incurred.data.columns)):
                modelsI.append(cl.WRTO(self.Incurred.data.iloc[:,i].dropna(), self.Paid.data.iloc[:,i].dropna(), w=1/self.Incurred.data.iloc[:,i].dropna()))
                modelsP.append(cl.WRTO(self.Paid.data.iloc[:,i].dropna(), self.Incurred.data.iloc[:,i].dropna(), w=1/self.Paid.data.iloc[:,i].dropna()))       
        q_f = np.array([item.coefficient for item in modelsI])
        qinverse_f = np.array([item.coefficient for item in modelsP])
        rhoI_sigma = np.array([item.sigma for item in modelsI])
        rhoP_sigma = np.array([item.sigma for item in modelsP])
        #y = np.log(rhoI_sigma[:-1])
        #x = np.array([i + 1 for i in range(len(y))])
        #x = sm.add_constant(x)
        #OLS = sm.OLS(y,x).fit()
        #tailsigma = np.exp((x[:,1][-1]+ 1) * OLS.params[1] + OLS.params[0])
        return rhoI_sigma, rhoP_sigma, q_f, qinverse_f
test_ols.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _check_wls(self, x, y, weights):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, weights=1 / weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)
test_ols.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x)

        # check that sparse version is the same
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
        _compare_ols_results(result, sparse_result)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.tvalues, result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
test_against_stata.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def housing(request):
    result = HOUSING_RESULTS[request.param]
    keys = request.param.split('-')
    mod = MODELS[keys[0]]

    data = HOUSING_DATA
    endog = data.rent
    exog = sm.add_constant(data.pcturban)
    instd = data.hsngval
    instr = data[['faminc', 'region']]
    cov_opts = deepcopy(COV_OPTIONS[keys[1]])
    cov_opts['debiased'] = keys[2] == 'small'
    if keys[0] == 'gmm':
        weight_opts = deepcopy(COV_OPTIONS[keys[1]])
        weight_opts['weight_type'] = weight_opts['cov_type']
        del weight_opts['cov_type']
    else:
        weight_opts = {}

    model_result = mod(endog, exog, instd, instr, **weight_opts).fit(**cov_opts)
    return model_result, result
test_results.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_multiple(data):
    dependent = data.set_index(['nr', 'year']).lwage
    exog = sm.add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
    res = PanelOLS(dependent, exog, entity_effects=True, time_effects=True).fit()
    res2 = PanelOLS(dependent, exog, entity_effects=True).fit(cov_type='clustered',
                                                              cluster_entity=True)
    exog = sm.add_constant(data.set_index(['nr', 'year'])[['married', 'union']])
    res3 = PooledOLS(dependent, exog).fit()
    exog = data.set_index(['nr', 'year'])[['exper']]
    res4 = RandomEffects(dependent, exog).fit()
    comp = compare([res, res2, res3, res4])
    assert len(comp.rsquared) == 4
    d = dir(comp)
    for value in d:
        if value.startswith('_'):
            continue
        getattr(comp, value)
test_results.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_multiple_no_effects(data):
    dependent = data.set_index(['nr', 'year']).lwage
    exog = sm.add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
    res = PanelOLS(dependent, exog).fit()
    exog = sm.add_constant(data.set_index(['nr', 'year'])[['married', 'union']])
    res3 = PooledOLS(dependent, exog).fit()
    exog = data.set_index(['nr', 'year'])[['exper']]
    res4 = RandomEffects(dependent, exog).fit()
    comp = compare(dict(a=res, model2=res3, model3=res4))
    assert len(comp.rsquared) == 3
    d = dir(comp)
    for value in d:
        if value.startswith('_'):
            continue
        getattr(comp, value)
    compare(OrderedDict(a=res, model2=res3, model3=res4))
pscore.py 文件源码 项目:pscore_match 作者: kellieotto 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def compute(self, method='logistic'):
        """
        Compute propensity score and measures of goodness-of-fit

        Parameters
        ----------
        method : str
            Propensity score estimation method. Either 'logistic' or 'probit'
        """
        predictors = sm.add_constant(self.covariates, prepend=False)
        if method == 'logistic':
            model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        elif method == 'probit':
            model = sm.Probit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        else:
            raise ValueError('Unrecognized method')
        return model.predict()
model.py 文件源码 项目:DriverPower 作者: smshuai 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def run_glm(X, y, model_name):
    """ Train the binomial/negative binomial GLM

    Args:
        X (np.array): scaled X. 
        y (pd.df): four columns response table.

    Returns:
        sm.model: trained GLM models.

    """
    # Add const manually. sm.add_constant cannot add 1 for shape (1, n)
    X = np.c_[X, np.ones(X.shape[0])]
    if model_name == 'Binomial':
        logger.info('Building binomial GLM')
        # make two columns response (# success, # failure)
        y_binom = np.zeros((y.shape[0], 2), dtype=np.int_)
        y_binom[:, 0] = y.nMut
        y_binom[:, 1] = y.length * y.N - y.nMut
        glm = sm.GLM(y_binom, X, family=sm.families.Binomial())
    elif model_name == 'NegativeBinomial':
        logger.info('Building negative binomial GLM')
        # use nMut as response and length*N as exposure
        glm = sm.GLM(y.nMut.values, X, family=sm.families.NegativeBinomial(), exposure=y.length.values*y.N.values+1)
    else:
        sys.stderr.write('Unknown GLM name {}. Must be Binomial or NegativeBinomial'.format(model_name))
        sys.exit(1)
    model = glm.fit()
    return model
smoothers.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def rlm(data, xseq, **params):
    """
    Fit RLM
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)
    results = sm.RLM(data['y'], X).fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        warnings.warn("Confidence intervals are not yet implemented"
                      "for RLM smoothing.")

    return data
smoothers.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def glm(data, xseq, **params):
    """
    Fit GLM
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)
    results = sm.GLM(data['y'], X).fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        # TODO: Depends on statsmodel > 0.7
        # https://github.com/statsmodels/statsmodels/pull/2151
        # https://github.com/statsmodels/statsmodels/pull/3406
        # Remove the try/except when a compatible version is released
        try:
            prediction = results.get_prediction(Xseq)
            ci = prediction.conf_int(1 - params['level'])
            data['ymin'] = ci[:, 0]
            data['ymax'] = ci[:, 1]
        except (AttributeError, TypeError):
            warnings.warn(
                "Cannot compute confidence intervals."
                "Install latest/development version of statmodels.")

    return data
Chainladder.py 文件源码 项目:chainladder-python 作者: jbogaardt 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_tail_factor(self, colname_sep='-'):
        """Estimate tail factor, idea from Thomas Mack:
        Returns a tail factor based off of an exponential fit to the LDFs.  This will
        fail if the product of 2nd and 3rd to last LDF < 1.0001.  This also fails if
        the estimated tail is larger than 2.0.  In other areas, exponential fit is
        rejected if the slope parameter p-value >0.5.  This is currently representative
        of the R implementation of this package, but may be enhanced in the future to be
        p-value based.

        Parameters:    
            colname_sep : str
                text to join the names of two adjacent columns representing the
                age-to-age factor column name.

        Returns:
            Pandas.Series of the tail factor.        
        """
        LDF = np.array(self.get_LDF()[:self.triangle.ncol-1])
        if self.tail==False:
            tail_factor=1
        elif len(LDF[LDF>1]) < 2:
            warn("Not enough factors larger than 1.0 to fit an exponential regression.")
            tail_factor = 1
        elif (LDF[-3] * LDF[-2] > 1.0001):
            y = Series(LDF)
            x = sm.add_constant((y.index+1)[y>1])
            y = LDF[LDF>1]
            n, = np.where(LDF==y[-1])[0]
            tail_model = sm.OLS(np.log(y-1),x).fit()
            tail_factor = np.product(np.exp(tail_model.params[0] + np.array([i+2 for i in range(n,n+100)]) * tail_model.params[1]).astype(float) + 1)
            if tail_factor > 2:
                warn("The estimate tail factor was bigger than 2 and has been reset to 1.")
                tail_factor = 1
            if tail_model.f_pvalue > 0.05:
                warn("The p-value of the exponential tail fit is insignificant and tail has been set to 1.")
                tail_factor = 1
        else:
            warn("LDF[-2] * LDF[-1] is not greater than 1.0001 and tail has been set to 1.")
            tail_factor = 1
        return Series(tail_factor, index = [self.triangle.data.columns[-1] + colname_sep + 'Ult'])
modeltest.py 文件源码 项目:strategy 作者: kanghua309 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
algo.py 文件源码 项目:Quantopian_Pairs_Trader 作者: bartchr808 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def apply_half_life(self, time_series):
        lag = np.roll(time_series, 1)
        lag[0] = 0
        ret = time_series - lag
        ret[0] = 0

        # adds intercept terms to X variable for regression
        lag2 = sm.add_constant(lag)

        model = sm.OLS(ret, lag2)
        res = model.fit()

        self.half_life = -np.log(2) / res.params[1]
algo.py 文件源码 项目:Quantopian_Pairs_Trader 作者: bartchr808 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def hedge_ratio(Y, X):
    # Look into using Kalman Filter to calculate the hedge ratio
    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    return model.params[1]
business_case_solver_without_classes.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def run_logistic_regression(df):
    # Logistic regression
    X = df['pageviews_cumsum']
    X = sm.add_constant(X)
    y = df['is_conversion']
    logit = sm.Logit(y, X)
    logistic_regression_results = logit.fit()
    print(logistic_regression_results.summary())
    return logistic_regression_results
business_case_solver_without_classes.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def predict_probabilities(logistic_regression_results):
    # Predict the conversion probability for 0 up till 50 pageviews
    X = sm.add_constant(range(0, 50))
    y_hat = logistic_regression_results.predict(X)
    df_hat = pd.DataFrame(zip(X[:, 1], y_hat))
    df_hat.columns = ['X', 'y_hat']
    p_conversion_25_pageviews = df_hat.ix[25]['y_hat']
    print("")
    print("The probability of converting after 25 pageviews is {}".format(p_conversion_25_pageviews))
business_case_solver.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def run_logistic_regression(self):
        # Logistic regression
        X = self.df['pageviews_cumsum']
        X = sm.add_constant(X)
        y = self.df['is_conversion']
        logit = sm.Logit(y, X)
        self.logistic_regression_results = logit.fit()
        print self.logistic_regression_results.summary()
business_case_solver.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def predict_probabilities(self):
        # Predict the conversion probability for 0 up till 50 pageviews
        X = sm.add_constant(range(0, 50))
        y_hat = self.logistic_regression_results.predict(X)
        df_hat = pd.DataFrame(zip(X[:, 1], y_hat))
        df_hat.columns = ['X', 'y_hat']
        print ""
        print "For example, the probability of converting after 25 pageviews is {}".format(df_hat.ix[25]['y_hat'])
qap.py 文件源码 项目:mrqap-python 作者: lisette-espin 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def ols(self, x, y):
        xflatten = np.delete(x, [i*(x.shape[0]+1)for i in range(x.shape[0])])
        yflatten = np.delete(y, [i*(y.shape[0]+1)for i in range(y.shape[0])])
        xflatten = sm.add_constant(xflatten)
        model = sm.OLS(yflatten,xflatten)
        results = model.fit()
        print results.summary()
selection.py 文件源码 项目:Enrich2 作者: FowlerLab 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def regression_apply(row, timepoints, weighted):
    """
    :py:meth:`pandas.DataFrame.apply` apply function for calculating 
    enrichment using linear regression. If *weighted* is ``True`` perform
    weighted least squares; else perform ordinary least squares.

    Weights for weighted least squares are included in *row*.

    Returns a :py:class:`pandas.Series` containing regression coefficients,
    residuals, and statistics.
    """
    # retrieve log ratios from the row
    y = row[['L_{}'.format(t) for t in timepoints]]

    # re-scale the x's to fall within [0, 1]
    xvalues = [x / float(max(timepoints)) for x in timepoints]

    # perform the fit
    X = sm.add_constant(xvalues) # fit intercept
    if weighted:
        W = row[['W_{}'.format(t) for t in timepoints]]
        fit = sm.WLS(y, X, weights=W).fit()
    else:
        fit = sm.OLS(y, X).fit()

    # re-format as a data frame row
    values = np.concatenate([fit.params,  [fit.bse['x1'], fit.tvalues['x1'], 
                            fit.pvalues['x1']], fit.resid])
    index = ['intercept', 'slope', 'SE_slope', 't', 'pvalue_raw'] + \
            ['e_{}'.format(t) for t in timepoints]
    return pd.Series(data=values, index=index)
test_postestimation.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def data():
    return AttrDict(dep=SIMULATED_DATA.y_robust,
                    exog=add_constant(SIMULATED_DATA[['x3', 'x4', 'x5']]),
                    endog=SIMULATED_DATA[['x1', 'x2']],
                    instr=SIMULATED_DATA[['z1', 'z2']])
test_against_stata.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def construct_model(key):
    model, nendog, nexog, ninstr, weighted, var, other = key.split('-')
    var = var.replace('wmatrix', 'vce')
    mod = MODELS[model]
    data = SIMULATED_DATA
    endog = data[['x1', 'x2']] if '2' in nendog else data.x1
    exog = data[['x3', 'x4', 'x5']]
    instr = data[['z1', 'z2']] if '2' in ninstr else data.z1
    deps = {'vce(unadjusted)': data.y_unadjusted,
            'vce(robust)': data.y_robust,
            'vce(cluster cluster_id)': data.y_clustered,
            'vce(hac bartlett 12)': data.y_kernel}
    dep = deps[var]
    if 'noconstant' not in other:
        exog = sm.add_constant(data[['x3', 'x4', 'x5']])

    cov_opts = deepcopy(SIMULATED_COV_OPTIONS[var])
    cov_opts['debiased'] = 'small' in other
    mod_options = {}
    if 'True' in weighted:
        mod_options['weights'] = data.weights
    if model == 'gmm':
        mod_options.update(deepcopy(SIMULATED_COV_OPTIONS[var]))
        mod_options['weight_type'] = mod_options['cov_type']
        del mod_options['cov_type']
        mod_options['center'] = 'center' in other

    model_result = mod(dep, exog, endog, instr, **mod_options).fit(**cov_opts)
    if model == 'gmm' and 'True' in weighted:
        pytest.skip('Weighted GMM differs slightly')
    return model_result
test_model.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_2sls_direct(data):
    mod = IV2SLS(data.dep, add_constant(data.exog), data.endog, data.instr)
    res = mod.fit()
    x = np.c_[add_constant(data.exog), data.endog]
    z = np.c_[add_constant(data.exog), data.instr]
    y = data.y
    xhat = z @ pinv(z) @ x
    params = pinv(xhat) @ y
    assert_allclose(res.params, params.ravel())
    # This is just a quick smoke check of results
    get_all(res)
test_results.py 文件源码 项目:linearmodels 作者: bashtage 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_incorrect_type(data):
    dependent = data.set_index(['nr', 'year']).lwage
    exog = sm.add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
    mod = PanelOLS(dependent, exog)
    res = mod.fit()
    mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None)
    res2 = mod2.fit()
    with pytest.raises(TypeError):
        compare(dict(model1=res, model2=res2))
experiments.py 文件源码 项目:deep-iv 作者: allentran 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def fit_ols(y, x, idx=-1):
        ols = OLS(y, add_constant(x))
        results = ols.fit()
        return results.params.values[idx], results.cov_params().values[idx, idx]
experiments.py 文件源码 项目:deep-iv 作者: allentran 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def weak_instruments(self, n_sims=20):

        np.random.seed(1692)

        model = feedforward.FeedForwardModel(19, 1, dense_size=60, n_dense_layers=2)

        treatment_effects = []
        ols_betas, ols_ses = [], []
        old_corrs, new_corrs = [], []
        for _ in xrange(n_sims):
            df = self.treatment_gen.simulate_data(False)

            X = np.hstack((self.x, df['new_treat'].values[:, None]))
            Z = np.hstack((self.x, df['instrument'].values[:, None]))

            ols_beta, ols_se = self.fit_ols(df['treatment_effect'], X)
            ols_betas.append(ols_beta)
            ols_ses.append(ols_se)

            old_corr = df[['instrument', 'new_treat']].corr().values[0, 1]
            new_instrument, new_corr = model.fit_instruments(X, Z, df['treatment_effect'].values, batchsize=128)
            new_corrs.append(new_corr)
            old_corrs.append(old_corr)

            Z2 = Z.copy()
            Z2[:, -1] = new_instrument[:, 0]

            iv = IV2SLS(df['treatment_effect'].values.flatten(), add_constant(X), add_constant(Z2))

            model.reset_params()

        if new_corr:
            logger.info("Old corr: %.2f, New corr: %.2f", np.mean(old_corrs), np.mean(new_corrs))
        logger.info("Treatment effect (OLS): %.3f (%.4f)", np.mean(ols_betas), np.mean(ols_ses))
        logger.info("Treatment effect: %.3f (%.4f)", np.mean(treatment_effects), np.std(treatment_effects))


问题


面经


文章

微信
公众号

扫码关注公众号