functions.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:binet 作者: crisjf 项目源码 文件源码
def _residualNet(data,uselog=True,c=None,p=None,x=None,useaggregate=True,numericalControls=[],categoricalControls=[]):
    '''
    Given the data on a bipartite network of the form source,target,flow

    Parameters
    ----------
    data : pandas.DataFrame
        Raw data. It has source,target,volume (trade, number of people etc.).
    c,p,x : str (optional)
        Labels of the columns in data used for source,target,volume. 
        If not provided it will use the first, second, and third.
    numericalControls : list
        List of columns to use as numerical controls.
    categoricalControls : list
        List of columns to use as categorical controls.
    uselog : boolean (True)
        If True it will use the logarithm of the provided weight.
    useaggregate : boolean (True)
        If true it will calculate the aggregate of the volume on both sides (c and p) and use as numbercal controls.

    Returns
    -------
    net : pandas.Dataframe
        Table with c,p,x,x_res, where x_res is the residual of regressing x on the given control variables.
    '''
    c = data.columns.values[0] if c is None else c
    p = data.columns.values[1] if p is None else p
    x = data.columns.values[2] if x is None else x
    data_ = data[[c,p,x]+numericalControls+categoricalControls]
    if useaggregate:
        data_ = merge(data_,data.groupby(c).sum()[[x]].reset_index().rename(columns={x:x+'_'+c}))
        data_ = merge(data_,data.groupby(p).sum()[[x]].reset_index().rename(columns={x:x+'_'+p}))
        numericalControls+=[x+'_'+c,x+'_'+p]
    if uselog:
        data_ = data_[data_[x]!=0]
        data_[x] = np.log10(data_[x])
        if useaggregate:
            data_[x+'_'+c] = np.log10(data_[x+'_'+c])
            data_[x+'_'+p] = np.log10(data_[x+'_'+p])
    _categoricalControls = []
    for var in ser(categoricalControls):
        vals = list(set(data_[var]))
        for v in vals[1:]:
            _categoricalControls.append(var+'_'+str(v))
            data_[var+'_'+str(v)]=0
            data_.loc[data_[var]==v,var+'_'+str(v)]=1

    Y = data_[x].values
    X = data_[list(set(numericalControls))+list(set(_categoricalControls))].values
    X = sm.add_constant(X)

    model = sm.OLS(Y,X).fit()
    data_[x+'_res'] = Y-model.predict(X)
    return data_[[c,p,x,x+'_res']]
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号