frisk.py 文件源码-python代码片段

frisk.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

项目：ReducedVarianceReparamGradients 作者: andymiller 项目源码文件源码

def process_dataset():
    data_dir = os.path.dirname(__file__)
    df = pd.read_csv(os.path.join(data_dir, 'data/frisk/frisk_with_noise.dat'), skiprows=6, delim_whitespace=True)

    # compute proportion black in precinct, black = 1
    # first aggregate by precinct/ethnicity, and sum over populations
    popdf = df[['pop', 'precinct', 'eth']]. \
                groupby(['precinct', 'eth'])['pop'].apply(sum)
    percent_black = np.array([ popdf[i][1] / float(popdf[i].sum())
                               for i in xrange(1, 76)] )
    precinct_type = pd.cut(percent_black, [0, .1, .4, 1.])  #
    df['precinct_type'] = precinct_type.codes[df.precinct.values-1]
    return df