def process_dataset():
data_dir = os.path.dirname(__file__)
df = pd.read_csv(os.path.join(data_dir, 'data/frisk/frisk_with_noise.dat'), skiprows=6, delim_whitespace=True)
# compute proportion black in precinct, black = 1
# first aggregate by precinct/ethnicity, and sum over populations
popdf = df[['pop', 'precinct', 'eth']]. \
groupby(['precinct', 'eth'])['pop'].apply(sum)
percent_black = np.array([ popdf[i][1] / float(popdf[i].sum())
for i in xrange(1, 76)] )
precinct_type = pd.cut(percent_black, [0, .1, .4, 1.]) #
df['precinct_type'] = precinct_type.codes[df.precinct.values-1]
return df
frisk.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录