tests.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:datacleaner 作者: rhiever 项目源码 文件源码
def test_autoclean_real_data():
    """Test autoclean() with the adult data set"""
    adult_data = pd.read_csv('adult.csv.gz', sep='\t', compression='gzip')
    adult_data.loc[30:60, 'age'] = np.nan
    adult_data.loc[90:100, 'education'] = np.nan

    hand_cleaned_adult_data = adult_data.copy()

    hand_cleaned_adult_data['age'].fillna(hand_cleaned_adult_data['age'].median(), inplace=True)
    hand_cleaned_adult_data['education'].fillna(hand_cleaned_adult_data['education'].mode()[0], inplace=True)

    for column in ['workclass', 'education', 'marital-status',
                   'occupation', 'relationship', 'race',
                   'sex', 'native-country', 'label']:
        hand_cleaned_adult_data[column] = LabelEncoder().fit_transform(hand_cleaned_adult_data[column].values)

    cleaned_adult_data = autoclean(adult_data)

    assert cleaned_adult_data.equals(hand_cleaned_adult_data)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号