def test_autoclean_no_nans_with_strings():
"""Test autoclean() with a data set that has some string-encoded categorical values and no NaNs"""
data = pd.DataFrame({'A': np.random.rand(1000),
'B': np.random.rand(1000),
'C': np.random.randint(0, 3, 1000)})
string_map = {0: 'oranges', 1: 'apples', 2: 'bananas'}
data['C'] = data['C'].apply(lambda x: string_map[x])
hand_cleaned_data = data.copy()
hand_cleaned_data['C'] = LabelEncoder().fit_transform(hand_cleaned_data['C'].values)
cleaned_data = autoclean(data)
assert cleaned_data.equals(hand_cleaned_data)
评论列表
文章目录