def fetch_data():
"""Download the data."""
train_file = tempfile.NamedTemporaryFile()
test_file = tempfile.NamedTemporaryFile()
req.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases"
"/adult/adult.data", train_file.name)
req.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases/"
"adult/adult.test", test_file.name)
df_train = pd.read_csv(train_file, names=COLUMNS, skipinitialspace=True)
df_test = pd.read_csv(test_file, names=COLUMNS, skipinitialspace=True,
skiprows=1)
df_train[LABEL_COLUMN] = (df_train["income_bracket"]
.apply(lambda x: ">50K" in x)).astype(int)
df_test[LABEL_COLUMN] = (df_test["income_bracket"]
.apply(lambda x: ">50K" in x)).astype(int)
return df_train, df_test
评论列表
文章目录