def set_missing_ages(df):
age_df = df[['Age', 'Fare', 'Parch', 'SibSp', 'Pclass']]
known_age = age_df[age_df.Age.notnull()].as_matrix()
unknown_age = age_df[age_df.Age.isnull()].as_matrix()
y = known_age[:, 0]
X = known_age[:, 1:]
# fit by RamdomForestRegressor
rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
rfr.fit(X, y)
# predict the unknown age
predictedAges = rfr.predict(unknown_age[:, 1:])
# backfill the value of unknown age
df.loc[(df.Age.isnull()), 'Age'] = predictedAges
return df, rfr
评论列表
文章目录