def drop_variable_before_preparation(self, df):
# Acceptable limit of NaN in features
limit_of_nans = 0.3*df.shape[0]
# limit_of_nans = 800
for feature in self.features_with_missing_values_in_dataframe(df).index:
if df[feature].isnull().sum() > limit_of_nans:
df = df.drop([feature], axis=1)
# df = df.drop(['Alley'], axis=1)
# df = df.drop(['MasVnrType'], axis=1)
# df = df.drop(["Utilities", "LotFrontage", "Alley", "MasVnrType", "MasVnrArea", "BsmtQual",
# "BsmtCond", "BsmtExposure", "BsmtFinType1", "BsmtFinType2",
# "Electrical", "FireplaceQu", "GarageType", "GarageYrBlt",
# "GarageFinish", "GarageQual", "GarageCond", "PoolQC",
# "Fence", "MiscFeature"], axis=1)
return df
评论列表
文章目录