def preprocess_data(path, is_test=False):
data = pd.read_csv(path, index_col='PassengerId')
data.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
if is_test:
data = data.replace([None], [0])
else:
data = data[pd.notnull(data['Age'])]
data = data[pd.notnull(data['Embarked'])]
data.replace(["female", "male"], [0, 1], inplace=True)
data.replace(["Q", "C", "S"], [0, 1, 2], inplace=True)
if "Survived" in data:
data = data[pd.notnull(data['Survived'])]
data_norm = (data - data.mean()) / (data.max() - data.min())
return data_norm
评论列表
文章目录