prepare.py 文件源码

python
阅读 41 收藏 0 点赞 0 评论 0

项目:Titanic 作者: dataventureutc 项目源码 文件源码
def load_data():

    data = pd.read_csv('data/train.csv')

    # drop rows with empty features / gaps in columns
    data = data.dropna()

    # Categorical values into numerical (one hot encoding)
    one_hot_embarked = pd.get_dummies(data['Embarked'], prefix='embarked')
    data = data.join(one_hot_embarked)

    one_hot_pclass = pd.get_dummies(data['Pclass'], prefix='pclass')
    data = data.join(one_hot_pclass)

    # The sex column has only two values (M/F), so that only one column is required for encoding (0/1)
    # Intead of one hot encoding with two columns
    data['sex'] = data.apply(lambda x: 1 if (x['Sex'] == 'female') else 0, axis=1)

    # Drop features not used for training the model
    data = data.drop(['Cabin', 'Name', 'PassengerId', 'Pclass', 'Sex', 'Ticket', 'Embarked'], axis=1)

    return data.drop(['Survived'], axis=1), data[['Survived']]
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号