def create_id_df(cls, df, is_train):
"""
:rtype: DataFrame
:return: dataFrame, sorted by id,
columns are ["label", "id0", "id", "id_tr", "id_te"]
"""
df = df[["id0", "label"]].copy()
df = df.reset_index(drop=True)
is_train = np.array(is_train)
le_tr = LabelEncoder().fit(df.id0[is_train])
le_te = LabelEncoder().fit(df.id0[~is_train])
df["id_tr"] = np.nan
df["id_te"] = np.nan
df.loc[is_train, "id_tr"] = le_tr.transform(df.id0[is_train])
df.loc[~is_train, "id_te"] = le_te.transform(df.id0[~is_train])
df["id"] = np.where(np.isnan(df["id_tr"]), len(le_tr.classes_) + df["id_te"], df["id_tr"])
df = df.fillna(-1)
df = df.sort("id")
df = df[["label", "id0", "id", "id_tr", "id_te"]]
return df
评论列表
文章目录