def create_categorical_features(df, label_list, random_state=None):
"""
Creates random categorical variables
:param df: data frame we're operation on
:param label_list: A list of lists, each list is the labels for one categorical variable
:param random_state: the numpy RandomState
:return: A modified dataframe
Example:
create_categorical_features(df, [['a','b'], ['red','blue']])
"""
random_state = get_random_state(random_state)
df = df.copy()
n_categorical = len(label_list)
# get numeric columns ONCE so we don't have to do it every time we loop:
numer_cols = [col for col in df.select_dtypes(include=['number']).columns if col != 'y']
for i in range(0, n_categorical):
# we might be out of numerical columns!
if not numer_cols:
break
# chose a random numeric column that isn't y
chosen_col = random_state.choice(numer_cols)
# pop the chosen_col out of the numer_cols
numer_cols.pop(numer_cols.index(chosen_col))
# use cut to convert that column to categorical
df[chosen_col] = pd.cut(df[chosen_col], bins=len(label_list[i]), labels=label_list[i])
return df
评论列表
文章目录