def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
# # Splits a column into multiple columns
# dataframe : pandas dataframe to be processed
# split_col : chr string of the column name to be split
# split_char : chr to split the col on
# new_colnames : list of new name for the columns
# delete_old : logical True / False, remove original column?
# ~~~~~~~~~~~~~~~~ #
import pandas as pd
import numpy as np
# pl.my_debugger(globals().copy())
# my_debugger(locals().copy())
# save the split column as a separate object
new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
# if all values were NaN, no split occured, only one col exists still
if len(new_cols.columns) < len(new_colnames):
# create the missing cols, fill with NaN
for i in range(len(new_cols.columns), len(new_colnames)):
new_cols[new_colnames[i]] = np.nan
# rename the cols
new_cols.columns = new_colnames
# remove the original column from the df
if delete_old is True:
del dataframe[split_col]
# merge with df
new_df = dataframe.join(new_cols)
return new_df
评论列表
文章目录