def sort_eg_attributes(df, attributes=['doh', 'ldate'],
reverse_list=[0, 0],
add_columns=False):
'''Sort master list attribute columns by employee group in preparation
for list construction. The overall master list structure and order is
unaffected, only the selected attribute columns are sorted (normally
date-related columns such as doh or ldate)
inputs
df
The master data dataframe (does not need to be sorted)
attributes
columns to sort by eg (inplace)
reverse_list
If an attribute is to be sorted in reverse order (descending),
use a '1' in the list position corresponding to the position of
the attribute within the attributes input
add_columns
If True, an additional column for each sorted attribute will be
added to the resultant dataframe, with the suffix '_sort' added
to it.
'''
date_cols = []
for col in df:
if (df[col]).dtype == 'datetime64[ns]':
date_cols.append(col)
try:
df.sort_values(['eg', 'eg_number'], inplace=True)
except LookupError:
df.sort_values(['eg', 'eg_order'], inplace=True)
egs = df.eg.values
i = 0
for measure in attributes:
data = df[measure].values
measure_col = np.empty_like(data)
for eg in pd.unique(df.eg):
measure_slice = data[egs == eg]
measure_slice_index = np.where(egs == eg)[0]
measure_slice_sorted = np.sort(measure_slice, axis=0)
if reverse_list[i]:
measure_slice_invert = measure_slice_sorted[::-1]
measure_slice_sorted = measure_slice_invert
np.put(measure_col, measure_slice_index, measure_slice_sorted)
if add_columns:
col_name = measure + '_sort'
else:
col_name = measure
df[col_name] = measure_col
if measure in date_cols:
df[col_name] = pd.to_datetime(df[col_name].dt.date)
i += 1
return df
评论列表
文章目录