def make_sessions(data, session_th=30 * 60, is_ordered=False, user_key='user_id', item_key='item_id', time_key='ts'):
"""Assigns session ids to the events in data without grouping keys"""
if not is_ordered:
# sort data by user and time
data.sort_values(by=[user_key, time_key], ascending=True, inplace=True)
# compute the time difference between queries
tdiff = np.diff(data[time_key].values)
# check which of them are bigger then session_th
split_session = tdiff > session_th
split_session = np.r_[True, split_session]
# check when the user chenges is data
new_user = data['user_id'].values[1:] != data['user_id'].values[:-1]
new_user = np.r_[True, new_user]
# a new sessions stars when at least one of the two conditions is verified
new_session = np.logical_or(new_user, split_session)
# compute the session ids
session_ids = np.cumsum(new_session)
data['session_id'] = session_ids
return data
评论列表
文章目录