def member_to_badge_proximity(fileobject, time_bins_size='1min', tz='US/Eastern'):
"""Creates a member-to-badge proximity DataFrame from a proximity data file.
Parameters
----------
fileobject : file or iterable list of str
The proximity data, as an iterable of JSON strings.
time_bins_size : str
The size of the time bins used for resampling. Defaults to '1min'.
tz : str
The time zone used for localization of dates. Defaults to 'US/Eastern'.
Returns
-------
pd.DataFrame :
The member-to-badge proximity data.
"""
def readfile(fileobject):
for line in fileobject:
data = json.loads(line)['data']
for (observed_id, distance) in data['rssi_distances'].items():
yield (
data['timestamp'],
str(data['member']),
int(observed_id),
float(distance['rssi']),
float(distance['count']),
)
df = pd.DataFrame(
readfile(fileobject),
columns=('timestamp', 'member', 'observed_id', 'rssi', 'count')
)
# Convert timestamp to datetime for convenience, and localize to UTC
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s', utc=True) \
.dt.tz_localize('UTC').dt.tz_convert(tz)
del df['timestamp']
# Group per time bins, member and observed_id,
# and take the first value, arbitrarily
df = df.groupby([
pd.TimeGrouper(time_bins_size, key='datetime'),
'member',
'observed_id'
]).first()
# Sort the data
df.sort_index(inplace=True)
return df
评论列表
文章目录