-
Notifications
You must be signed in to change notification settings - Fork 1
/
pipeline.py
executable file
·97 lines (76 loc) · 4.15 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
import os
import sys
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime
from datetime import timedelta
import json
event_id_mapping = {
"False Negative Reported": 0,
"Puff Detected": 1,
"Session Detected": 2,
"User Started Smoking Session": 3,
"AI Started Smoking Session": 4,
"User Stopped Smoking Session": 5,
"Timer Stopped Smoking": 6
}
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python3 timedeltas.py [path-to-data-directory]")
exit(1)
os.system('mkdir -p processed')
dir = sys.argv[1]
events = pd.DataFrame()
info = {}
raw = pd.DataFrame()
for file in os.listdir(dir):
if file == 'events.csv':
events = pd.read_csv(f'{dir}/{file}')
elif file == 'Info.json':
info = json.load(open(f'{dir}/{file}', 'r'))
# Read Raw Files
for file in os.listdir(f'{dir}/raw'):
tmp = pd.read_csv(f'{dir}/raw/{file}', skiprows=1)
raw = pd.concat([raw, tmp], ignore_index=True)
raw = raw.sort_values(by=['timestamp'], ignore_index=True)
# get app run length (delta from start time)
app_run_time_millis = int(raw['real time'].iloc[-1] - info['App Start Time'])
app_run_time_str = str(timedelta(milliseconds = app_run_time_millis))
print(f'Total App Run Time: {app_run_time_str}')
# add readable times to raw df
readable_times = []
for time_millis in raw['real time']:
delta = (time_millis - info['App Start Time'])
readable_times.append(str(timedelta(milliseconds=delta)))
raw['readable time'] = readable_times
# Get each smoking session
smoking_sessions = pd.DataFrame(columns=['starttime', 'stoptime']) # start and stop of all sessions
start_times = events[(events['event_id'] == event_id_mapping['User Started Smoking Session']) | \
(events['event_id'] == event_id_mapping['AI Started Smoking Session'])]['time'].reset_index(drop=True)
stop_times = events[(events['event_id'] == event_id_mapping['User Stopped Smoking Session']) | \
(events['event_id'] == event_id_mapping['Timer Stopped Smoking'])]['time'].reset_index(drop=True)
smoking_sessions['starttime'] = start_times
smoking_sessions['stoptime'] = stop_times
# If a start session did not have an end session associated with it, set the end after 8 minutes
compute_replacement_value = lambda row: int(row['starttime'] + 8*60*1e3)
smoking_sessions['stoptime'].fillna(smoking_sessions.apply(compute_replacement_value, axis=1), inplace=True)
# Get false negatives (missed smoking sessions)
app_start_datetime = datetime.strptime(info['App Start Time Readable'], "%Y-%m-%d_%H_%M_%S")
false_negatives = events[events['event_id'] == event_id_mapping["False Negative Reported"]]
for false_negative_time in false_negatives['time reported']:
time_reported_datetime = datetime.strptime(false_negative_time, '%Y-%m-%d_%H_%M_%S')
# Get time in milliseconds of this event since app start time
time_reported_millis = int((time_reported_datetime - app_start_datetime).total_seconds()*1e3 + info['App Start Time'])
stoptime = int(time_reported_millis + 8 * 60 * 1e3) # We don't know how long the smoking session was - assume 8 minutes
smoking_sessions = pd.concat([smoking_sessions, pd.DataFrame({'starttime': [time_reported_millis], 'stoptime': [stoptime]})], ignore_index=True)
# Get the start and stop time of each session in milliseconds since app start time
# anytime the 'real time' in raw is in this range (greater than stop and less than start), make 'in session' of that row = 1
# Add smoking status to raw
raw['is smoking'] = 10
for i in range(len(smoking_sessions)):
raw.loc[(raw['real time'] >= smoking_sessions['starttime'][i]) & (raw['real time'] <= smoking_sessions['stoptime'][i]), 'is smoking'] = 15
fig = px.line(raw, x='readable time', y=['is smoking', 'acc_x', 'acc_y', 'acc_z'], title='Time delta',
labels={'readable time': 'Time Since start'})
fig.show(renderer='browser')