From f95e7875e8f30f539b2853739c16a82f2700e8e3 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Fri, 15 Sep 2023 11:21:54 -0400 Subject: [PATCH 1/8] add indegree and outdegree for calls OR texts --- forest/willow/log_stats.py | 69 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 1418871b..2ec4a502 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -140,6 +140,67 @@ def text_analysis( ) +def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int) -> tuple: + """Calculate the summary statistics for the call data + in the given time interval. + + Args: + df_call: pd.DataFrame + dataframe of the call data + df_text: pd.DataFrame + dataframe of the text data + stamp: int + starting timestamp of the study + step_size: int + ending timestamp of the study + + Returns: + tuple of summary statistics containing: + num_uniq_in_call_or_text: int + number of people making incoming calls or texts to the Beiwe + user + num_uniq_out_call_or_text: int + number of people receiving outgoing calls or texts from the + Beiwe user + + """ + # filter the data based on the timestamp + if df_call.shape > 0 + temp_call = df_call[ + (df_call["timestamp"] / 1000 >= stamp) + & (df_call["timestamp"] / 1000 < stamp + step_size) + ] + index_in_call = np.array(temp_call["call type"]) == "Incoming Call" + index_out_call = np.array(temp_call["call type"]) == "Outgoing Call" + calls_in = np.array(temp_call["hashed phone number"])[index_in_call] + calls_out = np.array(temp_call["hashed phone number"])[index_out_call] + else: ## no calls were received, so no unique numbers will be used + calls_in = np.array([]) + calls_out = np.array([]) + + if df_text.shape > 0: + temp_text = df_text[ + (df_text["timestamp"] / 1000 >= stamp) + & (df_text["timestamp"] / 1000 < stamp + step_size) + ] + + index_s = np.array(temp_text["sent vs received"]) == "sent SMS" + index_r = np.array(temp_text["sent vs received"]) == "received SMS" + texts_in = np.array(temp_text["hashed phone number"])[index_r] + texts_out = np.array(temp_text["hashed phone number"])[index_s] + else: ## no texts were received, so no unique numbers will be used + texts_in = np.array([]) + texts_out = np.array([]) + + num_uniq_in_call_or_text = len(np.unique(np.hstack(calls_in, texts_in))) + num_uniq_out_call_or_text = len(np.unique(np.hstack(texts_out, calls_out))) + + return ( + num_uniq_in_call_or_text, + num_uniq_out_call_or_text + ) + + def call_analysis(df_call: pd.DataFrame, stamp: int, step_size: int) -> tuple: """Calculate the summary statistics for the call data in the given time interval. @@ -288,6 +349,11 @@ def comm_logs_summaries( newline += list(call_stats) else: newline += [pd.NA] * 8 + if df_text.shape[0] > 0 or df_call.shape[0] > 0: + text_and_call_stats = text_and_call_analysis(df_text, df_call, stamp, step_size, frequency) + newline += list(text_and_call_stats) + else: + newline += [pd.NA] * 2 if df_text.shape[0] > 0: text_stats = text_analysis(df_text, stamp, step_size, frequency) @@ -295,6 +361,7 @@ def comm_logs_summaries( else: newline += [pd.NA] * 10 + if frequency == Frequency.DAILY: newline = [year, month, day] + newline else: @@ -311,6 +378,8 @@ def comm_logs_summaries( "num_mis_caller", "total_mins_in_call", "total_mins_out_call", + "num_uniq_in_call_or_text", + "num_uniq_out_call_or_text", "num_s", "num_r", "num_mms_s", From e0b6cd8aeb5c548fa54fde752d561f8fdbbe755d Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Fri, 15 Sep 2023 11:22:33 -0400 Subject: [PATCH 2/8] add summary statistics for willow --- docs/source/index.md | 6 ++++++ docs/source/willow.md | 3 +++ 2 files changed, 9 insertions(+) diff --git a/docs/source/index.md b/docs/source/index.md index 2bc7c2f2..5ed87a46 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -216,6 +216,12 @@ The summary statistics that are generated are listed below: * - total_mins_out_call - float - The duration (minute) of all outgoing calls. + * - num_uniq_in_call_or_text + - int + - The total number of unique individuals who called or texted the subject. +* - num_uniq_out_call_or_text + - int + - The total number of unique individuals called or texted by the subject. * - num_s - int - The total number of sent SMS. diff --git a/docs/source/willow.md b/docs/source/willow.md index ccf79fcb..c2647148 100644 --- a/docs/source/willow.md +++ b/docs/source/willow.md @@ -41,6 +41,8 @@ ___ | num_uniq_mis_call | float | Total number of unique callers missed | | total_time_in_call | int | Total amount of minutes spent on incoming calls | | total_time_out_call | int | Total amount of minutes spent on outgoing calls | +| num_uniq_in_call_or_text | float | Total number of unique individuals who called or texted the Beiwe user | +| num_uniq_out_call_or_text | int | Total number of unique individuals who the Beiwe user called or texted | | num_s | float | Total number of sent SMS texts | | num_r | int | Total number of received SMS texts | | num_mms_s | int | Total number of sent MMS texts | @@ -52,6 +54,7 @@ ___ | text_reciprocity_incoming | int | The total number of times a text is sent to a unique person without response | | text_reciprocity_outgoing | int | The total number of times a text is received by a unique person without response | + ## References ## Contact information for questions: From 0f552f5047aff23c105dc9328b9342d02f9649c5 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Fri, 15 Sep 2023 11:29:29 -0400 Subject: [PATCH 3/8] don't distinguish between incoming and outgoing calls/texts --- docs/source/index.md | 7 ++----- docs/source/willow.md | 9 ++++----- forest/willow/log_stats.py | 22 ++++++++++------------ 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/docs/source/index.md b/docs/source/index.md index 5ed87a46..872ae6b1 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -216,12 +216,9 @@ The summary statistics that are generated are listed below: * - total_mins_out_call - float - The duration (minute) of all outgoing calls. - * - num_uniq_in_call_or_text + * - num_uniq_individuals_call_or_text - int - - The total number of unique individuals who called or texted the subject. -* - num_uniq_out_call_or_text - - int - - The total number of unique individuals called or texted by the subject. + - The total number of unique individuals who called or texted the subject, or who the subject called or texted. The total number of individuals who the subject had any kind of communication with. * - num_s - int - The total number of sent SMS. diff --git a/docs/source/willow.md b/docs/source/willow.md index c2647148..e0ce225c 100644 --- a/docs/source/willow.md +++ b/docs/source/willow.md @@ -36,13 +36,12 @@ ___ | num_in_call | int | Total number of incoming calls | | num_out_call | int | Total number of outgoing calls | | num_mis_call | int | Total number of missed calls -| num_uniq_in_call | float | Total number of unique incoming callers | -| num_uniq_out_call | int | Total number of unique outgoing calls | -| num_uniq_mis_call | float | Total number of unique callers missed | +| num_in_caller | float | Total number of unique incoming callers | +| num_out_caller | int | Total number of unique outgoing calls | +| num_mis_caller | float | Total number of unique callers missed | | total_time_in_call | int | Total amount of minutes spent on incoming calls | | total_time_out_call | int | Total amount of minutes spent on outgoing calls | -| num_uniq_in_call_or_text | float | Total number of unique individuals who called or texted the Beiwe user | -| num_uniq_out_call_or_text | int | Total number of unique individuals who the Beiwe user called or texted | +| num_uniq_individuals_call_or_text | float | Total number of unique individuals who called or texted the Beiwe user, or who the Beiwe user called or texted. The total number of individuals with any communication contact with the Beiwe user | | num_s | float | Total number of sent SMS texts | | num_r | int | Total number of received SMS texts | | num_mms_s | int | Total number of sent MMS texts | diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 2ec4a502..9c42a86a 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -156,12 +156,10 @@ def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: Returns: tuple of summary statistics containing: - num_uniq_in_call_or_text: int + num_uniq_individuals_call_or_text: int number of people making incoming calls or texts to the Beiwe - user - num_uniq_out_call_or_text: int - number of people receiving outgoing calls or texts from the - Beiwe user + user or who the Beiwe user made outgoing calls or texts to + """ # filter the data based on the timestamp @@ -192,12 +190,13 @@ def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: texts_in = np.array([]) texts_out = np.array([]) - num_uniq_in_call_or_text = len(np.unique(np.hstack(calls_in, texts_in))) - num_uniq_out_call_or_text = len(np.unique(np.hstack(texts_out, calls_out))) + num_uniq_individuals_call_or_text = len(np.unique(np.hstack( + calls_in, texts_in, texts_out, calls_out + ))) + return ( - num_uniq_in_call_or_text, - num_uniq_out_call_or_text + num_uniq_individuals_call_or_text, ) @@ -353,7 +352,7 @@ def comm_logs_summaries( text_and_call_stats = text_and_call_analysis(df_text, df_call, stamp, step_size, frequency) newline += list(text_and_call_stats) else: - newline += [pd.NA] * 2 + newline += [pd.NA] if df_text.shape[0] > 0: text_stats = text_analysis(df_text, stamp, step_size, frequency) @@ -378,8 +377,7 @@ def comm_logs_summaries( "num_mis_caller", "total_mins_in_call", "total_mins_out_call", - "num_uniq_in_call_or_text", - "num_uniq_out_call_or_text", + "num_uniq_individuals_call_or_text", "num_s", "num_r", "num_mms_s", From f86e20fa73fcce1b01a9bf011eee0a3fed43d284 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Mon, 18 Sep 2023 20:27:59 -0400 Subject: [PATCH 4/8] mypy fixes, data validation checks --- forest/willow/log_stats.py | 67 +++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 9c42a86a..d931bf30 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -140,7 +140,9 @@ def text_analysis( ) -def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int) -> tuple: +def text_and_call_analysis( + df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int +) -> tuple: """Calculate the summary statistics for the call data in the given time interval. @@ -163,20 +165,23 @@ def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: """ # filter the data based on the timestamp - if df_call.shape > 0 + if df_call.shape[0] > 0: temp_call = df_call[ (df_call["timestamp"] / 1000 >= stamp) & (df_call["timestamp"] / 1000 < stamp + step_size) ] index_in_call = np.array(temp_call["call type"]) == "Incoming Call" index_out_call = np.array(temp_call["call type"]) == "Outgoing Call" + index_mis_call = np.array(temp_call["call type"]) == "Missed Call" calls_in = np.array(temp_call["hashed phone number"])[index_in_call] calls_out = np.array(temp_call["hashed phone number"])[index_out_call] - else: ## no calls were received, so no unique numbers will be used + calls_mis = np.array(temp_call["hashed phone number"])[index_mis_call] + + else: # no calls were received, so no unique numbers will be used calls_in = np.array([]) calls_out = np.array([]) - if df_text.shape > 0: + if df_text.shape[0] > 0: temp_text = df_text[ (df_text["timestamp"] / 1000 >= stamp) & (df_text["timestamp"] / 1000 < stamp + step_size) @@ -186,15 +191,13 @@ def text_and_call_analysis(df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: index_r = np.array(temp_text["sent vs received"]) == "received SMS" texts_in = np.array(temp_text["hashed phone number"])[index_r] texts_out = np.array(temp_text["hashed phone number"])[index_s] - else: ## no texts were received, so no unique numbers will be used + else: # no texts were received, so no unique numbers will be used texts_in = np.array([]) texts_out = np.array([]) num_uniq_individuals_call_or_text = len(np.unique(np.hstack( - calls_in, texts_in, texts_out, calls_out + [calls_in, texts_in, texts_out, calls_out, calls_mis] ))) - - return ( num_uniq_individuals_call_or_text, ) @@ -349,7 +352,9 @@ def comm_logs_summaries( else: newline += [pd.NA] * 8 if df_text.shape[0] > 0 or df_call.shape[0] > 0: - text_and_call_stats = text_and_call_analysis(df_text, df_call, stamp, step_size, frequency) + text_and_call_stats = text_and_call_analysis( + df_call, df_text, stamp, step_size + ) newline += list(text_and_call_stats) else: newline += [pd.NA] @@ -359,8 +364,6 @@ def comm_logs_summaries( newline += list(text_stats) else: newline += [pd.NA] * 10 - - if frequency == Frequency.DAILY: newline = [year, month, day] + newline else: @@ -492,6 +495,48 @@ def log_stats_main( tz_str, frequency, ) + # num_uniq_individuals_call_or_text is the cardinality + # of the union of several sets. It should should always + # be at least as large as the cardinality of any one of + # the sets, and it should never be larger than the sum + # of the cardinalities of all of the sets + # (it may be equal if all the sets are disjoint) + sum_all_set_cols = pd.Series( + [0]*stats_pdframe.shape[0] + ) + for col in [ + "num_s_tel", "num_r_tel", "num_in_caller", + "num_out_caller", "num_mis_caller" + ]: + sum_all_set_cols += stats_pdframe[col] + if ( + stats_pdframe[ + "num_uniq_individuals_call_or_text" + ] < stats_pdframe[col] + ).any(): + logger.error( + "Error: " + "num_uniq_individuals_call_or_text " + "was found to be less than %s for at " + "least one time interval. This error " + "comes from an issue with the code," + " not an issue with the input data", + col + ) + if ( + stats_pdframe[ + "num_uniq_individuals_call_or_text" + ] > sum_all_set_cols + ).any(): + logger.error( + "Error: " + "num_uniq_individuals_call_or_text " + "was found to be larger than the sum " + "of individual cardinalities for at " + "least one time interval. This error " + "comes from an issue with the code," + " not an issue with the input data" + ) write_all_summaries(bid, stats_pdframe, output_folder) From 800eb218ca8e23ec7c9da925b6c052b7a899be5d Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Tue, 19 Sep 2023 19:43:50 -0400 Subject: [PATCH 5/8] addition to jasmine to stop an error that happened, also updated docstring --- forest/jasmine/traj2stats.py | 10 ++++++++++ forest/willow/log_stats.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index 0e7fdfd1..877d2477 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -388,6 +388,16 @@ def gps_summaries( res += [0] * (2 * len(places_of_interest) + 1) summary_stats.append(res) continue + elif sum(index_rows) == 0 and not split_day_night: + ## There is no data and it is daily data, so we need to add empty rows + res = [year, month, day] + [0] * 3 + [pd.NA] * 15 + + if places_of_interest is not None: + # add empty data for places of interest + # for daytime/nighttime + other + res += [0] * (2 * len(places_of_interest) + 1) + summary_stats.append(res) + continue temp = traj[index_rows, :] # take a subset which is exactly one hour/day, diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index d931bf30..aae5fd82 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -143,7 +143,7 @@ def text_analysis( def text_and_call_analysis( df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int ) -> tuple: - """Calculate the summary statistics for the call data + """Calculate the summary statistics for the call and text data in the given time interval. Args: From 837ac83af13fbcbd8a2d1c11d8febd661f7cf965 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Tue, 19 Sep 2023 19:47:52 -0400 Subject: [PATCH 6/8] more docstring updates --- forest/willow/log_stats.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index aae5fd82..08b948a9 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -143,7 +143,7 @@ def text_analysis( def text_and_call_analysis( df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int ) -> tuple: - """Calculate the summary statistics for the call and text data + """Calculate the summary statistics for anything requiring both call and text data in the given time interval. Args: @@ -152,9 +152,9 @@ def text_and_call_analysis( df_text: pd.DataFrame dataframe of the text data stamp: int - starting timestamp of the study + starting timestamp of the interval step_size: int - ending timestamp of the study + ending timestamp of the interval Returns: tuple of summary statistics containing: @@ -211,9 +211,9 @@ def call_analysis(df_call: pd.DataFrame, stamp: int, step_size: int) -> tuple: df_call: pd.DataFrame dataframe of the call data stamp: int - starting timestamp of the study + starting timestamp of the interval step_size: int - ending timestamp of the study + ending timestamp of the interval Returns: tuple of summary statistics containing: @@ -295,9 +295,9 @@ def comm_logs_summaries( df_call: pd.DataFrame dataframe of the call data stamp_start: int - starting timestamp of the study + starting timestamp of the interval stamp_end: int - ending timestamp of the study + ending timestamp of the interval tz_str: str timezone where the study was/is conducted frequency: Frequency class, From 26bf3e31350dae38309bcf318e4153e1f1a02c66 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Tue, 19 Sep 2023 19:53:18 -0400 Subject: [PATCH 7/8] pep8 fixes --- forest/jasmine/traj2stats.py | 3 ++- forest/willow/log_stats.py | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index 877d2477..67c0b70c 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -389,7 +389,8 @@ def gps_summaries( summary_stats.append(res) continue elif sum(index_rows) == 0 and not split_day_night: - ## There is no data and it is daily data, so we need to add empty rows + # There is no data and it is daily data, so we need to add empty + # rows res = [year, month, day] + [0] * 3 + [pd.NA] * 15 if places_of_interest is not None: diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 08b948a9..a40e1be0 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -143,9 +143,8 @@ def text_analysis( def text_and_call_analysis( df_call: pd.DataFrame, df_text: pd.DataFrame, stamp: int, step_size: int ) -> tuple: - """Calculate the summary statistics for anything requiring both call and text data - in the given time interval. - + """Calculate the summary statistics for anything requiring both call and + text data in the given time interval. Args: df_call: pd.DataFrame dataframe of the call data From a3582747c1cf84070e69d8ac0b675ad1424c4e8b Mon Sep 17 00:00:00 2001 From: joannakennedyharvard <112563754+joannakennedyharvard@users.noreply.github.com> Date: Wed, 27 Sep 2023 21:28:33 -0400 Subject: [PATCH 8/8] sycamore.md documentation update (#202) Added list of summary statistics to the bottom of sycamore.md --- docs/source/sycamore.md | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/docs/source/sycamore.md b/docs/source/sycamore.md index a9386d39..092ae1e9 100644 --- a/docs/source/sycamore.md +++ b/docs/source/sycamore.md @@ -201,3 +201,91 @@ If surveys are sent on a weekly schedule, Sycamore assumes that there is a surve **What does `surv_inst_flg` mean in the outputs?** `surv_inst_flg` is a unique identifying number to distinguish different times when the same individual took the same survey. This column is useful for joining outputs together. + + +## List of summary statistics + +The following variables are created in the “submits_summary.csv” file. This file will only be generated if the config file and intervention timings file are provided. The `submits_summary_daily.csv` and `submits_summary_hourly.csv` files contain the same columns, but with additional granularity at the day or hourly levels rather than at the user level. + + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| survey id | str | ID of the survey for which this row applies to. Note: If `submits_by_survey_id` is False, surveys will not be aggregated at the survey level (they will only be aggregated by user) so this column will not appear. | +| year | int | Year of the time period at which submits/deliveries are being aggregated. This is only included in `submits_summary_daily.csv` and `submits_summary_hourly.csv` | +| month | int | Month of the time period at which submits/deliveries are being aggregated. This is only included in `submits_summary_daily.csv` and `submits_summary_hourly.csv` | +| day | int | Day over which submits/deliveries are being aggregated. This is only included in `submits_summary_daily.csv` and `submits_summary_hourly.csv` | +| hour | int | Hour over which submits/deliveries are being aggregated. This is only included in `submits_summary_hourly.csv` | +| num_surveys | int | Number of surveys scheduled for delivery to the individual during the period | +| num_submitted_surveys | int | Number of surveys submitted during the period (i.e. the user hit submit on all surveys) +| num_opened_surveys | int | Number of surveys opened by the individual during the time period (i.e. the user answered at least one question) | +| avg_time_to_submit | float | Average time between survey delivery and survey submission, in seconds, for complete surveys | +| avg_time_to_open | float | Average time between survey delivery and survey opening, in seconds. This is averaged over survey responses where a survey_timings file was available because we do not have information about survey opening in responses where a survey_timings file is missing. | +| avg_duration | float | Average time between survey opening and survey submission, in seconds.This is averaged over survey responses where a survey_timings file was available because we do not have information about survey opening in responses where a survey_timings file is missing. | + +
+The following variables are created in the “submits_and_deliveries.csv” file. This file will only be generated if the config file and intervention timings file are provided. + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| survey id | str | ID of the survey | +| delivery_time | str | A scheduled delivery time. If surveys are weekly, delivery times will be generated for each week between start_date and end_date | +| submit_flg | str | Either the time when the user hit submit or the time when the individual stopped interacting with the survey for that session | +| time_to_submit | float | Time between survey delivery and survey submission, in seconds. If a survey was incomplete, this will be blank. | +| time_to_open | float | Time between survey delivery time and the first recorded survey answer, in seconds (for responses where a survey_timings file was available; if only a survey_answers file was available, this will be 0) | +| survey_duration | float | Time between the first recorded survey answer and the survey submission, in seconds (for responses where a survey_timings file was available; if only a survey_answers file was available, this will be NA)| + +
+The following variables are created in the “answers_data.csv” file. This file will be generated if a survey config file is available. + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| survey id | str | ID of the survey | +| beiwe_id | str | The participant’s Beiwe ID | +| question id | str | The ID of the question for this line | +| question text | str | The question text corresponding to the answer | +| question type | str | The type of question (radio button, free response, etc.) corresponding to the answer | +| question answer options | str | The answer options presented to the user (applicable for check box or radio button surveys) | +| timestamp | str | The Unix timestamp corresponding to the latest time the user was on the question | +| Local time | str | The local time corresponding to the latest time the user was on the question | +| last_answer | str | The last answer the user had selected before moving on to the next question or submitting | +| all_answers | str | A list of all answers the user selected | +| num_answers | int | The number of different answers selected by the user (the length of the list in all_answers) | +| first_time | str | The local time corresponding to the earliest time the user was on the question | +| last_time | str | The local time corresponding to the latest time the user was on the question | +| time_to_answer | float | The time that the user spent on the question | + +
+The following variables are created in the “answers_summary.csv” file. This file will only be generated if the config file and intervention timings file are provided. + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| survey id | str | ID of the survey | +| beiwe_id | str | The participant’s Beiwe ID | +| question id | str | The ID of the question for this line | +| num_answers | int | The number of times in the given data the answer is answered | +| average_time_to_answer | float | The average number of seconds the user takes to answer the question | +| average_number_of_answers | float | Average number of answers selected for a question. This indicated if a user changed an answer before submitting it. | +| most_common_answer | str | A user’s most common answer to a question | + +
+The following variables are created in the “submits_only.csv” file. This file will always be generated. + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| survey id | str | ID of the survey | +| beiwe_id | str | The participant’s Beiwe ID | +| surv_inst_flg | int | A “submission flag” which distinguishes submissions that are done by the same individual on the same survey | +| max_time | str | Either the time when the user hit submit or the time when the individual stopped interacting with the survey for that session | +| min_time | str | The earliest time the individual was interacting with the survey that session | +| time_to_complete | float | Time between min_time and max_time, in seconds (for responses where a survey_timings file was available) | + +
+The following variables are created in a csv file for each survey. + +| Variable | Type | Description of Variable | +|--------------------------------------- |-------------- |------------------------------------------------------------------------------------------------------------- | +| start_time | str | Time this survey submission was started | +| end_time | str | Time this survey submission was ended | +| survey_duration | float | Difference between start and end time, in seconds (for surveys where a survey_timings file was available) | +| question_1, question_2, … | str | Responses to each question in the survey | +