From b6d9598667bbd454bd13be9e4ac7181b4a17e92c Mon Sep 17 00:00:00 2001
From: Ilya Sytchev <isytchev@hsph.harvard.edu>
Date: Wed, 1 Nov 2023 11:13:03 -0400
Subject: [PATCH] PEP484 fixes in Sycamore

---
 forest/sycamore/base.py       | 11 +++++----
 forest/sycamore/common.py     | 43 ++++++++++++++++++-----------------
 forest/sycamore/read_audio.py | 20 ++++++++--------
 forest/sycamore/submits.py    |  9 ++++----
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/forest/sycamore/base.py b/forest/sycamore/base.py
index 6800c885..853ab2d8 100644
--- a/forest/sycamore/base.py
+++ b/forest/sycamore/base.py
@@ -24,10 +24,11 @@ def compute_survey_stats(
         study_folder: str, output_folder: str, tz_str: str = "UTC",
         users: Optional[List] = None,
         start_date: str = EARLIEST_DATE, end_date: Optional[str] = None,
-        config_path: Optional[str] = None, interventions_filepath: str = None,
+        config_path: Optional[str] = None,
+        interventions_filepath: Optional[str] = None,
         augment_with_answers: bool = True,
         submits_timeframe: Frequency = Frequency.HOURLY_AND_DAILY,
-        submits_by_survey_id: bool = True, history_path: str = None,
+        submits_by_survey_id: bool = True, history_path: Optional[str] = None,
         include_audio_surveys: bool = True
 ) -> bool:
     """Compute statistics on surveys
@@ -55,7 +56,7 @@ def compute_survey_stats(
             filepath where interventions json file is.
             The interventions json file
             can be downloaded by clicking "Edit this Study" on the Beiwe
-            website, then clicking clicking "Download Interventions" next to
+            website, then clicking "Download Interventions" next to
             "Intervention Data".
         augment_with_answers:
             Whether to use the survey_answers
@@ -219,9 +220,9 @@ def get_submits_for_tableau(
         study_folder: str, output_folder: str, config_path: str,
         tz_str: str = "UTC", start_date: str = EARLIEST_DATE,
         end_date: Optional[str] = None, users: Optional[List] = None,
-        interventions_filepath: str = None,
+        interventions_filepath: Optional[str] = None,
         submits_timeframe: Frequency = Frequency.DAILY,
-        history_path: str = None
+        history_path: Optional[str] = None
 ) -> None:
     """Get survey submissions per day for integration into Tableau WDC
 
diff --git a/forest/sycamore/common.py b/forest/sycamore/common.py
index d23d83ad..d372b84f 100644
--- a/forest/sycamore/common.py
+++ b/forest/sycamore/common.py
@@ -69,7 +69,7 @@ def standardize_question_type(
 def read_and_aggregate(
         study_dir: str, user: str, data_stream: str,
         time_start: str = EARLIEST_DATE,
-        time_end: str = None,
+        time_end: Optional[str] = None,
         tz_str: str = "UTC"
 ) -> pd.DataFrame:
     """Read and aggregate data for a user
@@ -132,9 +132,9 @@ def read_and_aggregate(
 
 
 def aggregate_surveys(
-        study_dir: str, users: list = None,
+        study_dir: str, users: Optional[list] = None,
         time_start: str = EARLIEST_DATE,
-        time_end: str = None, tz_str: str = "UTC"
+        time_end: Optional[str] = None, tz_str: str = "UTC"
 ) -> pd.DataFrame:
     """Aggregate Survey Data
 
@@ -347,9 +347,9 @@ def convert_timezone_df(df_merged: pd.DataFrame, tz_str: str = "UTC",
 
 def aggregate_surveys_config(
         study_dir: str, config_path: str, study_tz: str = "UTC",
-        users: list = None, time_start: str = EARLIEST_DATE,
-        time_end: str = None, augment_with_answers: bool = True,
-        history_path: str = None, include_audio_surveys: bool = True
+        users: Optional[list] = None, time_start: str = EARLIEST_DATE,
+        time_end: Optional[str] = None, augment_with_answers: bool = True,
+        history_path: Optional[str] = None, include_audio_surveys: bool = True
 ) -> pd.DataFrame:
     """Aggregate surveys when config is available
 
@@ -465,8 +465,8 @@ def aggregate_surveys_config(
 
 
 def aggregate_surveys_no_config(
-        study_dir: str, study_tz: str = "UTC", users: list = None,
-        time_start: str = EARLIEST_DATE, time_end: str = None,
+        study_dir: str, study_tz: str = "UTC", users: Optional[list] = None,
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None,
         augment_with_answers: bool = True, include_audio_surveys: bool = True
 ) -> pd.DataFrame:
     """Clean aggregated data
@@ -532,9 +532,9 @@ def aggregate_surveys_no_config(
 
 def append_from_answers(
         agg_data: pd.DataFrame, download_folder: str,
-        users: list = None, tz_str: str = "UTC",
-        time_start: str = EARLIEST_DATE, time_end: str = None,
-        config_path: str = None, history_path: str = None
+        users: Optional[list] = None, tz_str: str = "UTC",
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None,
+        config_path: Optional[str] = None, history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Append surveys included in survey_answers to data from survey_timings.
 
@@ -673,7 +673,7 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame,
 
 def read_user_answers_stream(
         download_folder: str, user: str, tz_str: str = "UTC",
-        time_start: str = EARLIEST_DATE, time_end: str = None
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None
 ) -> pd.DataFrame:
     """Reads in all survey_answers data for a user
 
@@ -777,10 +777,10 @@ def read_user_answers_stream(
 
 
 def read_aggregate_answers_stream(
-        download_folder: str, users: list = None,
-        tz_str: str = "UTC", config_path: str = None,
-        time_start: str = EARLIEST_DATE, time_end: str = None,
-        history_path: str = None
+        download_folder: str, users: Optional[list] = None,
+        tz_str: str = "UTC", config_path: Optional[str] = None,
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None,
+        history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Reads in all answers data for many users and fixes Android users to have
     an answer instead of an integer
@@ -892,8 +892,8 @@ def read_aggregate_answers_stream(
 
 
 def fix_radio_answer_choices(
-        aggregated_data: pd.DataFrame, config_path: str = None,
-        history_path: str = None
+        aggregated_data: pd.DataFrame, config_path: Optional[str] = None,
+        history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """
     Change the "question answer options" column into a list of question answer
@@ -1028,8 +1028,9 @@ def update_qs_with_seps(qs_with_seps: dict, survey_content: list) -> dict:
     return qs_with_seps
 
 
-def get_choices_with_sep_values(config_path: str = None,
-                                survey_history_path: str = None) -> dict:
+def get_choices_with_sep_values(config_path: Optional[str] = None,
+                                survey_history_path: Optional[str] = None
+                                ) -> dict:
     """
     Create a dict with a key for every question ID and a set of any responses
     for that ID that had a comma in them.
@@ -1085,7 +1086,7 @@ def get_choices_with_sep_values(config_path: str = None,
 
 
 def write_data_by_user(df_to_write: pd.DataFrame, output_folder: str,
-                       users: list = None):
+                       users: Optional[list] = None) -> None:
     """
     Write a dataframe to csv files, with a csv file corresponding to each user.
 
diff --git a/forest/sycamore/read_audio.py b/forest/sycamore/read_audio.py
index 79a11350..5249b31b 100644
--- a/forest/sycamore/read_audio.py
+++ b/forest/sycamore/read_audio.py
@@ -2,7 +2,7 @@
 
 import logging
 import os
-from typing import Dict
+from typing import Dict, Optional
 
 import librosa
 import numpy as np
@@ -16,7 +16,9 @@
 logger = logging.getLogger(__name__)
 
 
-def get_audio_survey_id_dict(history_path: str = None) -> Dict[str, str]:
+def get_audio_survey_id_dict(
+        history_path: Optional[str] = None
+) -> Dict[str, str]:
     """Create a dict that has most recent prompt corresponding to an audio
     survey as keys and the survey ID as the corresponding value.
 
@@ -47,7 +49,7 @@ def get_audio_survey_id_dict(history_path: str = None) -> Dict[str, str]:
     return output_dict
 
 
-def get_config_id_dict(config_path: str = None) -> Dict[str, int]:
+def get_config_id_dict(config_path: Optional[str] = None) -> Dict[str, int]:
     """Get a dict with question prompts as keys and the config IDs as values
 
     Args:
@@ -78,8 +80,8 @@ def get_config_id_dict(config_path: str = None) -> Dict[str, int]:
 
 def read_user_audio_recordings_stream(
         download_folder: str, user: str, tz_str: str = "UTC",
-        time_start: str = EARLIEST_DATE, time_end: str = None,
-        history_path: str = None
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None,
+        history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Reads in all audio_recordings data for a user
 
@@ -200,10 +202,10 @@ def read_user_audio_recordings_stream(
 
 
 def read_aggregate_audio_recordings_stream(
-        download_folder: str, users: list = None,
-        tz_str: str = "UTC", config_path: str = None,
-        time_start: str = EARLIEST_DATE, time_end: str = None,
-        history_path: str = None
+        download_folder: str, users: Optional[list] = None,
+        tz_str: str = "UTC", config_path: Optional[str] = None,
+        time_start: str = EARLIEST_DATE, time_end: Optional[str] = None,
+        history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Reads in all answers data for many users and fixes Android users to have
     an answer instead of an integer
diff --git a/forest/sycamore/submits.py b/forest/sycamore/submits.py
index f3ce634f..79af48c4 100644
--- a/forest/sycamore/submits.py
+++ b/forest/sycamore/submits.py
@@ -176,7 +176,7 @@ def get_question_ids(survey_dict: dict, audio_survey_id_dict: dict) -> list:
 
 def gen_survey_schedule(
         config_path: str, time_start: str, time_end: str, users: list,
-        all_interventions_dict: dict, history_path: str = None
+        all_interventions_dict: dict, history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Get survey schedule for a number of users
 
@@ -278,8 +278,9 @@ def gen_survey_schedule(
 
 def survey_submits(
         config_path: str, time_start: str, time_end: str, users: list,
-        aggregated_data: pd.DataFrame, interventions_filepath: str = None,
-        history_path: str = None
+        aggregated_data: pd.DataFrame,
+        interventions_filepath: Optional[str] = None,
+        history_path: Optional[str] = None
 ) -> pd.DataFrame:
     """Get survey submits for users
 
@@ -481,7 +482,7 @@ def survey_submits(
 
 
 def summarize_submits(submits_df: pd.DataFrame,
-                      timeunit: Frequency = None,
+                      timeunit: Optional[Frequency] = None,
                       summarize_over_survey: bool = True) -> pd.DataFrame:
     """Summarize a survey submits df