From b6d9598667bbd454bd13be9e4ac7181b4a17e92c Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 11:13:03 -0400 Subject: [PATCH] PEP484 fixes in Sycamore --- forest/sycamore/base.py | 11 +++++---- forest/sycamore/common.py | 43 ++++++++++++++++++----------------- forest/sycamore/read_audio.py | 20 ++++++++-------- forest/sycamore/submits.py | 9 ++++---- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/forest/sycamore/base.py b/forest/sycamore/base.py index 6800c885..853ab2d8 100644 --- a/forest/sycamore/base.py +++ b/forest/sycamore/base.py @@ -24,10 +24,11 @@ def compute_survey_stats( study_folder: str, output_folder: str, tz_str: str = "UTC", users: Optional[List] = None, start_date: str = EARLIEST_DATE, end_date: Optional[str] = None, - config_path: Optional[str] = None, interventions_filepath: str = None, + config_path: Optional[str] = None, + interventions_filepath: Optional[str] = None, augment_with_answers: bool = True, submits_timeframe: Frequency = Frequency.HOURLY_AND_DAILY, - submits_by_survey_id: bool = True, history_path: str = None, + submits_by_survey_id: bool = True, history_path: Optional[str] = None, include_audio_surveys: bool = True ) -> bool: """Compute statistics on surveys @@ -55,7 +56,7 @@ def compute_survey_stats( filepath where interventions json file is. The interventions json file can be downloaded by clicking "Edit this Study" on the Beiwe - website, then clicking clicking "Download Interventions" next to + website, then clicking "Download Interventions" next to "Intervention Data". augment_with_answers: Whether to use the survey_answers @@ -219,9 +220,9 @@ def get_submits_for_tableau( study_folder: str, output_folder: str, config_path: str, tz_str: str = "UTC", start_date: str = EARLIEST_DATE, end_date: Optional[str] = None, users: Optional[List] = None, - interventions_filepath: str = None, + interventions_filepath: Optional[str] = None, submits_timeframe: Frequency = Frequency.DAILY, - history_path: str = None + history_path: Optional[str] = None ) -> None: """Get survey submissions per day for integration into Tableau WDC diff --git a/forest/sycamore/common.py b/forest/sycamore/common.py index d23d83ad..d372b84f 100644 --- a/forest/sycamore/common.py +++ b/forest/sycamore/common.py @@ -69,7 +69,7 @@ def standardize_question_type( def read_and_aggregate( study_dir: str, user: str, data_stream: str, time_start: str = EARLIEST_DATE, - time_end: str = None, + time_end: Optional[str] = None, tz_str: str = "UTC" ) -> pd.DataFrame: """Read and aggregate data for a user @@ -132,9 +132,9 @@ def read_and_aggregate( def aggregate_surveys( - study_dir: str, users: list = None, + study_dir: str, users: Optional[list] = None, time_start: str = EARLIEST_DATE, - time_end: str = None, tz_str: str = "UTC" + time_end: Optional[str] = None, tz_str: str = "UTC" ) -> pd.DataFrame: """Aggregate Survey Data @@ -347,9 +347,9 @@ def convert_timezone_df(df_merged: pd.DataFrame, tz_str: str = "UTC", def aggregate_surveys_config( study_dir: str, config_path: str, study_tz: str = "UTC", - users: list = None, time_start: str = EARLIEST_DATE, - time_end: str = None, augment_with_answers: bool = True, - history_path: str = None, include_audio_surveys: bool = True + users: Optional[list] = None, time_start: str = EARLIEST_DATE, + time_end: Optional[str] = None, augment_with_answers: bool = True, + history_path: Optional[str] = None, include_audio_surveys: bool = True ) -> pd.DataFrame: """Aggregate surveys when config is available @@ -465,8 +465,8 @@ def aggregate_surveys_config( def aggregate_surveys_no_config( - study_dir: str, study_tz: str = "UTC", users: list = None, - time_start: str = EARLIEST_DATE, time_end: str = None, + study_dir: str, study_tz: str = "UTC", users: Optional[list] = None, + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None, augment_with_answers: bool = True, include_audio_surveys: bool = True ) -> pd.DataFrame: """Clean aggregated data @@ -532,9 +532,9 @@ def aggregate_surveys_no_config( def append_from_answers( agg_data: pd.DataFrame, download_folder: str, - users: list = None, tz_str: str = "UTC", - time_start: str = EARLIEST_DATE, time_end: str = None, - config_path: str = None, history_path: str = None + users: Optional[list] = None, tz_str: str = "UTC", + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None, + config_path: Optional[str] = None, history_path: Optional[str] = None ) -> pd.DataFrame: """Append surveys included in survey_answers to data from survey_timings. @@ -673,7 +673,7 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame, def read_user_answers_stream( download_folder: str, user: str, tz_str: str = "UTC", - time_start: str = EARLIEST_DATE, time_end: str = None + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None ) -> pd.DataFrame: """Reads in all survey_answers data for a user @@ -777,10 +777,10 @@ def read_user_answers_stream( def read_aggregate_answers_stream( - download_folder: str, users: list = None, - tz_str: str = "UTC", config_path: str = None, - time_start: str = EARLIEST_DATE, time_end: str = None, - history_path: str = None + download_folder: str, users: Optional[list] = None, + tz_str: str = "UTC", config_path: Optional[str] = None, + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None, + history_path: Optional[str] = None ) -> pd.DataFrame: """Reads in all answers data for many users and fixes Android users to have an answer instead of an integer @@ -892,8 +892,8 @@ def read_aggregate_answers_stream( def fix_radio_answer_choices( - aggregated_data: pd.DataFrame, config_path: str = None, - history_path: str = None + aggregated_data: pd.DataFrame, config_path: Optional[str] = None, + history_path: Optional[str] = None ) -> pd.DataFrame: """ Change the "question answer options" column into a list of question answer @@ -1028,8 +1028,9 @@ def update_qs_with_seps(qs_with_seps: dict, survey_content: list) -> dict: return qs_with_seps -def get_choices_with_sep_values(config_path: str = None, - survey_history_path: str = None) -> dict: +def get_choices_with_sep_values(config_path: Optional[str] = None, + survey_history_path: Optional[str] = None + ) -> dict: """ Create a dict with a key for every question ID and a set of any responses for that ID that had a comma in them. @@ -1085,7 +1086,7 @@ def get_choices_with_sep_values(config_path: str = None, def write_data_by_user(df_to_write: pd.DataFrame, output_folder: str, - users: list = None): + users: Optional[list] = None) -> None: """ Write a dataframe to csv files, with a csv file corresponding to each user. diff --git a/forest/sycamore/read_audio.py b/forest/sycamore/read_audio.py index 79a11350..5249b31b 100644 --- a/forest/sycamore/read_audio.py +++ b/forest/sycamore/read_audio.py @@ -2,7 +2,7 @@ import logging import os -from typing import Dict +from typing import Dict, Optional import librosa import numpy as np @@ -16,7 +16,9 @@ logger = logging.getLogger(__name__) -def get_audio_survey_id_dict(history_path: str = None) -> Dict[str, str]: +def get_audio_survey_id_dict( + history_path: Optional[str] = None +) -> Dict[str, str]: """Create a dict that has most recent prompt corresponding to an audio survey as keys and the survey ID as the corresponding value. @@ -47,7 +49,7 @@ def get_audio_survey_id_dict(history_path: str = None) -> Dict[str, str]: return output_dict -def get_config_id_dict(config_path: str = None) -> Dict[str, int]: +def get_config_id_dict(config_path: Optional[str] = None) -> Dict[str, int]: """Get a dict with question prompts as keys and the config IDs as values Args: @@ -78,8 +80,8 @@ def get_config_id_dict(config_path: str = None) -> Dict[str, int]: def read_user_audio_recordings_stream( download_folder: str, user: str, tz_str: str = "UTC", - time_start: str = EARLIEST_DATE, time_end: str = None, - history_path: str = None + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None, + history_path: Optional[str] = None ) -> pd.DataFrame: """Reads in all audio_recordings data for a user @@ -200,10 +202,10 @@ def read_user_audio_recordings_stream( def read_aggregate_audio_recordings_stream( - download_folder: str, users: list = None, - tz_str: str = "UTC", config_path: str = None, - time_start: str = EARLIEST_DATE, time_end: str = None, - history_path: str = None + download_folder: str, users: Optional[list] = None, + tz_str: str = "UTC", config_path: Optional[str] = None, + time_start: str = EARLIEST_DATE, time_end: Optional[str] = None, + history_path: Optional[str] = None ) -> pd.DataFrame: """Reads in all answers data for many users and fixes Android users to have an answer instead of an integer diff --git a/forest/sycamore/submits.py b/forest/sycamore/submits.py index f3ce634f..79af48c4 100644 --- a/forest/sycamore/submits.py +++ b/forest/sycamore/submits.py @@ -176,7 +176,7 @@ def get_question_ids(survey_dict: dict, audio_survey_id_dict: dict) -> list: def gen_survey_schedule( config_path: str, time_start: str, time_end: str, users: list, - all_interventions_dict: dict, history_path: str = None + all_interventions_dict: dict, history_path: Optional[str] = None ) -> pd.DataFrame: """Get survey schedule for a number of users @@ -278,8 +278,9 @@ def gen_survey_schedule( def survey_submits( config_path: str, time_start: str, time_end: str, users: list, - aggregated_data: pd.DataFrame, interventions_filepath: str = None, - history_path: str = None + aggregated_data: pd.DataFrame, + interventions_filepath: Optional[str] = None, + history_path: Optional[str] = None ) -> pd.DataFrame: """Get survey submits for users @@ -481,7 +482,7 @@ def survey_submits( def summarize_submits(submits_df: pd.DataFrame, - timeunit: Frequency = None, + timeunit: Optional[Frequency] = None, summarize_over_survey: bool = True) -> pd.DataFrame: """Summarize a survey submits df