From 90de508964042b8229f5d7a3fcb82029fa4e8d65 Mon Sep 17 00:00:00 2001
From: GeorgiosEfstathiadis
 <54844705+GeorgeEfstathiadis@users.noreply.github.com>
Date: Thu, 16 Nov 2023 16:10:15 -0500
Subject: [PATCH 1/7] Jasmine - Add more tests for summary values (#221)

---
 forest/jasmine/tests/test_traj2stats.py | 81 +++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/forest/jasmine/tests/test_traj2stats.py b/forest/jasmine/tests/test_traj2stats.py
index 48988f13..2955e1e8 100644
--- a/forest/jasmine/tests/test_traj2stats.py
+++ b/forest/jasmine/tests/test_traj2stats.py
@@ -399,6 +399,87 @@ def test_gps_summaries_log_format(
     assert np.all(dates_stats == dates_log)
 
 
+def test_gps_summaries_summary_vals(
+    coords1, sample_trajectory, sample_nearby_locations, mocker
+):
+    """Testing gps summaries summary values are correct"""
+    mocker.patch(
+        "forest.jasmine.traj2stats.get_nearby_locations",
+        return_value=sample_nearby_locations,
+    )
+    mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1)
+
+    parameters = Hyperparameters()
+
+    summary, _ = gps_summaries(
+        traj=sample_trajectory,
+        tz_str="Europe/London",
+        frequency=Frequency.DAILY,
+        parameters=parameters,
+    )
+
+    assert np.all(summary["obs_duration"] == 24)
+    assert summary["obs_day"].iloc[0] == 10
+    assert summary["obs_night"].iloc[0] == 14
+    assert summary["obs_day"].iloc[1] == 24
+    assert summary["obs_night"].iloc[1] == 0
+    assert np.all(summary["home_time"] == 0)
+    assert summary["dist_traveled"].iloc[0] == 0.208
+    assert summary["dist_traveled"].iloc[1] == 0
+    assert np.round(summary["max_dist_home"].iloc[0], 3) == 0.915
+    assert np.round(summary["max_dist_home"].iloc[1], 3) == 0.915
+    assert np.round(summary["radius"].iloc[0], 3) == 0.013
+    assert summary["radius"].iloc[1] == 0
+    assert np.round(summary["diameter"].iloc[0], 3) == 0.064
+    assert summary["diameter"].iloc[1] == 0
+    assert summary["num_sig_places"].iloc[0] == 2
+    assert summary["num_sig_places"].iloc[1] == 1
+    assert np.round(summary["entropy"].iloc[0], 3) == 0.468
+    assert summary["entropy"].iloc[1] == 0
+    assert round(summary["total_flight_time"].iloc[0], 3) == 1.528
+    assert summary["total_flight_time"].iloc[1] == 0
+    assert round(summary["av_flight_length"].iloc[0], 3) == 0.052
+    assert summary["av_flight_length"].iloc[1] == 0
+    assert round(summary["sd_flight_length"].iloc[0], 3) == 0.012
+    assert summary["sd_flight_length"].iloc[1] == 0
+    assert round(summary["av_flight_duration"].iloc[0], 3) == 0.382
+    assert summary["av_flight_duration"].iloc[1] == 0
+    assert round(summary["sd_flight_duration"].iloc[0], 3) == 0.132
+    assert summary["sd_flight_duration"].iloc[1] == 0
+    assert round(summary["total_pause_time"].iloc[0], 3) == 22.472
+    assert summary["total_pause_time"].iloc[1] == 24
+    assert round(summary["av_pause_duration"].iloc[0], 3) == 4.494
+    assert summary["av_pause_duration"].iloc[1] == 24
+    assert round(summary["sd_pause_duration"].iloc[0], 3) == 3.496
+    assert summary["sd_pause_duration"].iloc[1] == 0
+
+
+def test_gps_summaries_pcr(
+    coords1, sample_trajectory, sample_nearby_locations, mocker
+):
+    """Testing gps summaries pcr"""
+    mocker.patch(
+        "forest.jasmine.traj2stats.get_nearby_locations",
+        return_value=sample_nearby_locations,
+    )
+    mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1)
+
+    parameters = Hyperparameters()
+    parameters.pcr_bool = True
+
+    summary, _ = gps_summaries(
+        traj=sample_trajectory,
+        tz_str="Europe/London",
+        frequency=Frequency.DAILY,
+        parameters=parameters,
+    )
+
+    assert summary["physical_circadian_rhythm"].iloc[0] == 0
+    assert summary["physical_circadian_rhythm"].iloc[1] == 1
+    assert summary["physical_circadian_rhythm_stratified"].iloc[0] == 0
+    assert summary["physical_circadian_rhythm_stratified"].iloc[1] == 0
+
+
 @pytest.fixture()
 def mobmat1():
     """mobility matrix 1"""

From 89e7aa8eb3a2295a10d6d464d95787c80be025ef Mon Sep 17 00:00:00 2001
From: Zachary Clement <clementzach@icloud.com>
Date: Wed, 22 Nov 2023 15:41:02 -0500
Subject: [PATCH 2/7] Update test_log_stats.py (#223)

* Update test_log_stats.py

Add a test to make sure that hourly frequency returns the right size

* Make input data frame correct width
---
 forest/willow/log_stats.py            | 2 +-
 forest/willow/tests/test_log_stats.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py
index a40e1be0..650f3ac0 100644
--- a/forest/willow/log_stats.py
+++ b/forest/willow/log_stats.py
@@ -366,7 +366,7 @@ def comm_logs_summaries(
         if frequency == Frequency.DAILY:
             newline = [year, month, day] + newline
         else:
-            newline = [year, month, day, hour] + newline[:16]
+            newline = [year, month, day, hour] + newline[:17]
 
         summary_stats.append(newline)
 
diff --git a/forest/willow/tests/test_log_stats.py b/forest/willow/tests/test_log_stats.py
index ddc40ed5..1a871fe5 100644
--- a/forest/willow/tests/test_log_stats.py
+++ b/forest/willow/tests/test_log_stats.py
@@ -17,6 +17,14 @@ def test_comm_log_summaries_with_empty_data():
     assert isinstance(stats_pdframe, pd.DataFrame)
 
 
+def test_comm_log_summaries_with_empty_data_hourly():
+    text_data = pd.DataFrame.from_dict({})
+    call_data = pd.DataFrame.from_dict({})
+    stats_pdframe = comm_logs_summaries(text_data, call_data, STAMP_START,
+                                        STAMP_END, TZ_STR, Frequency.HOURLY)
+    assert isinstance(stats_pdframe, pd.DataFrame)
+
+
 def test_comm_log_summaries_with_empty_text_data():
     text_data = pd.DataFrame.from_dict({})
     call_data = pd.DataFrame.from_dict(

From 285ad43410122c86e8858a899a1ee5e53d08c7bb Mon Sep 17 00:00:00 2001
From: Zachary Clement <clementzach@icloud.com>
Date: Mon, 27 Nov 2023 16:52:57 -0500
Subject: [PATCH 3/7] Remove librosa argument name deprecation warning in
 Sycamore (#222)

---
 forest/sycamore/read_audio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/forest/sycamore/read_audio.py b/forest/sycamore/read_audio.py
index 5249b31b..84d91e55 100644
--- a/forest/sycamore/read_audio.py
+++ b/forest/sycamore/read_audio.py
@@ -136,7 +136,7 @@ def read_user_audio_recordings_stream(
             if valid_file:
                 all_files.append(filename)
                 all_durations.append(librosa.get_duration(
-                    filename=os.path.join(audio_dir, survey, filename)
+                    path=os.path.join(audio_dir, survey, filename)
                 ))
 
         if len(all_files) == 0:

From 43948b2dd009477272cb795ba1e14fb0e39f59ff Mon Sep 17 00:00:00 2001
From: GeorgiosEfstathiadis
 <54844705+GeorgeEfstathiadis@users.noreply.github.com>
Date: Tue, 28 Nov 2023 17:14:12 -0500
Subject: [PATCH 4/7] Oak - Minute level analysis (#220)

* add support for minute-level analysis
* vectorized pydate conversion
* optimize datetime calculations and list comprehension
* vectorize get_pp
* integer valued frequency
* compute_window_and_count argument in minutes
* timedelta minutes for freq

---------

Co-authored-by: Ilya Sytchev <isytchev@hsph.harvard.edu>
---
 forest/constants.py                     | 13 ++---
 forest/jasmine/tests/test_traj2stats.py |  8 ++--
 forest/jasmine/traj2stats.py            | 18 ++++---
 forest/oak/base.py                      | 64 ++++++++++++++++---------
 forest/oak/tests/test_run_hourly.py     |  6 +--
 forest/sycamore/base.py                 | 14 ++++++
 forest/willow/log_stats.py              | 18 +++++--
 7 files changed, 95 insertions(+), 46 deletions(-)

diff --git a/forest/constants.py b/forest/constants.py
index c71fe71b..8d95f4ce 100644
--- a/forest/constants.py
+++ b/forest/constants.py
@@ -20,12 +20,13 @@
 
 class Frequency(Enum):
     """This class enumerates possible frequencies for summary data."""
-    HOURLY = 1
-    DAILY = 24
-    HOURLY_AND_DAILY = "hourly_and_daily"
-    THREE_HOURLY = 3
-    SIX_HOURLY = 6
-    TWELVE_HOURLY = 12
+    MINUTELY = 1
+    HOURLY = 60
+    THREE_HOURLY = 3 * 60
+    SIX_HOURLY = 6 * 60
+    TWELVE_HOURLY = 12 * 60
+    DAILY = 24 * 60
+    HOURLY_AND_DAILY = -1
 
 
 class OSMTags(Enum):
diff --git a/forest/jasmine/tests/test_traj2stats.py b/forest/jasmine/tests/test_traj2stats.py
index 2955e1e8..156bac89 100644
--- a/forest/jasmine/tests/test_traj2stats.py
+++ b/forest/jasmine/tests/test_traj2stats.py
@@ -654,7 +654,7 @@ def test_compute_window_size(sample_trajectory):
     """Testing window size is correct"""
 
     window, _ = compute_window_and_count(
-        sample_trajectory[0, 3], sample_trajectory[-1, 6], 1
+        sample_trajectory[0, 3], sample_trajectory[-1, 6], 60
     )
 
     assert window == 3600
@@ -664,7 +664,7 @@ def test_compute_window_count(sample_trajectory):
     """Testing number of windows is correct"""
 
     _, num_windows = compute_window_and_count(
-        sample_trajectory[0, 3], sample_trajectory[-1, 6], 1
+        sample_trajectory[0, 3], sample_trajectory[-1, 6], 60
     )
 
     assert num_windows == 24
@@ -674,7 +674,7 @@ def test_compute_window_size_6_hour(sample_trajectory):
     """Testing window size is correct 6 hour window"""
 
     window, _ = compute_window_and_count(
-        sample_trajectory[0, 3], sample_trajectory[-1, 6], 6
+        sample_trajectory[0, 3], sample_trajectory[-1, 6], 360
     )
 
     assert window == 3600 * 6
@@ -684,7 +684,7 @@ def test_compute_window_count_6_hour(sample_trajectory):
     """Testing number of windows is correct 6 hour window"""
 
     _, num_windows = compute_window_and_count(
-        sample_trajectory[0, 3], sample_trajectory[-1, 6], 6
+        sample_trajectory[0, 3], sample_trajectory[-1, 6], 360
     )
 
     assert num_windows == 4
diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py
index ed2d8a92..da6ca15c 100644
--- a/forest/jasmine/traj2stats.py
+++ b/forest/jasmine/traj2stats.py
@@ -1125,8 +1125,8 @@ def gps_summaries(
         ValueError: Frequency is not valid
     """
 
-    if frequency == Frequency.HOURLY_AND_DAILY:
-        raise ValueError("Frequency must be 'hourly' or 'daily'")
+    if frequency in [Frequency.HOURLY_AND_DAILY, Frequency.MINUTELY]:
+        raise ValueError(f"Frequency cannot be {frequency.name.lower()}.")
 
     if frequency != Frequency.DAILY:
         parameters.split_day_night = False
@@ -1161,7 +1161,7 @@ def gps_summaries(
             traj, [3, 4, 5], tz_str, 3600*24
         )
         window, num_windows = compute_window_and_count(
-            start_stamp, end_stamp, 24, parameters.split_day_night
+            start_stamp, end_stamp, 24*60, parameters.split_day_night
         )
 
     if num_windows <= 0:
@@ -1484,7 +1484,7 @@ def get_time_range(
 
 
 def compute_window_and_count(
-    start_stamp: int, end_stamp: int, window_hours: int,
+    start_stamp: int, end_stamp: int, window_minutes: int,
     split_day_night: bool = False
 ) -> Tuple[int, int]:
     """Computes the window and number of windows based on given time stamps.
@@ -1492,7 +1492,7 @@ def compute_window_and_count(
     Args:
         start_stamp: int, starting time stamp
         end_stamp: int, ending time stamp
-        window_hours: int, window in hours
+        window_minutes: int, window in minutes
         split_day_night: bool, True if split day and night
     Returns:
         A tuple of two integers (window, num_windows):
@@ -1500,7 +1500,7 @@ def compute_window_and_count(
             num_windows: int, number of windows
     """
 
-    window = window_hours * 60 * 60
+    window = window_minutes * 60
     num_windows = (end_stamp - start_stamp) // window
     if split_day_night:
         num_windows *= 2
@@ -1595,8 +1595,14 @@ def gps_stats_main(
             as pickle files for future use
         and a record csv file to show which users are processed
         and logger csv file to show warnings and bugs during the run
+    Raises:
+        ValueError: Frequency is not valid
     """
 
+    # no minutely analysis on GPS data
+    if frequency == Frequency.MINUTELY:
+        raise ValueError("Frequency cannot be minutely.")
+
     os.makedirs(output_folder, exist_ok=True)
 
     if parameters is None:
diff --git a/forest/oak/base.py b/forest/oak/base.py
index 14ac0e49..19d9e3e9 100644
--- a/forest/oak/base.py
+++ b/forest/oak/base.py
@@ -137,8 +137,7 @@ def get_pp(vm_bout: np.ndarray, fs: int = 10) -> npt.NDArray[np.float64]:
 
     """
     vm_res_sec = vm_bout.reshape((fs, -1), order="F")
-    pp = np.array([max(vm_res_sec[:, i])-min(vm_res_sec[:, i])
-                   for i in range(vm_res_sec.shape[1])])
+    pp = np.ptp(vm_res_sec, axis=0)
 
     return pp
 
@@ -478,7 +477,7 @@ def preprocess_dates(
 
 
 def run_hourly(
-    t_hours_pd: pd.Series, days_hourly: pd.DatetimeIndex,
+    t_hours_pd: pd.Series, t_ind_pydate: list,
     cadence_bout: np.ndarray, steps_hourly: np.ndarray,
     walkingtime_hourly: np.ndarray, cadence_hourly: np.ndarray,
     frequency: Frequency
@@ -489,7 +488,7 @@ def run_hourly(
     Args:
         t_hours_pd: pd.Series
             timestamp of each measurement
-        days_hourly: pd.DatetimeIndex
+        t_ind_pydate: list
             list of days with hourly resolution
         cadence_bout: np.ndarray
             cadence of each measurement
@@ -503,11 +502,10 @@ def run_hourly(
             summary statistics format, Frequency class at constants.py
     """
     for t_unique in t_hours_pd.unique():
-        t_ind_pydate = [t_ind.to_pydatetime() for t_ind in days_hourly]
         # get indexes of ranges of dates that contain t_unique
         ind_to_store = -1
         for ind_to_store, t_ind in enumerate(t_ind_pydate):
-            if t_ind <= t_unique < t_ind + timedelta(hours=frequency.value):
+            if t_ind <= t_unique < t_ind + timedelta(minutes=frequency.value):
                 break
         cadence_temp = cadence_bout[t_hours_pd == t_unique]
         cadence_temp = cadence_temp[cadence_temp > 0]
@@ -555,13 +553,15 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
     from_zone = tz.gettz('UTC')
     to_zone = tz.gettz(tz_str) if tz_str else from_zone
 
+    freq_str = frequency.name.lower()
+
     # create folders to store results
     if frequency == Frequency.HOURLY_AND_DAILY:
         os.makedirs(os.path.join(output_folder, "daily"), exist_ok=True)
         os.makedirs(os.path.join(output_folder, "hourly"), exist_ok=True)
     else:
         os.makedirs(
-            os.path.join(output_folder, frequency.name.lower()), exist_ok=True
+            os.path.join(output_folder, freq_str), exist_ok=True
         )
     if users is None:
         users = get_ids(study_folder)
@@ -578,21 +578,38 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
         )
 
         days = pd.date_range(date_start, date_end, freq='D')
-        if (frequency == Frequency.HOURLY_AND_DAILY
-                or frequency == Frequency.HOURLY):
-            freq = 'H'
-        else:
-            freq = str(frequency.value) + 'H'
-        days_hourly = pd.date_range(date_start, date_end+timedelta(days=1),
-                                    freq=freq)[:-1]
+
         # allocate memory
         steps_daily = np.full((len(days), 1), np.nan)
         cadence_daily = np.full((len(days), 1), np.nan)
         walkingtime_daily = np.full((len(days), 1), np.nan)
 
-        steps_hourly = np.full((len(days_hourly), 1), np.nan)
-        cadence_hourly = np.full((len(days_hourly), 1), np.nan)
-        walkingtime_hourly = np.full((len(days_hourly), 1), np.nan)
+        steps_hourly = np.full((1, 1), np.nan)
+        cadence_hourly = np.full((1, 1), np.nan)
+        walkingtime_hourly = np.full((1, 1), np.nan)
+        t_ind_pydate = pd.Series([])
+        t_ind_pydate_str = None
+
+        if frequency != Frequency.DAILY:
+            if (
+                frequency == Frequency.HOURLY_AND_DAILY
+                or frequency == Frequency.HOURLY
+            ):
+                freq = 'H'
+            elif frequency == Frequency.MINUTELY:
+                freq = 'T'
+            else:
+                freq = str(frequency.value/60) + 'H'
+
+            days_hourly = pd.date_range(date_start, date_end+timedelta(days=1),
+                                        freq=freq)[:-1]
+
+            steps_hourly = np.full((len(days_hourly), 1), np.nan)
+            cadence_hourly = np.full((len(days_hourly), 1), np.nan)
+            walkingtime_hourly = np.full((len(days_hourly), 1), np.nan)
+
+            t_ind_pydate = days_hourly.to_pydatetime()
+            t_ind_pydate_str = t_ind_pydate.astype(str)
 
         for d_ind, d_datetime in enumerate(days):
             logger.info("Day: %d", d_ind)
@@ -628,14 +645,18 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
                 ]
                 # transform t to full hours
                 t_series = pd.Series(t_datetime)
-                t_hours_pd = t_series.dt.floor('H')
+                if frequency == Frequency.MINUTELY:
+                    t_hours_pd = t_series.dt.floor('T')
+                else:
+                    t_hours_pd = t_series.dt.floor('H')
+
                 # convert t_hours to correct timezone
                 t_hours_pd = t_hours_pd.dt.tz_localize(
                     from_zone
                 ).dt.tz_convert(to_zone)
 
                 run_hourly(
-                    t_hours_pd, days_hourly, cadence_bout, steps_hourly,
+                    t_hours_pd, t_ind_pydate, cadence_bout, steps_hourly,
                     walkingtime_hourly, cadence_hourly, frequency
                 )
 
@@ -660,8 +681,7 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
                 summary_stats.to_csv(dest_path, index=False)
             if frequency != Frequency.DAILY:
                 summary_stats = pd.DataFrame({
-                    'date': [date.strftime('%Y-%m-%d %H:%M:%S')
-                             for date in days_hourly],
+                    'date': t_ind_pydate_str,
                     'walking_time': walkingtime_hourly[:, -1],
                     'steps': steps_hourly[:, -1],
                     'cadence': cadence_hourly[:, -1]})
@@ -669,6 +689,6 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
                 if frequency == Frequency.HOURLY_AND_DAILY:
                     freq_name = "hourly"
                 else:
-                    freq_name = frequency.name.lower()
+                    freq_name = freq_str
                 dest_path = os.path.join(output_folder, freq_name, output_file)
                 summary_stats.to_csv(dest_path, index=False)
diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py
index cb6beb37..242b52bd 100644
--- a/forest/oak/tests/test_run_hourly.py
+++ b/forest/oak/tests/test_run_hourly.py
@@ -20,12 +20,12 @@ def sample_run_input(signal_bout):
         "2020-02-25 08:00:00-05:00",
         "2020-02-25 08:00:00-05:00"
     ], utc=True).tz_convert('US/Eastern'))
-    days_hourly = pd.date_range(
+    t_ind_pydate = pd.date_range(
         start='2020-02-24 00:00:00',
         end='2020-02-25 23:00:00',
         freq='H',
         tz='US/Eastern'
-    )
+    ).to_pydatetime()
     cadence_bout = np.array(
         [1.65, 1.6, 1.55, 1.6, 1.55, 1.85, 1.8, 1.75, 1.75, 1.7]
     )
@@ -35,7 +35,7 @@ def sample_run_input(signal_bout):
 
     return (
         t_hours_pd,
-        days_hourly,
+        t_ind_pydate,
         cadence_bout,
         steps_hourly,
         walkingtime_hourly,
diff --git a/forest/sycamore/base.py b/forest/sycamore/base.py
index 853ab2d8..67da6024 100644
--- a/forest/sycamore/base.py
+++ b/forest/sycamore/base.py
@@ -91,6 +91,13 @@ def compute_survey_stats(
     Returns:
         True if successful, False otherwise
     """
+
+    if submits_timeframe not in [
+        Frequency.HOURLY_AND_DAILY, Frequency.HOURLY, Frequency.DAILY
+    ]:
+        logger.error("Error: Invalid submits timeframe")
+        return False
+
     os.makedirs(output_folder, exist_ok=True)
     os.makedirs(os.path.join(output_folder, "summaries"), exist_ok=True)
     os.makedirs(os.path.join(output_folder, "by_survey"), exist_ok=True)
@@ -250,6 +257,13 @@ def get_submits_for_tableau(
         history_path: Filepath to the survey history file. If this is not
                 included, audio survey timings cannot be estimated.
     """
+
+    if submits_timeframe not in [
+        Frequency.HOURLY, Frequency.DAILY, Frequency.HOURLY_AND_DAILY
+    ]:
+        logger.error("Error: Invalid submits timeframe")
+        return
+
     os.makedirs(output_folder, exist_ok=True)
 
     if users is None:
diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py
index 650f3ac0..6bf12ae5 100644
--- a/forest/willow/log_stats.py
+++ b/forest/willow/log_stats.py
@@ -304,9 +304,6 @@ def comm_logs_summaries(
 
     Returns:
         pandas dataframe of summary stats
-
-    Raises:
-        ValueError: if frequency is not of correct type
     """
     summary_stats = []
     start_year, start_month, start_day, start_hour, _, _ = stamp2datetime(
@@ -318,7 +315,9 @@ def comm_logs_summaries(
 
     # determine the starting and ending timestamp again based on the frequency
     if frequency == Frequency.HOURLY_AND_DAILY:
-        raise ValueError("frequency not of correct type")
+        logger.error(
+            "Error: frequency cannot be HOURLY_AND_DAILY for this function"
+        )
 
     if frequency == Frequency.DAILY:
         table_start = datetime2stamp(
@@ -337,7 +336,7 @@ def comm_logs_summaries(
 
     # determine the step size based on the frequency
     # step_size is in seconds
-    step_size = 3600 * frequency.value
+    step_size = 60 * frequency.value
 
     # for each chunk, calculate the summary statistics (colmean or count)
     for stamp in np.arange(table_start, table_end + 1, step=step_size):
@@ -429,6 +428,15 @@ def log_stats_main(
         time_end: ending timestamp of the study
         beiwe_id: list of Beiwe IDs to be processed
     """
+
+    if frequency not in [
+        Frequency.HOURLY_AND_DAILY, Frequency.DAILY, Frequency.HOURLY
+    ]:
+        logger.error(
+            "Error: frequency must be one of the following: "
+            "HOURLY_AND_DAILY, DAILY, HOURLY"
+        )
+
     os.makedirs(output_folder, exist_ok=True)
 
     if frequency == Frequency.HOURLY_AND_DAILY:

From 6a4dba26a81b76a6e606d6d7dc108e34ea9cd925 Mon Sep 17 00:00:00 2001
From: Zachary Clement <clementzach@icloud.com>
Date: Tue, 28 Nov 2023 18:30:38 -0500
Subject: [PATCH 5/7] Oak fixes zc (#224)

* avoid runtime warning from mean of empty slice

* Don't try to process empty data

* pep8 fixes

---------

Co-authored-by: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com>
---
 forest/oak/base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/forest/oak/base.py b/forest/oak/base.py
index 19d9e3e9..349a2364 100644
--- a/forest/oak/base.py
+++ b/forest/oak/base.py
@@ -85,7 +85,8 @@ def preprocess_bout(t_bout: np.ndarray, x_bout: np.ndarray, y_bout: np.ndarray,
                              z_bout_interp**2)
 
     # standardize measurement to gravity units (g) if its recorded in m/s**2
-    if np.mean(vm_bout_interp) > 5:
+    # Also avoid a runtime warning of taking the mean of an empty slice
+    if vm_bout_interp.shape[0] > 0 and np.mean(vm_bout_interp) > 5:
         x_bout_interp = x_bout_interp/9.80665
         y_bout_interp = y_bout_interp/9.80665
         z_bout_interp = z_bout_interp/9.80665
@@ -635,6 +636,8 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
             z = np.array(data["z"], dtype="float64")  # z-axis acc.
             # preprocess data fragment
             t_bout_interp, vm_bout = preprocess_bout(timestamp, x, y, z)
+            if len(t_bout_interp) == 0:  # no valid data to process here
+                continue
             # find walking and estimate cadence
             cadence_bout = find_walking(vm_bout)
             # distribute metrics across hours

From 5cf03af1f8a69c966e65c84f0f9e15364618f9c0 Mon Sep 17 00:00:00 2001
From: GeorgiosEfstathiadis
 <54844705+GeorgeEfstathiadis@users.noreply.github.com>
Date: Wed, 29 Nov 2023 11:17:53 -0500
Subject: [PATCH 6/7] Oak bug preprocess dates (#225)

* fix bug in files outside the range of analysis timeframe
* suppress warning of missing dtype
* rename dates_original to dates_shifted
* improve readability
---
 forest/oak/base.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/forest/oak/base.py b/forest/oak/base.py
index 349a2364..2fbe1c8e 100644
--- a/forest/oak/base.py
+++ b/forest/oak/base.py
@@ -450,24 +450,29 @@ def preprocess_dates(
         date.replace(tzinfo=from_zone).astimezone(to_zone) for date in dates
     ]
     # trim dataset according to time_start and time_end
-    if time_start is not None and time_end is not None:
+    if time_start is None or time_end is None:
+        dates_filtered = dates
+    else:
         time_min = datetime.strptime(time_start, fmt)
         time_min = time_min.replace(tzinfo=from_zone).astimezone(to_zone)
         time_max = datetime.strptime(time_end, fmt)
         time_max = time_max.replace(tzinfo=from_zone).astimezone(to_zone)
-        dates = [date for date in dates if time_min <= date <= time_max]
+        dates_filtered = [
+            date for date in dates if time_min <= date <= time_max
+        ]
 
     dates_shifted = [date-timedelta(hours=date.hour) for date in dates]
+
     # create time vector with days for analysis
     if time_start is None:
-        date_start = dates_shifted[0]
+        date_start = dates_filtered[0]
         date_start = date_start - timedelta(hours=date_start.hour)
     else:
         date_start = datetime.strptime(time_start, fmt)
         date_start = date_start.replace(tzinfo=from_zone).astimezone(to_zone)
         date_start = date_start - timedelta(hours=date_start.hour)
     if time_end is None:
-        date_end = dates_shifted[-1]
+        date_end = dates_filtered[-1]
         date_end = date_end - timedelta(hours=date_end.hour)
     else:
         date_end = datetime.strptime(time_end, fmt)
@@ -588,7 +593,7 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
         steps_hourly = np.full((1, 1), np.nan)
         cadence_hourly = np.full((1, 1), np.nan)
         walkingtime_hourly = np.full((1, 1), np.nan)
-        t_ind_pydate = pd.Series([])
+        t_ind_pydate = pd.Series([], dtype='datetime64[ns]')
         t_ind_pydate_str = None
 
         if frequency != Frequency.DAILY:

From 3db2547d380768a74b8bf410de5f08ee19c8e418 Mon Sep 17 00:00:00 2001
From: Ilya Sytchev <isytchev@hsph.harvard.edu>
Date: Wed, 29 Nov 2023 12:40:54 -0500
Subject: [PATCH 7/7] Avoid using the root logger

---
 docs/source/logging.md             | 7 -------
 forest/bonsai/simulate_gps_data.py | 4 ++--
 forest/jasmine/data2mobmat.py      | 4 ++--
 forest/jasmine/mobmat2traj.py      | 4 ++--
 forest/jasmine/sogp_gps.py         | 4 ++--
 forest/jasmine/traj2stats.py       | 4 ++--
 forest/willow/log_stats.py         | 4 ++--
 7 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/docs/source/logging.md b/docs/source/logging.md
index 581f345b..a47d9e87 100644
--- a/docs/source/logging.md
+++ b/docs/source/logging.md
@@ -37,13 +37,6 @@ import logging
 logger = logging.getLogger(__name__)
 ```
 
-Or like this:
-
-```
-from logging import getLogger
-logger = getLogger(__name__)
-```
-
 ## 3. How to insert log messages into definitions
 
 Basic `logging` messages:
diff --git a/forest/bonsai/simulate_gps_data.py b/forest/bonsai/simulate_gps_data.py
index 1f0c6249..2e3b77a3 100644
--- a/forest/bonsai/simulate_gps_data.py
+++ b/forest/bonsai/simulate_gps_data.py
@@ -27,8 +27,8 @@
 TRAVELLING_STATUS_LIST = range(11)
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 class PossibleExits(Enum):
diff --git a/forest/jasmine/data2mobmat.py b/forest/jasmine/data2mobmat.py
index 2d8a78a5..da282397 100644
--- a/forest/jasmine/data2mobmat.py
+++ b/forest/jasmine/data2mobmat.py
@@ -15,8 +15,8 @@
 TOLERANCE = 1e-6
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 def cartesian(
diff --git a/forest/jasmine/mobmat2traj.py b/forest/jasmine/mobmat2traj.py
index 1527a982..e4579ed8 100644
--- a/forest/jasmine/mobmat2traj.py
+++ b/forest/jasmine/mobmat2traj.py
@@ -13,8 +13,8 @@
 from .data2mobmat import great_circle_dist, exist_knot
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 # the details of the functions are in paper [Liu and Onnela (2020)]
diff --git a/forest/jasmine/sogp_gps.py b/forest/jasmine/sogp_gps.py
index 86931e72..e5532e95 100644
--- a/forest/jasmine/sogp_gps.py
+++ b/forest/jasmine/sogp_gps.py
@@ -14,8 +14,8 @@
 import numpy as np
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 def calculate_k0(x1: np.ndarray, x2: np.ndarray, pars: list) -> float:
diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py
index da6ca15c..ca13b384 100644
--- a/forest/jasmine/traj2stats.py
+++ b/forest/jasmine/traj2stats.py
@@ -32,8 +32,8 @@
 from forest.utils import get_ids
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 @dataclass
diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py
index 6bf12ae5..3eee8a25 100644
--- a/forest/willow/log_stats.py
+++ b/forest/willow/log_stats.py
@@ -17,8 +17,8 @@
 )
 
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 def text_analysis(