From 90de508964042b8229f5d7a3fcb82029fa4e8d65 Mon Sep 17 00:00:00 2001 From: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:10:15 -0500 Subject: [PATCH 1/7] Jasmine - Add more tests for summary values (#221) --- forest/jasmine/tests/test_traj2stats.py | 81 +++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/forest/jasmine/tests/test_traj2stats.py b/forest/jasmine/tests/test_traj2stats.py index 48988f13..2955e1e8 100644 --- a/forest/jasmine/tests/test_traj2stats.py +++ b/forest/jasmine/tests/test_traj2stats.py @@ -399,6 +399,87 @@ def test_gps_summaries_log_format( assert np.all(dates_stats == dates_log) +def test_gps_summaries_summary_vals( + coords1, sample_trajectory, sample_nearby_locations, mocker +): + """Testing gps summaries summary values are correct""" + mocker.patch( + "forest.jasmine.traj2stats.get_nearby_locations", + return_value=sample_nearby_locations, + ) + mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + + summary, _ = gps_summaries( + traj=sample_trajectory, + tz_str="Europe/London", + frequency=Frequency.DAILY, + parameters=parameters, + ) + + assert np.all(summary["obs_duration"] == 24) + assert summary["obs_day"].iloc[0] == 10 + assert summary["obs_night"].iloc[0] == 14 + assert summary["obs_day"].iloc[1] == 24 + assert summary["obs_night"].iloc[1] == 0 + assert np.all(summary["home_time"] == 0) + assert summary["dist_traveled"].iloc[0] == 0.208 + assert summary["dist_traveled"].iloc[1] == 0 + assert np.round(summary["max_dist_home"].iloc[0], 3) == 0.915 + assert np.round(summary["max_dist_home"].iloc[1], 3) == 0.915 + assert np.round(summary["radius"].iloc[0], 3) == 0.013 + assert summary["radius"].iloc[1] == 0 + assert np.round(summary["diameter"].iloc[0], 3) == 0.064 + assert summary["diameter"].iloc[1] == 0 + assert summary["num_sig_places"].iloc[0] == 2 + assert summary["num_sig_places"].iloc[1] == 1 + assert np.round(summary["entropy"].iloc[0], 3) == 0.468 + assert summary["entropy"].iloc[1] == 0 + assert round(summary["total_flight_time"].iloc[0], 3) == 1.528 + assert summary["total_flight_time"].iloc[1] == 0 + assert round(summary["av_flight_length"].iloc[0], 3) == 0.052 + assert summary["av_flight_length"].iloc[1] == 0 + assert round(summary["sd_flight_length"].iloc[0], 3) == 0.012 + assert summary["sd_flight_length"].iloc[1] == 0 + assert round(summary["av_flight_duration"].iloc[0], 3) == 0.382 + assert summary["av_flight_duration"].iloc[1] == 0 + assert round(summary["sd_flight_duration"].iloc[0], 3) == 0.132 + assert summary["sd_flight_duration"].iloc[1] == 0 + assert round(summary["total_pause_time"].iloc[0], 3) == 22.472 + assert summary["total_pause_time"].iloc[1] == 24 + assert round(summary["av_pause_duration"].iloc[0], 3) == 4.494 + assert summary["av_pause_duration"].iloc[1] == 24 + assert round(summary["sd_pause_duration"].iloc[0], 3) == 3.496 + assert summary["sd_pause_duration"].iloc[1] == 0 + + +def test_gps_summaries_pcr( + coords1, sample_trajectory, sample_nearby_locations, mocker +): + """Testing gps summaries pcr""" + mocker.patch( + "forest.jasmine.traj2stats.get_nearby_locations", + return_value=sample_nearby_locations, + ) + mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.pcr_bool = True + + summary, _ = gps_summaries( + traj=sample_trajectory, + tz_str="Europe/London", + frequency=Frequency.DAILY, + parameters=parameters, + ) + + assert summary["physical_circadian_rhythm"].iloc[0] == 0 + assert summary["physical_circadian_rhythm"].iloc[1] == 1 + assert summary["physical_circadian_rhythm_stratified"].iloc[0] == 0 + assert summary["physical_circadian_rhythm_stratified"].iloc[1] == 0 + + @pytest.fixture() def mobmat1(): """mobility matrix 1""" From 89e7aa8eb3a2295a10d6d464d95787c80be025ef Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Wed, 22 Nov 2023 15:41:02 -0500 Subject: [PATCH 2/7] Update test_log_stats.py (#223) * Update test_log_stats.py Add a test to make sure that hourly frequency returns the right size * Make input data frame correct width --- forest/willow/log_stats.py | 2 +- forest/willow/tests/test_log_stats.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index a40e1be0..650f3ac0 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -366,7 +366,7 @@ def comm_logs_summaries( if frequency == Frequency.DAILY: newline = [year, month, day] + newline else: - newline = [year, month, day, hour] + newline[:16] + newline = [year, month, day, hour] + newline[:17] summary_stats.append(newline) diff --git a/forest/willow/tests/test_log_stats.py b/forest/willow/tests/test_log_stats.py index ddc40ed5..1a871fe5 100644 --- a/forest/willow/tests/test_log_stats.py +++ b/forest/willow/tests/test_log_stats.py @@ -17,6 +17,14 @@ def test_comm_log_summaries_with_empty_data(): assert isinstance(stats_pdframe, pd.DataFrame) +def test_comm_log_summaries_with_empty_data_hourly(): + text_data = pd.DataFrame.from_dict({}) + call_data = pd.DataFrame.from_dict({}) + stats_pdframe = comm_logs_summaries(text_data, call_data, STAMP_START, + STAMP_END, TZ_STR, Frequency.HOURLY) + assert isinstance(stats_pdframe, pd.DataFrame) + + def test_comm_log_summaries_with_empty_text_data(): text_data = pd.DataFrame.from_dict({}) call_data = pd.DataFrame.from_dict( From 285ad43410122c86e8858a899a1ee5e53d08c7bb Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Mon, 27 Nov 2023 16:52:57 -0500 Subject: [PATCH 3/7] Remove librosa argument name deprecation warning in Sycamore (#222) --- forest/sycamore/read_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forest/sycamore/read_audio.py b/forest/sycamore/read_audio.py index 5249b31b..84d91e55 100644 --- a/forest/sycamore/read_audio.py +++ b/forest/sycamore/read_audio.py @@ -136,7 +136,7 @@ def read_user_audio_recordings_stream( if valid_file: all_files.append(filename) all_durations.append(librosa.get_duration( - filename=os.path.join(audio_dir, survey, filename) + path=os.path.join(audio_dir, survey, filename) )) if len(all_files) == 0: From 43948b2dd009477272cb795ba1e14fb0e39f59ff Mon Sep 17 00:00:00 2001 From: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> Date: Tue, 28 Nov 2023 17:14:12 -0500 Subject: [PATCH 4/7] Oak - Minute level analysis (#220) * add support for minute-level analysis * vectorized pydate conversion * optimize datetime calculations and list comprehension * vectorize get_pp * integer valued frequency * compute_window_and_count argument in minutes * timedelta minutes for freq --------- Co-authored-by: Ilya Sytchev --- forest/constants.py | 13 ++--- forest/jasmine/tests/test_traj2stats.py | 8 ++-- forest/jasmine/traj2stats.py | 18 ++++--- forest/oak/base.py | 64 ++++++++++++++++--------- forest/oak/tests/test_run_hourly.py | 6 +-- forest/sycamore/base.py | 14 ++++++ forest/willow/log_stats.py | 18 +++++-- 7 files changed, 95 insertions(+), 46 deletions(-) diff --git a/forest/constants.py b/forest/constants.py index c71fe71b..8d95f4ce 100644 --- a/forest/constants.py +++ b/forest/constants.py @@ -20,12 +20,13 @@ class Frequency(Enum): """This class enumerates possible frequencies for summary data.""" - HOURLY = 1 - DAILY = 24 - HOURLY_AND_DAILY = "hourly_and_daily" - THREE_HOURLY = 3 - SIX_HOURLY = 6 - TWELVE_HOURLY = 12 + MINUTELY = 1 + HOURLY = 60 + THREE_HOURLY = 3 * 60 + SIX_HOURLY = 6 * 60 + TWELVE_HOURLY = 12 * 60 + DAILY = 24 * 60 + HOURLY_AND_DAILY = -1 class OSMTags(Enum): diff --git a/forest/jasmine/tests/test_traj2stats.py b/forest/jasmine/tests/test_traj2stats.py index 2955e1e8..156bac89 100644 --- a/forest/jasmine/tests/test_traj2stats.py +++ b/forest/jasmine/tests/test_traj2stats.py @@ -654,7 +654,7 @@ def test_compute_window_size(sample_trajectory): """Testing window size is correct""" window, _ = compute_window_and_count( - sample_trajectory[0, 3], sample_trajectory[-1, 6], 1 + sample_trajectory[0, 3], sample_trajectory[-1, 6], 60 ) assert window == 3600 @@ -664,7 +664,7 @@ def test_compute_window_count(sample_trajectory): """Testing number of windows is correct""" _, num_windows = compute_window_and_count( - sample_trajectory[0, 3], sample_trajectory[-1, 6], 1 + sample_trajectory[0, 3], sample_trajectory[-1, 6], 60 ) assert num_windows == 24 @@ -674,7 +674,7 @@ def test_compute_window_size_6_hour(sample_trajectory): """Testing window size is correct 6 hour window""" window, _ = compute_window_and_count( - sample_trajectory[0, 3], sample_trajectory[-1, 6], 6 + sample_trajectory[0, 3], sample_trajectory[-1, 6], 360 ) assert window == 3600 * 6 @@ -684,7 +684,7 @@ def test_compute_window_count_6_hour(sample_trajectory): """Testing number of windows is correct 6 hour window""" _, num_windows = compute_window_and_count( - sample_trajectory[0, 3], sample_trajectory[-1, 6], 6 + sample_trajectory[0, 3], sample_trajectory[-1, 6], 360 ) assert num_windows == 4 diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index ed2d8a92..da6ca15c 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -1125,8 +1125,8 @@ def gps_summaries( ValueError: Frequency is not valid """ - if frequency == Frequency.HOURLY_AND_DAILY: - raise ValueError("Frequency must be 'hourly' or 'daily'") + if frequency in [Frequency.HOURLY_AND_DAILY, Frequency.MINUTELY]: + raise ValueError(f"Frequency cannot be {frequency.name.lower()}.") if frequency != Frequency.DAILY: parameters.split_day_night = False @@ -1161,7 +1161,7 @@ def gps_summaries( traj, [3, 4, 5], tz_str, 3600*24 ) window, num_windows = compute_window_and_count( - start_stamp, end_stamp, 24, parameters.split_day_night + start_stamp, end_stamp, 24*60, parameters.split_day_night ) if num_windows <= 0: @@ -1484,7 +1484,7 @@ def get_time_range( def compute_window_and_count( - start_stamp: int, end_stamp: int, window_hours: int, + start_stamp: int, end_stamp: int, window_minutes: int, split_day_night: bool = False ) -> Tuple[int, int]: """Computes the window and number of windows based on given time stamps. @@ -1492,7 +1492,7 @@ def compute_window_and_count( Args: start_stamp: int, starting time stamp end_stamp: int, ending time stamp - window_hours: int, window in hours + window_minutes: int, window in minutes split_day_night: bool, True if split day and night Returns: A tuple of two integers (window, num_windows): @@ -1500,7 +1500,7 @@ def compute_window_and_count( num_windows: int, number of windows """ - window = window_hours * 60 * 60 + window = window_minutes * 60 num_windows = (end_stamp - start_stamp) // window if split_day_night: num_windows *= 2 @@ -1595,8 +1595,14 @@ def gps_stats_main( as pickle files for future use and a record csv file to show which users are processed and logger csv file to show warnings and bugs during the run + Raises: + ValueError: Frequency is not valid """ + # no minutely analysis on GPS data + if frequency == Frequency.MINUTELY: + raise ValueError("Frequency cannot be minutely.") + os.makedirs(output_folder, exist_ok=True) if parameters is None: diff --git a/forest/oak/base.py b/forest/oak/base.py index 14ac0e49..19d9e3e9 100644 --- a/forest/oak/base.py +++ b/forest/oak/base.py @@ -137,8 +137,7 @@ def get_pp(vm_bout: np.ndarray, fs: int = 10) -> npt.NDArray[np.float64]: """ vm_res_sec = vm_bout.reshape((fs, -1), order="F") - pp = np.array([max(vm_res_sec[:, i])-min(vm_res_sec[:, i]) - for i in range(vm_res_sec.shape[1])]) + pp = np.ptp(vm_res_sec, axis=0) return pp @@ -478,7 +477,7 @@ def preprocess_dates( def run_hourly( - t_hours_pd: pd.Series, days_hourly: pd.DatetimeIndex, + t_hours_pd: pd.Series, t_ind_pydate: list, cadence_bout: np.ndarray, steps_hourly: np.ndarray, walkingtime_hourly: np.ndarray, cadence_hourly: np.ndarray, frequency: Frequency @@ -489,7 +488,7 @@ def run_hourly( Args: t_hours_pd: pd.Series timestamp of each measurement - days_hourly: pd.DatetimeIndex + t_ind_pydate: list list of days with hourly resolution cadence_bout: np.ndarray cadence of each measurement @@ -503,11 +502,10 @@ def run_hourly( summary statistics format, Frequency class at constants.py """ for t_unique in t_hours_pd.unique(): - t_ind_pydate = [t_ind.to_pydatetime() for t_ind in days_hourly] # get indexes of ranges of dates that contain t_unique ind_to_store = -1 for ind_to_store, t_ind in enumerate(t_ind_pydate): - if t_ind <= t_unique < t_ind + timedelta(hours=frequency.value): + if t_ind <= t_unique < t_ind + timedelta(minutes=frequency.value): break cadence_temp = cadence_bout[t_hours_pd == t_unique] cadence_temp = cadence_temp[cadence_temp > 0] @@ -555,13 +553,15 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, from_zone = tz.gettz('UTC') to_zone = tz.gettz(tz_str) if tz_str else from_zone + freq_str = frequency.name.lower() + # create folders to store results if frequency == Frequency.HOURLY_AND_DAILY: os.makedirs(os.path.join(output_folder, "daily"), exist_ok=True) os.makedirs(os.path.join(output_folder, "hourly"), exist_ok=True) else: os.makedirs( - os.path.join(output_folder, frequency.name.lower()), exist_ok=True + os.path.join(output_folder, freq_str), exist_ok=True ) if users is None: users = get_ids(study_folder) @@ -578,21 +578,38 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, ) days = pd.date_range(date_start, date_end, freq='D') - if (frequency == Frequency.HOURLY_AND_DAILY - or frequency == Frequency.HOURLY): - freq = 'H' - else: - freq = str(frequency.value) + 'H' - days_hourly = pd.date_range(date_start, date_end+timedelta(days=1), - freq=freq)[:-1] + # allocate memory steps_daily = np.full((len(days), 1), np.nan) cadence_daily = np.full((len(days), 1), np.nan) walkingtime_daily = np.full((len(days), 1), np.nan) - steps_hourly = np.full((len(days_hourly), 1), np.nan) - cadence_hourly = np.full((len(days_hourly), 1), np.nan) - walkingtime_hourly = np.full((len(days_hourly), 1), np.nan) + steps_hourly = np.full((1, 1), np.nan) + cadence_hourly = np.full((1, 1), np.nan) + walkingtime_hourly = np.full((1, 1), np.nan) + t_ind_pydate = pd.Series([]) + t_ind_pydate_str = None + + if frequency != Frequency.DAILY: + if ( + frequency == Frequency.HOURLY_AND_DAILY + or frequency == Frequency.HOURLY + ): + freq = 'H' + elif frequency == Frequency.MINUTELY: + freq = 'T' + else: + freq = str(frequency.value/60) + 'H' + + days_hourly = pd.date_range(date_start, date_end+timedelta(days=1), + freq=freq)[:-1] + + steps_hourly = np.full((len(days_hourly), 1), np.nan) + cadence_hourly = np.full((len(days_hourly), 1), np.nan) + walkingtime_hourly = np.full((len(days_hourly), 1), np.nan) + + t_ind_pydate = days_hourly.to_pydatetime() + t_ind_pydate_str = t_ind_pydate.astype(str) for d_ind, d_datetime in enumerate(days): logger.info("Day: %d", d_ind) @@ -628,14 +645,18 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, ] # transform t to full hours t_series = pd.Series(t_datetime) - t_hours_pd = t_series.dt.floor('H') + if frequency == Frequency.MINUTELY: + t_hours_pd = t_series.dt.floor('T') + else: + t_hours_pd = t_series.dt.floor('H') + # convert t_hours to correct timezone t_hours_pd = t_hours_pd.dt.tz_localize( from_zone ).dt.tz_convert(to_zone) run_hourly( - t_hours_pd, days_hourly, cadence_bout, steps_hourly, + t_hours_pd, t_ind_pydate, cadence_bout, steps_hourly, walkingtime_hourly, cadence_hourly, frequency ) @@ -660,8 +681,7 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, summary_stats.to_csv(dest_path, index=False) if frequency != Frequency.DAILY: summary_stats = pd.DataFrame({ - 'date': [date.strftime('%Y-%m-%d %H:%M:%S') - for date in days_hourly], + 'date': t_ind_pydate_str, 'walking_time': walkingtime_hourly[:, -1], 'steps': steps_hourly[:, -1], 'cadence': cadence_hourly[:, -1]}) @@ -669,6 +689,6 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, if frequency == Frequency.HOURLY_AND_DAILY: freq_name = "hourly" else: - freq_name = frequency.name.lower() + freq_name = freq_str dest_path = os.path.join(output_folder, freq_name, output_file) summary_stats.to_csv(dest_path, index=False) diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py index cb6beb37..242b52bd 100644 --- a/forest/oak/tests/test_run_hourly.py +++ b/forest/oak/tests/test_run_hourly.py @@ -20,12 +20,12 @@ def sample_run_input(signal_bout): "2020-02-25 08:00:00-05:00", "2020-02-25 08:00:00-05:00" ], utc=True).tz_convert('US/Eastern')) - days_hourly = pd.date_range( + t_ind_pydate = pd.date_range( start='2020-02-24 00:00:00', end='2020-02-25 23:00:00', freq='H', tz='US/Eastern' - ) + ).to_pydatetime() cadence_bout = np.array( [1.65, 1.6, 1.55, 1.6, 1.55, 1.85, 1.8, 1.75, 1.75, 1.7] ) @@ -35,7 +35,7 @@ def sample_run_input(signal_bout): return ( t_hours_pd, - days_hourly, + t_ind_pydate, cadence_bout, steps_hourly, walkingtime_hourly, diff --git a/forest/sycamore/base.py b/forest/sycamore/base.py index 853ab2d8..67da6024 100644 --- a/forest/sycamore/base.py +++ b/forest/sycamore/base.py @@ -91,6 +91,13 @@ def compute_survey_stats( Returns: True if successful, False otherwise """ + + if submits_timeframe not in [ + Frequency.HOURLY_AND_DAILY, Frequency.HOURLY, Frequency.DAILY + ]: + logger.error("Error: Invalid submits timeframe") + return False + os.makedirs(output_folder, exist_ok=True) os.makedirs(os.path.join(output_folder, "summaries"), exist_ok=True) os.makedirs(os.path.join(output_folder, "by_survey"), exist_ok=True) @@ -250,6 +257,13 @@ def get_submits_for_tableau( history_path: Filepath to the survey history file. If this is not included, audio survey timings cannot be estimated. """ + + if submits_timeframe not in [ + Frequency.HOURLY, Frequency.DAILY, Frequency.HOURLY_AND_DAILY + ]: + logger.error("Error: Invalid submits timeframe") + return + os.makedirs(output_folder, exist_ok=True) if users is None: diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 650f3ac0..6bf12ae5 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -304,9 +304,6 @@ def comm_logs_summaries( Returns: pandas dataframe of summary stats - - Raises: - ValueError: if frequency is not of correct type """ summary_stats = [] start_year, start_month, start_day, start_hour, _, _ = stamp2datetime( @@ -318,7 +315,9 @@ def comm_logs_summaries( # determine the starting and ending timestamp again based on the frequency if frequency == Frequency.HOURLY_AND_DAILY: - raise ValueError("frequency not of correct type") + logger.error( + "Error: frequency cannot be HOURLY_AND_DAILY for this function" + ) if frequency == Frequency.DAILY: table_start = datetime2stamp( @@ -337,7 +336,7 @@ def comm_logs_summaries( # determine the step size based on the frequency # step_size is in seconds - step_size = 3600 * frequency.value + step_size = 60 * frequency.value # for each chunk, calculate the summary statistics (colmean or count) for stamp in np.arange(table_start, table_end + 1, step=step_size): @@ -429,6 +428,15 @@ def log_stats_main( time_end: ending timestamp of the study beiwe_id: list of Beiwe IDs to be processed """ + + if frequency not in [ + Frequency.HOURLY_AND_DAILY, Frequency.DAILY, Frequency.HOURLY + ]: + logger.error( + "Error: frequency must be one of the following: " + "HOURLY_AND_DAILY, DAILY, HOURLY" + ) + os.makedirs(output_folder, exist_ok=True) if frequency == Frequency.HOURLY_AND_DAILY: From 6a4dba26a81b76a6e606d6d7dc108e34ea9cd925 Mon Sep 17 00:00:00 2001 From: Zachary Clement Date: Tue, 28 Nov 2023 18:30:38 -0500 Subject: [PATCH 5/7] Oak fixes zc (#224) * avoid runtime warning from mean of empty slice * Don't try to process empty data * pep8 fixes --------- Co-authored-by: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> --- forest/oak/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/forest/oak/base.py b/forest/oak/base.py index 19d9e3e9..349a2364 100644 --- a/forest/oak/base.py +++ b/forest/oak/base.py @@ -85,7 +85,8 @@ def preprocess_bout(t_bout: np.ndarray, x_bout: np.ndarray, y_bout: np.ndarray, z_bout_interp**2) # standardize measurement to gravity units (g) if its recorded in m/s**2 - if np.mean(vm_bout_interp) > 5: + # Also avoid a runtime warning of taking the mean of an empty slice + if vm_bout_interp.shape[0] > 0 and np.mean(vm_bout_interp) > 5: x_bout_interp = x_bout_interp/9.80665 y_bout_interp = y_bout_interp/9.80665 z_bout_interp = z_bout_interp/9.80665 @@ -635,6 +636,8 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, z = np.array(data["z"], dtype="float64") # z-axis acc. # preprocess data fragment t_bout_interp, vm_bout = preprocess_bout(timestamp, x, y, z) + if len(t_bout_interp) == 0: # no valid data to process here + continue # find walking and estimate cadence cadence_bout = find_walking(vm_bout) # distribute metrics across hours From 5cf03af1f8a69c966e65c84f0f9e15364618f9c0 Mon Sep 17 00:00:00 2001 From: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:17:53 -0500 Subject: [PATCH 6/7] Oak bug preprocess dates (#225) * fix bug in files outside the range of analysis timeframe * suppress warning of missing dtype * rename dates_original to dates_shifted * improve readability --- forest/oak/base.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/forest/oak/base.py b/forest/oak/base.py index 349a2364..2fbe1c8e 100644 --- a/forest/oak/base.py +++ b/forest/oak/base.py @@ -450,24 +450,29 @@ def preprocess_dates( date.replace(tzinfo=from_zone).astimezone(to_zone) for date in dates ] # trim dataset according to time_start and time_end - if time_start is not None and time_end is not None: + if time_start is None or time_end is None: + dates_filtered = dates + else: time_min = datetime.strptime(time_start, fmt) time_min = time_min.replace(tzinfo=from_zone).astimezone(to_zone) time_max = datetime.strptime(time_end, fmt) time_max = time_max.replace(tzinfo=from_zone).astimezone(to_zone) - dates = [date for date in dates if time_min <= date <= time_max] + dates_filtered = [ + date for date in dates if time_min <= date <= time_max + ] dates_shifted = [date-timedelta(hours=date.hour) for date in dates] + # create time vector with days for analysis if time_start is None: - date_start = dates_shifted[0] + date_start = dates_filtered[0] date_start = date_start - timedelta(hours=date_start.hour) else: date_start = datetime.strptime(time_start, fmt) date_start = date_start.replace(tzinfo=from_zone).astimezone(to_zone) date_start = date_start - timedelta(hours=date_start.hour) if time_end is None: - date_end = dates_shifted[-1] + date_end = dates_filtered[-1] date_end = date_end - timedelta(hours=date_end.hour) else: date_end = datetime.strptime(time_end, fmt) @@ -588,7 +593,7 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, steps_hourly = np.full((1, 1), np.nan) cadence_hourly = np.full((1, 1), np.nan) walkingtime_hourly = np.full((1, 1), np.nan) - t_ind_pydate = pd.Series([]) + t_ind_pydate = pd.Series([], dtype='datetime64[ns]') t_ind_pydate_str = None if frequency != Frequency.DAILY: From 3db2547d380768a74b8bf410de5f08ee19c8e418 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 29 Nov 2023 12:40:54 -0500 Subject: [PATCH 7/7] Avoid using the root logger --- docs/source/logging.md | 7 ------- forest/bonsai/simulate_gps_data.py | 4 ++-- forest/jasmine/data2mobmat.py | 4 ++-- forest/jasmine/mobmat2traj.py | 4 ++-- forest/jasmine/sogp_gps.py | 4 ++-- forest/jasmine/traj2stats.py | 4 ++-- forest/willow/log_stats.py | 4 ++-- 7 files changed, 12 insertions(+), 19 deletions(-) diff --git a/docs/source/logging.md b/docs/source/logging.md index 581f345b..a47d9e87 100644 --- a/docs/source/logging.md +++ b/docs/source/logging.md @@ -37,13 +37,6 @@ import logging logger = logging.getLogger(__name__) ``` -Or like this: - -``` -from logging import getLogger -logger = getLogger(__name__) -``` - ## 3. How to insert log messages into definitions Basic `logging` messages: diff --git a/forest/bonsai/simulate_gps_data.py b/forest/bonsai/simulate_gps_data.py index 1f0c6249..2e3b77a3 100644 --- a/forest/bonsai/simulate_gps_data.py +++ b/forest/bonsai/simulate_gps_data.py @@ -27,8 +27,8 @@ TRAVELLING_STATUS_LIST = range(11) -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) class PossibleExits(Enum): diff --git a/forest/jasmine/data2mobmat.py b/forest/jasmine/data2mobmat.py index 2d8a78a5..da282397 100644 --- a/forest/jasmine/data2mobmat.py +++ b/forest/jasmine/data2mobmat.py @@ -15,8 +15,8 @@ TOLERANCE = 1e-6 -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) def cartesian( diff --git a/forest/jasmine/mobmat2traj.py b/forest/jasmine/mobmat2traj.py index 1527a982..e4579ed8 100644 --- a/forest/jasmine/mobmat2traj.py +++ b/forest/jasmine/mobmat2traj.py @@ -13,8 +13,8 @@ from .data2mobmat import great_circle_dist, exist_knot -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # the details of the functions are in paper [Liu and Onnela (2020)] diff --git a/forest/jasmine/sogp_gps.py b/forest/jasmine/sogp_gps.py index 86931e72..e5532e95 100644 --- a/forest/jasmine/sogp_gps.py +++ b/forest/jasmine/sogp_gps.py @@ -14,8 +14,8 @@ import numpy as np -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) def calculate_k0(x1: np.ndarray, x2: np.ndarray, pars: list) -> float: diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index da6ca15c..ca13b384 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -32,8 +32,8 @@ from forest.utils import get_ids -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) @dataclass diff --git a/forest/willow/log_stats.py b/forest/willow/log_stats.py index 6bf12ae5..3eee8a25 100644 --- a/forest/willow/log_stats.py +++ b/forest/willow/log_stats.py @@ -17,8 +17,8 @@ ) -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) def text_analysis(