diff --git a/forest/oak/tests/test_preprocess_dates.py b/forest/oak/tests/test_preprocess_dates.py index 8ee01107..05d1e879 100644 --- a/forest/oak/tests/test_preprocess_dates.py +++ b/forest/oak/tests/test_preprocess_dates.py @@ -5,17 +5,85 @@ from forest.oak.base import preprocess_dates -@pytest.fixture(scope="session") -def test_file_list(): - file_list = [ - [f"2023-11-{day:02} {hr:02}_00_00+00_00.csv" for hr in range(24)] - for day in range(1, 7) +@pytest.fixture +def sample_file_list(): + return [ + '2023-11-01 00_00_00+00_00.csv', '2023-11-01 01_00_00+00_00.csv', + '2023-11-01 02_00_00+00_00.csv', '2023-11-01 03_00_00+00_00.csv', + '2023-11-01 04_00_00+00_00.csv', '2023-11-01 05_00_00+00_00.csv', + '2023-11-01 06_00_00+00_00.csv', '2023-11-01 07_00_00+00_00.csv', + '2023-11-01 08_00_00+00_00.csv', '2023-11-01 09_00_00+00_00.csv', + '2023-11-01 10_00_00+00_00.csv', '2023-11-01 11_00_00+00_00.csv', + '2023-11-01 12_00_00+00_00.csv', '2023-11-01 13_00_00+00_00.csv', + '2023-11-01 14_00_00+00_00.csv', '2023-11-01 15_00_00+00_00.csv', + '2023-11-01 16_00_00+00_00.csv', '2023-11-01 17_00_00+00_00.csv', + '2023-11-01 18_00_00+00_00.csv', '2023-11-01 19_00_00+00_00.csv', + '2023-11-01 20_00_00+00_00.csv', '2023-11-01 21_00_00+00_00.csv', + '2023-11-01 22_00_00+00_00.csv', '2023-11-01 23_00_00+00_00.csv', + '2023-11-02 00_00_00+00_00.csv', '2023-11-02 01_00_00+00_00.csv', + '2023-11-02 02_00_00+00_00.csv', '2023-11-02 03_00_00+00_00.csv', + '2023-11-02 04_00_00+00_00.csv', '2023-11-02 05_00_00+00_00.csv', + '2023-11-02 06_00_00+00_00.csv', '2023-11-02 07_00_00+00_00.csv', + '2023-11-02 08_00_00+00_00.csv', '2023-11-02 09_00_00+00_00.csv', + '2023-11-02 10_00_00+00_00.csv', '2023-11-02 11_00_00+00_00.csv', + '2023-11-02 12_00_00+00_00.csv', '2023-11-02 13_00_00+00_00.csv', + '2023-11-02 14_00_00+00_00.csv', '2023-11-02 15_00_00+00_00.csv', + '2023-11-02 16_00_00+00_00.csv', '2023-11-02 17_00_00+00_00.csv', + '2023-11-02 18_00_00+00_00.csv', '2023-11-02 19_00_00+00_00.csv', + '2023-11-02 20_00_00+00_00.csv', '2023-11-02 21_00_00+00_00.csv', + '2023-11-02 22_00_00+00_00.csv', '2023-11-02 23_00_00+00_00.csv', + '2023-11-03 00_00_00+00_00.csv', '2023-11-03 01_00_00+00_00.csv', + '2023-11-03 02_00_00+00_00.csv', '2023-11-03 03_00_00+00_00.csv', + '2023-11-03 04_00_00+00_00.csv', '2023-11-03 05_00_00+00_00.csv', + '2023-11-03 06_00_00+00_00.csv', '2023-11-03 07_00_00+00_00.csv', + '2023-11-03 08_00_00+00_00.csv', '2023-11-03 09_00_00+00_00.csv', + '2023-11-03 10_00_00+00_00.csv', '2023-11-03 11_00_00+00_00.csv', + '2023-11-03 12_00_00+00_00.csv', '2023-11-03 13_00_00+00_00.csv', + '2023-11-03 14_00_00+00_00.csv', '2023-11-03 15_00_00+00_00.csv', + '2023-11-03 16_00_00+00_00.csv', '2023-11-03 17_00_00+00_00.csv', + '2023-11-03 18_00_00+00_00.csv', '2023-11-03 19_00_00+00_00.csv', + '2023-11-03 20_00_00+00_00.csv', '2023-11-03 21_00_00+00_00.csv', + '2023-11-03 22_00_00+00_00.csv', '2023-11-03 23_00_00+00_00.csv', + '2023-11-04 00_00_00+00_00.csv', '2023-11-04 01_00_00+00_00.csv', + '2023-11-04 02_00_00+00_00.csv', '2023-11-04 03_00_00+00_00.csv', + '2023-11-04 04_00_00+00_00.csv', '2023-11-04 05_00_00+00_00.csv', + '2023-11-04 06_00_00+00_00.csv', '2023-11-04 07_00_00+00_00.csv', + '2023-11-04 08_00_00+00_00.csv', '2023-11-04 09_00_00+00_00.csv', + '2023-11-04 10_00_00+00_00.csv', '2023-11-04 11_00_00+00_00.csv', + '2023-11-04 12_00_00+00_00.csv', '2023-11-04 13_00_00+00_00.csv', + '2023-11-04 14_00_00+00_00.csv', '2023-11-04 15_00_00+00_00.csv', + '2023-11-04 16_00_00+00_00.csv', '2023-11-04 17_00_00+00_00.csv', + '2023-11-04 18_00_00+00_00.csv', '2023-11-04 19_00_00+00_00.csv', + '2023-11-04 20_00_00+00_00.csv', '2023-11-04 21_00_00+00_00.csv', + '2023-11-04 22_00_00+00_00.csv', '2023-11-04 23_00_00+00_00.csv', + '2023-11-05 00_00_00+00_00.csv', '2023-11-05 01_00_00+00_00.csv', + '2023-11-05 02_00_00+00_00.csv', '2023-11-05 03_00_00+00_00.csv', + '2023-11-05 04_00_00+00_00.csv', '2023-11-05 05_00_00+00_00.csv', + '2023-11-05 06_00_00+00_00.csv', '2023-11-05 07_00_00+00_00.csv', + '2023-11-05 08_00_00+00_00.csv', '2023-11-05 09_00_00+00_00.csv', + '2023-11-05 10_00_00+00_00.csv', '2023-11-05 11_00_00+00_00.csv', + '2023-11-05 12_00_00+00_00.csv', '2023-11-05 13_00_00+00_00.csv', + '2023-11-05 14_00_00+00_00.csv', '2023-11-05 15_00_00+00_00.csv', + '2023-11-05 16_00_00+00_00.csv', '2023-11-05 17_00_00+00_00.csv', + '2023-11-05 18_00_00+00_00.csv', '2023-11-05 19_00_00+00_00.csv', + '2023-11-05 20_00_00+00_00.csv', '2023-11-05 21_00_00+00_00.csv', + '2023-11-05 22_00_00+00_00.csv', '2023-11-05 23_00_00+00_00.csv', + '2023-11-06 00_00_00+00_00.csv', '2023-11-06 01_00_00+00_00.csv', + '2023-11-06 02_00_00+00_00.csv', '2023-11-06 03_00_00+00_00.csv', + '2023-11-06 04_00_00+00_00.csv', '2023-11-06 05_00_00+00_00.csv', + '2023-11-06 06_00_00+00_00.csv', '2023-11-06 07_00_00+00_00.csv', + '2023-11-06 08_00_00+00_00.csv', '2023-11-06 09_00_00+00_00.csv', + '2023-11-06 10_00_00+00_00.csv', '2023-11-06 11_00_00+00_00.csv', + '2023-11-06 12_00_00+00_00.csv', '2023-11-06 13_00_00+00_00.csv', + '2023-11-06 14_00_00+00_00.csv', '2023-11-06 15_00_00+00_00.csv', + '2023-11-06 16_00_00+00_00.csv', '2023-11-06 17_00_00+00_00.csv', + '2023-11-06 18_00_00+00_00.csv', '2023-11-06 19_00_00+00_00.csv', + '2023-11-06 20_00_00+00_00.csv', '2023-11-06 21_00_00+00_00.csv', + '2023-11-06 22_00_00+00_00.csv', '2023-11-06 23_00_00+00_00.csv' ] - file_list = [item for sublist in file_list for item in sublist] - return file_list -def test_preprocess_dates_length(test_file_list): +def test_preprocess_dates_length(sample_file_list): """Test preprocess_dates function for length of output list""" time_start = None @@ -25,12 +93,12 @@ def test_preprocess_dates_length(test_file_list): to_zone = tz.gettz("America/New_York") dates_shifted, _, _ = preprocess_dates( - test_file_list, time_start, time_end, fmt, from_zone, to_zone + sample_file_list, time_start, time_end, fmt, from_zone, to_zone ) assert len(dates_shifted) == 144 -def test_preprocess_dates_start_end_dates(test_file_list): +def test_preprocess_dates_start_end_dates(sample_file_list): """Test preprocess_dates function for start/end date""" time_start = None @@ -40,13 +108,13 @@ def test_preprocess_dates_start_end_dates(test_file_list): to_zone = tz.gettz("America/New_York") _, date_start, date_end = preprocess_dates( - test_file_list, time_start, time_end, fmt, from_zone, to_zone + sample_file_list, time_start, time_end, fmt, from_zone, to_zone ) assert date_start == datetime.datetime(2023, 10, 31, 0, 0, tzinfo=to_zone) assert date_end == datetime.datetime(2023, 11, 6, 0, 0, tzinfo=to_zone) -def test_preprocess_dates_start_end_dates_inputs(test_file_list): +def test_preprocess_dates_start_end_dates_inputs(sample_file_list): """Test preprocess_dates function for start/end date, with inputs""" time_start = "2023-10-31 00_00_00" @@ -56,7 +124,7 @@ def test_preprocess_dates_start_end_dates_inputs(test_file_list): to_zone = tz.gettz("America/New_York") _, date_start, date_end = preprocess_dates( - test_file_list, time_start, time_end, fmt, from_zone, to_zone + sample_file_list, time_start, time_end, fmt, from_zone, to_zone ) assert date_start == datetime.datetime(2023, 10, 30, 0, 0, tzinfo=to_zone) assert date_end == datetime.datetime(2023, 11, 2, 0, 0, tzinfo=to_zone) diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py index 266ac097..a969660f 100644 --- a/forest/oak/tests/test_run_hourly.py +++ b/forest/oak/tests/test_run_hourly.py @@ -4,38 +4,38 @@ import pandas as pd import pytest -from forest.oak.base import run_hourly, find_walking, preprocess_bout +from forest.oak.base import run_hourly from forest.constants import Frequency -@pytest.fixture(scope="module") -def test_input(signal_bout): - timestamp, _, x, y, z = signal_bout +@pytest.fixture +def sample_run_input(signal_bout): - t_bout_interp, vm_bout = preprocess_bout(timestamp, x, y, z) + t_hours_pd = pd.Series(pd.to_datetime([ + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00", + "2020-02-25 08:00:00-05:00" + ], utc=True).tz_convert('US/Eastern')) - cadence_bout = find_walking(vm_bout) - - t_datetime = [datetime.fromtimestamp(t_ind) for t_ind in t_bout_interp] - - t_series = pd.Series(t_datetime) - t_hours_pd = t_series.dt.floor("H") - - from_zone = tz.gettz("UTC") - to_zone = tz.gettz("America/New_York") - - # convert t_hours to correct timezone - t_hours_pd = t_hours_pd.dt.tz_localize(from_zone).dt.tz_convert(to_zone) - - date_start = datetime(2020, 2, 24, 0, 0, tzinfo=to_zone) - date_end = datetime(2020, 2, 25, 0, 0, tzinfo=to_zone) days_hourly = pd.date_range( - date_start, date_end + timedelta(days=1), freq="h" - )[:-1] + start='2020-02-24 00:00:00', + end='2020-02-25 23:00:00', + freq='H', + tz='US/Eastern' + ) + + cadence_bout = np.array([1.65, 1.6 , 1.55, 1.6 , 1.55, 1.85, 1.8 , 1.75, 1.75, 1.7 ]) - steps_hourly = np.full((len(days_hourly), 1), np.nan) - cadence_hourly = np.full((len(days_hourly), 1), np.nan) - walkingtime_hourly = np.full((len(days_hourly), 1), np.nan) + steps_hourly = np.full((48, 1), np.nan) + cadence_hourly = np.full((48, 1), np.nan) + walkingtime_hourly = np.full((48, 1), np.nan) return ( t_hours_pd, @@ -47,24 +47,24 @@ def test_input(signal_bout): ) -def test_run_hourly_one_hour_data(test_input): - run_hourly(*test_input, Frequency.HOURLY) +def test_run_hourly_one_hour_data(sample_run_input): + run_hourly(*sample_run_input, Frequency.HOURLY) - steps_hourly, cadence_hourly, walkingtime_hourly = test_input[3:] + steps_hourly, cadence_hourly, walkingtime_hourly = sample_run_input[3:] assert len(steps_hourly) - np.sum(np.isnan(steps_hourly)) == 1 assert len(cadence_hourly) - np.sum(np.isnan(cadence_hourly)) == 1 assert len(walkingtime_hourly) - np.sum(np.isnan(walkingtime_hourly)) == 1 -def test_run_hourly_accuracy(test_input): - run_hourly(*test_input, Frequency.HOURLY) +def test_run_hourly_accuracy(sample_run_input): + run_hourly(*sample_run_input, Frequency.HOURLY) - steps_hourly, cadence_hourly, walkingtime_hourly = test_input[3:] + steps_hourly, cadence_hourly, walkingtime_hourly = sample_run_input[3:] - indice = np.where(~np.isnan(steps_hourly))[0] + index = np.where(~np.isnan(steps_hourly))[0] # get non-nan indices - assert steps_hourly[indice][0] == 32 - assert cadence_hourly[indice][0] == 20 - assert walkingtime_hourly[indice][0] == 1.6 + assert steps_hourly[index][0] == 32 + assert cadence_hourly[index][0] == 20 + assert walkingtime_hourly[index][0] == 1.6