From cbe2f536a3c9d5d568290a59599f0b03bc501747 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 14 Nov 2023 17:05:39 -0500 Subject: [PATCH] Remove unnecessary line breaks --- forest/oak/base.py | 46 ++++------------------- forest/oak/tests/test_preprocess_dates.py | 3 -- forest/oak/tests/test_run_hourly.py | 8 ---- 3 files changed, 8 insertions(+), 49 deletions(-) diff --git a/forest/oak/base.py b/forest/oak/base.py index 452303ae..14ac0e49 100644 --- a/forest/oak/base.py +++ b/forest/oak/base.py @@ -442,17 +442,13 @@ def preprocess_dates( """ # transform all files in folder to datelike format file_dates = [ - file.replace(".csv", "").replace("+00_00", "") - for file in file_list + file.replace(".csv", "").replace("+00_00", "") for file in file_list ] - # process dates dates = [datetime.strptime(file, fmt) for file in file_dates] dates = [ - date.replace(tzinfo=from_zone).astimezone(to_zone) - for date in dates + date.replace(tzinfo=from_zone).astimezone(to_zone) for date in dates ] - # trim dataset according to time_start and time_end if time_start is not None and time_end is not None: time_min = datetime.strptime(time_start, fmt) @@ -462,7 +458,6 @@ def preprocess_dates( dates = [date for date in dates if time_min <= date <= time_max] dates_shifted = [date-timedelta(hours=date.hour) for date in dates] - # create time vector with days for analysis if time_start is None: date_start = dates_shifted[0] @@ -471,7 +466,6 @@ def preprocess_dates( date_start = datetime.strptime(time_start, fmt) date_start = date_start.replace(tzinfo=from_zone).astimezone(to_zone) date_start = date_start - timedelta(hours=date_start.hour) - if time_end is None: date_end = dates_shifted[-1] date_end = date_end - timedelta(hours=date_end.hour) @@ -509,19 +503,14 @@ def run_hourly( summary statistics format, Frequency class at constants.py """ for t_unique in t_hours_pd.unique(): - t_ind_pydate = [t_ind.to_pydatetime() for t_ind in - days_hourly] + t_ind_pydate = [t_ind.to_pydatetime() for t_ind in days_hourly] # get indexes of ranges of dates that contain t_unique ind_to_store = -1 for ind_to_store, t_ind in enumerate(t_ind_pydate): - if ( - t_ind <= t_unique < t_ind + timedelta(hours=frequency.value) - ): + if t_ind <= t_unique < t_ind + timedelta(hours=frequency.value): break - cadence_temp = cadence_bout[t_hours_pd == t_unique] cadence_temp = cadence_temp[cadence_temp > 0] - # store hourly metrics if math.isnan(steps_hourly[ind_to_store]): steps_hourly[ind_to_store] = int(np.sum(cadence_temp)) @@ -574,13 +563,11 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, os.makedirs( os.path.join(output_folder, frequency.name.lower()), exist_ok=True ) - if users is None: users = get_ids(study_folder) for user in users: logger.info("Beiwe ID: %s", user) - # get file list source_folder = os.path.join(study_folder, user, "accelerometer") file_list = os.listdir(source_folder) @@ -591,16 +578,13 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, ) days = pd.date_range(date_start, date_end, freq='D') - if ( - frequency == Frequency.HOURLY_AND_DAILY - or frequency == Frequency.HOURLY - ): + if (frequency == Frequency.HOURLY_AND_DAILY + or frequency == Frequency.HOURLY): freq = 'H' else: freq = str(frequency.value) + 'H' days_hourly = pd.date_range(date_start, date_end+timedelta(days=1), freq=freq)[:-1] - # allocate memory steps_daily = np.full((len(days), 1), np.nan) cadence_daily = np.full((len(days), 1), np.nan) @@ -612,22 +596,17 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, for d_ind, d_datetime in enumerate(days): logger.info("Day: %d", d_ind) - # find file indices for this d_ind file_ind = [i for i, x in enumerate(dates_shifted) if x == d_datetime] - # check if there is at least one file for a given day if len(file_ind) <= 0: continue - # initiate dataframe data = pd.DataFrame() - # load data for a given day for f in file_ind: logger.info("File: %d", f) - # read data file_path = os.path.join(source_folder, file_list[f]) data = pd.concat([data, pd.read_csv(file_path)], axis=0) @@ -637,24 +616,19 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, x = np.array(data["x"], dtype="float64") # x-axis acc. y = np.array(data["y"], dtype="float64") # y-axis acc. z = np.array(data["z"], dtype="float64") # z-axis acc. - # preprocess data fragment t_bout_interp, vm_bout = preprocess_bout(timestamp, x, y, z) - # find walking and estimate cadence cadence_bout = find_walking(vm_bout) - # distribute metrics across hours if frequency != Frequency.DAILY: # get t as datetimes t_datetime = [ datetime.fromtimestamp(t_ind) for t_ind in t_bout_interp ] - # transform t to full hours t_series = pd.Series(t_datetime) t_hours_pd = t_series.dt.floor('H') - # convert t_hours to correct timezone t_hours_pd = t_hours_pd.dt.tz_localize( from_zone @@ -666,7 +640,6 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, ) cadence_bout = cadence_bout[np.where(cadence_bout > 0)] - # store daily metrics steps_daily[d_ind] = int(np.sum(cadence_bout)) if len(cadence_bout) > 0: # control for empty slices @@ -674,12 +647,9 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, else: cadence_daily[d_ind] = np.nan walkingtime_daily[d_ind] = len(cadence_bout) - # save results depending on "frequency" - if ( - frequency == Frequency.DAILY - or frequency == Frequency.HOURLY_AND_DAILY - ): + if (frequency == Frequency.DAILY + or frequency == Frequency.HOURLY_AND_DAILY): summary_stats = pd.DataFrame({ 'date': days.strftime('%Y-%m-%d'), 'walking_time': walkingtime_daily[:, -1], diff --git a/forest/oak/tests/test_preprocess_dates.py b/forest/oak/tests/test_preprocess_dates.py index 84e99c32..f4017c0a 100644 --- a/forest/oak/tests/test_preprocess_dates.py +++ b/forest/oak/tests/test_preprocess_dates.py @@ -85,7 +85,6 @@ def sample_file_list(): def test_preprocess_dates_length(sample_file_list): """Test preprocess_dates function for length of output list""" - time_start = None time_end = None fmt = "%Y-%m-%d %H_%M_%S" @@ -100,7 +99,6 @@ def test_preprocess_dates_length(sample_file_list): def test_preprocess_dates_start_end_dates(sample_file_list): """Test preprocess_dates function for start/end date""" - time_start = None time_end = None fmt = "%Y-%m-%d %H_%M_%S" @@ -116,7 +114,6 @@ def test_preprocess_dates_start_end_dates(sample_file_list): def test_preprocess_dates_start_end_dates_inputs(sample_file_list): """Test preprocess_dates function for start/end date, with inputs""" - time_start = "2023-10-31 00_00_00" time_end = "2023-11-03 00_00_00" fmt = "%Y-%m-%d %H_%M_%S" diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py index 16c70482..cb6beb37 100644 --- a/forest/oak/tests/test_run_hourly.py +++ b/forest/oak/tests/test_run_hourly.py @@ -8,7 +8,6 @@ @pytest.fixture() def sample_run_input(signal_bout): - t_hours_pd = pd.Series(pd.to_datetime([ "2020-02-25 08:00:00-05:00", "2020-02-25 08:00:00-05:00", @@ -21,18 +20,15 @@ def sample_run_input(signal_bout): "2020-02-25 08:00:00-05:00", "2020-02-25 08:00:00-05:00" ], utc=True).tz_convert('US/Eastern')) - days_hourly = pd.date_range( start='2020-02-24 00:00:00', end='2020-02-25 23:00:00', freq='H', tz='US/Eastern' ) - cadence_bout = np.array( [1.65, 1.6, 1.55, 1.6, 1.55, 1.85, 1.8, 1.75, 1.75, 1.7] ) - steps_hourly = np.full((48, 1), np.nan) cadence_hourly = np.full((48, 1), np.nan) walkingtime_hourly = np.full((48, 1), np.nan) @@ -49,7 +45,6 @@ def sample_run_input(signal_bout): def test_run_hourly_one_hour_data(sample_run_input): run_hourly(*sample_run_input, Frequency.HOURLY) - steps_hourly, cadence_hourly, walkingtime_hourly = sample_run_input[3:] assert len(steps_hourly) - np.sum(np.isnan(steps_hourly)) == 1 @@ -59,11 +54,8 @@ def test_run_hourly_one_hour_data(sample_run_input): def test_run_hourly_accuracy(sample_run_input): run_hourly(*sample_run_input, Frequency.HOURLY) - steps_hourly, cadence_hourly, walkingtime_hourly = sample_run_input[3:] - index = np.where(~np.isnan(steps_hourly))[0] - # get non-nan indices assert steps_hourly[index][0] == 16 assert cadence_hourly[index][0] == 10