From 8768ee3fb66b00d934177f70704b6c4dfc571762 Mon Sep 17 00:00:00 2001 From: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:32:56 -0400 Subject: [PATCH 01/14] Jasmine physical cyrcadian rhythm (#205) * remove rare argument from gps_summaries and place in Hyperparameters * update documentation for refactoring of parameters and pcr * add new pcr parameters for bool to run and sampling rate * update gps_stats_main README * update tests * create new columns to reformat gps_summaries: split_day_night_cols, get_time_range, compute_window_and_windows_count * reformat gps_summaries * add unit test for traj2stats functions --------- Co-authored-by: Ilya Sytchev --- README.md | 10 +- docs/source/index.md | 6 +- docs/source/jasmine.md | 39 +- forest/jasmine/tests/test_traj2stats.py | 270 +++- forest/jasmine/traj2stats.py | 1553 ++++++++++++++++------- 5 files changed, 1331 insertions(+), 547 deletions(-) diff --git a/README.md b/README.md index 1dc9d610..1a2df8f7 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ sample_gps_data = sim_gps_data(n_persons, location, start_date, end_date, cycle, gps_to_csv(sample_gps_data, path_to_synthetic_gps_data, start_date, end_date) # 2. Specify parameters for imputation -# See https://github.com/onnela-lab/forest/wiki/Jasmine-documentation#input for details +# See https://forest.beiwe.org/en/latest/jasmine.html for details # time zone where the study took place (assumes that all participants were always in this time zone) tz_str = "Etc/GMT-1" # Generate summary metrics e.g. Frequency.HOURLY, Frequency.DAILY or Frequency.HOURLY_AND_DAILY (see Frequency class in constants.py) @@ -127,12 +127,8 @@ save_traj = False parameters = None # list of locations to track if visited, leave None if don't want these summary statistics places_of_interest = ['cafe', 'bar', 'hospital'] -# True if want to save a log of all locations and attributes of those locations visited -save_osm_log = True # list of OpenStreetMap tags to use for identifying locations, leave None to default to amenity and leisure tagged locations or if you don't want to use OSM (see OSMTags class in constants.py) osm_tags = None -# threshold of time spent in a location to count as being in that location, in minutes -threshold = 15 # 3. Impute location data and generate mobility summary metrics using the simulated data above gps_stats_main( @@ -143,9 +139,7 @@ gps_stats_main( save_traj = save_traj, parameters = parameters, places_of_interest = places_of_interest, - save_osm_log = save_osm_log, - osm_tags = None, - threshold = threshold, + osm_tags = osm_tags, ) # 4. Generate daily summary metrics for call/text logs diff --git a/docs/source/index.md b/docs/source/index.md index 0c8f29da..25d0c46b 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -170,15 +170,15 @@ The summary statistics that are generated are listed below: - Entropy measure based on the proportion of time spent at significant locations over the course of a day - Letting p_i be the proportion of the day spent at significant location I, significant location entropy is calculated as -\sum_{i} p_i*log(p_i), where the sum occurs over all non-zero p_i for that day. * - mis_duration - - Float + - Not Available - Number of hours of GPS data missing over the course of a day - * - Physical circadian rhythm - - Not Available + - Float - A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up - For a detailed description of how this measure is calculated, see Canzian and Musolesi's 2015 paper in the Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing, titled "Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis." Their procedure was followed using 30-min increments as a bin size. * - Physical circadian rhythm stratified - - Not Available + - Float - A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up - Calculated in the same way as Physical circadian rhythm, except the procedure is repeated separately for weekends and weekdays. ``` diff --git a/docs/source/jasmine.md b/docs/source/jasmine.md index 4e3381cd..6e4f0517 100644 --- a/docs/source/jasmine.md +++ b/docs/source/jasmine.md @@ -11,7 +11,7 @@ For instructions on how to install forest, please visit [here](https://github.co ### Input -When using jasmine, you should call function `gps_stats_main(study_folder, output_folder, tz_str, frequency, save_traj, parameters = None, save_osm_log = None, osm_tags = None, threshold, split_day_night, person_point_radius = 2, place_point_radius = 7.5, time_start = None, time_end = None, participant_ids = None, all_memory_dict = None, all_BV_set = None, quality_threshold = 0.05)` in the `traj2stats` module and specify: +When using jasmine, you should call function `gps_stats_main(study_folder, output_folder, tz_str, frequency, save_traj, places_of_interest = None, osm_tags = None, time_start = None, time_end = None, participant_ids = None, parameters = None, all_memory_dict = None, all_bv_set = None)` in the `traj2stats` module and specify: - `study_folder`, string, the path of the study folder. The study folder should contain individual participant folder with a subfolder `gps` inside - `output_folder`, string, the path of the folder where you want to save results @@ -27,17 +27,13 @@ In addition, the main function takes four arguments that provide further flexibi - `tz_str`, string, the timezone where the study is/was conducted. Please use "`pytz.all_timezones`" to check all options. For example, "America/New_York". - `frequency`, Frequency class, the frequency of the summary stats (resolution for summary statistics) e.g. Frequency.HOURLY, Frequency.DAILY, etc. - `save_traj`, bool, True if you want to save the trajectories as a csv file, False if you don't (default: False). - - `parameters`, a list of parameters, by default it is set to None. The details are as below. - `places_of_interest`, a list of places of interest, by default it is set to None. The details are as used in openstreetmaps - - `save_osm_log`, bool, True if you want to output a log of locations visited and their tags(default: False). - `osm_tags`, list of OSMTags class, a list of tags to filter the places of interest, by default it is set to None. The details are as used in openstreetmaps. Avoid using a lot of them if large area is covered. - - `threshold`, int, time spent in a pause needs to exceed the threshold to be placed in the log - - `split_day_night`, bool, True if you want to split all metrics to datetime and nighttime patterns (only for Frequency.DAILY) - - person_point_radius, float, radius of the person's circle when discovering places near him in pauses (default: 2) - - `place_point_radius`, float, radius of place's circle when place is returned as centre coordinates from osm (default: 7.5) - - `all_memory_dict` and `all_BV_set` are dictionaries from previous run (none if it's the first time). + - `parameters`, a list of parameters, by default it is set to None. The details are as below. + - `all_memory_dict` and `all_bv_set` are dictionaries from previous run (none if it's the first time). + +You can also tweak the parameters that change the assumptions of the imputation and summary statistics. The parameters are -You can also tweak the parameters that change the assumptions of the imputation and summary statistics. The parameters are (1) `l1`: the scale parameter in the abs function in the daily kernel; (2) `l2`: the scale parameter in the abs function in the weekly kernel; (3) `l3`: the scale parameter in the geographical kernel if only latitude or longitude is used; @@ -58,7 +54,17 @@ You can also tweak the parameters that change the assumptions of the imputation (18) `accuracylim`: we filter out GPS record with accuracy higher than this threshold. (19) `r`: the maximum radius of a pause; (20) `w`: a threshold for distance, if the distance to the great circle is greater than this threshold, we consider there is a knot; -(21) `h`: a threshold of distance, if the movement between two timestamps is less than h, consider it as a pause and a knot +(21) `h`: a threshold of distance, if the movement between two timestamps is less than h, consider it as a pause and a knot +(22) `save_osm_log`: bool, True if you want to output a log of locations visited and their tags(default: False). +(23) `log_threshold`: int, time spent in a pause needs to exceed the threshold to be placed in the log +(24) `split_day_night`: bool, True if you want to split all metrics to datetime and nighttime patterns (only for Frequency.DAILY) +(25) `person_point_radius`: float, radius of the person's circle when discovering places near him in pauses (default: 2) +(26) `place_point_radius`: float, radius of place's circle when place is returned as centre coordinates from osm (default: 7.5) +(27) `pcr_bool`: bool, True if you want to calculate the physical cyrcadian rhythm (default: False) +(28) `pcr_window`: int, number of days to look back and forward for calculating the physical cyrcadian rhythm (default: 14) +(29) `pcr_sample_rate`: int, number of seconds between each sample for calculating the physical cyrcadian rhythm (default: 30) + + ### Output (1) summary statistics for all specified participants (.csv) @@ -70,8 +76,8 @@ You can also tweak the parameters that change the assumptions of the imputation - Contains start date/time and end date/time for each participant.\ - Is useful for tracking whose data during which time range have been processed, especially for the online algorithm. -(4) all_BV_set (.pkl)\ - - It is a dictionary, with the key as user ID and the value as a numpy array with size, where each column represents [start_timestamp, start_latitude, start_longitude, end_timestamp, end_latitude, end_longitude]. If it is your first time run the code, it is set to NULL by default. If you want to continue your analysis from here in the future, all_BV_set is expected to be an input in your new analysis and it will be updated in that run. The size of the file should be fixed overtime. +(4) all_bv_set (.pkl)\ + - It is a dictionary, with the key as user ID and the value as a numpy array with size, where each column represents [start_timestamp, start_latitude, start_longitude, end_timestamp, end_latitude, end_longitude]. If it is your first time run the code, it is set to NULL by default. If you want to continue your analysis from here in the future, all_bv_set is expected to be an input in your new analysis and it will be updated in that run. The size of the file should be fixed overtime. (5) all_memory_dict (.pkl)\ - It is also a dictionary, with the key as user ID and the value as a numpy array of other parameters for the user. If it is your first time run the code, it is set to NULL by default. If you want to continue your analysis from here in the future, all_memory_dict is expected to be an input in your new analysis and it will be updated in that run. The size of the file should be fixed overtime. @@ -115,8 +121,7 @@ This file imputes the missing trajectories based on the observed trajectory matr `traj2stats.py` This file converts the imputed trajectory matrix to summary statistics. - -- `Hyperparameters`: @dataclass to store the hyperparameters for the imputation process. +- `Hyperparameters`: dataclass to store the hyperparameters for the imputation and summary statistics. - `transform_point_to_circle`: transform a transforms a set of cooordinates to a shapely circle with a provided radius. - `get_nearby_locations`: return a dictionary of nearby locations, a dictionary of nearby locations' names, and a dictionary of nearby locations' coordinates. - `gps_summaries`: converts the imputed trajectory matrix to summary statistics. @@ -147,9 +152,9 @@ The summary statistics that are generated are listed below: | Average pause duration | Float | Average of the duration of all pauses that took place over the course of a day (in hour) | We consider that a participant has a pause if the distance that he has moved during a 30-s period is less than `r` m. By default, `r`=10.| | Standard deviation of flight duration | Float | Standard deviation of the duration of all pauses that took place over the course of a day (in hour) | GPS is converted into a sequence of flights (straight line movement) and pauses (time spent stationary). The standard deviation of duration of pauses over the course of a day is reported. | | Significant location entropy | Float | Entropy measure based on the proportion of time spent at significant locations over the course of a day | Letting p_i be the proportion of the day spent at significant location I, significant location entropy is calculated as -\sum_{i} p_i*log(p_i), where the sum occurs over all non-zero p_i for that day. | -| Minutes of GPS data missing | Float | Number of minutes of GPS data missing over the course of a day | | -| Physical circadian rhythm | Not Available | A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up | For a detailed description of how this measure is calculated, see Canzian and Musolesi's 2015 paper in the Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing, titled "Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis." Their procedure was followed using 30-min increments as a bin size.| -| Physical circadian rhythm stratified | Not Available | A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up | Calculated in the same way as Physical circadian rhythm, except the procedure is repeated separately for weekends and weekdays. | +| Minutes of GPS data missing | Not Available | Number of minutes of GPS data missing over the course of a day | | +| Physical circadian rhythm | Float | A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up | For a detailed description of how this measure is calculated, see Canzian and Musolesi's 2015 paper in the Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing, titled "Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis." Their procedure was followed using 30-min increments as a bin size.| +| Physical circadian rhythm stratified | Float | A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up | Calculated in the same way as Physical circadian rhythm, except the procedure is repeated separately for weekends and weekdays. | ### Other technical details diff --git a/forest/jasmine/tests/test_traj2stats.py b/forest/jasmine/tests/test_traj2stats.py index 31925f13..48988f13 100644 --- a/forest/jasmine/tests/test_traj2stats.py +++ b/forest/jasmine/tests/test_traj2stats.py @@ -5,8 +5,11 @@ from shapely.geometry import Point from forest.jasmine.data2mobmat import great_circle_dist -from forest.jasmine.traj2stats import (Frequency, transform_point_to_circle, - gps_summaries) +from forest.jasmine.traj2stats import ( + Frequency, gps_summaries, Hyperparameters, transform_point_to_circle, + avg_mobility_trace_difference, create_mobility_trace, get_pause_array, + extract_pause_from_row, compute_window_and_count +) @pytest.fixture() @@ -48,7 +51,7 @@ def test_transform_point_to_circle_radius(coords1): ] distance = great_circle_dist(*coords1, *point_in_edge)[0] - assert distance >= 4 and distance <= 5 + assert 4 <= distance <= 5 @pytest.fixture() @@ -273,16 +276,16 @@ def test_gps_summaries_shape( return_value=sample_nearby_locations, ) mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.save_osm_log = True + summary, _ = gps_summaries( traj=sample_trajectory, tz_str="Europe/London", frequency=Frequency.HOURLY, + parameters=parameters, places_of_interest=["pub", "fast_food"], - save_osm_log=True, - threshold=None, - split_day_night=False, - person_point_radius=2, - place_point_radius=7.5, ) assert summary.shape == (24, 21) @@ -296,16 +299,16 @@ def test_gps_summaries_places_of_interest( return_value=sample_nearby_locations, ) mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.save_osm_log = True + summary, _ = gps_summaries( traj=sample_trajectory, tz_str="Europe/London", frequency=Frequency.HOURLY, + parameters=parameters, places_of_interest=["pub", "fast_food"], - save_osm_log=True, - threshold=None, - split_day_night=False, - person_point_radius=2, - place_point_radius=7.5, ) time_in_places_of_interest = ( summary["pub"] + summary["fast_food"] + summary["other"] @@ -324,16 +327,16 @@ def test_gps_summaries_obs_day_night( return_value=sample_nearby_locations, ) mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.save_osm_log = True + summary, _ = gps_summaries( traj=sample_trajectory, tz_str="Europe/London", frequency=Frequency.DAILY, + parameters=parameters, places_of_interest=["pub", "fast_food"], - save_osm_log=True, - threshold=None, - split_day_night=False, - person_point_radius=2, - place_point_radius=7.5, ) total_obs = summary["obs_day"] + summary["obs_night"] assert np.all(round(total_obs, 4) == round(summary["obs_duration"], 4)) @@ -348,16 +351,17 @@ def test_gps_summaries_datetime_nighttime_shape( return_value=sample_nearby_locations, ) mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.save_osm_log = True + parameters.split_day_night = True + summary, _ = gps_summaries( traj=sample_trajectory, tz_str="Europe/London", frequency=Frequency.DAILY, + parameters=parameters, places_of_interest=["pub", "fast_food"], - save_osm_log=True, - threshold=None, - split_day_night=True, - person_point_radius=2, - place_point_radius=7.5, ) assert summary.shape == (2, 46) @@ -373,16 +377,16 @@ def test_gps_summaries_log_format( return_value=sample_nearby_locations, ) mocker.patch("forest.jasmine.traj2stats.locate_home", return_value=coords1) + + parameters = Hyperparameters() + parameters.save_osm_log = True + summary, log = gps_summaries( traj=sample_trajectory, tz_str="Europe/London", frequency=Frequency.DAILY, + parameters=parameters, places_of_interest=["pub", "fast_food"], - save_osm_log=True, - threshold=None, - split_day_night=False, - person_point_radius=2, - place_point_radius=7.5, ) dates_stats = ( summary["day"].astype(int).astype(str) @@ -393,3 +397,213 @@ def test_gps_summaries_log_format( ) dates_log = np.array(list(log.keys())) assert np.all(dates_stats == dates_log) + + +@pytest.fixture() +def mobmat1(): + """mobility matrix 1""" + return np.array( + [ + [16.49835, -142.72462, 1], + [16.49521, -142.72461, 2], + [51.45435654, -2.58555554, 3], + [51.45435621, -2.58555524, 4], + [51.45435632, -2.58555544, 5] + ] + ) + + +@pytest.fixture() +def mobmat2(): + """mobility matrix 2""" + return np.array( + [ + [51.45435654, -2.58555554, 1], + [51.45435654, -2.58555554, 2], + [51.45435654, -2.58555554, 3], + [51.45435654, -2.58555554, 4], + [51.45435654, -2.58555554, 5] + ] + ) + + +@pytest.fixture() +def mobmat3(): + """mobility matrix 3""" + return np.array( + [ + [51.45435654, -2.58555554, 7], + [51.45435654, -2.58555554, 8], + [51.45435654, -2.58555554, 9], + [51.45435654, -2.58555554, 10], + [51.45435654, -2.58555554, 11] + ] + ) + + +def test_avg_mobility_trace_difference_common_timestamps( + mobmat1, mobmat2 +): + """Testing avg mobility trace difference + when there are common timestamps and all points are close + """ + + time_range = (3, 5) + res = avg_mobility_trace_difference( + time_range, mobmat1, mobmat2 + ) + + assert res == 1 + + +def test_avg_mobility_trace_difference_common_timestamps2( + mobmat1, mobmat2 +): + """Testing avg mobility trace difference + when there are common timestamps and some points are close + """ + + time_range = (1, 5) + res = avg_mobility_trace_difference( + time_range, mobmat1, mobmat2 + ) + + assert res == 0.6 + + +def test_avg_mobility_trace_difference_no_common_timestamps( + mobmat1, mobmat3 +): + """Testing avg mobility trace difference + when there are no common timestamps + """ + + time_range = (1, 5) + res = avg_mobility_trace_difference( + time_range, mobmat1, mobmat3 + ) + + assert res == 0 + + +def test_create_mobility_trace_shape(sample_trajectory): + """Testing shape of mobility trace""" + + res = create_mobility_trace(sample_trajectory) + + assert res.shape == (81200, 3) + + +def test_create_mobility_trace_start_end_times(sample_trajectory): + """Testing start and end times of mobility trace""" + + res = create_mobility_trace(sample_trajectory) + + assert res[0, 2] == 1633042800.0 + assert res[-1, 2] == 1633129499.0 + + +def test_get_pause_array_shape(sample_trajectory, coords2): + """Testing shape of pause array""" + + parameters = Hyperparameters() + + pause_array = get_pause_array( + sample_trajectory[sample_trajectory[:, 0] == 2, :], + *coords2, + parameters + ) + + assert pause_array.shape == (3, 3) + + +def test_get_pause_array_times(sample_trajectory, coords2): + """Testing times spent in places of pause array""" + + parameters = Hyperparameters() + + pause_array = get_pause_array( + sample_trajectory[sample_trajectory[:, 0] == 2, :], + *coords2, + parameters + ) + + assert pause_array[0, 2] == 1113.3333333333333 + assert pause_array[-1, 2] == 180 + + +def test_get_pause_array_house(sample_trajectory): + """Testing case where house is in pause array""" + + house_coords = (51.45435654, -2.58555554) + parameters = Hyperparameters() + + pause_array = get_pause_array( + sample_trajectory[sample_trajectory[:, 0] == 2, :], + *house_coords, + parameters + ) + + assert pause_array.shape == (2, 3) + + +def test_extract_pause_from_row_shape(sample_trajectory): + """Testing shape of pause array""" + + pause_list = extract_pause_from_row( + sample_trajectory[0, :] + ) + + assert len(pause_list) == 3 + + +def test_extract_pause_from_row_time(sample_trajectory): + """Testing pause time of row""" + + pause_list = extract_pause_from_row( + sample_trajectory[0, :] + ) + + true_val = sample_trajectory[0, 6] - sample_trajectory[0, 3] + + assert pause_list[2] == true_val / 60 + + +def test_compute_window_size(sample_trajectory): + """Testing window size is correct""" + + window, _ = compute_window_and_count( + sample_trajectory[0, 3], sample_trajectory[-1, 6], 1 + ) + + assert window == 3600 + + +def test_compute_window_count(sample_trajectory): + """Testing number of windows is correct""" + + _, num_windows = compute_window_and_count( + sample_trajectory[0, 3], sample_trajectory[-1, 6], 1 + ) + + assert num_windows == 24 + + +def test_compute_window_size_6_hour(sample_trajectory): + """Testing window size is correct 6 hour window""" + + window, _ = compute_window_and_count( + sample_trajectory[0, 3], sample_trajectory[-1, 6], 6 + ) + + assert window == 3600 * 6 + + +def test_compute_window_count_6_hour(sample_trajectory): + """Testing number of windows is correct 6 hour window""" + + _, num_windows = compute_window_and_count( + sample_trajectory[0, 3], sample_trajectory[-1, 6], 6 + ) + + assert num_windows == 4 diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index 67c0b70c..ed2d8a92 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -3,6 +3,7 @@ """ from dataclasses import dataclass +from datetime import datetime import json import logging import os @@ -37,7 +38,8 @@ @dataclass class Hyperparameters: - """Class containing hyperparemeters for imputation of trajectories. + """Class containing hyperparemeters for gps imputation and trajectory + summary statistics calculation. Args: itrvl, accuracylim, r, w, h: hyperparameters for the @@ -48,7 +50,28 @@ class Hyperparameters: l1, l2, a1, a2, b1, b2, b3, g, method, switch, num, linearity: hyperparameters for the impute_gps function. itrvl, r, w, h: hyperparameters for the imp_to_traj function. + log_threshold: int, time spent in a pause needs to exceed the + log_threshold to be placed in the log + only if save_osm_log True, in minutes + split_day_night: bool, True if you want to split all metrics to + datetime and nighttime patterns + only for daily frequency + person_point_radius: float, radius of the person's circle when + discovering places near him in pauses + place_point_radius: float, radius of place's circle + when place is returned as centre coordinates from osm + save_osm_log: bool, True if you want to output a log of locations + visited and their tags + quality_threshold: float, a percentage value of the fraction of data + required for a summary to be created + pcr_bool: bool, True if you want to calculate the physical + circadian rhythm + pcr_window: int, number of days to look back and forward + for calculating the physical circadian rhythm + pcr_sample_rate: int, number of seconds between each sample + for calculating the physical circadian rhythm """ + # imputation hyperparameters l1: int = 60 * 60 * 24 * 10 l2: int = 60 * 60 * 24 * 30 l3: float = 0.002 @@ -71,6 +94,17 @@ class Hyperparameters: w: Optional[float] = None h: Optional[float] = None + # summary statistics hyperparameters + save_osm_log: bool = False + log_threshold: int = 60 + split_day_night: bool = False + person_point_radius: float = 2 + place_point_radius: float = 7.5 + quality_threshold: float = 0.05 + pcr_bool: bool = False + pcr_window: int = 14 + pcr_sample_rate: int = 30 + def transform_point_to_circle(lat: float, lon: float, radius: float ) -> Polygon: @@ -116,9 +150,10 @@ def get_nearby_locations( types of nearby locations supported by Overpass API defaults to [OSMTags.AMENITY, OSMTags.LEISURE] Returns: - ids: dictionary, contains nearby locations' ids - locations: dictionary, contains nearby locations' coordinates - tags: dictionary, contains nearby locations' tags + A tuple of: + dictionary, contains nearby locations' ids + dictionary, contains nearby locations' coordinates + dictionary, contains nearby locations' tags Raises: RuntimeError: if the query to Overpass API fails """ @@ -223,17 +258,835 @@ def get_nearby_locations( return ids, locations, tags +def avg_mobility_trace_difference( + time_range: Tuple[int, int], mobility_trace1: np.ndarray, + mobility_trace2: np.ndarray +) -> float: + """This function calculates the average mobility trace difference + + Args: + time_range: tuple of two ints, time range of mobility_trace + mobility_trace1: numpy array, mobility trace 1 + contains 3 columns: [x, y, t] + mobility_trace2: numpy array, mobility trace 2 + contains 3 columns: [x, y, t] + Returns: + float, average mobility trace difference + Raises: + ValueError: if the calculation fails + """ + + # Create masks for timestamps that lie within the specified time range + mask1 = ( + (mobility_trace1[:, 2] >= time_range[0]) + & (mobility_trace1[:, 2] <= time_range[1]) + ) + mask2 = ( + (mobility_trace2[:, 2] >= time_range[0]) + & (mobility_trace2[:, 2] <= time_range[1]) + ) + + # Create a set of common timestamps for efficient lookup + common_times = ( + set(mobility_trace1[mask1, 2]) & set(mobility_trace2[mask2, 2]) + ) + + # Create masks for the common timestamps + mask1_common = np.isin(mobility_trace1[:, 2], list(common_times)) + mask2_common = np.isin(mobility_trace2[:, 2], list(common_times)) + + if not any(mask1_common) or not any(mask2_common): + return 0 + + # Calculate distances using the common timestamp masks + dists = great_circle_dist( + mobility_trace1[mask1_common, 0], mobility_trace1[mask1_common, 1], + mobility_trace2[mask2_common, 0], mobility_trace2[mask2_common, 1] + ) + + dist_flag = dists <= 10 + res = np.mean(dist_flag) + if np.isnan(res): + raise ValueError("PCR calculation failed") + + return float(res) + + +def routine_index( + time_range: Tuple[int, int], mobility_trace: np.ndarray, + pcr_window: int = 14, pcr_sample_rate: int = 30, + stratified: bool = False, timezone: str = "US/Eastern", +) -> float: + """This function calculates the routine index of a trajectory + + Description of routine index can be found in the paper: + Canzian and Musolesi's 2015 paper in the Proceedings of the 2015 + ACM International Joint Conference on Pervasive and Ubiquitous Computing, + titled “Trajectories of depression: unobtrusive monitoring of depressive + states by means of smartphone mobility traces analysis.” + + Args: + time_range: tuple of two ints, time range of mobility_trace + mobility_trace: numpy array, trajectory + contains 3 columns: [x, y, t] + pcr_window: int, number of days to look back and forward + for calculating the physical circadian rhythm + pcr_sample_rate: int, number of seconds between each sample + for calculating the physical circadian rhythm + stratified: bool, True if you want to calculate the routine index + for weekdays and weekends separately + timezone: str, timezone of the mobility trace + Returns: + float, routine index + """ + + t_1, t_2 = time_range + + t_init = mobility_trace[:, 2].min() + t_fin = mobility_trace[:, 2].max() + + t_1 = max(t_1, t_init) + t_2 = min(t_2, t_fin) + + # n1, n2 are the number of days before and after the time range + n1 = int(round((t_1 - t_init) / (24 * 60 * 60))) + n2 = int(round((t_fin - t_2) / (24 * 60 * 60))) + + # to avoid long computational times + # only look at the last window days and next window days + n1 = min(n1, pcr_window) + n2 = min(n2, pcr_window) + + if max(n1, n2) == 0: + return 0 + + shifts = list(range(1, n1 + 1)) + list(range(-n2, 0)) + if stratified: + time_mid = int((t_1 + t_2) / 2) + weekend_today = datetime( + *stamp2datetime(time_mid, timezone) + ).weekday() >= 5 + if weekend_today: + shifts = [ + s for s in shifts + if datetime( + *stamp2datetime( + time_mid - s * 24 * 60 * 60, timezone + ) + ).weekday() >= 5 + ] + else: + shifts = [ + s for s in shifts + if datetime( + *stamp2datetime( + time_mid - s * 24 * 60 * 60, timezone + ) + ).weekday() < 5 + ] + + res = sum( + avg_mobility_trace_difference( + time_range, mobility_trace[::pcr_sample_rate], + np.column_stack( + [ + mobility_trace[:, :2], + mobility_trace[:, 2] + i * 24 * 60 * 60 + ] + ) + ) + for i in shifts + ) + + return res / (n1 + n2) + + +def create_mobility_trace(traj: np.ndarray) -> np.ndarray: + """This function creates a mobility trace from a trajectory + + Args: + traj: numpy array, trajectory + contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + Returns: + numpy array, mobility trace + contains 3 columns: [x, y, t] + """ + + pause_vec = traj[traj[:, 0] == 2] + + # Calculate the time ranges for all pauses + start_times = pause_vec[:, 3].astype(int) + end_times = pause_vec[:, 6].astype(int) + time_ranges = [np.arange(s, e) for s, e in zip(start_times, end_times)] + + # Flatten time_ranges and get the corresponding locations + flat_time_ranges = np.concatenate(time_ranges) + repeats = [len(r) for r in time_ranges] + locs = np.repeat(pause_vec[:, 1:3], repeats, axis=0) + + # Stack locations and time_ranges to get the mobility trace + mobility_trace = np.column_stack([locs, flat_time_ranges]) + + # check if duplicate timestamps exist + _, unique_indices = np.unique(mobility_trace[:, 2], return_index=True) + + return mobility_trace[unique_indices] + + +def get_day_night_indices( + traj: np.ndarray, tz_str: str, index: int, start_time: int, end_time: int, + current_time_list: List[int] +) -> Tuple[np.ndarray, int, int, int, int]: + """This function returns the indices of the rows in the trajectory + if the trajectory is split into day and night. + + Args: + traj: numpy array, trajectory + contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + tz_str: str, timezone + index: int, index of the window + start_time: int, starting time of the window + end_time: int, ending time of the window + current_time_list: list of int, current time + Returns: + A tuple of: + numpy array, indices of the rows in the trajectory + if the trajectory is split into day and night + int, index of the row in the trajectory + where the first part of the trajectory ends + int, index of the row in the trajectory + where the second part of the trajectory starts + int, starting time of the second part of the trajectory + int, ending time of the second part of the trajectory + """ + + current_time_list2 = current_time_list.copy() + current_time_list3 = current_time_list.copy() + current_time_list2[3] = 8 + current_time_list3[3] = 20 + start_time2 = datetime2stamp(current_time_list2, tz_str) + end_time2 = datetime2stamp(current_time_list3, tz_str) + if index % 2 == 0: + # daytime + index_rows = (traj[:, 3] <= end_time2) * (traj[:, 6] >= start_time2) + + return index_rows, 0, 0, start_time2, end_time2 + + # nighttime + index1 = ( + (traj[:, 6] < start_time2) + * (traj[:, 3] < end_time) + * (traj[:, 6] > start_time) + ) + index2 = ( + (traj[:, 3] > end_time2) + * (traj[:, 3] < end_time) + * (traj[:, 6] > start_time) + ) + stop1 = sum(index1) - 1 + stop2 = sum(index1) + index_rows = index1 + index2 + + return index_rows, stop1, stop2, start_time2, end_time2 + + +def smooth_temp_ends( + temp: np.ndarray, index_rows: np.ndarray, t0_temp: float, + t1_temp: float, parameters: Hyperparameters, i: int, start_time: int, + end_time2: int, start_time2: int, end_time: int, stop1: int, stop2: int +) -> np.ndarray: + """This function smooths the starting and ending points of the + trajectory. + + Args: + temp: numpy array, trajectory + contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + index_rows: numpy array, indices of the rows in the trajectory + if the trajectory is split into day and night + t0_temp: float, starting time of the trajectory + t1_temp: float, ending time of the trajectory + parameters: Hyperparameters, hyperparameters in functions + recommend to set it to default + i: int, index of the window + start_time: int, starting time of the window + end_time2: int, ending time of the second part of the trajectory + start_time2: int, starting time of the second part of the trajectory + end_time: int, ending time of the window + stop1: int, index of the row in the trajectory + where the first part of the trajectory ends + stop2: int, index of the row in the trajectory + where the second part of the trajectory starts + Returns: + temp: numpy array, trajectory + contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + """ + if sum(index_rows) == 1: + p0 = (t0_temp - temp[0, 3]) / (temp[0, 6] - temp[0, 3]) + p1 = (t1_temp - temp[0, 3]) / (temp[0, 6] - temp[0, 3]) + x0, y0 = temp[0, [1, 2]] + x1, y1 = temp[0, [4, 5]] + temp[0, 1] = (1 - p0) * x0 + p0 * x1 + temp[0, 2] = (1 - p0) * y0 + p0 * y1 + temp[0, 3] = t0_temp + temp[0, 4] = (1 - p1) * x0 + p1 * x1 + temp[0, 5] = (1 - p1) * y0 + p1 * y1 + temp[0, 6] = t1_temp + else: + if parameters.split_day_night and i % 2 != 0: + t0_temp_l = [start_time, end_time2] + t1_temp_l = [start_time2, end_time] + start_temp = [0, stop2] + end_temp = [stop1, -1] + for j in range(2): + p0 = (temp[start_temp[j], 6] - t0_temp_l[j]) / ( + temp[start_temp[j], 6] - temp[start_temp[j], 3] + ) + p1 = (t1_temp_l[j] - temp[end_temp[j], 3]) / ( + temp[end_temp[j], 6] - temp[end_temp[j], 3] + ) + temp[start_temp[j], 1] = (1 - p0) * temp[ + start_temp[j], 4 + ] + p0 * temp[start_temp[j], 1] + temp[start_temp[j], 2] = (1 - p0) * temp[ + start_temp[j], 5 + ] + p0 * temp[start_temp[j], 2] + temp[start_temp[j], 3] = t0_temp_l[j] + temp[end_temp[j], 4] = (1 - p1) * temp[ + end_temp[j], 1 + ] + p1 * temp[end_temp[j], 4] + temp[end_temp[j], 5] = (1 - p1) * temp[ + end_temp[j], 2 + ] + p1 * temp[end_temp[j], 5] + temp[end_temp[j], 6] = t1_temp_l[j] + else: + p0 = (temp[0, 6] - t0_temp) / (temp[0, 6] - temp[0, 3]) + p1 = ( + (t1_temp - temp[-1, 3]) + / (temp[-1, 6] - temp[-1, 3]) + ) + temp[0, 1] = (1 - p0) * temp[0, 4] + p0 * temp[0, 1] + temp[0, 2] = (1 - p0) * temp[0, 5] + p0 * temp[0, 2] + temp[0, 3] = t0_temp + temp[-1, 4] = (1 - p1) * temp[-1, 1] + p1 * temp[-1, 4] + temp[-1, 5] = (1 - p1) * temp[-1, 2] + p1 * temp[-1, 5] + temp[-1, 6] = t1_temp + + return temp + + +def get_pause_array(pause_vec: np.ndarray, home_lat: float, home_lon: float, + parameters: Hyperparameters) -> np.ndarray: + """This function returns a numpy array of pauses. + + Args: + pause_vec: numpy array, contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + home_lat: float, latitude of the home + home_lon: float, longitude of the home + parameters: Hyperparameters, hyperparameters in functions + Returns: + pause_array: numpy array, contains 3 columns: [x, y, t] + """ + pause_array: np.ndarray = np.array([]) + for row in pause_vec: + if ( + great_circle_dist(row[1], row[2], home_lat, home_lon)[0] + > 2*parameters.place_point_radius + ): + if len(pause_array) == 0: + pause_array = np.array( + [extract_pause_from_row(row)] + ) + elif ( + np.min( + great_circle_dist( + row[1], row[2], + pause_array[:, 0], pause_array[:, 1], + ) + ) + > 2*parameters.place_point_radius + ): + pause_array = np.append( + pause_array, + [extract_pause_from_row(row)], + axis=0, + ) + else: + pause_array[ + np.argmin( + great_circle_dist( + row[1], row[2], + pause_array[:, 0], pause_array[:, 1], + ) + ), + -1, + ] += (row[6] - row[3]) / 60 + + return pause_array + + +def extract_pause_from_row(row: np.ndarray) -> list: + """This function extracts the pause from a row in a trajectory. + + Args: + row: numpy array, contains 8 columns: [s,x0,y0,t0,x1,y1,t1,obs] + Returns: + list, pause + """ + return [row[1], row[2], (row[6] - row[3]) / 60] + + +def get_polygon(saved_polygons: dict, lat: float, lon: float, label: str, + radius: float) -> Tuple[Polygon, dict]: + """This function returns a saved polygon if it exists, + otherwise it computes a polygon and saves it. + + Args: + saved_polygons: dict, contains saved polygons + lat: float, latitude of the center of the circle + lon: float, longitude of the center of the circle + label: str, label of the location + radius: float, radius of the circle + Returns: + A tuple with the following elements: + shapely polygon + dict, contains saved polygons + """ + loc_str = f"{lat}, {lon} - {label}" + if loc_str in saved_polygons.keys(): + return saved_polygons[loc_str], saved_polygons + + circle = transform_point_to_circle(lat, lon, radius) + saved_polygons[loc_str] = circle + return circle, saved_polygons + + +def intersect_with_places_of_interest( + pause: list, places_of_interest: list, saved_polygons: dict, + parameters: Hyperparameters, ids: dict, locations: dict, + ids_keys_list: list +) -> Tuple[list, bool]: + """This function computes the intersection between a pause and + places of interest. + + Args: + pause: list, pause + places_of_interest: list of str, places of interest + saved_polygons: dict, contains saved polygons + parameters: Hyperparameters, hyperparameters in functions + ids: dict, contains nearby locations' ids + locations: dict, contains nearby locations' coordinates + ids_keys_list: list of str, keys of ids + Returns: + A tuple with the following elements: + list of float, intersection between a pause and + places of interest + bool, True if the pause is not intersected with + any place of interest + """ + all_place_probs = [0] * len(places_of_interest) + pause_circle, saved_polygons = get_polygon( + saved_polygons, pause[0], pause[1], "person", + parameters.person_point_radius + ) + add_to_other = True + for j, place in enumerate(places_of_interest): + if place not in ids_keys_list: + continue + for element_id in ids[place]: + intersection_area = 0 + + if len(locations[element_id]) == 1: + loc_lat, loc_lon = locations[element_id][0] + + loc_circle = get_polygon( + saved_polygons, loc_lat, loc_lon, "place", + parameters.place_point_radius + ) + + intersection_area = pause_circle.intersection( + loc_circle + ).area + elif len(locations[element_id]) >= 3: + polygon = Polygon(locations[element_id]) + + intersection_area = pause_circle.intersection( + polygon + ).area + + if intersection_area > 0: + all_place_probs[j] += intersection_area + add_to_other = False + + return all_place_probs, add_to_other + + +def compute_flight_pause_stats( + flight_d_vec: np.ndarray, flight_t_vec: np.ndarray, + pause_t_vec: np.ndarray, +) -> list: + """This function computes the flight and pause statistics. + + Args: + flight_d_vec: numpy array, contains flight distances + flight_t_vec: numpy array, contains flight durations + pause_t_vec: numpy array, contains pause durations + Returns: + list with the following elements: + av_f_len: float, average flight length + sd_f_len: float, standard deviation of flight length + av_f_dur: float, average flight duration + sd_f_dur: float, standard deviation of flight duration + av_p_dur: float, average pause duration + sd_p_dur: float, standard deviation of pause duration + """ + if len(flight_d_vec) > 0: + av_f_len = np.mean(flight_d_vec) + sd_f_len = np.std(flight_d_vec) + av_f_dur = np.mean(flight_t_vec) + sd_f_dur = np.std(flight_t_vec) + else: + av_f_len = 0 + sd_f_len = 0 + av_f_dur = 0 + sd_f_dur = 0 + + if len(pause_t_vec) > 0: + av_p_dur = np.mean(pause_t_vec) + sd_p_dur = np.std(pause_t_vec) + else: + av_p_dur = 0 + sd_p_dur = 0 + + return [av_f_len, sd_f_len, av_f_dur, sd_f_dur, av_p_dur, sd_p_dur] + + +def final_hourly_prep( + obs_dur: float, time_at_home: float, dist_traveled: float, + max_dist_home: float, total_flight_time: float, total_pause_time: float, + flight_pause_stats: list, all_place_times: list, + all_place_times_adjusted: list, summary_stats: list, log_tags: dict, + log_tags_temp: list, datetime_list: List[int], + places_of_interest: Optional[List[str]] +) -> Tuple[list, dict]: + """This function prepares the final hourly summary statistics. + + Args: + obs_dur: float, observed duration + time_at_home: float, time at home + dist_traveled: float, distance traveled + max_dist_home: float, maximum distance from home + total_flight_time: float, total flight time + total_pause_time: float, total pause time + flight_pause_stats: list, flight and pause statistics + all_place_times: list of float, time spent at places of interest + all_place_times_adjusted: list of float, adjusted time spent at + places of interest + summary_stats: list, summary statistics + log_tags: dict, contains log of tags of all locations visited + from openstreetmap + log_tags_temp: list, log of tags of all locations visited + from openstreetmap + datetime_list: list of int, current time + places_of_interest: list of str, places of interest + Returns: + A tuple of: + a list, summary statistics + a dict, contains log of tags of all locations visited + from openstreetmap + """ + + year, month, day, hour = datetime_list[:4] + ( + av_f_len, sd_f_len, av_f_dur, sd_f_dur, av_p_dur, sd_p_dur + ) = flight_pause_stats + + if obs_dur == 0: + res = [ + year, + month, + day, + hour, + 0, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + ] + if places_of_interest is not None: + for place_int in range(2 * len(places_of_interest) + 1): + res.append(pd.NA) + summary_stats.append(res) + log_tags[f"{day}/{month}/{year} {hour}:00"] = [] + else: + res = [ + year, + month, + day, + hour, + obs_dur / 60, + time_at_home / 60, + dist_traveled / 1000, + max_dist_home / 1000, + total_flight_time / 60, + av_f_len, + sd_f_len, + av_f_dur / 60, + sd_f_dur / 60, + total_pause_time / 60, + av_p_dur / 60, + sd_p_dur / 60, + ] + if places_of_interest is not None: + res += all_place_times + res += all_place_times_adjusted + log_tags[f"{day}/{month}/{year} {hour}:00"] = log_tags_temp + + summary_stats.append(res) + + return summary_stats, log_tags + + +def final_daily_prep( + obs_dur: float, obs_day: float, obs_night: float, time_at_home: float, + dist_traveled: float, max_dist_home: float, radius: float, + diameter: float, num_sig: int, entropy: float, total_flight_time: float, + total_pause_time: float, flight_pause_stats: list, + all_place_times: list, all_place_times_adjusted: list, + summary_stats: list, log_tags: dict, log_tags_temp: list, + datetime_list: List[int], places_of_interest: Optional[List[str]], + parameters: Hyperparameters, pcr: float, pcr_stratified: float, i: int +) -> Tuple[list, dict]: + """This function prepares the final daily summary statistics. + + Args: + obs_dur: float, observed duration + obs_day: float, observed duration during the day + obs_night: float, observed duration during the night + time_at_home: float, time at home + dist_traveled: float, distance traveled + max_dist_home: float, maximum distance from home + radius: float, radius of gyration + diameter: float, diameter of gyration + num_sig: int, number of significant places + entropy: float, entropy of the trajectory + total_flight_time: float, total flight time + total_pause_time: float, total pause time + flight_pause_stats: list, flight and pause statistics + all_place_times: list of float, time spent at places of interest + all_place_times_adjusted: list of float, adjusted time spent at + places of interest + summary_stats: list, summary statistics + log_tags: dict, contains log of tags of all locations visited + from openstreetmap + log_tags_temp: list, log of tags of all locations visited + from openstreetmap + datetime_list: list of int, current time + places_of_interest: list of str, places of interest + parameters: Hyperparameters, hyperparameters in functions + pcr: float, physical circadian rhythm + pcr_stratified: float, physical circadian rhythm stratified + i: int, index of the window + Returns: + A tuple of: + a list, summary statistics + a dict, contains log of tags of all locations visited + from openstreetmap + """ + + year, month, day = datetime_list[:3] + ( + av_f_len, sd_f_len, av_f_dur, sd_f_dur, av_p_dur, sd_p_dur + ) = flight_pause_stats + + if obs_dur == 0: + res = [ + year, + month, + day, + 0, + 0, + 0, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + pd.NA, + ] + if parameters.pcr_bool: + res += [pcr, pcr_stratified] + if places_of_interest is not None: + for place_int in range(2 * len(places_of_interest) + 1): + res.append(pd.NA) + summary_stats.append(res) + log_tags[f"{day}/{month}/{year}"] = [] + else: + res = [ + year, + month, + day, + obs_dur / 3600, + obs_day / 3600, + obs_night / 3600, + time_at_home / 3600, + dist_traveled / 1000, + max_dist_home / 1000, + radius / 1000, + diameter / 1000, + num_sig, + entropy, + total_flight_time / 3600, + av_f_len / 1000, + sd_f_len / 1000, + av_f_dur / 3600, + sd_f_dur / 3600, + total_pause_time / 3600, + av_p_dur / 3600, + sd_p_dur / 3600, + ] + if parameters.pcr_bool: + res += [pcr, pcr_stratified] + if places_of_interest is not None: + res += all_place_times + res += all_place_times_adjusted + summary_stats.append(res) + if parameters.split_day_night: + if i % 2 == 0: + time_cat = "daytime" + else: + time_cat = "nighttime" + log_tags[f"{day}/{month}/{year}, {time_cat}"] = ( + log_tags_temp + ) + else: + log_tags[f"{day}/{month}/{year}"] = log_tags_temp + + return summary_stats, log_tags + + +def format_summary_stats( + summary_stats: list, log_tags: dict, frequency: Frequency, + parameters: Hyperparameters, places_of_interest: Optional[List[str]] +) -> Tuple[pd.DataFrame, dict]: + """This function formats the summary statistics. + + Args: + summary_stats: list, summary statistics + log_tags: dict, contains log of tags of all locations visited + from openstreetmap + frequency: Frequency, the time windows of the summary statistics + parameters: Hyperparameters, hyperparameters in functions + recommend to set it to default + places_of_interest: list of str, places of interest + Returns: + A tuple of: + a pd dataframe, summary statistics + a dict, contains log of tags of all locations visited + from openstreetmap + """ + + summary_stats_df = pd.DataFrame(summary_stats) + + if places_of_interest is None: + places_of_interest2 = [] + places_of_interest3 = [] + else: + places_of_interest2 = places_of_interest.copy() + places_of_interest2.append("other") + places_of_interest3 = [f"{pl}_adjusted" for pl in places_of_interest] + + if parameters.pcr_bool: + pcr_cols = [ + "physical_circadian_rhythm", + "physical_circadian_rhythm_stratified", + ] + else: + pcr_cols = [] + + if frequency != Frequency.DAILY: + summary_stats_df.columns = ( + [ + "year", + "month", + "day", + "hour", + "obs_duration", + "home_time", + "dist_traveled", + "max_dist_home", + "total_flight_time", + "av_flight_length", + "sd_flight_length", + "av_flight_duration", + "sd_flight_duration", + "total_pause_time", + "av_pause_duration", + "sd_pause_duration", + ] + + places_of_interest2 + + places_of_interest3 + ) + else: + summary_stats_df.columns = ( + [ + "year", + "month", + "day", + "obs_duration", + "obs_day", + "obs_night", + "home_time", + "dist_traveled", + "max_dist_home", + "radius", + "diameter", + "num_sig_places", + "entropy", + "total_flight_time", + "av_flight_length", + "sd_flight_length", + "av_flight_duration", + "sd_flight_duration", + "total_pause_time", + "av_pause_duration", + "sd_pause_duration", + ] + + pcr_cols + + places_of_interest2 + + places_of_interest3 + ) + + if parameters.split_day_night: + summary_stats_df2 = split_day_night_cols(summary_stats_df) + else: + summary_stats_df2 = summary_stats_df + + return summary_stats_df2, log_tags + + def gps_summaries( traj: np.ndarray, tz_str: str, frequency: Frequency, + parameters: Hyperparameters, places_of_interest: Optional[List[str]] = None, - save_osm_log: bool = False, osm_tags: Optional[List[OSMTags]] = None, - threshold: Optional[int] = None, - split_day_night: bool = False, - person_point_radius: float = 2, - place_point_radius: float = 7.5, ) -> Tuple[pd.DataFrame, dict]: """This function derives summary statistics from the imputed trajectories @@ -242,7 +1095,8 @@ def gps_summaries( "max_dist_home", "dist_traveled","av_flight_length","sd_flight_length", "av_flight_duration","sd_flight_duration"] if the frequency is daily, it additionally returns - ["obs_day","obs_night","radius","diameter","num_sig_places","entropy"] + ["obs_day","obs_night","radius","diameter""num_sig_places","entropy", + "physical_circadian_rhythm","physical_circadian_rhythm_stratified"] Args: traj: 2d array, output from imp_to_traj(), which is a n by 8 mat, @@ -253,27 +1107,18 @@ def gps_summaries( obs (1 as observed and 0 as imputed) tz_str: timezone frequency: Frequency, the time windows of the summary statistics + parameters: Hyperparameters, hyperparameters in functions + recommend to set it to default places_of_interest: list of "osm_tags" places to watch, keywords as used in openstreetmaps e.g. ["cafe", "hospital", "restaurant"] - save_osm_log: bool, True if you want to output a log of locations - visited and their tags osm_tags: list of tags to search for in openstreetmaps avoid using a lot of them if large area is covered - threshold: int, time spent in a pause needs to exceed the threshold - to be placed in the log - only if save_osm_log True, in minutes - split_day_night: bool, True if you want to split all metrics to - daytime and nighttime patterns - only for daily frequency - person_point_radius: float, radius of the person's circle when - discovering places near him in pauses - place_point_radius: float, radius of place's circle - when place is returned as centre coordinates from osm Returns: - a pd dataframe, with each row as an hour/day, + A tuple of: + a pd dataframe, with each row as an hour/day, and each col as a feature/stat - a dictionary, contains log of tags of all locations visited + a dictionary, contains log of tags of all locations visited from openstreetmap Raises: RuntimeError: if the query to Overpass API fails @@ -284,14 +1129,16 @@ def gps_summaries( raise ValueError("Frequency must be 'hourly' or 'daily'") if frequency != Frequency.DAILY: - split_day_night = False + parameters.split_day_night = False ids: Dict[str, List[int]] = {} locations: Dict[int, List[List[float]]] = {} tags: Dict[int, Dict[str, str]] = {} - if places_of_interest is not None or save_osm_log: + if places_of_interest is not None or parameters.save_osm_log: ids, locations, tags = get_nearby_locations(traj, osm_tags) ids_keys_list = list(ids.keys()) + else: + ids_keys_list = [] obs_traj = traj[traj[:, 7] == 1, :] home_lat, home_lon = locate_home(obs_traj, tz_str) @@ -301,38 +1148,27 @@ def gps_summaries( if frequency != Frequency.DAILY: # find starting and ending time logger.info("Calculating the hourly summary stats...") - time_list = stamp2datetime(traj[0, 3], tz_str) - time_list[4:6] = [0, 0] - start_stamp = datetime2stamp(time_list, tz_str) - time_list = stamp2datetime(traj[-1, 6], tz_str) - time_list[4:6] = [0, 0] - end_stamp = datetime2stamp(time_list, tz_str) - # start_time, end_time are exact points - # (if it ends at 2019-3-8 11 o'clock, then 11 shouldn't be included) - window = frequency.value * 60 * 60 - no_windows = (end_stamp - start_stamp) // window + start_stamp, end_stamp = get_time_range( + traj, [4, 5], tz_str + ) + window, num_windows = compute_window_and_count( + start_stamp, end_stamp, frequency.value + ) else: # find starting and ending time logger.info("Calculating the daily summary stats...") - time_list = stamp2datetime(traj[0, 3], tz_str) - time_list[3:6] = [0, 0, 0] - start_stamp = datetime2stamp(time_list, tz_str) - time_list = stamp2datetime(traj[-1, 6], tz_str) - time_list[3:6] = [0, 0, 0] - end_stamp = datetime2stamp(time_list, tz_str) + 3600 * 24 - # if it starts from 2019-3-8 11 o'clock, - # then our daily summary starts from 2019-3-9) - window = 60 * 60 * 24 - no_windows = (end_stamp - start_stamp) // window - if split_day_night: - no_windows *= 2 - - if no_windows <= 0: + start_stamp, end_stamp = get_time_range( + traj, [3, 4, 5], tz_str, 3600*24 + ) + window, num_windows = compute_window_and_count( + start_stamp, end_stamp, 24, parameters.split_day_night + ) + + if num_windows <= 0: raise ValueError("start time and end time are not correct") - summary_stats_df = pd.DataFrame([]) - for i in range(no_windows): - if split_day_night: + for i in range(num_windows): + if parameters.split_day_night: i2 = i // 2 else: i2 = i @@ -349,36 +1185,14 @@ def gps_summaries( stop1 = 0 stop2 = 0 - if split_day_night: - current_time_list2 = current_time_list.copy() - current_time_list3 = current_time_list.copy() - current_time_list2[3] = 8 - current_time_list3[3] = 20 - start_time2 = datetime2stamp(current_time_list2, tz_str) - end_time2 = datetime2stamp(current_time_list3, tz_str) - if i % 2 == 0: - # daytime - index_rows = ( - (traj[:, 3] <= end_time2) - * (traj[:, 6] >= start_time2) - ) - else: - # nighttime - index1 = ( - (traj[:, 6] < start_time2) - * (traj[:, 3] < end_time) - * (traj[:, 6] > start_time) - ) - index2 = ( - (traj[:, 3] > end_time2) - * (traj[:, 3] < end_time) - * (traj[:, 6] > start_time) + if parameters.split_day_night: + index_rows, stop1, stop2, start_time2, end_time2 = ( + get_day_night_indices( + traj, tz_str, i, start_time, end_time, current_time_list ) - stop1 = sum(index1) - 1 - stop2 = sum(index1) - index_rows = index1 + index2 + ) - if sum(index_rows) == 0 and split_day_night: + if sum(index_rows) == 0 and parameters.split_day_night: # if there is no data in the day, then we need to # to add empty rows to the dataframe with 21 columns res = [year, month, day] + [0] * 18 @@ -388,7 +1202,7 @@ def gps_summaries( res += [0] * (2 * len(places_of_interest) + 1) summary_stats.append(res) continue - elif sum(index_rows) == 0 and not split_day_night: + elif sum(index_rows) == 0 and not parameters.split_day_night: # There is no data and it is daily data, so we need to add empty # rows res = [year, month, day] + [0] * 3 + [pd.NA] * 15 @@ -403,63 +1217,17 @@ def gps_summaries( temp = traj[index_rows, :] # take a subset which is exactly one hour/day, # cut the trajs at two ends proportionally - if split_day_night and i % 2 == 0: + if parameters.split_day_night and i % 2 == 0: t0_temp = start_time2 t1_temp = end_time2 else: t0_temp = start_time t1_temp = end_time - if sum(index_rows) == 1: - p0 = (t0_temp - temp[0, 3]) / (temp[0, 6] - temp[0, 3]) - p1 = (t1_temp - temp[0, 3]) / (temp[0, 6] - temp[0, 3]) - x0, y0 = temp[0, [1, 2]] - x1, y1 = temp[0, [4, 5]] - temp[0, 1] = (1 - p0) * x0 + p0 * x1 - temp[0, 2] = (1 - p0) * y0 + p0 * y1 - temp[0, 3] = t0_temp - temp[0, 4] = (1 - p1) * x0 + p1 * x1 - temp[0, 5] = (1 - p1) * y0 + p1 * y1 - temp[0, 6] = t1_temp - else: - if split_day_night and i % 2 != 0: - t0_temp_l = [start_time, end_time2] - t1_temp_l = [start_time2, end_time] - start_temp = [0, stop2] - end_temp = [stop1, -1] - for j in range(2): - p0 = (temp[start_temp[j], 6] - t0_temp_l[j]) / ( - temp[start_temp[j], 6] - temp[start_temp[j], 3] - ) - p1 = (t1_temp_l[j] - temp[end_temp[j], 3]) / ( - temp[end_temp[j], 6] - temp[end_temp[j], 3] - ) - temp[start_temp[j], 1] = (1 - p0) * temp[ - start_temp[j], 4 - ] + p0 * temp[start_temp[j], 1] - temp[start_temp[j], 2] = (1 - p0) * temp[ - start_temp[j], 5 - ] + p0 * temp[start_temp[j], 2] - temp[start_temp[j], 3] = t0_temp_l[j] - temp[end_temp[j], 4] = (1 - p1) * temp[ - end_temp[j], 1 - ] + p1 * temp[end_temp[j], 4] - temp[end_temp[j], 5] = (1 - p1) * temp[ - end_temp[j], 2 - ] + p1 * temp[end_temp[j], 5] - temp[end_temp[j], 6] = t1_temp_l[j] - else: - p0 = (temp[0, 6] - t0_temp) / (temp[0, 6] - temp[0, 3]) - p1 = ( - (t1_temp - temp[-1, 3]) - / (temp[-1, 6] - temp[-1, 3]) - ) - temp[0, 1] = (1 - p0) * temp[0, 4] + p0 * temp[0, 1] - temp[0, 2] = (1 - p0) * temp[0, 5] + p0 * temp[0, 2] - temp[0, 3] = t0_temp - temp[-1, 4] = (1 - p1) * temp[-1, 1] + p1 * temp[-1, 4] - temp[-1, 5] = (1 - p1) * temp[-1, 2] + p1 * temp[-1, 5] - temp[-1, 6] = t1_temp + temp = smooth_temp_ends( + temp, index_rows, t0_temp, t1_temp, parameters, i, start_time, + end_time2, start_time2, end_time, stop1, stop2 + ) obs_dur = sum((temp[:, 6] - temp[:, 3])[temp[:, 7] == 1]) d_home_1 = great_circle_dist( @@ -487,42 +1255,11 @@ def gps_summaries( all_place_times = [] all_place_times_adjusted = [] log_tags_temp = [] - if places_of_interest is not None or save_osm_log: + if places_of_interest is not None or parameters.save_osm_log: pause_vec = temp[temp[:, 0] == 2] - pause_array: np.ndarray = np.array([]) - for row in pause_vec: - if ( - great_circle_dist(row[1], row[2], home_lat, home_lon)[0] - > 2*place_point_radius - ): - if len(pause_array) == 0: - pause_array = np.array( - [[row[1], row[2], (row[6] - row[3]) / 60]] - ) - elif ( - np.min( - great_circle_dist( - row[1], row[2], - pause_array[:, 0], pause_array[:, 1], - ) - ) - > 2*place_point_radius - ): - pause_array = np.append( - pause_array, - [[row[1], row[2], (row[6] - row[3]) / 60]], - axis=0, - ) - else: - pause_array[ - np.argmin( - great_circle_dist( - row[1], row[2], - pause_array[:, 0], pause_array[:, 1], - ) - ), - -1, - ] += (row[6] - row[3]) / 60 + pause_array = get_pause_array( + pause_vec, home_lat, home_lon, parameters + ) if places_of_interest is not None: all_place_times = [0] * (len(places_of_interest) + 1) @@ -530,52 +1267,12 @@ def gps_summaries( for pause in pause_array: if places_of_interest is not None: - all_place_probs = [0] * len(places_of_interest) - pause_str = f"{pause[0]}, {pause[1]} - person" - if pause_str in saved_polygons.keys(): - pause_circle = saved_polygons[pause_str] - else: - pause_circle = transform_point_to_circle( - pause[0], pause[1], person_point_radius + all_place_probs, add_to_other = ( + intersect_with_places_of_interest( + pause, places_of_interest, saved_polygons, + parameters, ids, locations, ids_keys_list ) - saved_polygons[pause_str] = pause_circle - add_to_other = True - for j, place in enumerate(places_of_interest): - # if place of interest not in nearby locations of - # the current pause, skip - if place not in ids_keys_list: - continue - for element_id in ids[place]: - if len(locations[element_id]) == 1: - loc_lat = locations[element_id][0][0] - loc_lon = locations[element_id][0][1] - loc_str = f"{loc_lat}, {loc_lon} - place" - if loc_str in saved_polygons.keys(): - loc_circle = saved_polygons[loc_str] - else: - loc_circle = transform_point_to_circle( - loc_lat, - loc_lon, - place_point_radius, - ) - saved_polygons[loc_str] = loc_circle - - intersection_area = pause_circle.intersection( - loc_circle - ).area - if intersection_area > 0: - all_place_probs[j] += intersection_area - add_to_other = False - - elif len(locations[element_id]) >= 3: - polygon = Polygon(locations[element_id]) - - intersection_area = pause_circle.intersection( - polygon - ).area - if intersection_area > 0: - all_place_probs[j] += intersection_area - add_to_other = False + ) # in case of pause not in places of interest if add_to_other: @@ -591,14 +1288,8 @@ def gps_summaries( prob * pause[2] / 60 ) - if save_osm_log: - if threshold is None: - threshold = 60 - logger.info( - "threshold parameter set to None," - " automatically converted to 60min." - ) - if pause[2] >= threshold: + if parameters.save_osm_log: + if pause[2] >= parameters.log_threshold: for place_id, place_coordinates in locations.items(): if len(place_coordinates) == 1: if ( @@ -607,7 +1298,7 @@ def gps_summaries( place_coordinates[0][0], place_coordinates[0][1], )[0] - < place_point_radius + < parameters.place_point_radius ): log_tags_temp.append(tags[place_id]) elif len(place_coordinates) >= 3: @@ -616,72 +1307,18 @@ def gps_summaries( if polygon.contains(point): log_tags_temp.append(tags[place_id]) - if len(flight_d_vec) > 0: - av_f_len = np.mean(flight_d_vec) - sd_f_len = np.std(flight_d_vec) - av_f_dur = np.mean(flight_t_vec) - sd_f_dur = np.std(flight_t_vec) - else: - av_f_len = 0 - sd_f_len = 0 - av_f_dur = 0 - sd_f_dur = 0 - if len(pause_t_vec) > 0: - av_p_dur = np.mean(pause_t_vec) - sd_p_dur = np.std(pause_t_vec) - else: - av_p_dur = 0 - sd_p_dur = 0 - if frequency != Frequency.DAILY: - if obs_dur == 0: - res = [ - year, - month, - day, - hour, - 0, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - ] - if places_of_interest is not None: - for place_int in range(2 * len(places_of_interest) + 1): - res.append(pd.NA) - summary_stats.append(res) - log_tags[f"{day}/{month}/{year} {hour}:00"] = [] - else: - res = [ - year, - month, - day, - hour, - obs_dur / 60, - time_at_home / 60, - dist_traveled / 1000, - max_dist_home / 1000, - total_flight_time / 60, - av_f_len, - sd_f_len, - av_f_dur / 60, - sd_f_dur / 60, - total_pause_time / 60, - av_p_dur / 60, - sd_p_dur / 60, - ] - if places_of_interest is not None: - res += all_place_times - res += all_place_times_adjusted - log_tags[f"{day}/{month}/{year} {hour}:00"] = log_tags_temp + flight_pause_stats = compute_flight_pause_stats( + flight_d_vec, flight_t_vec, pause_t_vec + ) + datetime_list = [year, month, day, hour, 0, 0] - summary_stats.append(res) + if frequency != Frequency.DAILY: + summary_stats, log_tags = final_hourly_prep( + obs_dur, time_at_home, dist_traveled, max_dist_home, + total_flight_time, total_pause_time, flight_pause_stats, + all_place_times, all_place_times_adjusted, summary_stats, + log_tags, log_tags_temp, datetime_list, places_of_interest + ) else: hours = [] for j in range(temp.shape[0]): @@ -721,6 +1358,22 @@ def gps_summaries( t_sig = np.array(t_xy)[np.array(t_xy) / 60 > 15] p = t_sig / sum(t_sig) entropy = -sum(p * np.log(p + 0.00001)) + # physical circadian rhythm + if obs_dur != 0 and parameters.pcr_bool: + mobility_trace = create_mobility_trace(traj) + pcr = routine_index( + (start_time, end_time), mobility_trace, + parameters.pcr_window, parameters.pcr_sample_rate + ) + pcr_stratified = routine_index( + (start_time, end_time), mobility_trace, + parameters.pcr_window, parameters.pcr_sample_rate, + True, tz_str + ) + else: + pcr = pd.NA + pcr_stratified = pd.NA + # if there is only one significant place, the entropy is zero # but here it is -log(1.00001) < 0 # but the small value is added to avoid log(0) @@ -732,182 +1385,128 @@ def gps_summaries( else: diameters = pairwise_great_circle_dist(temp[:, [1, 2]]) diameter = max(diameters) - if obs_dur == 0: - res = [ - year, - month, - day, - 0, - 0, - 0, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - pd.NA, - ] - if places_of_interest is not None: - for place_int in range(2 * len(places_of_interest) + 1): - res.append(pd.NA) - summary_stats.append(res) - log_tags[f"{day}/{month}/{year}"] = [] - else: - res = [ - year, - month, - day, - obs_dur / 3600, - obs_day / 3600, - obs_night / 3600, - time_at_home / 3600, - dist_traveled / 1000, - max_dist_home / 1000, - radius / 1000, - diameter / 1000, - num_sig, - entropy, - total_flight_time / 3600, - av_f_len / 1000, - sd_f_len / 1000, - av_f_dur / 3600, - sd_f_dur / 3600, - total_pause_time / 3600, - av_p_dur / 3600, - sd_p_dur / 3600, - ] - if places_of_interest is not None: - res += all_place_times - res += all_place_times_adjusted - summary_stats.append(res) - if split_day_night: - if i % 2 == 0: - time_cat = "daytime" - else: - time_cat = "nighttime" - log_tags[f"{day}/{month}/{year}, {time_cat}"] = ( - log_tags_temp - ) - else: - log_tags[f"{day}/{month}/{year}"] = log_tags_temp - summary_stats_df = pd.DataFrame(summary_stats) - if places_of_interest is None: - places_of_interest2 = [] - places_of_interest3 = [] - else: - places_of_interest2 = places_of_interest.copy() - places_of_interest2.append("other") - places_of_interest3 = [ - f"{pl}_adjusted" for pl in places_of_interest - ] - if frequency != Frequency.DAILY: - summary_stats_df.columns = ( - [ - "year", - "month", - "day", - "hour", - "obs_duration", - "home_time", - "dist_traveled", - "max_dist_home", - "total_flight_time", - "av_flight_length", - "sd_flight_length", - "av_flight_duration", - "sd_flight_duration", - "total_pause_time", - "av_pause_duration", - "sd_pause_duration", - ] - + places_of_interest2 - + places_of_interest3 - ) - else: - summary_stats_df.columns = ( - [ - "year", - "month", - "day", - "obs_duration", - "obs_day", - "obs_night", - "home_time", - "dist_traveled", - "max_dist_home", - "radius", - "diameter", - "num_sig_places", - "entropy", - "total_flight_time", - "av_flight_length", - "sd_flight_length", - "av_flight_duration", - "sd_flight_duration", - "total_pause_time", - "av_pause_duration", - "sd_pause_duration", - ] - + places_of_interest2 - + places_of_interest3 - ) - if split_day_night: - summary_stats_df_daytime = summary_stats_df[::2].reset_index( - drop=True - ) - summary_stats_df_nighttime = summary_stats_df[1::2].reset_index( - drop=True + summary_stats, log_tags = final_daily_prep( + obs_dur, obs_day, obs_night, time_at_home, dist_traveled, + max_dist_home, radius, diameter, num_sig, entropy, + total_flight_time, total_pause_time, flight_pause_stats, + all_place_times, all_place_times_adjusted, summary_stats, + log_tags, log_tags_temp, datetime_list, places_of_interest, + parameters, pcr, pcr_stratified, i ) - summary_stats_df2 = pd.concat( - [ - summary_stats_df_daytime, - summary_stats_df_nighttime.iloc[:, 3:], - ], - axis=1, - ) - summary_stats_df2.columns = ( - list(summary_stats_df.columns)[:3] - + [ - f"{cname}_daytime" - for cname in list(summary_stats_df.columns)[3:] - ] - + [ - f"{cname}_nighttime" - for cname in list(summary_stats_df.columns)[3:] - ] - ) - summary_stats_df2 = summary_stats_df2.drop( - [ - "obs_day_daytime", - "obs_night_daytime", - "obs_day_nighttime", - "obs_night_nighttime", - ], - axis=1, - ) - summary_stats_df2.insert( - 3, - "obs_duration", - summary_stats_df2["obs_duration_daytime"] - + summary_stats_df2["obs_duration_nighttime"], - ) - else: - summary_stats_df2 = summary_stats_df + summary_stats_df2, log_tags = format_summary_stats( + summary_stats, log_tags, frequency, parameters, places_of_interest + ) return summary_stats_df2, log_tags +def split_day_night_cols(summary_stats_df: pd.DataFrame) -> pd.DataFrame: + """This function splits the summary statistics dataframe + into daytime and nighttime columns. + + Args: + summary_stats_df: pandas dataframe with summary statistics + Returns: + pandas dataframe with summary statistics + split into daytime and nighttime columns + """ + + summary_stats_df_daytime = summary_stats_df[::2].reset_index(drop=True) + summary_stats_df_nighttime = summary_stats_df[1::2].reset_index(drop=True) + + summary_stats_df2 = pd.concat( + [ + summary_stats_df_daytime, + summary_stats_df_nighttime.iloc[:, 3:], + ], + axis=1, + ) + summary_stats_df2.columns = ( + list(summary_stats_df.columns)[:3] + + [ + f"{cname}_daytime" + for cname in list(summary_stats_df.columns)[3:] + ] + + [ + f"{cname}_nighttime" + for cname in list(summary_stats_df.columns)[3:] + ] + ) + summary_stats_df2 = summary_stats_df2.drop( + [ + "obs_day_daytime", + "obs_night_daytime", + "obs_day_nighttime", + "obs_night_nighttime", + ], + axis=1, + ) + summary_stats_df2.insert( + 3, + "obs_duration", + summary_stats_df2["obs_duration_daytime"] + + summary_stats_df2["obs_duration_nighttime"], + ) + + return summary_stats_df2 + + +def get_time_range( + traj: np.ndarray, time_reset_indices: list, + tz_str: str, offset_seconds: int = 0, +) -> Tuple[int, int]: + """Computes the starting and ending time stamps + based on given trajectory and indices. + + Args: + traj: numpy array of trajectory + time_reset_indices: list of indices to reset time + offset_seconds: int, offset in seconds + tz_str: str, timezone + Returns: + A tuple of two integers (start_stamp, end_stamp): + start_stamp: int, starting time stamp + end_stamp: int, ending time stamp + """ + time_list = stamp2datetime(traj[0, 3], tz_str) + for idx in time_reset_indices: + time_list[idx] = 0 + start_stamp = datetime2stamp(time_list, tz_str) + + time_list = stamp2datetime(traj[-1, 6], tz_str) + for idx in time_reset_indices: + time_list[idx] = 0 + end_stamp = datetime2stamp(time_list, tz_str) + offset_seconds + + return start_stamp, end_stamp + + +def compute_window_and_count( + start_stamp: int, end_stamp: int, window_hours: int, + split_day_night: bool = False +) -> Tuple[int, int]: + """Computes the window and number of windows based on given time stamps. + + Args: + start_stamp: int, starting time stamp + end_stamp: int, ending time stamp + window_hours: int, window in hours + split_day_night: bool, True if split day and night + Returns: + A tuple of two integers (window, num_windows): + window: int, window in seconds + num_windows: int, number of windows + """ + + window = window_hours * 60 * 60 + num_windows = (end_stamp - start_stamp) // window + if split_day_night: + num_windows *= 2 + return window, num_windows + + def gps_quality_check(study_folder: str, study_id: str) -> float: """The function checks the gps data quality. @@ -947,20 +1546,14 @@ def gps_stats_main( tz_str: str, frequency: Frequency, save_traj: bool, - parameters: Optional[Hyperparameters] = None, places_of_interest: Optional[list] = None, - save_osm_log: bool = False, osm_tags: Optional[List[OSMTags]] = None, - threshold: Optional[int] = None, - split_day_night: bool = False, - person_point_radius: float = 2, - place_point_radius: float = 7.5, time_start: Optional[list] = None, time_end: Optional[list] = None, participant_ids: Optional[list] = None, + parameters: Optional[Hyperparameters] = None, all_memory_dict: Optional[dict] = None, all_bv_set: Optional[dict] = None, - quality_threshold: float = 0.05, ): """This the main function to do the GPS imputation. It calls every function defined before. @@ -976,20 +1569,8 @@ def gps_stats_main( csv file, False if you don't places_of_interest: list of places to watch, keywords as used in openstreetmaps - save_osm_log: bool, True if you want to output a log of locations - visited and their tags osm_tags: list of tags to search for in openstreetmaps avoid using a lot of them if large area is covered - threshold: int, time spent in a pause needs to exceed the - threshold to be placed in the log - only if save_osm_log True, in minutes - split_day_night: bool, True if you want to split all metrics to - datetime and nighttime patterns - only for daily frequency - person_point_radius: float, radius of the person's circle when - discovering places near him in pauses - place_point_radius: float, radius of place's circle - when place is returned as centre coordinates from osm time_start: list, starting time of window of interest time_end: list ending time of the window of interest time should be a list of integers with format @@ -1005,8 +1586,6 @@ def gps_stats_main( recommend to set it to default all_memory_dict: dict, from previous run (none if it's the first time) all_bv_set: dict, from previous run (none if it's the first time) - quality_threshold: float, a percentage value of the fraction of data - required for a summary to be created. Returns: write summary stats as csv for each user during the specified period @@ -1057,7 +1636,7 @@ def gps_stats_main( logger.info("User: %s", participant_id) # data quality check quality = gps_quality_check(study_folder, participant_id) - if quality > quality_threshold: + if quality > parameters.quality_threshold: # read data logger.info("Read in the csv files ...") data, _, _ = read_data( @@ -1141,11 +1720,9 @@ def gps_stats_main( traj, tz_str, Frequency.HOURLY, + parameters, places_of_interest, - save_osm_log, osm_tags, - threshold, - split_day_night, ) write_all_summaries(participant_id, summary_stats1, f"{output_folder}/hourly") @@ -1153,17 +1730,13 @@ def gps_stats_main( traj, tz_str, Frequency.DAILY, + parameters, places_of_interest, - save_osm_log, osm_tags, - threshold, - split_day_night, - person_point_radius, - place_point_radius, ) write_all_summaries(participant_id, summary_stats2, f"{output_folder}/daily") - if save_osm_log: + if parameters.save_osm_log: os.makedirs(f"{output_folder}/logs", exist_ok=True) with open( f"{output_folder}/logs/locations_logs_hourly.json", @@ -1180,16 +1753,14 @@ def gps_stats_main( traj, tz_str, frequency, + parameters, places_of_interest, - save_osm_log, osm_tags, - threshold, - split_day_night, ) write_all_summaries( participant_id, summary_stats, output_folder ) - if save_osm_log: + if parameters.save_osm_log: os.makedirs(f"{output_folder}/logs", exist_ok=True) with open( f"{output_folder}/logs/locations_logs.json", From fc6b338f9b18e929dc1b0abe9098bf5a3ac5da55 Mon Sep 17 00:00:00 2001 From: GeorgiosEfstathiadis <54844705+GeorgeEfstathiadis@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:42:10 -0400 Subject: [PATCH 02/14] typo flight instead of pause (#209) Co-authored-by: Ilya Sytchev --- docs/source/jasmine.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/jasmine.md b/docs/source/jasmine.md index 6e4f0517..c2bab7ec 100644 --- a/docs/source/jasmine.md +++ b/docs/source/jasmine.md @@ -150,7 +150,7 @@ The summary statistics that are generated are listed below: | Standard deviation of flight duration | Float | Standard deviation of the duration of all flights (straight line movement) that took place over the course of a day (in hours) | GPS is converted into a sequence of flights (straight line movement) and pauses (time spent stationary). The standard deviation of the duration of flights of the day is reported. | | Total pause time | Float | Total time spent in pause over the course of a day (in hours) | A pause is defined to be a longest time spent stationary without a directional change or flight. | | Average pause duration | Float | Average of the duration of all pauses that took place over the course of a day (in hour) | We consider that a participant has a pause if the distance that he has moved during a 30-s period is less than `r` m. By default, `r`=10.| -| Standard deviation of flight duration | Float | Standard deviation of the duration of all pauses that took place over the course of a day (in hour) | GPS is converted into a sequence of flights (straight line movement) and pauses (time spent stationary). The standard deviation of duration of pauses over the course of a day is reported. | +| Standard deviation of pause duration | Float | Standard deviation of the duration of all pauses that took place over the course of a day (in hour) | GPS is converted into a sequence of flights (straight line movement) and pauses (time spent stationary). The standard deviation of duration of pauses over the course of a day is reported. | | Significant location entropy | Float | Entropy measure based on the proportion of time spent at significant locations over the course of a day | Letting p_i be the proportion of the day spent at significant location I, significant location entropy is calculated as -\sum_{i} p_i*log(p_i), where the sum occurs over all non-zero p_i for that day. | | Minutes of GPS data missing | Not Available | Number of minutes of GPS data missing over the course of a day | | | Physical circadian rhythm | Float | A continuous measurement of routine in the interval [0,1] that scores a day with 0 if there was a complete break from routine and 1 if the person followed the exact same routine as have in every other day of follow up | For a detailed description of how this measure is calculated, see Canzian and Musolesi's 2015 paper in the Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing, titled "Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis." Their procedure was followed using 30-min increments as a bin size.| From 9fb0322f89220d42e7f3ce5232143d7d16239d36 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 15:34:47 -0400 Subject: [PATCH 03/14] Upgrade pytest and pytest-mock to the latest available versions --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3660f732..f4ce5678 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ flake8==4.0.1 mypy==0.950 -pytest==7.1.2 -pytest-mock==3.7.0 +pytest==7.4.3 +pytest-mock==3.12.0 types-python-dateutil==2.8.10 types-pytz==2021.3.7 types-requests==2.27.25 From 1a7f36789beca8c9daf189750a128203e61416e6 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 15:51:52 -0400 Subject: [PATCH 04/14] Upgrade flake8 to the latest available version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f4ce5678..70bf6207 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -flake8==4.0.1 +flake8==6.1.0 mypy==0.950 pytest==7.4.3 pytest-mock==3.12.0 From 7308942e8e0484e9614d6590e374a72537c02369 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 16:12:59 -0400 Subject: [PATCH 05/14] Add flake8-pytest-style to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 70bf6207..a8e70980 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ flake8==6.1.0 +flake8-pytest-style==1.7.2 mypy==0.950 pytest==7.4.3 pytest-mock==3.12.0 From e5d74550563985e794f93b6ae757d7fb47490d01 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 16:16:43 -0400 Subject: [PATCH 06/14] Update Bonsai tests --- forest/bonsai/tests/test_simulate_gps_data.py | 115 +++++++++--------- 1 file changed, 55 insertions(+), 60 deletions(-) diff --git a/forest/bonsai/tests/test_simulate_gps_data.py b/forest/bonsai/tests/test_simulate_gps_data.py index 5a14b0b3..c95cc530 100644 --- a/forest/bonsai/tests/test_simulate_gps_data.py +++ b/forest/bonsai/tests/test_simulate_gps_data.py @@ -10,7 +10,7 @@ ActionType, Attributes, Person, gen_basic_traj, gen_basic_pause, gen_route_traj, gen_all_traj, remove_data, prepare_data, process_switches, load_attributes, sim_gps_data - ) +) from forest.jasmine.data2mobmat import great_circle_dist @@ -282,7 +282,7 @@ def test_get_path_close_locations(coords1, coords3): == 2) -@pytest.fixture +@pytest.fixture() def random_path(directions1, coords1, coords2): lat1, lon1 = coords1 lat2, lon2 = coords2 @@ -352,7 +352,8 @@ def test_bounding_box_simple_case(sample_coordinates): def test_zero_meters_bounding_box(sample_coordinates): bbox = bounding_box(sample_coordinates, 0) - assert bbox[0] == bbox[2] and bbox[1] == bbox[3] + assert bbox[0] == bbox[2] + assert bbox[1] == bbox[3] @pytest.fixture(scope="session") @@ -423,32 +424,27 @@ def sample_locations(): def sample_attributes(): """Sample attributes""" return { - "User 1": - { - "main_employment": "none", - "vehicle": "car", - "travelling_status": 10, - "active_status": 0 - }, - - "Users 2-4": - { - "main_employment": "university", - "vehicle": "bicycle", - "travelling_status": 8, - "active_status": 8, - "active_status-16": 2 - }, - - "User 5": - { - "main_employment": "office", - "vehicle": "foot", - "travelling_status": 9, - "travelling_status-20": 1, - "preferred_places": ["cafe", "bar", "cinema"] - } + "User 1": { + "main_employment": "none", + "vehicle": "car", + "travelling_status": 10, + "active_status": 0 + }, + "Users 2-4": { + "main_employment": "university", + "vehicle": "bicycle", + "travelling_status": 8, + "active_status": 8, + "active_status-16": 2 + }, + "User 5": { + "main_employment": "office", + "vehicle": "foot", + "travelling_status": 9, + "travelling_status-20": 1, + "preferred_places": ["cafe", "bar", "cinema"] } + } def test_attributes_user_missing_args(sample_attributes): @@ -462,12 +458,10 @@ def test_process_attributes_arguments_correct(sample_attributes): """Test that given arguments are processed correctly""" user_attrs = sample_attributes["User 5"] attrs = Attributes(**user_attrs) - assert ( - attrs.travelling_status == 9 - and attrs.preferred_places == [ - PossibleExits.CAFE, PossibleExits.BAR, PossibleExits.CINEMA - ] - ) + assert attrs.travelling_status == 9 + assert attrs.preferred_places == [ + PossibleExits.CAFE, PossibleExits.BAR, PossibleExits.CINEMA + ] def test_person_main_employment(sample_coordinates, sample_locations, @@ -535,7 +529,7 @@ def test_update_preferred_places_case_first_option(sample_person): sample_person.update_preferred_places(PossibleExits.CAFE) assert sample_person.preferred_places_today == [ PossibleExits.BAR, PossibleExits.CAFE, PossibleExits.CINEMA - ] + ] def test_update_preferred_places_case_last_option(sample_person): @@ -547,8 +541,8 @@ def test_update_preferred_places_case_last_option(sample_person): def test_choose_preferred_exit_morning_home(sample_person): """Test choosing preferred exit early in the morning""" preferred_exit, location = sample_person.choose_preferred_exit(0) - assert (preferred_exit == "home" - and location == sample_person.home_coordinates) + assert preferred_exit == "home" + assert location == sample_person.home_coordinates def test_choose_preferred_exit_night_home(sample_person): @@ -556,8 +550,8 @@ def test_choose_preferred_exit_night_home(sample_person): preferred_exit, location = sample_person.choose_preferred_exit( 24 * 3600 - 1 ) - assert (preferred_exit == "home_night" - and location == sample_person.home_coordinates) + assert preferred_exit == "home_night" + assert location == sample_person.home_coordinates def test_choose_preferred_exit_random_exit(sample_person): @@ -669,7 +663,7 @@ def test_gen_basic_traj_distance(random_path): """Test basic trajectory generation distance""" _, dist = gen_basic_traj( random_path[0], random_path[-1], Vehicle.FOOT, 100 - ) + ) assert dist == great_circle_dist(*random_path[0], *random_path[-1])[0] @@ -705,7 +699,8 @@ def test_gen_basic_pause_t_diff_range(random_path): def test_gen_route_traj_shape(random_path): """Test route generation shape is correct""" traj, _ = gen_route_traj(random_path, Vehicle.CAR, 0) - assert traj.shape[1] == 3 and traj.shape[0] >= len(random_path) + assert traj.shape[1] == 3 + assert traj.shape[0] >= len(random_path) def test_gen_route_traj_distance(random_path): @@ -735,7 +730,7 @@ def test_gen_all_traj_len(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) assert traj.shape[0] == 4 * 24 * 3600 @@ -749,7 +744,7 @@ def test_gen_all_traj_time(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) assert np.all(np.diff(traj[:, 0]) > 0) @@ -764,7 +759,7 @@ def test_gen_all_traj_consistent_values(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) distances = [] for i in range(len(traj) - 1): @@ -786,10 +781,11 @@ def test_gen_all_traj_time_at_home(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) home_time_list = np.array(home_time_list) - assert np.all(home_time_list >= 0) and np.all(home_time_list <= 24 * 3600) + assert np.all(home_time_list >= 0) + assert np.all(home_time_list <= 24 * 3600) def test_gen_all_traj_dist_travelled(sample_person, mocker): @@ -803,7 +799,7 @@ def test_gen_all_traj_dist_travelled(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) total_d_list = np.array(total_d_list) assert np.all(total_d_list >= 0) @@ -819,7 +815,7 @@ def generated_trajectory(sample_person, mocker): start_date=datetime.date(2021, 10, 1), end_date=datetime.date(2021, 10, 5), api_key="mock_api_key", - ) + ) return traj @@ -833,7 +829,8 @@ def test_prepare_data_shape(generated_trajectory): """Test shape of prepared dataset""" obs_data = remove_data(generated_trajectory, 15, .8, 4) final_data = prepare_data(obs_data, 0, "UTC") - assert final_data.shape[0] == len(obs_data) and final_data.shape[1] == 6 + assert final_data.shape[0] == len(obs_data) + assert final_data.shape[1] == 6 def test_prepare_data_timezones(generated_trajectory): @@ -842,7 +839,7 @@ def test_prepare_data_timezones(generated_trajectory): final_data = prepare_data(obs_data, 0, "Etc/GMT+1") boolean_series = ( final_data['timestamp'] == final_data['UTC time'] + 3600000 - ) + ) assert sum(boolean_series) == len(boolean_series) @@ -850,10 +847,8 @@ def test_process_switches(sample_attributes): """Test processing attributes with switch of behavior""" key = "User 5" switches = process_switches(sample_attributes, key) - assert ( - list(switches.keys())[0] == "travelling_status-20" - and list(switches.values())[0] == 1 - ) + assert list(switches.keys())[0] == "travelling_status-20" + assert list(switches.values())[0] == 1 def test_load_attributes_nusers(sample_attributes): @@ -912,13 +907,13 @@ def sample_addresses(): 'addr:street': 'Cambridge Crescent' } }, - ], + ], dtype=object ) def test_sim_gps_data_times( - sample_addresses, sample_locations, sample_attributes, mocker + sample_addresses, sample_locations, sample_attributes, mocker ): """Test timestamp of simulated trajectories""" mocker.patch("forest.bonsai.simulate_gps_data.get_path", @@ -936,17 +931,17 @@ def test_sim_gps_data_times( cycle=15, percentage=.8, attributes_dict=sample_attributes, - ) + ) list_of_time_differences = [] for i in range(1, data.shape[0]): list_of_time_differences.append( - data.timestamp[i] - data.timestamp[i-1] + data.timestamp[i] - data.timestamp[i - 1] ) assert np.all(np.array(list_of_time_differences) > 0) def test_sim_gps_data_multiple_people( - sample_addresses, sample_locations, sample_attributes, mocker + sample_addresses, sample_locations, sample_attributes, mocker ): """Test timestamp of simulated trajectories""" mocker.patch("forest.bonsai.simulate_gps_data.get_path", @@ -964,5 +959,5 @@ def test_sim_gps_data_multiple_people( cycle=15, percentage=.8, attributes_dict=sample_attributes, - ) + ) assert len(np.unique(data.user)) == 3 From 25861a8b9190cbff0942589e9af9c9a160493963 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 16:28:14 -0400 Subject: [PATCH 07/14] Update Sycamore tests --- forest/sycamore/tests/test_functions.py | 26 +++++++++---------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/forest/sycamore/tests/test_functions.py b/forest/sycamore/tests/test_functions.py index 79c55266..cd879549 100644 --- a/forest/sycamore/tests/test_functions.py +++ b/forest/sycamore/tests/test_functions.py @@ -44,30 +44,26 @@ TEST_DATA_DIR, "history_file_with_commas_and_semicolons.json" ) -AUDIO_SURVEY_CONFIG = os.path.join( - TEST_DATA_DIR, "audio_survey_config.json" -) +AUDIO_SURVEY_CONFIG = os.path.join(TEST_DATA_DIR, "audio_survey_config.json") -AUDIO_SURVEY_HISTORY = os.path.join( - TEST_DATA_DIR, "audio_survey_history.json" -) +AUDIO_SURVEY_HISTORY = os.path.join(TEST_DATA_DIR, "audio_survey_history.json") SEP_QS_DIR = os.path.join(TEST_DATA_DIR, "dir_with_seps_in_qs") -@pytest.fixture +@pytest.fixture() def agg_data_config(): return aggregate_surveys_config(SAMPLE_DIR, SURVEY_SETTINGS_PATH, "UTC", users=["16au2moz", "idr8gqdh"]) -@pytest.fixture +@pytest.fixture() def agg_data_no_config(): return aggregate_surveys_no_config(SAMPLE_DIR, study_tz="UTC", users=["16au2moz", "idr8gqdh"]) -@pytest.fixture +@pytest.fixture() def submits_data(): agg_data = aggregate_surveys_config( SAMPLE_DIR, SURVEY_SETTINGS_PATH_FOR_SUBMITS, study_tz="UTC", @@ -100,7 +96,6 @@ def test_summarize_submits_day(submits_data): def test_get_empty_intervention(): empty_path = os.path.join(TEST_DATA_DIR, "empty_intervention_data.json") empty_dict = get_all_interventions_dict(empty_path) - assert empty_dict == {} @@ -112,7 +107,6 @@ def test_get_intervention(): def test_aggregate_surveys(): sample_agg_data = aggregate_surveys(SAMPLE_DIR, ["idr8gqdh"]) - assert pd.isnull(sample_agg_data.loc[0, "time_prev"]) assert "MALFORMED" not in sample_agg_data["question text"].values @@ -124,8 +118,8 @@ def test_gen_survey_schedule(): time_start=pd.to_datetime("2021-12-01"), time_end=pd.to_datetime("2021-12-30"), users=["idr8gqdh"], - all_interventions_dict=interventions_dict) - + all_interventions_dict=interventions_dict + ) assert sample_schedule.shape[0] == 6 assert np.mean( sample_schedule.columns == @@ -391,9 +385,7 @@ def test_read_user_audio_recordings_stream(): def test_read_user_audio_recordings_stream_no_history(): - df = read_user_audio_recordings_stream( - SAMPLE_DIR, "audioqdz" - ) + df = read_user_audio_recordings_stream(SAMPLE_DIR, "audioqdz") assert df.shape[0] == 24 # 8 surveys, 3 lines per survey assert df["UTC time"].nunique() == 16 # 8 surveys, 2 times per survey assert df["question text"].nunique() == 1 @@ -482,7 +474,7 @@ def test_gen_survey_schedule_with_audio(): users=["idr8gqdh"], all_interventions_dict=interventions_dict, history_path=AUDIO_SURVEY_HISTORY - ) + ) assert sample_schedule.shape[0] == 687 assert sample_schedule["question_id"].str.contains( 'tO1GFjGJjMnaDRThUQK6l4dv' From 91279ce2aea6f1994638ef0fbb0cb93f33fa1a7a Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 16:40:39 -0400 Subject: [PATCH 08/14] Update Poplar tests --- forest/poplar/tests/test_poplar_functions.py | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/forest/poplar/tests/test_poplar_functions.py b/forest/poplar/tests/test_poplar_functions.py index a699bf50..bb1f4e66 100644 --- a/forest/poplar/tests/test_poplar_functions.py +++ b/forest/poplar/tests/test_poplar_functions.py @@ -29,31 +29,31 @@ def test_datetime2stamp(): def test_datetime2stamp_bad_seconds(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="second must be in 0..59"): datetime2stamp(time_list=[2020, 11, 1, 12, 9, 150], tz_str="America/New_York") def test_datetime2stamp_bad_minutes(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="minute must be in 0..59"): datetime2stamp(time_list=[2020, 11, 1, 12, 209, 50], tz_str="America/New_York") def test_datetime2stamp_bad_hours(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="hour must be in 0..23"): datetime2stamp(time_list=[2020, 11, 1, 35, 20, 50], tz_str="America/New_York") def test_datetime2stamp_bad_days(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="day is out of range for month"): datetime2stamp(time_list=[2020, 11, 35, 5, 20, 50], tz_str="America/New_York") def test_datetime2stamp_bad_months(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="month must be in 1..12"): datetime2stamp(time_list=[2020, 15, 20, 5, 20, 50], tz_str="America/New_York") @@ -119,19 +119,19 @@ def test_get_files_timestamps(): file_list, timestamp_list = get_files_timestamps( os.path.join(TEST_DATA_DIR, "idr8gqdh", "gps") ) - - assert np.array_equal(file_list, np.array( - ['2021-12-15 01_00_00+00_00.csv', '2021-12-16 21_00_00+00_00.csv', - '2021-12-17 00_00_00+00_00.csv'] - )) - assert np.array_equal(timestamp_list, np.array( - [1639530000, 1639688400, 1639699200] - )) + assert np.array_equal( + file_list, np.array(['2021-12-15 01_00_00+00_00.csv', + '2021-12-16 21_00_00+00_00.csv', + '2021-12-17 00_00_00+00_00.csv']) + ) + assert np.array_equal( + timestamp_list, np.array([1639530000, 1639688400, 1639699200]) + ) # Testing functions in forest.poplar.functions.helpers -@pytest.fixture +@pytest.fixture() def gps_df(): return read_data("idr8gqdh", TEST_DATA_DIR, "gps", "America/New_York", time_start=[2021, 12, 15, 20, 9, 50], From 0cc5af29f1cd3a4a3f33cc37f3f8391a12d700d9 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Tue, 31 Oct 2023 17:22:32 -0400 Subject: [PATCH 09/14] Run all workflows on pull request opened, synchronize, or reopened --- .github/workflows/build.yml | 1 + .github/workflows/docs.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a57d163c..7e17e555 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,6 +2,7 @@ name: 'Build and test' on: + pull_request: push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8ff5fd07..affdc46d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,7 @@ name: Sphinx documentation on: + pull_request: push: paths: - 'docs/**' From c9ddad4e63d50131097272f442c5890527a3d493 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 12:17:41 -0400 Subject: [PATCH 10/14] Avoid running actions twice on PR updates from the base branch --- .github/workflows/build.yml | 1 + .github/workflows/docs.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7e17e555..df98acc8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,6 +3,7 @@ name: 'Build and test' on: pull_request: + types: [opened, reopened] push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index affdc46d..2f188062 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,6 +2,7 @@ name: Sphinx documentation on: pull_request: + types: [opened, reopened] push: paths: - 'docs/**' From 6b41e29540cebe1ec0972945085ec5fdb9564be8 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 12:29:38 -0400 Subject: [PATCH 11/14] Avoid running actions twice on PRs --- .github/workflows/build.yml | 2 -- .github/workflows/docs.yml | 2 -- 2 files changed, 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index df98acc8..a57d163c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,8 +2,6 @@ name: 'Build and test' on: - pull_request: - types: [opened, reopened] push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2f188062..8ff5fd07 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,8 +1,6 @@ name: Sphinx documentation on: - pull_request: - types: [opened, reopened] push: paths: - 'docs/**' From 2a4e832c40c8dd989822a14573dd8664d57cb6d7 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 12:46:05 -0400 Subject: [PATCH 12/14] Run workflow to satisfy branch protection rules --- .github/workflows/build.yml | 2 ++ .github/workflows/docs.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a57d163c..13872859 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,6 +2,8 @@ name: 'Build and test' on: + pull_request: + types: [ready_for_review, review_requested] push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8ff5fd07..c2cdd31f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,8 @@ name: Sphinx documentation on: + pull_request: + types: [ready_for_review, review_requested] push: paths: - 'docs/**' From 1b1f9aead7887eb612f896bac99cf0cb603e9ea9 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 12:50:14 -0400 Subject: [PATCH 13/14] Run workflow to satisfy branch protection rules --- .github/workflows/build.yml | 2 +- .github/workflows/docs.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13872859..b9bc7543 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ name: 'Build and test' on: pull_request: - types: [ready_for_review, review_requested] + types: [ready_for_review, review_requested, synchronize] push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c2cdd31f..f73aea8b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,7 +2,7 @@ name: Sphinx documentation on: pull_request: - types: [ready_for_review, review_requested] + types: [ready_for_review, review_requested, synchronize] push: paths: - 'docs/**' From 5989fe807ca9abb7f2f9f44a5a8586223fde0ea1 Mon Sep 17 00:00:00 2001 From: Ilya Sytchev Date: Wed, 1 Nov 2023 13:00:46 -0400 Subject: [PATCH 14/14] Simplify workflow config --- .github/workflows/build.yml | 1 - .github/workflows/docs.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9bc7543..7e17e555 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,6 @@ name: 'Build and test' on: pull_request: - types: [ready_for_review, review_requested, synchronize] push: paths-ignore: - 'docs/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f73aea8b..affdc46d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,7 +2,6 @@ name: Sphinx documentation on: pull_request: - types: [ready_for_review, review_requested, synchronize] push: paths: - 'docs/**'