diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 90b8374b..343dd961 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,14 +11,14 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ['ubuntu-22.04', 'windows-2022', 'macos-12'] + os: ['ubuntu-24.04', 'windows-2022', 'macos-14'] steps: - name: Check out Forest code uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.11 - name: Install Forest dependencies for Linux # required by librosa if: ${{ startsWith(matrix.os, 'ubuntu') }} @@ -28,7 +28,7 @@ jobs: - name: Install Forest dependencies for Windows # required by librosa if: ${{ startsWith(matrix.os, 'windows') }} - uses: FedericoCarboni/setup-ffmpeg@v2 + uses: FedericoCarboni/setup-ffmpeg@v3 id: setup-ffmpeg - name: Install Forest run: pip install -e . diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1ef9bf02..dd4dfb23 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -7,7 +7,7 @@ on: jobs: build-html-docs: name: 'Build HTML docs' - runs-on: 'ubuntu-22.04' + runs-on: 'ubuntu-24.04' defaults: run: working-directory: './docs' @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.11 - name: Install documentation build dependencies run: pip install -r requirements.txt - name: Build HTML docs diff --git a/.gitignore b/.gitignore index 904de1e2..294657dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ +.venv/ .DS_Store # IntelliJ, VsCode project files @@ -21,4 +22,4 @@ __pycache__/ docs/_build/ # any python environment files -.python-version \ No newline at end of file +.python-version diff --git a/.readthedocs.yaml b/.readthedocs.yaml index a3c319b8..5677da7d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.8" + python: "3.11" # Optionally declare the Python requirements required to build your docs python: diff --git a/README.md b/README.md index 8d16f82a..9804f8a5 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,11 @@ Forest logo -# Forest (Python 3.8) - -The Onnela Lab at the Harvard T.H. Chan School of Public Health has developed the Forest library to analyze smartphone-based high-throughput digital phenotyping data. The main intellectual challenge in smartphone-based digital phenotyping has moved from data collection to data analysis. Our research focuses on the development of mathematical and statistical methods for analyzing intensive high-dimensional data. We are actively developing the Forest library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe](https://github.com/onnela-lab/beiwe-backend) platform. Forest will implement our methods for analyzing Beiwe data as a Python 3.8 package and is released under the BSD-3 open-source license. The Forest library will continue to grow over the coming years as we develop new analytical methods. +The Onnela Lab at the Harvard T.H. Chan School of Public Health has developed the Forest library to analyze smartphone-based high-throughput digital phenotyping data. The main intellectual challenge in smartphone-based digital phenotyping has moved from data collection to data analysis. Our research focuses on the development of mathematical and statistical methods for analyzing intensive high-dimensional data. We are actively developing the Forest library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe](https://github.com/onnela-lab/beiwe-backend) platform. Forest will implement our methods for analyzing Beiwe data as a Python package and is released under the BSD-3 open-source license. The Forest library will continue to grow over the coming years as we develop new analytical methods. Forest can be run locally but is also integrated into the Beiwe back-end on AWS, consistent with the preferred big-data computing paradigm of moving computation to the data. Integrated with Beiwe, Forest can be used to generate on-demand analytics, most importantly daily or hourly summary statistics of collected data, which are stored in a relational database on AWS. The system also implements an API for Tableau, which supports the creation of customizable workbooks and dashboards to view data summaries and troubleshoot any issues with data collection. Tableau is commercial software but is available under free viewer licenses and may be free to academic users for the first year (see Tableau for more information). -For more detailed info on specific subpackages, see our [Documentation](https://forest.beiwe.org). Please note that Forest uses Python 3.8. +For more detailed info on specific subpackages, see our [Documentation](https://forest.beiwe.org). Please note that Forest uses Python 3.11. # Description diff --git a/docs/requirements.txt b/docs/requirements.txt index af56edff..2b212fb8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -myst-parser==0.17.2 -sphinx==4.5.0 +myst-parser==3.0.0 +sphinx==7.3.7 sphinx-copybutton==0.5.0 -sphinx_rtd_theme==1.0.0 +sphinx_rtd_theme==2.0.0 diff --git a/docs/source/aws.md b/docs/source/aws.md index e0a5eb6c..625fc5bc 100644 --- a/docs/source/aws.md +++ b/docs/source/aws.md @@ -593,10 +593,10 @@ to that user. Assume we are ssh-ed to the EC2 instance. To create a new environment named `forest_main` with Python version -`3.8` use +`3.11` use ``` sh -conda create --name forest_main python=3.8 +conda create --name forest_main python=3.11 ``` To activate an environment named `forest_main` use @@ -678,12 +678,12 @@ base * /opt/anaconda [Forest](https://github.com/onnela-lab/forest) is a Python library for analyzing smartphone-based high-throughput digital phenotyping data collected with the Beiwe platform. Forest implements methods as a Python -3.8 package. Forest is integrated into the Beiwe back-end on AWS but can +3.11 package. Forest is integrated into the Beiwe back-end on AWS but can also be run locally. Assume we are ssh-ed to the EC2 instance. Use the commands below to activate Anaconda environment of choice (here, `forest_main` that has -Python `3.8` installed) and install `git`, `pip`. +Python `3.11` installed) and install `git`, `pip`. ``` sh conda activate forest_main diff --git a/docs/source/index.md b/docs/source/index.md index cc2ae350..3179a783 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -32,7 +32,7 @@ passive-data.md # Home -Forest is a library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe platform](https://www.beiwe.org/). Forest implements methods as a Python 3.8 package. Forest is integrated into the Beiwe back-end on AWS but can also be run locally. +Forest is a library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe platform](https://www.beiwe.org/). Forest implements methods as a Python 3.11 package. Forest is integrated into the Beiwe back-end on AWS but can also be run locally. **Table of Contents** ```{contents} diff --git a/docs/source/logging.md b/docs/source/logging.md index a47d9e87..ddb895f3 100644 --- a/docs/source/logging.md +++ b/docs/source/logging.md @@ -358,4 +358,4 @@ a, b, c = wrapper(x, y, z, 'path/to/log/output/directory') * [The Python Standard Library's documentation for `logging`](https://docs.python.org/3/library/logging.html) * Vinay Sajip's [*Logging HOWTO*](https://docs.python.org/3/howto/logging.html) -* [`LogRecord` attributes](https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes) +* [`LogRecord` attributes](https://docs.python.org/3.11/library/logging.html#logrecord-attributes) diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py index 6ffaf2e1..782c10a3 100644 --- a/forest/jasmine/traj2stats.py +++ b/forest/jasmine/traj2stats.py @@ -1821,6 +1821,6 @@ def gps_stats_generate_summary( if parameters.save_osm_log: with open( f"{logs_folder}/locations_logs_{frequency.name.lower()}.json", - "wa", + "a", ) as loc: json.dump(logs, loc, indent=4) diff --git a/forest/oak/base.py b/forest/oak/base.py index f2751833..d5925613 100644 --- a/forest/oak/base.py +++ b/forest/oak/base.py @@ -184,8 +184,12 @@ def compute_interpolate_cwt(tapered_bout: np.ndarray, fs: int = 10, # interpolate coefficients freqs = out[2] freqs_interp = np.arange(0.5, 4.5, 0.05) - ip = interpolate.interp2d(range(coefs.shape[1]), freqs, coefs) - coefs_interp = ip(range(coefs.shape[1]), freqs_interp) + interpolator = interpolate.RegularGridInterpolator( + (freqs, range(coefs.shape[1])), coefs + ) + grid_x, grid_y = np.meshgrid(freqs_interp, range(coefs.shape[1]), + indexing='ij') + coefs_interp = interpolator((grid_x, grid_y)) # trim spectrogram from the coi coefs_interp = coefs_interp[:, 5*fs:-5*fs] @@ -524,7 +528,7 @@ def run_hourly( cadence_temp = cadence_bout[t_hours_pd == t_unique] cadence_temp = cadence_temp[cadence_temp > 0] # store hourly metrics - if math.isnan(steps_hourly[ind_to_store]): + if math.isnan(steps_hourly[ind_to_store].item()): steps_hourly[ind_to_store] = int(np.sum(cadence_temp)) walkingtime_hourly[ind_to_store] = len(cadence_temp) else: @@ -609,11 +613,11 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None, frequency == Frequency.HOURLY_AND_DAILY or frequency == Frequency.HOURLY ): - freq = 'H' + freq = 'h' elif frequency == Frequency.MINUTE: - freq = 'T' + freq = 'min' else: - freq = str(frequency.value/60) + 'H' + freq = str(frequency.value/60) + 'h' days_hourly = pd.date_range(date_start, date_end+timedelta(days=1), freq=freq)[:-1] diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py index 242b52bd..3c392655 100644 --- a/forest/oak/tests/test_run_hourly.py +++ b/forest/oak/tests/test_run_hourly.py @@ -23,7 +23,7 @@ def sample_run_input(signal_bout): t_ind_pydate = pd.date_range( start='2020-02-24 00:00:00', end='2020-02-25 23:00:00', - freq='H', + freq='h', tz='US/Eastern' ).to_pydatetime() cadence_bout = np.array( diff --git a/forest/poplar/functions/log.py b/forest/poplar/functions/log.py index c30a8277..33917770 100644 --- a/forest/poplar/functions/log.py +++ b/forest/poplar/functions/log.py @@ -15,7 +15,7 @@ # Dictionary of available log record attributes: # For details, see: -# https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes +# https://docs.python.org/3.11/library/logging.html#logrecord-attributes AVAILABLE_ATTRIBUTES = { "asctime,msecs": "%(asctime)s", # Human-readable time with milliseconds. "created": "%(created)f", # Unix timestamp (seconds since epoch). @@ -134,7 +134,7 @@ def log_to_csv( log_name (str): Name for the log file. log_format (str): The format argument for logging.basicConfig. For available attributes and formatting instructions, see: - https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes) + https://docs.python.org/3.11/library/logging.html#logrecord-attributes header (list): Header for the csv. Returns: diff --git a/forest/sycamore/common.py b/forest/sycamore/common.py index d372b84f..e8bb1aa5 100644 --- a/forest/sycamore/common.py +++ b/forest/sycamore/common.py @@ -628,6 +628,11 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame, ].unique() missing_times = [] for time in known_answers_submits: + # If there were no timings submits recorded, every answers + # submit will be missing + if len(known_timings_submits) == 0: + missing_times.append(time) + continue hours_from_nearest = np.min( np.abs((pd.to_datetime(known_timings_submits) @@ -635,7 +640,7 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame, ) / 60 / 60 # add on the data if there is more than 1/2 hour between an # answers submission and a timing submission. - if hours_from_nearest > .5 or len(known_timings_submits) == 0: + if hours_from_nearest > .5: missing_times.append(time) if len(missing_times) > 0: missing_data = answers_data.loc[ diff --git a/forest/sycamore/responses.py b/forest/sycamore/responses.py index 0c62cd3f..6401fdee 100644 --- a/forest/sycamore/responses.py +++ b/forest/sycamore/responses.py @@ -143,7 +143,7 @@ def agg_changed_answers_summary( detail["time_to_answer"] = np.where( detail["data_stream"] == "survey_timings", detail["time_to_answer"], - np.NaN + np.nan ) ##################################################################### @@ -251,7 +251,7 @@ def format_responses_by_submission(agg_data: pd.DataFrame) -> dict: survey_df["survey_duration"] = np.where( survey_df["data_stream"] == "survey_timings", survey_df["survey_duration"], - np.NaN + np.nan ) keep_cols = ["beiwe_id", "start_time", "end_time", diff --git a/forest/sycamore/submits.py b/forest/sycamore/submits.py index 79af48c4..72f5bf46 100644 --- a/forest/sycamore/submits.py +++ b/forest/sycamore/submits.py @@ -470,12 +470,12 @@ def survey_submits( submit_lines3["time_to_submit"] = np.where( submit_lines3["submit_flg"] == 1, submit_lines3["time_to_submit"], - np.NaN + np.nan ) submit_lines3["time_to_open"] = np.where( submit_lines3["opened_flg"] == 1, submit_lines3["time_to_open"], - np.NaN + np.nan ) return submit_lines3.sort_values(["survey id", "beiwe_id"] ).drop_duplicates() diff --git a/mypy.ini b/mypy.ini index 712a36bc..d06e36b7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,5 @@ [mypy] -python_version = 3.8 +python_version = 3.11 [mypy-holidays] ignore_missing_imports = True diff --git a/tutorials/forest_usage.ipynb b/tutorials/forest_usage.ipynb index b64466d5..b58e6366 100644 --- a/tutorials/forest_usage.ipynb +++ b/tutorials/forest_usage.ipynb @@ -30,7 +30,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Before we begin, we need to check the current distribution of Python. Note that forest is built using Python 3.8. " + "Before we begin, we need to check the current distribution of Python. Note that forest is built using Python 3.11. " ] }, { @@ -74,7 +74,7 @@ "source": [ "*The output should display two lines.* \n", "\n", - "1. The Python version installed- make sure you are not using a version of Python that is earlier than 3.8\n", + "1. The Python version installed: make sure you are not using a version of Python that is earlier than 3.11\n", "2. The path to where Python is currently installed" ] }, @@ -709,10 +709,10 @@ "evalue": "name 'response_data' is not defined", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/nl/92kzg8c56mn1872898r7rjr40000gn/T/ipykernel_96458/1316156180.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m## Make sure the data is sorted according to date\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresponse_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mresponse_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtime_series_plot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvar_to_plot\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mylab\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxlab\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_x_ticks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'response_data' is not defined" + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", + "\u001B[0;32m/var/folders/nl/92kzg8c56mn1872898r7rjr40000gn/T/ipykernel_96458/1316156180.py\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;31m## Make sure the data is sorted according to date\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mresponse_data\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msort_values\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Date'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minplace\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 3\u001B[0m \u001B[0mresponse_data\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mreset_index\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdrop\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minplace\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mtime_series_plot\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvar_to_plot\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mylab\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m''\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mxlab\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m'Date'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mnum_x_ticks\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;36m4\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;31mNameError\u001B[0m: name 'response_data' is not defined" ] } ],