diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 90b8374b..343dd961 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,14 +11,14 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
- os: ['ubuntu-22.04', 'windows-2022', 'macos-12']
+ os: ['ubuntu-24.04', 'windows-2022', 'macos-14']
steps:
- name: Check out Forest code
uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
- python-version: 3.8
+ python-version: 3.11
- name: Install Forest dependencies for Linux
# required by librosa
if: ${{ startsWith(matrix.os, 'ubuntu') }}
@@ -28,7 +28,7 @@ jobs:
- name: Install Forest dependencies for Windows
# required by librosa
if: ${{ startsWith(matrix.os, 'windows') }}
- uses: FedericoCarboni/setup-ffmpeg@v2
+ uses: FedericoCarboni/setup-ffmpeg@v3
id: setup-ffmpeg
- name: Install Forest
run: pip install -e .
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 1ef9bf02..dd4dfb23 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -7,7 +7,7 @@ on:
jobs:
build-html-docs:
name: 'Build HTML docs'
- runs-on: 'ubuntu-22.04'
+ runs-on: 'ubuntu-24.04'
defaults:
run:
working-directory: './docs'
@@ -17,7 +17,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
- python-version: 3.8
+ python-version: 3.11
- name: Install documentation build dependencies
run: pip install -r requirements.txt
- name: Build HTML docs
diff --git a/.gitignore b/.gitignore
index 904de1e2..294657dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
__pycache__/
+.venv/
.DS_Store
# IntelliJ, VsCode project files
@@ -21,4 +22,4 @@ __pycache__/
docs/_build/
# any python environment files
-.python-version
\ No newline at end of file
+.python-version
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index a3c319b8..5677da7d 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
- python: "3.8"
+ python: "3.11"
# Optionally declare the Python requirements required to build your docs
python:
diff --git a/README.md b/README.md
index 8d16f82a..9804f8a5 100644
--- a/README.md
+++ b/README.md
@@ -4,13 +4,11 @@
-# Forest (Python 3.8)
-
-The Onnela Lab at the Harvard T.H. Chan School of Public Health has developed the Forest library to analyze smartphone-based high-throughput digital phenotyping data. The main intellectual challenge in smartphone-based digital phenotyping has moved from data collection to data analysis. Our research focuses on the development of mathematical and statistical methods for analyzing intensive high-dimensional data. We are actively developing the Forest library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe](https://github.com/onnela-lab/beiwe-backend) platform. Forest will implement our methods for analyzing Beiwe data as a Python 3.8 package and is released under the BSD-3 open-source license. The Forest library will continue to grow over the coming years as we develop new analytical methods.
+The Onnela Lab at the Harvard T.H. Chan School of Public Health has developed the Forest library to analyze smartphone-based high-throughput digital phenotyping data. The main intellectual challenge in smartphone-based digital phenotyping has moved from data collection to data analysis. Our research focuses on the development of mathematical and statistical methods for analyzing intensive high-dimensional data. We are actively developing the Forest library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe](https://github.com/onnela-lab/beiwe-backend) platform. Forest will implement our methods for analyzing Beiwe data as a Python package and is released under the BSD-3 open-source license. The Forest library will continue to grow over the coming years as we develop new analytical methods.
Forest can be run locally but is also integrated into the Beiwe back-end on AWS, consistent with the preferred big-data computing paradigm of moving computation to the data. Integrated with Beiwe, Forest can be used to generate on-demand analytics, most importantly daily or hourly summary statistics of collected data, which are stored in a relational database on AWS. The system also implements an API for Tableau, which supports the creation of customizable workbooks and dashboards to view data summaries and troubleshoot any issues with data collection. Tableau is commercial software but is available under free viewer licenses and may be free to academic users for the first year (see Tableau for more information).
-For more detailed info on specific subpackages, see our [Documentation](https://forest.beiwe.org). Please note that Forest uses Python 3.8.
+For more detailed info on specific subpackages, see our [Documentation](https://forest.beiwe.org). Please note that Forest uses Python 3.11.
# Description
diff --git a/docs/requirements.txt b/docs/requirements.txt
index af56edff..2b212fb8 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
-myst-parser==0.17.2
-sphinx==4.5.0
+myst-parser==3.0.0
+sphinx==7.3.7
sphinx-copybutton==0.5.0
-sphinx_rtd_theme==1.0.0
+sphinx_rtd_theme==2.0.0
diff --git a/docs/source/aws.md b/docs/source/aws.md
index e0a5eb6c..625fc5bc 100644
--- a/docs/source/aws.md
+++ b/docs/source/aws.md
@@ -593,10 +593,10 @@ to that user.
Assume we are ssh-ed to the EC2 instance.
To create a new environment named `forest_main` with Python version
-`3.8` use
+`3.11` use
``` sh
-conda create --name forest_main python=3.8
+conda create --name forest_main python=3.11
```
To activate an environment named `forest_main` use
@@ -678,12 +678,12 @@ base * /opt/anaconda
[Forest](https://github.com/onnela-lab/forest) is a Python library for
analyzing smartphone-based high-throughput digital phenotyping data
collected with the Beiwe platform. Forest implements methods as a Python
-3.8 package. Forest is integrated into the Beiwe back-end on AWS but can
+3.11 package. Forest is integrated into the Beiwe back-end on AWS but can
also be run locally.
Assume we are ssh-ed to the EC2 instance. Use the commands below to
activate Anaconda environment of choice (here, `forest_main` that has
-Python `3.8` installed) and install `git`, `pip`.
+Python `3.11` installed) and install `git`, `pip`.
``` sh
conda activate forest_main
diff --git a/docs/source/index.md b/docs/source/index.md
index cc2ae350..3179a783 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -32,7 +32,7 @@ passive-data.md
# Home
-Forest is a library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe platform](https://www.beiwe.org/). Forest implements methods as a Python 3.8 package. Forest is integrated into the Beiwe back-end on AWS but can also be run locally.
+Forest is a library for analyzing smartphone-based high-throughput digital phenotyping data collected with the [Beiwe platform](https://www.beiwe.org/). Forest implements methods as a Python 3.11 package. Forest is integrated into the Beiwe back-end on AWS but can also be run locally.
**Table of Contents**
```{contents}
diff --git a/docs/source/logging.md b/docs/source/logging.md
index a47d9e87..ddb895f3 100644
--- a/docs/source/logging.md
+++ b/docs/source/logging.md
@@ -358,4 +358,4 @@ a, b, c = wrapper(x, y, z, 'path/to/log/output/directory')
* [The Python Standard Library's documentation for `logging`](https://docs.python.org/3/library/logging.html)
* Vinay Sajip's [*Logging HOWTO*](https://docs.python.org/3/howto/logging.html)
-* [`LogRecord` attributes](https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes)
+* [`LogRecord` attributes](https://docs.python.org/3.11/library/logging.html#logrecord-attributes)
diff --git a/forest/jasmine/traj2stats.py b/forest/jasmine/traj2stats.py
index 6ffaf2e1..782c10a3 100644
--- a/forest/jasmine/traj2stats.py
+++ b/forest/jasmine/traj2stats.py
@@ -1821,6 +1821,6 @@ def gps_stats_generate_summary(
if parameters.save_osm_log:
with open(
f"{logs_folder}/locations_logs_{frequency.name.lower()}.json",
- "wa",
+ "a",
) as loc:
json.dump(logs, loc, indent=4)
diff --git a/forest/oak/base.py b/forest/oak/base.py
index f2751833..d5925613 100644
--- a/forest/oak/base.py
+++ b/forest/oak/base.py
@@ -184,8 +184,12 @@ def compute_interpolate_cwt(tapered_bout: np.ndarray, fs: int = 10,
# interpolate coefficients
freqs = out[2]
freqs_interp = np.arange(0.5, 4.5, 0.05)
- ip = interpolate.interp2d(range(coefs.shape[1]), freqs, coefs)
- coefs_interp = ip(range(coefs.shape[1]), freqs_interp)
+ interpolator = interpolate.RegularGridInterpolator(
+ (freqs, range(coefs.shape[1])), coefs
+ )
+ grid_x, grid_y = np.meshgrid(freqs_interp, range(coefs.shape[1]),
+ indexing='ij')
+ coefs_interp = interpolator((grid_x, grid_y))
# trim spectrogram from the coi
coefs_interp = coefs_interp[:, 5*fs:-5*fs]
@@ -524,7 +528,7 @@ def run_hourly(
cadence_temp = cadence_bout[t_hours_pd == t_unique]
cadence_temp = cadence_temp[cadence_temp > 0]
# store hourly metrics
- if math.isnan(steps_hourly[ind_to_store]):
+ if math.isnan(steps_hourly[ind_to_store].item()):
steps_hourly[ind_to_store] = int(np.sum(cadence_temp))
walkingtime_hourly[ind_to_store] = len(cadence_temp)
else:
@@ -609,11 +613,11 @@ def run(study_folder: str, output_folder: str, tz_str: Optional[str] = None,
frequency == Frequency.HOURLY_AND_DAILY
or frequency == Frequency.HOURLY
):
- freq = 'H'
+ freq = 'h'
elif frequency == Frequency.MINUTE:
- freq = 'T'
+ freq = 'min'
else:
- freq = str(frequency.value/60) + 'H'
+ freq = str(frequency.value/60) + 'h'
days_hourly = pd.date_range(date_start, date_end+timedelta(days=1),
freq=freq)[:-1]
diff --git a/forest/oak/tests/test_run_hourly.py b/forest/oak/tests/test_run_hourly.py
index 242b52bd..3c392655 100644
--- a/forest/oak/tests/test_run_hourly.py
+++ b/forest/oak/tests/test_run_hourly.py
@@ -23,7 +23,7 @@ def sample_run_input(signal_bout):
t_ind_pydate = pd.date_range(
start='2020-02-24 00:00:00',
end='2020-02-25 23:00:00',
- freq='H',
+ freq='h',
tz='US/Eastern'
).to_pydatetime()
cadence_bout = np.array(
diff --git a/forest/poplar/functions/log.py b/forest/poplar/functions/log.py
index c30a8277..33917770 100644
--- a/forest/poplar/functions/log.py
+++ b/forest/poplar/functions/log.py
@@ -15,7 +15,7 @@
# Dictionary of available log record attributes:
# For details, see:
-# https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes
+# https://docs.python.org/3.11/library/logging.html#logrecord-attributes
AVAILABLE_ATTRIBUTES = {
"asctime,msecs": "%(asctime)s", # Human-readable time with milliseconds.
"created": "%(created)f", # Unix timestamp (seconds since epoch).
@@ -134,7 +134,7 @@ def log_to_csv(
log_name (str): Name for the log file.
log_format (str): The format argument for logging.basicConfig.
For available attributes and formatting instructions, see:
- https://docs.python.org/3.8/library/logging.html?highlight=logging#logrecord-attributes)
+ https://docs.python.org/3.11/library/logging.html#logrecord-attributes
header (list): Header for the csv.
Returns:
diff --git a/forest/sycamore/common.py b/forest/sycamore/common.py
index d372b84f..e8bb1aa5 100644
--- a/forest/sycamore/common.py
+++ b/forest/sycamore/common.py
@@ -628,6 +628,11 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame,
].unique()
missing_times = []
for time in known_answers_submits:
+ # If there were no timings submits recorded, every answers
+ # submit will be missing
+ if len(known_timings_submits) == 0:
+ missing_times.append(time)
+ continue
hours_from_nearest = np.min(
np.abs((pd.to_datetime(known_timings_submits)
@@ -635,7 +640,7 @@ def find_missing_data(user: str, survey_id: str, agg_data: pd.DataFrame,
) / 60 / 60
# add on the data if there is more than 1/2 hour between an
# answers submission and a timing submission.
- if hours_from_nearest > .5 or len(known_timings_submits) == 0:
+ if hours_from_nearest > .5:
missing_times.append(time)
if len(missing_times) > 0:
missing_data = answers_data.loc[
diff --git a/forest/sycamore/responses.py b/forest/sycamore/responses.py
index 0c62cd3f..6401fdee 100644
--- a/forest/sycamore/responses.py
+++ b/forest/sycamore/responses.py
@@ -143,7 +143,7 @@ def agg_changed_answers_summary(
detail["time_to_answer"] = np.where(
detail["data_stream"] == "survey_timings",
detail["time_to_answer"],
- np.NaN
+ np.nan
)
#####################################################################
@@ -251,7 +251,7 @@ def format_responses_by_submission(agg_data: pd.DataFrame) -> dict:
survey_df["survey_duration"] = np.where(
survey_df["data_stream"] == "survey_timings",
survey_df["survey_duration"],
- np.NaN
+ np.nan
)
keep_cols = ["beiwe_id", "start_time", "end_time",
diff --git a/forest/sycamore/submits.py b/forest/sycamore/submits.py
index 79af48c4..72f5bf46 100644
--- a/forest/sycamore/submits.py
+++ b/forest/sycamore/submits.py
@@ -470,12 +470,12 @@ def survey_submits(
submit_lines3["time_to_submit"] = np.where(
submit_lines3["submit_flg"] == 1,
submit_lines3["time_to_submit"],
- np.NaN
+ np.nan
)
submit_lines3["time_to_open"] = np.where(
submit_lines3["opened_flg"] == 1,
submit_lines3["time_to_open"],
- np.NaN
+ np.nan
)
return submit_lines3.sort_values(["survey id", "beiwe_id"]
).drop_duplicates()
diff --git a/mypy.ini b/mypy.ini
index 712a36bc..d06e36b7 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,5 +1,5 @@
[mypy]
-python_version = 3.8
+python_version = 3.11
[mypy-holidays]
ignore_missing_imports = True
diff --git a/tutorials/forest_usage.ipynb b/tutorials/forest_usage.ipynb
index b64466d5..b58e6366 100644
--- a/tutorials/forest_usage.ipynb
+++ b/tutorials/forest_usage.ipynb
@@ -30,7 +30,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Before we begin, we need to check the current distribution of Python. Note that forest is built using Python 3.8. "
+ "Before we begin, we need to check the current distribution of Python. Note that forest is built using Python 3.11. "
]
},
{
@@ -74,7 +74,7 @@
"source": [
"*The output should display two lines.* \n",
"\n",
- "1. The Python version installed- make sure you are not using a version of Python that is earlier than 3.8\n",
+ "1. The Python version installed: make sure you are not using a version of Python that is earlier than 3.11\n",
"2. The path to where Python is currently installed"
]
},
@@ -709,10 +709,10 @@
"evalue": "name 'response_data' is not defined",
"output_type": "error",
"traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/var/folders/nl/92kzg8c56mn1872898r7rjr40000gn/T/ipykernel_96458/1316156180.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m## Make sure the data is sorted according to date\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresponse_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mresponse_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtime_series_plot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvar_to_plot\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mylab\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxlab\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_x_ticks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mNameError\u001b[0m: name 'response_data' is not defined"
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+ "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)",
+ "\u001B[0;32m/var/folders/nl/92kzg8c56mn1872898r7rjr40000gn/T/ipykernel_96458/1316156180.py\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;31m## Make sure the data is sorted according to date\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mresponse_data\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msort_values\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Date'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minplace\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 3\u001B[0m \u001B[0mresponse_data\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mreset_index\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdrop\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minplace\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mtime_series_plot\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvar_to_plot\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mylab\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m''\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mxlab\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m'Date'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mnum_x_ticks\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;36m4\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;31mNameError\u001B[0m: name 'response_data' is not defined"
]
}
],