Skip to content

Commit

Permalink
v2.1 (#118)
Browse files Browse the repository at this point in the history
- Add transfer learning
- Add gradient accumulation
- Build docker container on release action
- GPU memory cleanup on memory errors
  • Loading branch information
egillax authored Jul 9, 2024
1 parent 03fdb3d commit bb7c453
Show file tree
Hide file tree
Showing 47 changed files with 1,793 additions and 594 deletions.
6 changes: 5 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@
^extras$
^deploy.sh$
^compare_versions$
^.mypy_cache$
^.mypy_cache$
^inst/python/__pycache__
^.*\.pt$
^doc$
^Meta$
28 changes: 14 additions & 14 deletions .github/workflows/R_CDM_check_hades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,22 @@ jobs:
while read -r cmd
do
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "22.04"))')
- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache: always
extra-packages: any::rcmdcheck
needs: check

- name: setup r-reticulate venv
shell: Rscript {0}
run: |
python_packages <-
c("polars", "tqdm", "connectorx", "pyarrow", "scikit-learn")
c("polars", "tqdm", "connectorx", "pyarrow", "pynvml", "numpy==1.26.4")
library(reticulate)
virtualenv_create("r-reticulate", Sys.which("python"))
virtualenv_install("r-reticulate", python_packages)
virtualenv_create("r-reticulate", Sys.which("python"), packages=python_packages)
virtualenv_install("r-reticulate", "torch", pip_options = c("--index-url https://download.pytorch.org/whl/cpu"))
path_to_python <- virtualenv_python("r-reticulate")
Expand All @@ -95,24 +95,24 @@ jobs:
error-on: '"warning"'
check-dir: '"check"'

- name: Upload source package
if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v2
with:
name: package_tarball
path: check/*.tar.gz

- name: Install covr
if: runner.os == 'Windows'
if: runner.os == 'ubuntu-22.04'
run: |
remotes::install_cran("covr")
shell: Rscript {0}

- name: Test coverage
if: runner.os == 'Windows'
run: covr::codecov()
if: runner.os == 'ubuntu-22.04'
run: covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}")
shell: Rscript {0}

- name: Upload source package
if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v2
with:
name: package_tarball
path: check/*.tar.gz

Release:
needs: R-CMD-Check

Expand Down
84 changes: 84 additions & 0 deletions .github/workflows/release_docker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# When a new release is published,
# upload image to Dockerhub.
#
# Requires the following repository secrets:
# - DOCKER_IMAGE - Configured as a secret so it can be configured per fork.
# - DOCKER_HUB_USERNAME
# - DOCKER_HUB_ACCESS_TOKEN
# - GITHUBPAT - The github account to use for downloading CRAN dependencies.
# Needed to avoid "API rate limit exceeded" from github.
name: Release Docker

on:
push:
branches:
- 'main'
tags:
- 'v*'
workflow_dispatch:

jobs:
docker:
runs-on: ubuntu-latest
env:
DOCKER_IMAGE: 'ohdsi/deep_plp'
steps:
- uses: actions/checkout@v4

# ------------------------------------
# The pattern for the following steps is specified
# in OHDSI/WebAPI.

# Add Docker labels and tags
- name: Docker meta
id: docker_meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_IMAGE }}
tags: |
type=semver,pattern={{version}}
# Setup docker build environment
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Set build parameters
id: build_params
run: |
echo "SHA8=${GITHUB_SHA::8}" >> $GITHUB_ENV
- name: Build and push
id: docker_build
uses: docker/build-push-action@v6
with:
context: ./
cache-from: type=gha
cache-to: type=gha, mode=max
file: Dockerfile
platforms: linux/amd64
push: true
secrets: |
build_github_pat=${{ secrets.GH_TOKEN }}
build-args: |
GIT_BRANCH=${{ steps.docker_meta.outputs.version }}
GIT_COMMIT_ID_ABBREV=${{ env.SHA8 }}
tags: ${{ steps.docker_meta.outputs.tags }}
# Use runtime labels from docker_meta as well as fixed labels
labels: |
${{ steps.docker_meta.outputs.labels }}
maintainer=Egill A. Fridgeirsson <[email protected]>
org.opencontainers.image.authors=Egill A. Fridgeirsson <[email protected]>, Henrik John <[email protected]>
org.opencontainers.image.vendor=OHDSI
org.opencontainers.image.licenses=Apache-2.0
- name: Inspect image
run: |
docker pull ${{ env.DOCKER_IMAGE }}:${{ steps.docker_meta.outputs.version }}
docker image inspect ${{ env.DOCKER_IMAGE }}:${{ steps.docker_meta.outputs.version }}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ extras/
.Renviron
inst/python/__pycache__
.mypy_cache
/doc/
/Meta/
*.pt
18 changes: 8 additions & 10 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DeepPatientLevelPrediction
Type: Package
Title: Deep Learning For Patient Level Prediction Using Data In The OMOP Common Data Model
Version: 2.0.3
Date: 22-12-2023
Version: 2.1.0
Date: 08-07-2024
Authors@R: c(
person("Egill", "Fridgeirsson", email = "[email protected]", role = c("aut", "cre")),
person("Jenna", "Reps", email = "[email protected]", role = c("aut")),
Expand All @@ -20,38 +20,36 @@ Depends:
R (>= 4.0.0)
Imports:
dplyr,
FeatureExtraction (>= 3.0.0),
ParallelLogger (>= 2.0.0),
PatientLevelPrediction (>= 6.3.2),
rlang,
withr,
reticulate (>= 1.31)
Suggests:
devtools,
Eunomia,
knitr,
markdown,
plyr,
rmarkdown,
testthat,
PRROC,
FeatureExtraction (>= 3.0.0),
ResultModelManager (>= 0.2.0),
DatabaseConnector (>= 6.0.0),
Andromeda
Remotes:
ohdsi/PatientLevelPrediction,
ohdsi/FeatureExtraction,
ohdsi/Eunomia,
ohdsi/ResultModelManager
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Encoding: UTF-8
Config/testthat/edition: 3
Config/testthat/parallel: TRUE
Config/reticulate:
list(
packages = list(
list(package = "torch"),
list(package = "polars"),
list(package = "tqdm"),
list(package = "connectorx"),
list(package = "pyarrow")
list(package = "pyarrow"),
list(package = "pynvml")
)
)
64 changes: 64 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
FROM docker.io/rocker/r-ver:4.4.1 AS build

RUN --mount=type=secret,id=build_github_pat export GITHUB_PAT=$(cat /run/secrets/build_github_pat)

ARG GIT_BRANCH='main'
ARG GIT_COMMIT_ID_ABBREV

RUN apt-get -y update && apt-get install -y \
default-jre \
default-jdk \
libssl-dev \
python3-pip \
python3-dev \
--no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN R CMD javareconf

RUN install2.r -n -1 \
remotes \
CirceR \
Eunomia \
duckdb \
&& installGithub.r \
OHDSI/CohortGenerator \
OHDSI/ROhdsiWebApi \
OHDSI/ResultModelManager

RUN Rscript -e "DatabaseConnector::downloadJdbcDrivers(dbms='all', pathToDriver='/database_drivers/')"
ENV DATABASECONNECTOR_JAR_FOLDER=/database_drivers/

# install Python packages
RUN pip3 install uv \
&& uv pip install --system --no-cache-dir \
connectorx \
polars \
pyarrow \
torch \
tqdm \
pynvml \
&& rm -rf /root/.cache/pip

RUN Rscript -e "ref <- Sys.getenv('GIT_COMMIT_ID_ABBREV', unset = Sys.getenv('GIT_BRANCH')); remotes::install_github('ohdsi/DeepPatientLevelPrediction', ref=ref)"


FROM docker.io/rocker/rstudio:4.4.1
#
COPY --from=build /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=build /database_drivers /database_drivers
COPY --from=build /usr/local/lib/R/site-library /usr/local/lib/R/site-library
COPY --from=build /usr/local/lib/R/library /usr/local/lib/R/library

ENV RETICULATE_PYTHON=/usr/bin/python3
# runtime dependanceis
RUN apt-get -y update && apt-get install -y \
default-jre \
default-jdk \
libssl3 \
python3-dev \
--no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& R CMD javareconf

2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ export(predictDeepEstimator)
export(setDefaultResNet)
export(setDefaultTransformer)
export(setEstimator)
export(setFinetuner)
export(setMultiLayerPerceptron)
export(setResNet)
export(setTransformer)
export(torch)
export(trainingCache)
importFrom(dplyr,"%>%")
importFrom(reticulate,py_to_r)
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
DeepPatientLevelPrediction 2.1.0
======================
- Added basic transfer learning functionality. See vignette("TransferLearning")
- Add a gpu memory cleaner to clean cached memory after out of memory error
- The python module torch is now accessed through an exported function instead of loading the module at package load
- Added gradient accumulation. Studies running at different sites using different hardware can now use same effective batch size by accumulating gradients.
- Refactored out the cross validation from the hyperparameter tuning
- Remove predictions from non-optimal hyperparameter combinations to save space
- Only use html vignettes
- Rename MLP to MultiLayerPerceptron


DeepPatientLevelPrediction 2.0.3
======================
- Hotfix: Fix count for polars v0.20.x
Expand Down
11 changes: 9 additions & 2 deletions R/Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,21 @@ createDataset <- function(data, labels, plpModel = NULL) {
# sqlite object
attributes(data)$path <- attributes(data)$dbname
}
if (is.null(plpModel)) {
if (is.null(plpModel) && is.null(data$numericalIndex)) {
data <- dataset(r_to_py(normalizePath(attributes(data)$path)),
r_to_py(labels$outcomeCount))
} else if (!is.null(data$numericalIndex)) {
numericalIndex <-
r_to_py(as.array(data$numericalIndex %>% dplyr::pull()))
data <- dataset(r_to_py(normalizePath(attributes(data)$path)),
r_to_py(labels$outcomeCount),
numericalIndex)
} else {
numericalFeatures <-
r_to_py(as.array(which(plpModel$covariateImportance$isNumeric)))
data <- dataset(r_to_py(normalizePath(attributes(data)$path)),
numerical_features = numericalFeatures)
numerical_features = numericalFeatures
)
}

return(data)
Expand Down
17 changes: 15 additions & 2 deletions R/DeepPatientLevelPrediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,25 @@
#' @description A package containing deep learning extensions for developing
#' prediction models using data in the OMOP CDM
#'
#' @docType package
#' @name DeepPatientLevelPrediction
#' @importFrom dplyr %>%
#' @importFrom reticulate r_to_py py_to_r
#' @importFrom rlang .data
NULL
"_PACKAGE"

# package level global state
.globals <- new.env(parent = emptyenv())

#' Pytorch module
#'
#' The `torch` module object is the equivalent of
#' `reticulate::import("torch")` and provided mainly as a convenience.
#'
#' @returns the torch Python module
#' @export
#' @usage NULL
#' @format An object of class `python.builtin.module`
torch <- NULL

.onLoad <- function(libname, pkgname) {
# use superassignment to update global reference
Expand Down
Loading

0 comments on commit bb7c453

Please sign in to comment.