Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Containerize and publish gaia-core and gaia-db #340

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7177338
Containerize gaia-core and gaia-db (https://github.com/OHDSI/GIS/issu…
jshoughtaling Jul 19, 2024
6337d54
Fix broken package install (https://github.com/OHDSI/GIS/pull/340#dis…
jshoughtaling Jul 25, 2024
7bcd89b
Fix documentation for `gaia-db` (https://github.com/OHDSI/GIS/pull/34…
jshoughtaling Jul 25, 2024
781a243
Fix bug with sf install
jshoughtaling Sep 19, 2024
ec24b77
Update build_gaia_core.yml
jshoughtaling Sep 19, 2024
bb9669a
Update build_gaia_db.yml
jshoughtaling Sep 19, 2024
6e98b6d
Merge pull request #1 from TuftsCTSI/containerize
jshoughtaling Sep 19, 2024
0eb66c7
Update EPA data source geometry records
kzollove Sep 24, 2024
176ba66
Update SQL and logic for loadVariable
kzollove Sep 24, 2024
565e934
Update docs
kzollove Sep 24, 2024
33f4972
Merge pull request #2 from TuftsCTSI/gaia-fix
kzollove Sep 24, 2024
1ec11f2
Add createExposure (#3)
kzollove Sep 25, 2024
b7e5683
Remove old miscellanea
kzollove Sep 25, 2024
fa4eb7c
Merge main
kzollove Sep 25, 2024
6bb5a62
Merge pull request #360 from TuftsCTSI/containerize
kzollove Sep 26, 2024
115e74b
Add Getting Started
kzollove Sep 27, 2024
33c513e
modification for Broadsea builds from github (#361)
tibbben Sep 27, 2024
c680b1a
Update Dockerfile
kzollove Sep 30, 2024
56ec283
Update Dockerfile
kzollove Sep 30, 2024
8dd2f04
Postgres driver and start api automatically
tibbben Oct 9, 2024
8bf6db7
manual gaia.R start
tibbben Oct 9, 2024
ec8bc1f
no need for db jars, already in hades
tibbben Oct 10, 2024
6bce617
Update Dockerfile
tibbben Oct 10, 2024
1abbf28
Update Dockerfile
tibbben Oct 10, 2024
8f620d6
add plumber API to supervisord
tibbben Oct 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/build_gaia_core.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: gaia-core Docker image build

on:
push:
branches:
- main

env:
REGISTRY: ghcr.io
ORG: ohdsi

jobs:
build-and-push-images:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
include:
- dockerfile: ./docker/gaia-core/Dockerfile
image: ghcr.io/TuftsCTSI/gaia-core
context: .
permissions:
contents: read
packages: write

steps:
- name: Checkout the code
uses: actions/checkout@v2

- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to a container registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: jshoughtaling
password: ${{ secrets.GH_TOKEN }}

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ matrix.image }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
push: true
tags: |
${{ steps.meta.outputs.tags }}
labels: |
${{ steps.meta.outputs.labels }}
platforms: |
linux/amd64
cache-from: type=gha
cache-to: type=gha,mode=max

67 changes: 67 additions & 0 deletions .github/workflows/build_gaia_db.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: gaia-db Docker image build

on:
push:
branches:
- main
paths:
- 'docker/gaia-db/**'
- 'inst/csv/**'
- 'vocabularies/**'

env:
REGISTRY: ghcr.io
ORG: ohdsi

jobs:
build-and-push-images:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
include:
- dockerfile: ./docker/gaia-db/Dockerfile
image: ghcr.io/TuftsCTSI/gaia-db
context: .
permissions:
contents: read
packages: write

steps:
- name: Checkout the code
uses: actions/checkout@v2

- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to a container registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: jshoughtaling
password: ${{ secrets.GH_TOKEN }}

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ matrix.image }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
push: true
tags: |
${{ steps.meta.outputs.tags }}
labels: |
${{ steps.meta.outputs.labels }}
platforms: |
linux/amd64
cache-from: type=gha
cache-to: type=gha,mode=max

2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Description: What the package does (one paragraph).
License: Apache License (>= 2)
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.3.2
Depends:
R (>= 2.10)
LazyData: true
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export("%>%")
export(checkTableExists)
export(checkVariableExists)
export(createDdl)
export(createExposure)
export(createForeignKeys)
export(createIndices)
export(createOccurrenceDdl)
Expand Down
165 changes: 165 additions & 0 deletions R/createExposure.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#' Create an exposure_occurrence (exposure) table from a variable source id
#'
#' @param connectionDetails (list) An object of class connectionDetails as created by the createConnectionDetails function
#'
#' @param variableSourceId (integer) The variable source id of the variable to create an exposure table for
#'
#' @param locationImport (data.frame) A data frame with columns location_id and geometry. Represents the geocoded locations
#'
#' @return (data.frame) An OMOP CDM exposure_occurrence table for the specified variable source id and locations
#'
#' @examples
#' \dontrun{
#' # Create exposure_occurrence table for a given variable
#' variableSourceId <- 1 # Percentile Percentage of persons below poverty estimate
#' locationImport <- data.frame(location)
#' exposure_occurrence <- createExposure(connectionDetails, variableSourceId, locationImport)
#' }
#'
#' @details
#' This function creates an exposure_occurrence table for a given variable source id and geocoded locations.
#' The exposure_occurrence table is created by joining the variable table to the geom table and then joining
#' the geom table to the geocoded locations. The exposure_occurrence table is then created by selecting the
#' relevant columns from the variable table and the geocoded locations.
#'
#' The locationImport data frame should have columns location_id and geometry. The location_id column should
#' be an integer representing the location_id of the geocoded location. The geometry column should be a binary
#' representation of the geometry of the geocoded location:
#' ```
#' locationImport <- read.csv('geocoded_location_snippet.csv', sep="|", header=FALSE)
#' locationImport <- dplyr::rename(locationImport, location_id=1, lat=11, lon=12)
#' locationImport <- dplyr::mutate(locationImport,
#' location_id=as.integer(location_id),
#' lat=as.numeric(lat),
#' lon=as.numeric(gsub("[\\n]", "", lon)))
#' locationImport <- dplyr::filter(locationImport, !is.na(lat) & !is.na(lon))
#' locationImport <- locationImport_sf <- sf::st_as_sf(locationImport, coords=c('lon', 'lat'), crs=4326)
#' locationImport <- dplyr::select(locationImport, location_id, geometry)
#' locationImport <- data.frame(locationImport)
#' locationImport$geometry <-
#' sf::st_as_binary(locationImport$geometry, EWKB = TRUE, hex = TRUE)
#'
#' #> head(locationImport)
#' #=> location_id geometry
#' #=> 1 1 0101000020e610000072230d5ff6c351c000023164d0284540
#' #=> 2 2 0101000020e61000007222df852d8a52c0978b9d95594e4440
#' #=> 3 3 0101000020e610000076319xaa4ae351c0ba0a73cc43124540
#' #=> 4 4 0101000020e61000001d90fdfc97bc51c08a05bea2dbdd4440
#' ```
#' @export
#'

createExposure <- function(connectionDetails, variableSourceId, locationImport) {

# TODO verify locationImport

# Check that specified variable (and geom) are both loaded to staging ---------------

geomFullTableName <- getGeomNameFromVariableSourceId(connectionDetails = connectionDetails,
variableSourceId = variableSourceId)
attrFullTableName <- getAttrNameFromVariableSourceId(connectionDetails = connectionDetails,
variableSourceId = variableSourceId)


attrSchema <- strsplit(attrFullTableName, split="\\.")[[1]][[1]]
attrTableName <- strsplit(attrFullTableName, split="\\.")[[1]][[2]]

# TODO the following is a deconstruction of checkVariableExists.
# Refactor checkVariableExists to handle this case and not break the existing use case


if (!checkTableExists(connectionDetails = connectionDetails,
databaseSchema = attrSchema,
tableName = attrTableName)) {
loadVariable(connectionDetails, variableSourceId)
}

variableExistsQuery <- paste0("select count(*) from ", attrFullTableName, " where variable_source_record_id = '", variableSourceId,"'")
conn <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(conn))
variableExistsResult <- DatabaseConnector::querySql(conn, variableExistsQuery)
if (!variableExistsResult > 0){
loadVariable(connectionDetails, variableSourceId)
}

# Join all variable to geom, join all to geocoded addresses (create exp_occ in mem) --------------------------------------------

# TODO this could be a function in dbUtils

#TODO add temporal join condition:
# <<<
# join omop.geom_omop_location gol
# on public.st_within(gol.geometry, geo.geom_wgs84)"
# and (gol.valid_start_date < att.attr_end_date
# or gol.valid_end_date >att.attr_start_date)
# >>>

# TODO better exposure_*_date logic:
# After temporal join condition is added
# <<<
# CASE WHEN att.attr_start_date >= gol.valid_start_date THEN att.attr_start_date
# ELSE gol.valid_start_date END AS exposure_start_date
# CASE WHEN att.attr_end_date <= gol.valid_end_date THEN att.attr_end_date
# ELSE gol.valid_end_date END AS exposure_end_date
# >>>

# TODO how to get exposure_type_concept_id

# create table geom omop location
DatabaseConnector::executeSql(conn, "CREATE SCHEMA IF NOT EXISTS omop;")
DatabaseConnector::executeSql(conn, "DROP TABLE IF EXISTS omop.geom_omop_location")
DatabaseConnector::executeSql(conn, "CREATE TABLE IF NOT EXISTS omop.geom_omop_location (
location_id integer,
geometry public.geometry
)")

serv <- strsplit(connectionDetails$server(), "/")[[1]]

postgisConnection <- RPostgreSQL::dbConnect("PostgreSQL",
host = serv[1], dbname = serv[2],
user = connectionDetails$user(),
password = connectionDetails$password(),
port = connectionDetails$port())
on.exit(RPostgreSQL::dbDisconnect(postgisConnection))
rpostgis::pgInsert(postgisConnection,
name = c("omop", "geom_omop_location"),
geom = "geometry",
data.obj = locationImport)

exposureOccurrence <- DatabaseConnector::dbGetQuery(conn, paste0(
"select
gol.location_id
, CAST(NULL AS INTEGER) AS person_id
, CASE WHEN att.attr_concept_id IS NOT NULL THEN att.attr_concept_id ELSE 0 END AS exposure_concept_id
, att.attr_start_date AS exposure_start_date
, att.attr_start_date AS exposure_start_datetime
, att.attr_end_date AS exposure_end_date
, att.attr_end_date AS exposure_end_datetime
, 0 AS exposure_type_concept_id
, 0 AS exposure_relationship_concept_id
, att.attr_source_concept_id AS exposure_source_concept_id
, att.attr_source_value AS exposure_source_value
, CAST(NULL AS VARCHAR(50)) AS exposure_relationship_source_value
, CAST(NULL AS VARCHAR(50)) AS dose_unit_source_value
, CAST(NULL AS INTEGER) AS quantity
, CAST(NULL AS VARCHAR(50)) AS modifier_source_value
, CAST(NULL AS INTEGER) AS operator_concept_id
, att.value_as_number AS value_as_number
, att.value_as_concept_id AS value_as_concept_id
, att.unit_concept_id AS unit_concept_id
from ", getAttrNameFromVariableSourceId(connectionDetails, variableSourceId) ," att
inner join ", getGeomNameFromVariableSourceId(connectionDetails, variableSourceId)," geo
on att.geom_record_id = geo.geom_record_id
and att.variable_source_record_id = ", variableSourceId, "
join omop.geom_omop_location gol
on public.st_within(gol.geometry, geo.geom_wgs84)"
))

DatabaseConnector::disconnect(conn)

# Create exposure_occurrence_id column ------------------------------------

exposure_occurrence_id <- seq.int(nrow(exposureOccurrence))
exposureOccurrence <- cbind(exposure_occurrence_id, exposureOccurrence)
exposureOccurrence
}
21 changes: 7 additions & 14 deletions R/dbUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ getAttrNameFromVariableSourceId <- function(connectionDetails, variableSourceId)
select data_source_uuid
from backbone.variable_source vs
where variable_source_id = ", variableSourceId,"
)"
) LIMIT 1"
)
)[[1]]
}
Expand Down Expand Up @@ -459,11 +459,8 @@ createGeomInstanceTable <- function(connectionDetails, schema, name) {
if(!checkTableExists(connectionDetails, schema, paste0("geom_", name))) {
DatabaseConnector::dbExecute(conn, paste0("CREATE TABLE IF NOT EXISTS ", schema,
".\"geom_", name, "\" (like backbone.geom_template);"))
DatabaseConnector::dbExecute(conn, paste0("drop sequence if exists ", schema, ".geom_", name, "_geom_record_id_seq;"))
DatabaseConnector::dbExecute(conn, paste0("create sequence ", schema, ".geom_", name, "_geom_record_id_seq;"))
DatabaseConnector::dbExecute(conn, paste0("ALTER TABLE ONLY ", schema, ".\"geom_", name,
"\" ALTER COLUMN geom_record_id SET DEFAULT ",
"nextval('", schema, ".geom_", name, "_geom_record_id_seq'::regclass);"))
DatabaseConnector::executeSql(conn, paste0("ALTER TABLE ", schema, ".\"geom_", name, "\" ",
"ALTER COLUMN geom_record_id ADD GENERATED BY DEFAULT AS IDENTITY;"))
}
}

Expand All @@ -489,7 +486,7 @@ insertPostgisGeometry <- function(connectionDetails, staged, geomIndex) {
on.exit(RPostgreSQL::dbDisconnect(postgisConnection))
rpostgis::pgInsert(postgisConnection,
name = c(geomIndex$database_schema, paste0("geom_", geomIndex$table_name)),
geom = "geom_local_value",
geom = "geom_wgs84",
data.obj = staged)

}
Expand Down Expand Up @@ -519,8 +516,7 @@ getGeomTemplate <- function(connectionDetails){
#'
#' @return SRID set to 4326 the geom_wgs84 column in the given table in gaiaDB

setSridWgs84 <- function(connectionDetails, staged, geomIndex) {
geometryType <- as.character(unique(sf::st_geometry_type(staged$geometry)))
setSridWgs84 <- function(connectionDetails, geometryType, geomIndex) {
conn <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(conn))
DatabaseConnector::executeSql(conn, sql = paste0(
Expand Down Expand Up @@ -602,11 +598,8 @@ createAttrInstanceTable <- function(connectionDetails, schema, name) {
if(!checkTableExists(connectionDetails, schema, paste0("attr_", name))) {
DatabaseConnector::dbExecute(conn, paste0("CREATE TABLE IF NOT EXISTS ", schema,
".\"attr_", name, "\" (like backbone.attr_template);"))
DatabaseConnector::dbExecute(conn, paste0("drop sequence if exists ", schema, ".attr_", name, "_attr_record_id_seq;"))
DatabaseConnector::dbExecute(conn, paste0("create sequence ", schema, ".attr_", name, "_attr_record_id_seq;"))
DatabaseConnector::dbExecute(conn, paste0("ALTER TABLE ONLY ", schema, ".\"attr_", name,
"\" ALTER COLUMN attr_record_id SET DEFAULT ",
"nextval('", schema, ".attr_", name, "_attr_record_id_seq'::regclass);"))
DatabaseConnector::executeSql(conn, paste0("ALTER TABLE ", schema, ".\"attr_", name, "\" ",
"ALTER COLUMN attr_record_id ADD GENERATED BY DEFAULT AS IDENTITY;"))
}
}

Expand Down
Loading