diff --git a/.github/workflows/build_gaia_core.yml b/.github/workflows/build_gaia_core.yml new file mode 100644 index 00000000..4e6abff2 --- /dev/null +++ b/.github/workflows/build_gaia_core.yml @@ -0,0 +1,63 @@ +name: gaia-core Docker image build + +on: + push: + branches: + - main + +env: + REGISTRY: ghcr.io + ORG: ohdsi + +jobs: + build-and-push-images: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + include: + - dockerfile: ./docker/gaia-core/Dockerfile + image: ghcr.io/TuftsCTSI/gaia-core + context: . + permissions: + contents: read + packages: write + + steps: + - name: Checkout the code + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to a container registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: jshoughtaling + password: ${{ secrets.GH_TOKEN }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v3 + with: + images: ${{ matrix.image }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: ${{ matrix.context }} + file: ${{ matrix.dockerfile }} + push: true + tags: | + ${{ steps.meta.outputs.tags }} + labels: | + ${{ steps.meta.outputs.labels }} + platforms: | + linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max + diff --git a/.github/workflows/build_gaia_db.yml b/.github/workflows/build_gaia_db.yml new file mode 100644 index 00000000..7e52db9d --- /dev/null +++ b/.github/workflows/build_gaia_db.yml @@ -0,0 +1,67 @@ +name: gaia-db Docker image build + +on: + push: + branches: + - main + paths: + - 'docker/gaia-db/**' + - 'inst/csv/**' + - 'vocabularies/**' + +env: + REGISTRY: ghcr.io + ORG: ohdsi + +jobs: + build-and-push-images: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + include: + - dockerfile: ./docker/gaia-db/Dockerfile + image: ghcr.io/TuftsCTSI/gaia-db + context: . + permissions: + contents: read + packages: write + + steps: + - name: Checkout the code + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to a container registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: jshoughtaling + password: ${{ secrets.GH_TOKEN }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v3 + with: + images: ${{ matrix.image }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: ${{ matrix.context }} + file: ${{ matrix.dockerfile }} + push: true + tags: | + ${{ steps.meta.outputs.tags }} + labels: | + ${{ steps.meta.outputs.labels }} + platforms: | + linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max + diff --git a/DESCRIPTION b/DESCRIPTION index f1549dd8..33f1585d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Description: What the package does (one paragraph). License: Apache License (>= 2) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 Depends: R (>= 2.10) LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 9f830840..18cec922 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(checkTableExists) export(checkVariableExists) export(createDdl) +export(createExposure) export(createForeignKeys) export(createIndices) export(createOccurrenceDdl) diff --git a/R/createExposure.R b/R/createExposure.R new file mode 100644 index 00000000..b1652f5b --- /dev/null +++ b/R/createExposure.R @@ -0,0 +1,165 @@ +#' Create an exposure_occurrence (exposure) table from a variable source id +#' +#' @param connectionDetails (list) An object of class connectionDetails as created by the createConnectionDetails function +#' +#' @param variableSourceId (integer) The variable source id of the variable to create an exposure table for +#' +#' @param locationImport (data.frame) A data frame with columns location_id and geometry. Represents the geocoded locations +#' +#' @return (data.frame) An OMOP CDM exposure_occurrence table for the specified variable source id and locations +#' +#' @examples +#' \dontrun{ +#' # Create exposure_occurrence table for a given variable +#' variableSourceId <- 1 # Percentile Percentage of persons below poverty estimate +#' locationImport <- data.frame(location) +#' exposure_occurrence <- createExposure(connectionDetails, variableSourceId, locationImport) +#' } +#' +#' @details +#' This function creates an exposure_occurrence table for a given variable source id and geocoded locations. +#' The exposure_occurrence table is created by joining the variable table to the geom table and then joining +#' the geom table to the geocoded locations. The exposure_occurrence table is then created by selecting the +#' relevant columns from the variable table and the geocoded locations. +#' +#' The locationImport data frame should have columns location_id and geometry. The location_id column should +#' be an integer representing the location_id of the geocoded location. The geometry column should be a binary +#' representation of the geometry of the geocoded location: +#' ``` +#' locationImport <- read.csv('geocoded_location_snippet.csv', sep="|", header=FALSE) +#' locationImport <- dplyr::rename(locationImport, location_id=1, lat=11, lon=12) +#' locationImport <- dplyr::mutate(locationImport, +#' location_id=as.integer(location_id), +#' lat=as.numeric(lat), +#' lon=as.numeric(gsub("[\\n]", "", lon))) +#' locationImport <- dplyr::filter(locationImport, !is.na(lat) & !is.na(lon)) +#' locationImport <- locationImport_sf <- sf::st_as_sf(locationImport, coords=c('lon', 'lat'), crs=4326) +#' locationImport <- dplyr::select(locationImport, location_id, geometry) +#' locationImport <- data.frame(locationImport) +#' locationImport$geometry <- +#' sf::st_as_binary(locationImport$geometry, EWKB = TRUE, hex = TRUE) +#' +#' #> head(locationImport) +#' #=> location_id geometry +#' #=> 1 1 0101000020e610000072230d5ff6c351c000023164d0284540 +#' #=> 2 2 0101000020e61000007222df852d8a52c0978b9d95594e4440 +#' #=> 3 3 0101000020e610000076319xaa4ae351c0ba0a73cc43124540 +#' #=> 4 4 0101000020e61000001d90fdfc97bc51c08a05bea2dbdd4440 +#' ``` +#' @export +#' + +createExposure <- function(connectionDetails, variableSourceId, locationImport) { + +# TODO verify locationImport + +# Check that specified variable (and geom) are both loaded to staging --------------- + + geomFullTableName <- getGeomNameFromVariableSourceId(connectionDetails = connectionDetails, + variableSourceId = variableSourceId) + attrFullTableName <- getAttrNameFromVariableSourceId(connectionDetails = connectionDetails, + variableSourceId = variableSourceId) + + + attrSchema <- strsplit(attrFullTableName, split="\\.")[[1]][[1]] + attrTableName <- strsplit(attrFullTableName, split="\\.")[[1]][[2]] + + # TODO the following is a deconstruction of checkVariableExists. + # Refactor checkVariableExists to handle this case and not break the existing use case + + + if (!checkTableExists(connectionDetails = connectionDetails, + databaseSchema = attrSchema, + tableName = attrTableName)) { + loadVariable(connectionDetails, variableSourceId) + } + + variableExistsQuery <- paste0("select count(*) from ", attrFullTableName, " where variable_source_record_id = '", variableSourceId,"'") + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(conn)) + variableExistsResult <- DatabaseConnector::querySql(conn, variableExistsQuery) + if (!variableExistsResult > 0){ + loadVariable(connectionDetails, variableSourceId) + } + +# Join all variable to geom, join all to geocoded addresses (create exp_occ in mem) -------------------------------------------- + + # TODO this could be a function in dbUtils + + #TODO add temporal join condition: + # <<< + # join omop.geom_omop_location gol + # on public.st_within(gol.geometry, geo.geom_wgs84)" + # and (gol.valid_start_date < att.attr_end_date + # or gol.valid_end_date >att.attr_start_date) + # >>> + + # TODO better exposure_*_date logic: + # After temporal join condition is added + # <<< + # CASE WHEN att.attr_start_date >= gol.valid_start_date THEN att.attr_start_date + # ELSE gol.valid_start_date END AS exposure_start_date + # CASE WHEN att.attr_end_date <= gol.valid_end_date THEN att.attr_end_date + # ELSE gol.valid_end_date END AS exposure_end_date + # >>> + + # TODO how to get exposure_type_concept_id + + # create table geom omop location + DatabaseConnector::executeSql(conn, "CREATE SCHEMA IF NOT EXISTS omop;") + DatabaseConnector::executeSql(conn, "DROP TABLE IF EXISTS omop.geom_omop_location") + DatabaseConnector::executeSql(conn, "CREATE TABLE IF NOT EXISTS omop.geom_omop_location ( + location_id integer, + geometry public.geometry + )") + + serv <- strsplit(connectionDetails$server(), "/")[[1]] + + postgisConnection <- RPostgreSQL::dbConnect("PostgreSQL", + host = serv[1], dbname = serv[2], + user = connectionDetails$user(), + password = connectionDetails$password(), + port = connectionDetails$port()) + on.exit(RPostgreSQL::dbDisconnect(postgisConnection)) + rpostgis::pgInsert(postgisConnection, + name = c("omop", "geom_omop_location"), + geom = "geometry", + data.obj = locationImport) + + exposureOccurrence <- DatabaseConnector::dbGetQuery(conn, paste0( + "select + gol.location_id + , CAST(NULL AS INTEGER) AS person_id + , CASE WHEN att.attr_concept_id IS NOT NULL THEN att.attr_concept_id ELSE 0 END AS exposure_concept_id + , att.attr_start_date AS exposure_start_date + , att.attr_start_date AS exposure_start_datetime + , att.attr_end_date AS exposure_end_date + , att.attr_end_date AS exposure_end_datetime + , 0 AS exposure_type_concept_id + , 0 AS exposure_relationship_concept_id + , att.attr_source_concept_id AS exposure_source_concept_id + , att.attr_source_value AS exposure_source_value + , CAST(NULL AS VARCHAR(50)) AS exposure_relationship_source_value + , CAST(NULL AS VARCHAR(50)) AS dose_unit_source_value + , CAST(NULL AS INTEGER) AS quantity + , CAST(NULL AS VARCHAR(50)) AS modifier_source_value + , CAST(NULL AS INTEGER) AS operator_concept_id + , att.value_as_number AS value_as_number + , att.value_as_concept_id AS value_as_concept_id + , att.unit_concept_id AS unit_concept_id + from ", getAttrNameFromVariableSourceId(connectionDetails, variableSourceId) ," att + inner join ", getGeomNameFromVariableSourceId(connectionDetails, variableSourceId)," geo + on att.geom_record_id = geo.geom_record_id + and att.variable_source_record_id = ", variableSourceId, " + join omop.geom_omop_location gol + on public.st_within(gol.geometry, geo.geom_wgs84)" + )) + + DatabaseConnector::disconnect(conn) + + # Create exposure_occurrence_id column ------------------------------------ + + exposure_occurrence_id <- seq.int(nrow(exposureOccurrence)) + exposureOccurrence <- cbind(exposure_occurrence_id, exposureOccurrence) + exposureOccurrence +} diff --git a/R/dbUtils.R b/R/dbUtils.R index 9a010e52..dff7a99d 100644 --- a/R/dbUtils.R +++ b/R/dbUtils.R @@ -107,7 +107,7 @@ getAttrNameFromVariableSourceId <- function(connectionDetails, variableSourceId) select data_source_uuid from backbone.variable_source vs where variable_source_id = ", variableSourceId," - )" + ) LIMIT 1" ) )[[1]] } @@ -459,11 +459,8 @@ createGeomInstanceTable <- function(connectionDetails, schema, name) { if(!checkTableExists(connectionDetails, schema, paste0("geom_", name))) { DatabaseConnector::dbExecute(conn, paste0("CREATE TABLE IF NOT EXISTS ", schema, ".\"geom_", name, "\" (like backbone.geom_template);")) - DatabaseConnector::dbExecute(conn, paste0("drop sequence if exists ", schema, ".geom_", name, "_geom_record_id_seq;")) - DatabaseConnector::dbExecute(conn, paste0("create sequence ", schema, ".geom_", name, "_geom_record_id_seq;")) - DatabaseConnector::dbExecute(conn, paste0("ALTER TABLE ONLY ", schema, ".\"geom_", name, - "\" ALTER COLUMN geom_record_id SET DEFAULT ", - "nextval('", schema, ".geom_", name, "_geom_record_id_seq'::regclass);")) + DatabaseConnector::executeSql(conn, paste0("ALTER TABLE ", schema, ".\"geom_", name, "\" ", + "ALTER COLUMN geom_record_id ADD GENERATED BY DEFAULT AS IDENTITY;")) } } @@ -489,7 +486,7 @@ insertPostgisGeometry <- function(connectionDetails, staged, geomIndex) { on.exit(RPostgreSQL::dbDisconnect(postgisConnection)) rpostgis::pgInsert(postgisConnection, name = c(geomIndex$database_schema, paste0("geom_", geomIndex$table_name)), - geom = "geom_local_value", + geom = "geom_wgs84", data.obj = staged) } @@ -519,8 +516,7 @@ getGeomTemplate <- function(connectionDetails){ #' #' @return SRID set to 4326 the geom_wgs84 column in the given table in gaiaDB -setSridWgs84 <- function(connectionDetails, staged, geomIndex) { - geometryType <- as.character(unique(sf::st_geometry_type(staged$geometry))) +setSridWgs84 <- function(connectionDetails, geometryType, geomIndex) { conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(conn)) DatabaseConnector::executeSql(conn, sql = paste0( @@ -602,11 +598,8 @@ createAttrInstanceTable <- function(connectionDetails, schema, name) { if(!checkTableExists(connectionDetails, schema, paste0("attr_", name))) { DatabaseConnector::dbExecute(conn, paste0("CREATE TABLE IF NOT EXISTS ", schema, ".\"attr_", name, "\" (like backbone.attr_template);")) - DatabaseConnector::dbExecute(conn, paste0("drop sequence if exists ", schema, ".attr_", name, "_attr_record_id_seq;")) - DatabaseConnector::dbExecute(conn, paste0("create sequence ", schema, ".attr_", name, "_attr_record_id_seq;")) - DatabaseConnector::dbExecute(conn, paste0("ALTER TABLE ONLY ", schema, ".\"attr_", name, - "\" ALTER COLUMN attr_record_id SET DEFAULT ", - "nextval('", schema, ".attr_", name, "_attr_record_id_seq'::regclass);")) + DatabaseConnector::executeSql(conn, paste0("ALTER TABLE ", schema, ".\"attr_", name, "\" ", + "ALTER COLUMN attr_record_id ADD GENERATED BY DEFAULT AS IDENTITY;")) } } diff --git a/R/loadExposure.R b/R/loadExposure.R deleted file mode 100644 index bef72297..00000000 --- a/R/loadExposure.R +++ /dev/null @@ -1,161 +0,0 @@ -# TODO The purpose of this function is to execute a spatial join between a -## staged place-related variable, a staged geometry, and then to a point-address -## for a patient. - -loadExposure <- function(gaiaConnectionDetails, cdmConnectionDetails, cdmDatabaseSchema, variableSourceId) { - -# Check that exposure_occurrence exists in CDM ---------------------------- - - - if (!checkTableExists(connectionDetails = cdmConnectionDetails, - databaseSchema = cdmDatabaseSchema, - tableName = "exposure_occurrence")) { - message(paste0("Creating exposure_occurrence table in ", cdmConnectionDetails$server(), ".", cdmDatabaseSchema, "...")) - - message ("TODO: Create function to execute exposure_occurrence DDL with connectionDetails; https://github.com/OHDSI/GIS/issues/240") - message("TOOD: For now, you will have to manually create exposure_occurrence using the scripts in inst/ddl/001/gaiaResults_*") - stop("Table not found") - } - -# Check that specified variable (and geom) are both loaded to staging --------------- - - geomFullTableName <- getGeomNameFromVariableSourceId(connectionDetails = gaiaConnectionDetails, - variableSourceId = variableSourceId) - attrFullTableName <- getAttrNameFromVariableSourceId(connectionDetails = gaiaConnectionDetails, - variableSourceId = variableSourceId) - - attrSchema <- strsplit(attrFullTableName, split="\\.")[[1]][[1]] - attrTableName <- strsplit(attrFullTableName, split="\\.")[[1]][[2]] - - # TODO the following is a deconstruction of checkVariableExists. - # Refactor checkVariableExists to handle this case and not break the existing use case - - - if (!checkTableExists(connectionDetails = gaiaConnectionDetails, - databaseSchema = attrSchema, - tableName = attrTableName)) { - message("# TODO: this should call loadVariable because the desired variable doesn't exist") - # TODO: this should call loadVariable because the desired variable doesn't exist (by virtue of the entire attr table not existing) - # NOTE: we shouldn't need to check for a geometry.. if a variable has been loaded it is assumed a geometry was loaded at the same time. - } - - variableExistsQuery <- paste0("select count(*) from ", attrFullTableName, - " where variable_source_record_id = '", variableSourceId,"'") - conn <- DatabaseConnector::connect(gaiaConnectionDetails) - variableExistsResult <- DatabaseConnector::querySql(conn, variableExistsQuery) - DatabaseConnector::disconnect(conn) - if (!variableExistsResult > 0){ - message("# TODO: this should call loadVariable because the desired variable doesn't exist") - # TODO: this should call loadVariable because the desired variable doesn't exist - # NOTE: we shouldn't need to check for a geometry.. if a variable has been loaded it is assumed a geometry was loaded at the same time. - } - - -# Check that there is a geocoded address table ---------------------------- - - if (!checkTableExists(connectionDetails = gaiaConnectionDetails, - databaseSchema = "omop", - tableName = "geom_omop_location")) { - message(paste0("No geocoded address table detected in ", gaiaConnectionDetails$server(), ".", cdmDatabaseSchema, ".", tableName)) - message("Please ensure that you have a geocoded address table named \"geom_omop_location\" in a schema named \"omop\" within your gaiaDB instance") - message("Full geocoding walkthrough at: https://ohdsi.github.io/GIS/geocodingFull.html") - } - - - -# Join all variable to geom, join all to geocoded addresses (create exp_occ in mem) -------------------------------------------- - - # TODO this could be a function in dbUtils - - conn <- DatabaseConnector::connect(gaiaConnectionDetails) - - #TODO add temporal join condition: - # <<< - # join omop.geom_omop_location gol - # on public.st_within(gol.geometry, geo.geom_wgs84)" - # and (gol.valid_start_date < att.attr_end_date - # or gol.valid_end_date >att.attr_start_date) - # >>> - - # TODO better exposure_*_date logic: - # After temporal join condition is added - # <<< - # CASE WHEN att.attr_start_date >= gol.valid_start_date THEN att.attr_start_date - # ELSE gol.valid_start_date END AS exposure_start_date - # CASE WHEN att.attr_end_date <= gol.valid_end_date THEN att.attr_end_date - # ELSE gol.valid_end_date END AS exposure_end_date - # >>> - - # TODO how to get exposure_type_concept_id - - - - - - exposureOccurrence <- DatabaseConnector::dbGetQuery(conn, paste0( - "select gol.location_id - , gol.person_id AS person_id - , CAST(NULL AS INTEGER) AS cohort_definition_id - , CASE WHEN att.attr_concept_id IS NOT NULL THEN att.attr_concept_id ELSE 0 END AS exposure_concept_id - , att.attr_start_date AS exposure_start_date - , att.attr_start_date AS exposure_start_datetime - , att.attr_end_date AS exposure_end_date - , att.attr_end_date AS exposure_end_datetime - , 0 AS exposure_type_concept_id - , 0 AS exposure_relationship_concept_id - , att.attr_source_concept_id AS exposure_source_concept_id - , att.attr_source_value AS exposure_source_value - , CAST(NULL AS VARCHAR(50)) AS exposure_relationship_source_value - , CAST(NULL AS VARCHAR(50)) AS dose_unit_source_value - , CAST(NULL AS INTEGER) AS quantity - , CAST(NULL AS VARCHAR(50)) AS modifier_source_value - , CAST(NULL AS INTEGER) AS operator_concept_id - , att.value_as_number AS value_as_number - , att.value_as_concept_id AS value_as_concept_id - , att.unit_concept_id AS unit_concept_id - from ", getAttrNameFromVariableSourceId(gaiaConnectionDetails, variableSourceId)," att - inner join ", getGeomNameFromVariableSourceId(gaiaConnectionDetails, variableSourceId)," geo - on att.geom_record_id = geo.geom_record_id - and att.variable_source_record_id = ", variableSourceId, " - join omop.geom_omop_location gol - on public.st_within(gol.geometry, geo.geom_wgs84)" - )) - - DatabaseConnector::disconnect(conn) - - - -# Create exposure_occurrence_id column ------------------------------------ - - conn <- DatabaseConnector::connect(cdmConnectionDetails) - - # get max existing exposure_occurrence_id and append the exposure_occurrence_id - maxExposureOccurrenceId <- DatabaseConnector::dbGetQuery(conn, paste0("SELECT max(exposure_occurrence_id) FROM ", cdmDatabaseSchema,".exposure_occurrence;"))[[1]] - - if (is.na(maxExposureOccurrenceId)) { - exposureOccurrence <- cbind(exposure_occurrence_id = seq(1, nrow(exposureOccurrence)), exposureOccurrence) - } else { - exposureOccurrence <- cbind(exposure_occurrence_id = seq(maxExposureOccurrenceId + 1, maxExposureOccurrenceId + nrow(exposureOccurrence)), exposureOccurrence) - } - - DatabaseConnector::disconnect(conn) - - -# Insert into CDM table --------------------------------------------------- - - conn <- DatabaseConnector::connect(cdmConnectionDetails) - - - DatabaseConnector::insertTable(connection = conn, - databaseSchema = cdmDatabaseSchema, - tableName = "exposure_occurrence", - data = exposureOccurrence, - dropTableIfExists = FALSE, - createTable = FALSE) - - DatabaseConnector::disconnect(conn) - - - - -} \ No newline at end of file diff --git a/R/loadGeometry.R b/R/loadGeometry.R index 801c9c43..d03504e6 100644 --- a/R/loadGeometry.R +++ b/R/loadGeometry.R @@ -44,12 +44,37 @@ loadGeometry <- function(connectionDetails, dataSourceUuid) { stagedResult <- standardizeStaged(staged = staged, spec = dataSourceRecord$geom_spec) + geometryType <- "" + if (!is.null(staged$geometry)) { + geometryType <- as.character(unique(sf::st_geometry_type(staged$geometry))) + } else { + geometryType <- as.character(unique(sf::st_geometry_type(sf::st_as_sf(stagedResult)$geom_local_value))) + } + # Transform local geometry to epsg:4326 if (!"geom_wgs84" %in% names(stagedResult)) { + stagedResult <- sf::st_as_sf(stagedResult) + if (is.na(sf::st_crs(stagedResult))){ + if (length(unique(stagedResult$geom_local_epsg)) == 0) { + stop("Error: No local EPSG set. CRS cannot be set. Geometry cannot be loaded.") + } else if (length(unique(stagedResult$geom_local_epsg)) == 1) { + stagedResult <- sf::set_crs(stagedResult, unique(stagedResult$geom_local_epsg)[[1]]) + } else { + epsg <- unique(stagedResult$geom_local_epsg) + epsg_df <- lapply(epsg, function(x) { + epsg_fragment <- dplyr::filter(stagedResult, geom_local_epsg==x) + epsg_fragment <- sf::st_set_crs(epsg_fragment, x) + epsg_fragment$geom_local_value <- sf::st_transform(epsg_fragment$geom_local_value, 4326) + epsg_fragment + }) + stagedResult <- dplyr::bind_rows(epsg_df, .id="column_label") + } + } stagedResult$geom_wgs84 <- sf::st_transform(stagedResult$geom_local_value, 4326) } # format for insert + stagedResult <- data.frame(stagedResult) if (!"character" %in% class(stagedResult$geom_local_value)) { stagedResult$geom_local_value <- sf::st_as_binary(stagedResult$geom_local_value, EWKB = TRUE, hex = TRUE) stagedResult$geom_wgs84 <- sf::st_as_binary(stagedResult$geom_wgs84, EWKB = TRUE, hex = TRUE) @@ -88,7 +113,7 @@ loadGeometry <- function(connectionDetails, dataSourceUuid) { # Set SRID on geom_wgs84 after table import setSridWgs84(connectionDetails = connectionDetails, - staged = staged, + geometryType = geometryType, geomIndex = geomIndexRecord) # Index the geometry column (geom_local_value, geom_wgs84) diff --git a/R/loadVariable.R b/R/loadVariable.R index a035f7ad..4f392940 100644 --- a/R/loadVariable.R +++ b/R/loadVariable.R @@ -20,7 +20,7 @@ #' #' if (!tableExists) { #' message("Loading attr table dependency") -#' loadVariable(conn = conn, connectionDetails = connectionDetails, variableSourceId) +#' loadVariable(connectionDetails = connectionDetails, variableSourceId) #' } #' } #' @@ -33,6 +33,9 @@ loadVariable <- function(connectionDetails, variableSourceId){ variableSourceRecord <- getVariableSourceRecord(connectionDetails = connectionDetails, variableSourceId = variableSourceId) + if(nrow(variableSourceRecord) == 0) { + return(message(paste0("Variable ", variableSourceId," not found in the database."))) + } # get attr_index attrIndexRecord <- getAttrIndexRecord(connectionDetails = connectionDetails, variableSourceId = variableSourceRecord$variable_source_id) diff --git a/README.md b/README.md index c980b5cb..251db70d 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,80 @@ For **project management**, see the [GIS Project Page](https://github.com/orgs/O [Click here](https://github.com/OHDSI/GIS/issues/new?assignees=&labels=Use+Case&projects=&template=use-case.yaml&title=%5BUse+Case%5D%3A+) to propose a new **Use Case**. -# Quick Start +# Getting Started + +Two docker services, gaia-db and gaia-core, can be used to link geospatial data to EHR location histories. Follow these steps to copy or build the images, start and connect the containers, load location, and output a table of "exposures" and a delta vocabulary that can be inserted into your CDM database. + +## Copy or Build Gaia Images +You can build the images locally or pull from GHCR (choose one of COPY or BUILD) +```sh +# COPY +# requires auth echo | docker login ghcr.io -u "" --password-stdin +docker pull ghcr.io/tuftsctsi/gaia-core:main +docker pull ghcr.io/tuftsctsi/gaia-db:main + +docker tag ghcr.io/tuftsctsi/gaia-core:main gaia-core +docker tag ghcr.io/tuftsctsi/gaia-db:main gaia-db + +# BUILD +# git clone https://github.com/OHDSI/GIS.git +# cd GIS +docker build -t gaia-core -f docker/gaia-core/Dockerfile . +docker build -t gaia-db -f docker/gaia-db/Dockerfile . +``` + +## Run and Connect Containers +Create a network to allow the gaia-core R container to interact with the gaia-db database container +```sh +docker network create gaia +docker run -itd --rm -e USER="ohdsi" -e PASSWORD="mypass" --network gaia -p 8787:8787 --name gaia-core gaia-core +docker run -itd --rm -e POSTGRES_PASSWORD=SuperSecret -e POSTGRES_USER=postgres --network gaia -p 5432:5432 --name gaia-db gaia-db +``` + +## Using gaiaCore +The gaia-core container provides an R and RStudio environment with the R Package `gaiaCore` alongside the OHDSI HADES R Packages. `gaiaCore` provides the functionality for loading cataloged geospatial datasets into gaia-db and generate "exposures" by linking geospatial data to patient addresses. + +You can access `gaiaCore` from an RStudio environment, simply navigate to `localhost:8787` in your browser. Login with the USER and PASSWORD assigned on container start (default: ohdsi, mypass) + +Alternatively, you can access `gaiaCore` from the R Shell: + +```sh +docker exec -it gaia-core R +``` + +In your R environment, create a connection to the gaia-db database, import and format a table with geocoded patient addresses, and create exposures by selecting the variable ID for the exposure of interest: +```R +# Connect to gaia-db +connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "postgresql", + server = "gaia-db/postgres", + user="postgres", + password = "SuperSecret", + pathToDriver = "/opt" +) + +# Import and format geocoded addresses +location_import <- read.csv('location_geocoded.csv', sep="|", header=FALSE) +location_import <- dplyr::rename(location_import, location_id=1, lat=11, lon=12) +location_import <- dplyr::mutate(location_import, + location_id=as.integer(location_id), + lat=as.numeric(lat), + lon=as.numeric(gsub("[\\n]", "", lon))) +location_import <- dplyr::filter(location_import, !is.na(lat) & !is.na(lon)) +location_import <- location_import_sf <- sf::st_as_sf(location_import, coords=c('lon', 'lat'), crs=4326) +location_import <- dplyr::select(location_import, location_id, geometry) +location_import <- data.frame(location_import) +location_import$geometry <- + sf::st_as_binary(location_import$geometry, EWKB = TRUE, hex = TRUE) + +# Select exposure variable of interest +variableSourceId <- 1 # Percentile Percentage of persons below poverty estimate + +# Create exposure +createExposure(connectionDetails, variableSourceId, location_import) +``` + -Instructions to quickly install and start using Gaia are [here](https://ohdsi.github.io/GIS/get-started.html) # Support diff --git a/docker/gaia-core/Dockerfile b/docker/gaia-core/Dockerfile new file mode 100644 index 00000000..22291bdc --- /dev/null +++ b/docker/gaia-core/Dockerfile @@ -0,0 +1,35 @@ +FROM ohdsi/broadsea-hades:4.2.1 + +RUN apt-get update && \ + apt-get install --yes --no-install-recommends \ + libpq-dev \ + libudunits2-dev \ + libgdal-dev \ + libgeos-dev \ + libproj-dev + +# Temporary workaround for bug in sf -> https://forum.posit.co/t/sf-wont-install-for-anyone-on-posit-cloud/191242/2 +RUN Rscript -e 'remotes::install_github(repo = "r-spatial/sf", ref = "93a25fd8e2f5c6af7c080f92141cb2b765a04a84")' + +# Note that until the main package update is merged in the OHDSI org, we will need to reference the fixed code +# on the containerize branch +RUN Rscript -e 'remotes::install_github("OHDSI/GIS@containerize")' + +# Outdated Andromeda package from Hades causes error +# "Error: 'isAndromedaTable' is not an exported object from 'namespace:Andromeda'" +# Broadsea HADES dockerhub package should be updated to use Andromeda v0.6.7 +RUN Rscript -e 'install.packages("Andromeda")' + +# This provides an API to gaiaCore, could be moved to the package DESCRIPTION +RUN Rscript -e 'install.packages("plumber")' +COPY ./docker/gaia-core/R /ohdsi-gis +WORKDIR /ohdsi-gis + +# Add API service to supervisord +RUN echo "" >> /etc/supervisor/conf.d/supervisord.conf \ + && echo "[program:Rscript]" >> /etc/supervisor/conf.d/supervisord.conf \ + && echo "command=/usr/local/bin/Rscript /ohdsi-gis/gaia.R" >> /etc/supervisor/conf.d/supervisord.conf \ + && echo "" >> /etc/supervisor/conf.d/supervisord.conf \ + && echo "stdout_logfile=/var/log/supervisor/%(program_name)s.log" >> /etc/supervisor/conf.d/supervisord.conf + +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] diff --git a/docker/gaia-core/R/gaia.R b/docker/gaia-core/R/gaia.R new file mode 100644 index 00000000..d7fad388 --- /dev/null +++ b/docker/gaia-core/R/gaia.R @@ -0,0 +1,21 @@ +library(gaiaCore) +library(DatabaseConnector) +library(plumber) + +# set database connection +# from R docker container to gaiaDB docker container - network gaiadb_default +connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "postgresql", + server = paste0("gaia-db/",Sys.getenv(c("POSTGRES_DB"))), + port = Sys.getenv(c("POSTGRES_PORT")), + user=Sys.getenv(c("POSTGRES_USER")), + password = scan(Sys.getenv(c("POSTGRES_PASSWORD_FILE")),"character")) + +# initialize DB connection +gaiaCore::initializeDatabase(connectionDetails) + +# configure and run endpoint +root <- pr("plumber.R") +root %>% pr_run( + port=as.numeric(Sys.getenv(c("GAIA_CORE_API_PORT"))), + host='0.0.0.0') \ No newline at end of file diff --git a/docker/gaia-core/R/plumber.R b/docker/gaia-core/R/plumber.R new file mode 100644 index 00000000..6445e362 --- /dev/null +++ b/docker/gaia-core/R/plumber.R @@ -0,0 +1,14 @@ +#* Load the variable from the variable_source +#* @param variable_id The variable to load +#* @get /load +function(variable_id=-1){ + if (variable_id > 0) { + res <- capture.output( + gaiaCore::loadVariable(connectionDetails,variable_id), + type='message' + ) + } else { + res <- 'You must pass a variable_id' + } + list(res) +} \ No newline at end of file diff --git a/docker/gaia-db/Dockerfile b/docker/gaia-db/Dockerfile new file mode 100644 index 00000000..512d1edf --- /dev/null +++ b/docker/gaia-db/Dockerfile @@ -0,0 +1,12 @@ +FROM postgis/postgis:16-3.4-alpine + +RUN mkdir /csv +COPY inst/csv/data_source.csv /csv/data_source.csv +COPY inst/csv/variable_source.csv /csv/variable_source.csv +COPY vocabularies/gis_vocabs_vocabulary_stage_v1.csv /csv/gis_vocabulary_fragment.csv +COPY vocabularies/gis_vocabs_concept_class_stage_v1.csv /csv/gis_concept_class_fragment.csv +COPY vocabularies/gis_vocabs_domain_stage_v1.csv /csv/gis_domain_fragment.csv +COPY vocabularies/gis_vocabs_concept_stage_v1.csv /csv/gis_concept_fragment.csv +COPY vocabularies/gis_vocabs_relationship_stage_v1.csv /csv/gis_relationship_fragment.csv +COPY vocabularies/gis_vocabs_concept_relationship_stage_v1.csv /csv/gis_concept_relationship_fragment.csv +COPY docker/gaia-db/init.sql /docker-entrypoint-initdb.d/init.sql diff --git a/docker/gaia-db/README.md b/docker/gaia-db/README.md new file mode 100644 index 00000000..b4bfb1e1 --- /dev/null +++ b/docker/gaia-db/README.md @@ -0,0 +1,14 @@ +# Dockerized GAIA-DB + +You can run this container directly with the following command: + +```bash +sudo docker run --rm --env POSTGRES_PASSWORD= :latest +``` + +This image is based on the [alpine flavored postgis](https://hub.docker.com/layers/postgis/postgis/16-3.4-alpine/images/sha256-5c31b8b83d9ea726ed109d2db7c16a3febe994e4c2d9ef888d3fc77fff7fd2c2?context=explore) base image. The [initialization script](https://github.com/TuftsCTSI/GIS/blob/containerize/docker/gaia-db/init.sql) for the database combines and modifies existing sql scripts used in both the [catalog initialization](https://github.com/TuftsCTSI/GIS/blob/containerize/inst/initialize.sql) (via the backbone schema) and the [vocabulary integration](https://github.com/TuftsCTSI/GIS/blob/containerize/vocabularies/easyload.sql). + +Once deployed and auto-initialized, the containerized Postgres database includes: +- GIS Catalog (`backbone` schema) +- Constrained GIS vocabulary tables (`vocabulary` schema) +- postgis tools (native to image, `tiger` schema) \ No newline at end of file diff --git a/docker/gaia-db/init.sql b/docker/gaia-db/init.sql new file mode 100644 index 00000000..18fa277d --- /dev/null +++ b/docker/gaia-db/init.sql @@ -0,0 +1,553 @@ +-- * - * - * - * - * - * - * - * - * - +-- BACKBONE SCHEMA CONSTRUCTION +-- * - * - * - * - * - * - * - * - * - + +CREATE EXTENSION IF NOT EXISTS postgis; + +CREATE SCHEMA IF NOT EXISTS backbone; + +SET search_path = backbone, public; + +CREATE TABLE data_source ( + data_source_uuid int4 NOT NULL, + org_id varchar(100) NOT NULL, + org_set_id varchar(100) NOT NULL, + dataset_name varchar(100) NOT NULL, + dataset_version varchar(100) NOT NULL, + geom_type varchar(100) NULL, + geom_spec text NULL, + boundary_type varchar(100) NULL, + has_attributes int4 NULL, + download_method varchar(100) NOT NULL, + download_subtype varchar(100) NOT NULL, + download_data_standard varchar(100) NOT NULL, + download_filename varchar(100) NOT NULL, + download_url varchar(100) NOT NULL, + download_auth varchar(100) NULL, + documentation_url varchar(100) NULL ); + + +CREATE TABLE variable_source ( + variable_source_id serial4 NOT NULL, + geom_dependency_uuid int4 NULL, + variable_name varchar NOT NULL, + variable_desc text NOT NULL, + data_source_uuid int4 NOT NULL, + attr_spec text NOT NULL ); +CREATE TABLE attr_index ( + attr_index_id numeric NOT NULL, + variable_source_id numeric NOT NULL, + attr_of_geom_index_id numeric NOT NULL, + database_schema varchar(255) NOT NULL, + table_name varchar(255) NOT NULL, + data_source_id numeric NOT NULL ); +CREATE TABLE geom_index ( + geom_index_id numeric NOT NULL, + data_type_id numeric NULL, + data_type_name varchar(255) NOT NULL, + geom_type_concept_id numeric NULL, + geom_type_source_value varchar(255) NULL, + database_schema varchar(255) NOT NULL, + table_name varchar(255) NOT NULL, + table_desc varchar(255) NOT NULL, + data_source_id numeric NOT NULL ); +CREATE TABLE attr_template ( + attr_record_id serial4 NOT NULL, + geom_record_id int4 NOT NULL, + variable_source_record_id int4 NOT NULL, + attr_concept_id int4 NULL, + attr_start_date date NOT NULL, + attr_end_date date NOT NULL, + value_as_number float8 NULL, + value_as_string varchar NULL, + value_as_concept_id int4 NULL, + unit_concept_id int4 NULL, + unit_source_value varchar NULL, + qualifier_concept_id int4 NULL, + qualifier_source_value varchar NULL, + attr_source_concept_id int4 NULL, + attr_source_value varchar NOT NULL, + value_source_value varchar NOT NULL ); +CREATE TABLE geom_template ( + geom_record_id serial4 NOT NULL, + geom_name varchar NOT NULL, + geom_source_coding varchar NOT NULL, + geom_source_value varchar NOT NULL, + geom_wgs84 geometry NULL, + geom_local_epsg int4 NOT NULL, + geom_local_value geometry NOT NULL ); + +CREATE SEQUENCE IF NOT EXISTS attr_index_attr_index_id_seq + INCREMENT BY 1 + MINVALUE 1 + MAXVALUE 2147483647 + START 1 + CACHE 1 + NO CYCLE; + +CREATE SEQUENCE IF NOT EXISTS attr_template_attr_record_id_seq + INCREMENT BY 1 + MINVALUE 1 + MAXVALUE 2147483647 + START 1 + CACHE 1 + NO CYCLE; + +CREATE SEQUENCE IF NOT EXISTS variable_source_variable_source_id_seq + INCREMENT BY 1 + MINVALUE 1 + MAXVALUE 2147483647 + START 1 + CACHE 1 + NO CYCLE; + +CREATE SEQUENCE IF NOT EXISTS geom_index_geom_index_id_seq + INCREMENT BY 1 + MINVALUE 1 + MAXVALUE 2147483647 + START 1 + CACHE 1 + NO CYCLE; + +CREATE SEQUENCE IF NOT EXISTS geom_template_geom_record_id_seq + INCREMENT BY 1 + MINVALUE 1 + MAXVALUE 2147483647 + START 1 + CACHE 1 + NO CYCLE;-- attr_index definition + +\COPY data_source FROM '/csv/data_source.csv' (FORMAT csv, HEADER); +\COPY variable_source FROM '/csv/variable_source.csv' (FORMAT csv, HEADER); + +truncate geom_index; +truncate attr_index; + +insert into geom_index +select row_number() over() as geom_index_id + , null as data_type_id + , geom_type as data_type_name + , null as geom_type_concept_id + , boundary_type as geom_type_source_value + , regexp_replace(regexp_replace(lower(concat(org_id, '_', org_set_id)), '\W','_', 'g'), '^_+|_+$|_(?=_)', '', 'g') as database_schema + , regexp_replace(regexp_replace(lower(concat(dataset_name)), '\W','_', 'g'), '^_+|_+$|_(?=_)', '', 'g') as table_name + , concat_ws(' ', org_id, org_set_id, dataset_name) as table_desc + , data_source_uuid as data_source_id +from data_source +where geom_type <> '' +and geom_type is not null +and data_source_uuid not in ( + select data_source_uuid + from geom_index +); + +insert into attr_index +select row_number() over() as attr_index_id + , vs.variable_source_id as variable_source_id + , gi.geom_index_id as attr_of_geom_index_id + , regexp_replace(regexp_replace(lower(concat(ds.org_id, '_', ds.org_set_id)), '\W','_', 'g'), '^_+|_+$|_(?=_)', '', 'g') as database_schema + , regexp_replace(regexp_replace(lower(concat(ds.dataset_name)), '\W','_', 'g'), '^_+|_+$|_(?=_)', '', 'g') as table_name + , ds.data_source_uuid as data_source_id +from data_source ds +inner join variable_source vs +on ds.data_source_uuid = vs.data_source_uuid +and ds.has_attributes=1 +inner join geom_index gi +on gi.data_source_id = vs.geom_dependency_uuid; + +-- * - * - * - * - * - * - * - * - * - +-- VOCABULARY SCHEMA CONSTRUCTION +-- * - * - * - * - * - * - * - * - * - + +-- ADD VOCABULARIES + +CREATE SCHEMA IF NOT EXISTS vocabulary; + +CREATE TABLE vocabulary.concept ( + concept_id integer NOT NULL, + concept_name varchar(255) NOT NULL, + domain_id varchar(20) NOT NULL, + vocabulary_id varchar(20) NOT NULL, + concept_class_id varchar(20) NOT NULL, + standard_concept varchar(1) NULL, + concept_code varchar(50) NOT NULL, + valid_start_date date NOT NULL, + valid_end_date date NOT NULL, + invalid_reason varchar(1) NULL ); + +CREATE TABLE vocabulary.vocabulary ( + vocabulary_id varchar(20) NOT NULL, + vocabulary_name varchar(255) NOT NULL, + vocabulary_reference varchar(255) NULL, + vocabulary_version varchar(255) NULL, + vocabulary_concept_id integer NOT NULL ); + +CREATE TABLE vocabulary.domain ( + domain_id varchar(20) NOT NULL, + domain_name varchar(255) NOT NULL, + domain_concept_id integer NOT NULL ); + +CREATE TABLE vocabulary.concept_class ( + concept_class_id varchar(20) NOT NULL, + concept_class_name varchar(255) NOT NULL, + concept_class_concept_id integer NOT NULL ); + +CREATE TABLE vocabulary.concept_relationship ( + concept_id_1 integer NOT NULL, + concept_id_2 integer NOT NULL, + relationship_id varchar(20) NOT NULL, + valid_start_date date NOT NULL, + valid_end_date date NOT NULL, + invalid_reason varchar(1) NULL ); + +CREATE TABLE vocabulary.relationship ( + relationship_id varchar(20) NOT NULL, + relationship_name varchar(255) NOT NULL, + is_hierarchical varchar(1) NOT NULL, + defines_ancestry varchar(1) NOT NULL, + reverse_relationship_id varchar(20) NOT NULL, + relationship_concept_id integer NOT NULL ); + +CREATE TABLE vocabulary.concept_synonym ( + concept_id integer NOT NULL, + concept_synonym_name varchar(1000) NOT NULL, + language_concept_id integer NOT NULL ); + +CREATE TABLE vocabulary.concept_ancestor ( + ancestor_concept_id integer NOT NULL, + descendant_concept_id integer NOT NULL, + min_levels_of_separation integer NOT NULL, + max_levels_of_separation integer NOT NULL ); + +CREATE TABLE vocabulary.source_to_concept_map ( + source_code varchar(50) NOT NULL, + source_concept_id integer NOT NULL, + source_vocabulary_id varchar(20) NOT NULL, + source_code_description varchar(255) NULL, + target_concept_id integer NOT NULL, + target_vocabulary_id varchar(20) NOT NULL, + valid_start_date date NOT NULL, + valid_end_date date NOT NULL, + invalid_reason varchar(1) NULL ); + +CREATE TABLE vocabulary.drug_strength ( + drug_concept_id integer NOT NULL, + ingredient_concept_id integer NOT NULL, + amount_value NUMERIC NULL, + amount_unit_concept_id integer NULL, + numerator_value NUMERIC NULL, + numerator_unit_concept_id integer NULL, + denominator_value NUMERIC NULL, + denominator_unit_concept_id integer NULL, + box_size integer NULL, + valid_start_date date NOT NULL, + valid_end_date date NOT NULL, + invalid_reason varchar(1) NULL ); + +CREATE TABLE vocabulary.temp_vocabulary_data ( + vocabulary_id varchar(20) NOT NULL, + vocabulary_name varchar(255) NULL, + vocabulary_reference varchar(255) NULL, + vocabulary_version varchar(255) NULL, + vocabulary_concept_id int4 NULL +); + +-- ADD GENERAL VOCABULARY CONCEPTS FOR GIS & SDOH + + +\COPY vocabulary.temp_vocabulary_data FROM '/csv/gis_vocabulary_fragment.csv' DELIMITER ',' CSV HEADER; +-- Insert new vocabulary concept_ids (that are not in vocabulary) into concept table +INSERT INTO vocabulary.concept +SELECT vocabulary_concept_id AS concept_id + , vocabulary_name AS concept_name + , 'Metadata' AS domain_id + , 'Vocabulary' AS vocabulary_id + , 'Vocabulary' AS concept_class_id + , NULL AS standard_concept + , 'OMOP generated' AS concept_code + , '1970-01-01' AS valid_start_date + , '2099-12-31' AS valid_end_date + , NULL AS invalid_reason +FROM vocabulary.temp_vocabulary_data +WHERE vocabulary_id NOT IN ( + SELECT vocabulary_id + FROM vocabulary.vocabulary +); +INSERT INTO vocabulary.vocabulary SELECT * FROM vocabulary.temp_vocabulary_data WHERE vocabulary_id NOT IN (SELECT vocabulary_id FROM vocabulary.vocabulary); + +-- ADD CONCEPT_CLASSES +CREATE TABLE vocabulary.temp_concept_class_data ( + concept_class_id varchar(20) NOT NULL, + concept_class_name varchar(255) NULL, + concept_class_concept_id int4 NULL +); +\COPY vocabulary.temp_concept_class_data FROM '/csv/gis_concept_class_fragment.csv' DELIMITER ',' CSV HEADER; +-- Insert new concept_class concept_ids (that are not in concept_class) into concept table +INSERT INTO vocabulary.concept +SELECT concept_class_concept_id AS concept_id + , concept_class_name AS concept_name + , 'Metadata' AS domain_id + , 'Concept Class' AS vocabulary_id + , 'Concept Class' AS concept_class_id + , NULL AS standard_concept + , 'OMOP generated' AS concept_code + , '1970-01-01' AS valid_start_date + , '2099-12-31' AS valid_end_date + , NULL AS invalid_reason +FROM vocabulary.temp_concept_class_data +WHERE concept_class_id NOT IN ( + SELECT concept_class_id + FROM vocabulary.concept_class +); + +INSERT INTO vocabulary.concept_class SELECT * FROM vocabulary.temp_concept_class_data WHERE concept_class_id NOT IN (SELECT concept_class_id FROM vocabulary.concept_class); + +-- ADD DOMAINS +CREATE TABLE vocabulary.temp_domain_data ( + domain_id varchar(20) NOT NULL, + domain_name varchar(255) NULL, + domain_concept_id int4 NULL +); +\COPY vocabulary.temp_domain_data FROM '/csv/gis_domain_fragment.csv' DELIMITER ',' CSV HEADER; +-- Insert new domain concept_ids (that are not in domain) into concept table +INSERT INTO vocabulary.concept +SELECT domain_concept_id AS concept_id + , domain_name AS concept_name + , 'Metadata' AS domain_id + , 'Domain' AS vocabulary_id + , 'Domain' AS concept_class_id + , NULL AS standard_concept + , 'OMOP generated' AS concept_code + , '1970-01-01' AS valid_start_date + , '2099-12-31' AS valid_end_date + , NULL AS invalid_reason +FROM vocabulary.temp_domain_data +WHERE domain_id NOT IN ( + SELECT domain_id + FROM vocabulary.domain +); +INSERT INTO vocabulary.domain SELECT * FROM vocabulary.temp_domain_data WHERE domain_id NOT IN (SELECT domain_id FROM vocabulary.domain); + +-- ADD CONCEPTS +CREATE TABLE vocabulary.temp_concept_data ( + concept_id integer NULL, + concept_name text NULL, + domain_id text NULL, + vocabulary_id text NULL, + concept_class_id text NULL, + standard_concept text NULL, + concept_code text NULL, + valid_start_date date NULL, + valid_end_date date NULL, + invalid_reason text NULL +); +\COPY vocabulary.temp_concept_data FROM '/csv/gis_concept_fragment.csv' DELIMITER ',' CSV HEADER; + +INSERT INTO vocabulary.concept +SELECT concept_id + , LEFT(concept_name, 255) + , domain_id + , vocabulary_id + , concept_class_id + , standard_concept + , concept_code + , valid_start_date + , valid_end_date + , invalid_reason +FROM vocabulary.temp_concept_data +; +-- INSERT INTO vocabulary.concept SELECT * FROM vocabulary.temp_concept_data; + +-- ADD RELATIONSHIPS +CREATE TABLE vocabulary.temp_relationship_data ( + relationship_id varchar(20) NOT NULL, + relationship_name varchar(255) NULL, + is_hierarchical varchar(1) NULL, + defines_ancestry varchar(1) NULL, + reverse_relationship_id varchar(20) NULL, + relationship_concept_id int4 NULL +); +\COPY vocabulary.temp_relationship_data FROM '/csv/gis_relationship_fragment.csv' DELIMITER ',' CSV HEADER; +-- Insert new relationship concept_ids (that are not in relationship) into concept table +INSERT INTO vocabulary.concept +SELECT relationship_concept_id AS concept_id + , relationship_name AS concept_name + , 'Metadata' AS domain_id + , 'Relationship' AS vocabulary_id + , 'Relationship' AS concept_class_id + , NULL AS standard_concept + , 'OMOP generated' AS concept_code + , '1970-01-01' AS valid_start_date + , '2099-12-31' AS valid_end_date + , NULL AS invalid_reason +FROM vocabulary.temp_relationship_data; +INSERT INTO vocabulary.relationship SELECT * FROM vocabulary.temp_relationship_data; + +-- ADD CONCEPT_RELATIONSHIPS +CREATE TABLE vocabulary.temp_concept_relationship_data ( + concept_id_1 int4 NULL, + concept_id_2 int4 NULL, + concept_code_1 text NULL, + concept_code_2 text NULL, + vocabulary_id_1 text NULL, + vocabulary_id_2 text NULL, + relationship_id text NULL, + valid_start_date date NULL, + valid_end_date date NULL, + invalid_reason text NULL +); + + +\COPY vocabulary.temp_concept_relationship_data FROM '/csv/gis_concept_relationship_fragment.csv' DELIMITER ',' CSV HEADER; + +INSERT INTO vocabulary.concept_relationship +SELECT concept_id_1 + , concept_id_2 + , relationship_id + , valid_start_date + , valid_end_date + , invalid_reason +FROM vocabulary.temp_concept_relationship_data; + +-- ADD REVERSE CONCEPT_RELATIONSHIPS (WHERE MISSING) +INSERT INTO vocabulary.concept_relationship +select rev.* +from ( + select cr.concept_id_2 as concept_id_1 + , cr.concept_id_1 as concept_id_2 + , r.reverse_relationship_id as relationship_id + , cr.valid_start_date + , cr.valid_end_date + , cr.invalid_reason + from vocabulary.concept_relationship cr + inner join vocabulary.relationship r + on cr.relationship_id = r.relationship_id + and cr.concept_id_1 > 2000000000 +) rev +left join ( + select * + from vocabulary.concept_relationship + where concept_id_1 > 2000000000 +) orig +on rev.concept_id_1 = orig.concept_id_1 +and rev.concept_id_2 = orig.concept_id_2 +and rev.relationship_id = orig.relationship_id +where orig.concept_id_1 is NULL; + + +-- Drop all temporary tables +DROP TABLE vocabulary.temp_concept_data; +DROP TABLE vocabulary.temp_concept_relationship_data; +DROP TABLE vocabulary.temp_concept_class_data; +DROP TABLE vocabulary.temp_domain_data; +DROP TABLE vocabulary.temp_relationship_data; +DROP TABLE vocabulary.temp_vocabulary_data; + +ALTER TABLE vocabulary.concept + ADD CONSTRAINT xpk_concept PRIMARY KEY (concept_id); +ALTER TABLE vocabulary.vocabulary + ADD CONSTRAINT xpk_vocabulary PRIMARY KEY (vocabulary_id); +ALTER TABLE vocabulary.domain + ADD CONSTRAINT xpk_domain PRIMARY KEY (domain_id); +ALTER TABLE vocabulary.concept_class + ADD CONSTRAINT xpk_concept_class PRIMARY KEY (concept_class_id); +ALTER TABLE vocabulary.concept_relationship + ADD CONSTRAINT xpk_concept_relationship PRIMARY KEY (concept_id_1, concept_id_2, relationship_id); +ALTER TABLE vocabulary.relationship + ADD CONSTRAINT xpk_relationship PRIMARY KEY (relationship_id); +ALTER TABLE vocabulary.concept_ancestor + ADD CONSTRAINT xpk_concept_ancestor PRIMARY KEY (ancestor_concept_id, descendant_concept_id); +ALTER TABLE vocabulary.source_to_concept_map + ADD CONSTRAINT xpk_source_to_concept_map PRIMARY KEY (source_vocabulary_id, target_concept_id, source_code, valid_end_date); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT xpk_drug_strength PRIMARY KEY (drug_concept_id, ingredient_concept_id); + +-- constraints +CREATE UNIQUE INDEX idx_concept_concept_id ON vocabulary.concept (concept_id ASC); +CLUSTER +vocabulary.concept USING idx_concept_concept_id; +CREATE INDEX idx_concept_code ON vocabulary.concept (concept_code ASC); +CREATE INDEX idx_concept_vocabluary_id ON vocabulary.concept (vocabulary_id ASC); +CREATE INDEX idx_concept_domain_id ON vocabulary.concept (domain_id ASC); +CREATE INDEX idx_concept_class_id ON vocabulary.concept (concept_class_id ASC); +CREATE INDEX idx_concept_id_varchar ON vocabulary.concept (cast(concept_id AS VARCHAR)); +CREATE UNIQUE INDEX idx_vocabulary_vocabulary_id ON vocabulary.vocabulary (vocabulary_id ASC); +CLUSTER +vocabulary.vocabulary USING idx_vocabulary_vocabulary_id; +CREATE UNIQUE INDEX idx_domain_domain_id ON vocabulary.domain (domain_id ASC); +CLUSTER +vocabulary.domain USING idx_domain_domain_id; +CREATE UNIQUE INDEX idx_concept_class_class_id ON vocabulary.concept_class (concept_class_id ASC); +CLUSTER +vocabulary.concept_class USING idx_concept_class_class_id; +CREATE INDEX idx_concept_relationship_id_1 ON vocabulary.concept_relationship (concept_id_1 ASC); +CREATE INDEX idx_concept_relationship_id_2 ON vocabulary.concept_relationship (concept_id_2 ASC); +CREATE INDEX idx_concept_relationship_id_3 ON vocabulary.concept_relationship (relationship_id ASC); +CREATE UNIQUE INDEX idx_relationship_rel_id ON vocabulary.relationship (relationship_id ASC); +CLUSTER +vocabulary.relationship USING idx_relationship_rel_id; +CREATE INDEX idx_concept_synonym_id ON vocabulary.concept_synonym (concept_id ASC); +CLUSTER +vocabulary.concept_synonym USING idx_concept_synonym_id; +CREATE INDEX idx_concept_ancestor_id_1 ON vocabulary.concept_ancestor (ancestor_concept_id ASC); +CLUSTER +vocabulary.concept_ancestor USING idx_concept_ancestor_id_1; +CREATE INDEX idx_concept_ancestor_id_2 ON vocabulary.concept_ancestor (descendant_concept_id ASC); +CREATE INDEX idx_source_to_concept_map_id_3 ON vocabulary.source_to_concept_map (target_concept_id ASC); +CLUSTER +vocabulary.source_to_concept_map USING idx_source_to_concept_map_id_3; +CREATE INDEX idx_source_to_concept_map_id_1 ON vocabulary.source_to_concept_map (source_vocabulary_id ASC); +CREATE INDEX idx_source_to_concept_map_id_2 ON vocabulary.source_to_concept_map (target_vocabulary_id ASC); +CREATE INDEX idx_source_to_concept_map_code ON vocabulary.source_to_concept_map (source_code ASC); +CREATE INDEX idx_drug_strength_id_1 ON vocabulary.drug_strength (drug_concept_id ASC); +CLUSTER +vocabulary.drug_strength USING idx_drug_strength_id_1; +CREATE INDEX idx_drug_strength_id_2 ON vocabulary.drug_strength (ingredient_concept_id ASC); + +-- foreign key constraints +ALTER TABLE vocabulary.concept + ADD CONSTRAINT fpk_concept_domain FOREIGN KEY (domain_id) REFERENCES vocabulary.domain (domain_id); +ALTER TABLE vocabulary.concept + ADD CONSTRAINT fpk_concept_class FOREIGN KEY (concept_class_id) REFERENCES vocabulary.concept_class (concept_class_id); +ALTER TABLE vocabulary.concept + ADD CONSTRAINT fpk_concept_vocabulary FOREIGN KEY (vocabulary_id) REFERENCES vocabulary.vocabulary (vocabulary_id); +ALTER TABLE vocabulary.vocabulary + ADD CONSTRAINT fpk_vocabulary_concept FOREIGN KEY (vocabulary_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.domain + ADD CONSTRAINT fpk_domain_concept FOREIGN KEY (domain_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_class + ADD CONSTRAINT fpk_concept_class_concept FOREIGN KEY (concept_class_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_relationship + ADD CONSTRAINT fpk_concept_relationship_c_1 FOREIGN KEY (concept_id_1) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_relationship + ADD CONSTRAINT fpk_concept_relationship_c_2 FOREIGN KEY (concept_id_2) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_relationship + ADD CONSTRAINT fpk_concept_relationship_id FOREIGN KEY (relationship_id) REFERENCES vocabulary.relationship (relationship_id); +ALTER TABLE vocabulary.relationship + ADD CONSTRAINT fpk_relationship_concept FOREIGN KEY (relationship_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.relationship + ADD CONSTRAINT fpk_relationship_reverse FOREIGN KEY (reverse_relationship_id) REFERENCES vocabulary.relationship (relationship_id); +ALTER TABLE vocabulary.concept_synonym + ADD CONSTRAINT fpk_concept_synonym_concept FOREIGN KEY (concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_synonym + ADD CONSTRAINT fpk_concept_synonym_language_concept FOREIGN KEY (language_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_ancestor + ADD CONSTRAINT fpk_concept_ancestor_concept_1 FOREIGN KEY (ancestor_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.concept_ancestor + ADD CONSTRAINT fpk_concept_ancestor_concept_2 FOREIGN KEY (descendant_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.source_to_concept_map + ADD CONSTRAINT fpk_source_to_concept_map_v_1 FOREIGN KEY (source_vocabulary_id) REFERENCES vocabulary.vocabulary (vocabulary_id); +ALTER TABLE vocabulary.source_to_concept_map + ADD CONSTRAINT fpk_source_to_concept_map_v_2 FOREIGN KEY (target_vocabulary_id) REFERENCES vocabulary.vocabulary (vocabulary_id); +ALTER TABLE vocabulary.source_to_concept_map + ADD CONSTRAINT fpk_source_to_concept_map_c_1 FOREIGN KEY (target_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT fpk_drug_strength_concept_1 FOREIGN KEY (drug_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT fpk_drug_strength_concept_2 FOREIGN KEY (ingredient_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT fpk_drug_strength_unit_1 FOREIGN KEY (amount_unit_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT fpk_drug_strength_unit_2 FOREIGN KEY (numerator_unit_concept_id) REFERENCES vocabulary.concept (concept_id); +ALTER TABLE vocabulary.drug_strength + ADD CONSTRAINT fpk_drug_strength_unit_3 FOREIGN KEY (denominator_unit_concept_id) REFERENCES vocabulary.concept (concept_id); diff --git a/docs/data/data_source_example.csv b/docs/data/data_source_example.csv index 5ffb55fb..113b7bec 100644 --- a/docs/data/data_source_example.csv +++ b/docs/data/data_source_example.csv @@ -3,4 +3,4 @@ 911,Census,TIGER/Line,25-tract-2018,2018,polygon,"""{""stage_transform"":[""dplyr::mutate(staged,geom_source_coding='GEOID',geom_source_value=GEOID,geom_name=dplyr::select(sf::st_drop_geometry(staged), n = if('NAME' %in% colnames(staged)) 'NAME' else 'NAMELSAD')$n,geom_local_value=geometry,geom_local_epsg=sf::st_crs(staged, paramaters=TRUE)$epsg)""]}""",tract,0,file,zip,shp,tl_2018_25_tract.shp,https://www2.census.gov/geo/tiger/TIGER2018/TRACT/tl_2018_25_tract.zip,,https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html 1234,Census,TIGER/Line,us-county-2018,2018,polygon,"{""stage_transform"":[""dplyr::mutate(staged,geom_source_coding='GEOID',geom_source_value=GEOID,geom_name=dplyr::select(sf::st_drop_geometry(staged), n = if('NAME' %in% colnames(staged)) 'NAME' else 'NAMELSAD')$n,geom_local_value=geometry,geom_local_epsg=sf::st_crs(staged, paramaters=TRUE)$epsg)""]}",county,0,file,zip,shp,tl_2018_us_county.shp,https://www2.census.gov/geo/tiger/TIGER2018/COUNTY/tl_2018_us_county.zip,,https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html 7777,EPA,AQS,annual_conc_by_monitor_2018,2018,point,,,1,file,zip,csv,annual_conc_by_monitor_2018.csv,https://aqs.epa.gov/aqsweb/airdata/annual_conc_by_monitor_2018.zip,,https://www.epa.gov/aqs -7778,EPA,AQS,annual_conc_by_monitor_2018,2018,point,"{""stage_transform"":[""dplyr::mutate(staged,geom_source_coding='state_count_site',geom_source_value=paste0(staged$State.Code, '_', staged$County.Code, '_', staged$Site.Num),geom_name=Local.Site.Name,geom_local_value=sf::st_as_binary(sf::st_as_sf(staged, coords=c('Latitude', 'Longitude'))$geometry,EWKB=TRUE, hex=TRUE),geom_local_epsg=dplyr::left_join(staged['Datum'], tibble(Datum = c('NAD83', 'WGS84'), epsg = c(4326, 4269)))[['epsg']])""]}",,1,file,zip,csv,annual_conc_by_monitor_2018.csv,https://aqs.epa.gov/aqsweb/airdata/annual_conc_by_monitor_2018.zip,,https://www.epa.gov/aqs +7778,EPA,AQS,annual_conc_by_monitor_2018,2018,point,"{""stage_transform"":[""dplyr::mutate(staged,geom_source_coding='state_count_site',geom_source_value=paste0(staged$State.Code, '_', staged$County.Code, '_', staged$Site.Num),geom_name=Local.Site.Name,geom_local_value=sf::st_as_sf(staged, coords=c('Longitude', 'Latitude'))$geometry,geom_local_epsg=dplyr::left_join(staged['Datum'], tibble(Datum = c('NAD83', 'WGS84'), epsg = c(4326, 4269)))[['epsg']])""]}",,1,file,zip,csv,annual_conc_by_monitor_2018.csv,https://aqs.epa.gov/aqsweb/airdata/annual_conc_by_monitor_2018.zip,,https://www.epa.gov/aqs diff --git a/docs/fw-gaia-catalog.html b/docs/fw-gaia-catalog.html index e82ce1b0..26e2f989 100644 --- a/docs/fw-gaia-catalog.html +++ b/docs/fw-gaia-catalog.html @@ -11,9 +11,9 @@ -fw-gaia-catalog.knit + OHDSI GIS WG - + @@ -375,9 +375,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/fw-gaia-core.html b/docs/fw-gaia-core.html index 9363dbb3..0d218fb1 100644 --- a/docs/fw-gaia-core.html +++ b/docs/fw-gaia-core.html @@ -11,9 +11,9 @@ -fw-gaia-core.knit + OHDSI GIS WG - + @@ -375,9 +375,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/fw-gaia-extensions-ohdsi.html b/docs/fw-gaia-extensions-ohdsi.html index 0f2ff783..df9a454c 100644 --- a/docs/fw-gaia-extensions-ohdsi.html +++ b/docs/fw-gaia-extensions-ohdsi.html @@ -11,9 +11,9 @@ -fw-gaia-extensions-ohdsi.knit + OHDSI GIS WG - + @@ -457,9 +457,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/fw-gaia-extensions.html b/docs/fw-gaia-extensions.html index f5c6fbe5..79221d3b 100644 --- a/docs/fw-gaia-extensions.html +++ b/docs/fw-gaia-extensions.html @@ -11,9 +11,9 @@ -fw-gaia-extensions.knit + OHDSI GIS WG - + @@ -375,9 +375,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/fw-overview.html b/docs/fw-overview.html index 90284bb5..fb851bdb 100644 --- a/docs/fw-overview.html +++ b/docs/fw-overview.html @@ -11,9 +11,9 @@ -fw-overview.knit + OHDSI GIS WG - + @@ -375,9 +375,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/gaia-datamodel-information-flow.html b/docs/gaia-datamodel-information-flow.html index 1199d59a..6f252d34 100644 --- a/docs/gaia-datamodel-information-flow.html +++ b/docs/gaia-datamodel-information-flow.html @@ -11,9 +11,9 @@ -gaia-datamodel-information-flow.knit + OHDSI GIS WG - + @@ -457,9 +457,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

@@ -467,7 +466,7 @@

- + @@ -451,9 +451,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/gaia-goals.html b/docs/gaia-goals.html index e0f18a25..b2bfb141 100644 --- a/docs/gaia-goals.html +++ b/docs/gaia-goals.html @@ -11,9 +11,9 @@ -gaia-goals.knit + OHDSI GIS WG - + @@ -457,9 +457,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/gaia-intro.html b/docs/gaia-intro.html index 12ac4832..41962394 100644 --- a/docs/gaia-intro.html +++ b/docs/gaia-intro.html @@ -11,9 +11,9 @@ -gaia-intro.knit + OHDSI GIS WG - + @@ -457,9 +457,8 @@ -

- OHDSI GIS WG -

+

OHDSI GIS +WG

diff --git a/docs/gaia-principles.html b/docs/gaia-principles.html index 041721db..0afa36c7 100644 --- a/docs/gaia-principles.html +++ b/docs/gaia-principles.html @@ -11,9 +11,9 @@ -gaia-principles.knit + Gaia Design - + @@ -457,9 +457,8 @@ -

- Gaia Design -

+

Gaia +Design

diff --git a/docs/gaia-terminology.html b/docs/gaia-terminology.html index 53c636d8..10aaed6e 100644 --- a/docs/gaia-terminology.html +++ b/docs/gaia-terminology.html @@ -11,9 +11,9 @@ -gaia-terminology.knit + OHDSI GIS WG Working Terminology - + @@ -457,9 +457,8 @@ -

- OHDSI GIS WG Working Terminology -

+

OHDSI GIS +WG Working Terminology

diff --git a/docs/gaiaCore/404.html b/docs/gaiaCore/404.html index a9019179..23e4772d 100644 --- a/docs/gaiaCore/404.html +++ b/docs/gaiaCore/404.html @@ -7,53 +7,46 @@ Page not found (404) • gaiaCore - - - - - + + + + Skip to contents - -