From 546f3ee486035eadc44cce2d6883533a41e798f9 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 11 Apr 2024 11:03:13 -0400 Subject: [PATCH] [ENH] Add single docker compose recipe for all deployment flavours (#32) * create full-stack Docker Compose file Co-authored-by: Alyssa Dai Co-authored-by: Sebastian Urchs * Added `template.env` file * Wait specifically for the graphDB process ID * Use Docker service name for local node * Add profiles - "dev" replaces the old behaviour - other profile names replace the files in the other subdirectories - start with `docker compose --profile up` * Make the local data directory configurable * upload vocab from separate directory + more progress messages * add data-config.ttl to gitignore * fix default value and mounting of data/ * replace hardcoded params inside setup.sh with env vars * set default profile in .env * write output of setup.sh to log * rename dev profile and update README * write log to script dir rather than working dir --------- Co-authored-by: rmanaem Co-authored-by: Sebastian Urchs Co-authored-by: Sebastian Urchs --- .gitignore | 5 + README.md | 50 +++++ dev/add_data_to_graph.sh | 244 ++++++++++++++++++++++ dev/data-config_template.ttl | 49 +++++ dev/data/example_synthetic.jsonld | 331 ++++++++++++++++++++++++++++++ dev/docker-compose.yml | 61 ++++++ dev/graphdb_setup.sh | 200 ++++++++++++++++++ dev/local_nb_nodes.template.json | 6 + dev/setup.sh | 37 ++++ dev/template.env | 45 ++++ dev/vocab/nb_vocab.ttl | 37 ++++ 11 files changed, 1065 insertions(+) create mode 100755 dev/add_data_to_graph.sh create mode 100644 dev/data-config_template.ttl create mode 100644 dev/data/example_synthetic.jsonld create mode 100644 dev/docker-compose.yml create mode 100755 dev/graphdb_setup.sh create mode 100644 dev/local_nb_nodes.template.json create mode 100755 dev/setup.sh create mode 100644 dev/template.env create mode 100644 dev/vocab/nb_vocab.ttl diff --git a/.gitignore b/.gitignore index 0afcf87..1c3aece 100644 --- a/.gitignore +++ b/.gitignore @@ -143,6 +143,11 @@ venv.bak/ .dmypy.json dmypy.json +# Neurobagel +# RDF data configuration file, created automatically during setup from data-config_template.ttl based on .env values +data-config.ttl +local_nb_nodes.json + # Pyre type checker .pyre/ diff --git a/README.md b/README.md index 08ab50f..29446eb 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,56 @@ Configuration files for a Neurobagel deployment. ## How to use For detailed instructions on the deployment options for Neurobagel, see the official Neurobagel documentation on [setting up a local knowledge graph (node)](https://neurobagel.org/infrastructure/) and [local query federation](https://neurobagel.org/federate/). +### Using the full-stack Docker Compose file + +1. Clone the repository +```bash +git clone https://github.com/neurobagel/recipes.git +``` + +2. `cd` into the directory containing the Neurobagel deployment recipe + +```bash +cd recipes/dev +``` + +3. Copy and rename template files in the directory +```bash +cp template.env .env + +# if also setting up local federation +cp local_nb_nodes.template.json local_nb_nodes.json +``` +Ensure to edit the file(s) according to your deployment. + +4. Start the Docker Compose stack and specify your desired deployment profile + + **To set up only a local node:** + ```bash + docker compose up -d + ``` + or + ```bash + docker compose --profile local_node up -d + ``` + + **To set up a local node with a graphical query tool:** + ```bash + docker compose --profile local_node_query up -d + ``` + + **To set up a local node and local federation (including a graphical query tool):** + ```bash + docker compose --profile full_stack up -d + ``` +A log file `DEPLOY.log` will be automatically created in the current directory with a copy of the STDOUT from the automatic deployment process. + +### Using deployment type-specific Docker Compose files + +**Note: These instructions will soon be superceded by the full-stack Docker Compose file instructions.** + 1. Clone the repository ```bash git clone https://github.com/neurobagel/recipes.git diff --git a/dev/add_data_to_graph.sh b/dev/add_data_to_graph.sh new file mode 100755 index 0000000..869d13b --- /dev/null +++ b/dev/add_data_to_graph.sh @@ -0,0 +1,244 @@ +#!/bin/bash +# +# ARG_HELP([Upload JSONLD and Turtle data to a Neurobagel graph]) +# ARG_POSITIONAL_SINGLE([dir],[Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded.]) +# ARG_POSITIONAL_SINGLE([graph-url],[Host and port at which to access the graph database to add data to (e.g., localhost:7200)]) +# ARG_POSITIONAL_SINGLE([graph-database],[Name of graph database to add data to]) +# ARG_POSITIONAL_SINGLE([user],[Username for graph database access]) +# ARG_POSITIONAL_SINGLE([password],[Password for graph database user]) +# ARG_OPTIONAL_BOOLEAN([clear-data],[],[Whether or not to first clear all existing data from the graph database],[off]) +# ARG_OPTIONAL_BOOLEAN([use-stardog-syntax],[],[Set to use Stardog API endpoints to update the specified graph database. If unset, assumes the graph database is a GraphDB database.],[off]) +# ARG_OPTIONAL_BOOLEAN([log-output],[],[Whether or not to write the output to a log file],[off]) +# ARG_OPTIONAL_SINGLE([log-file],[],[Path to the log file],[LOG.txt]) +# ARGBASH_GO() +# needed because of Argbash --> m4_ignore([ +### START OF CODE GENERATED BY Argbash v2.9.0 one line above ### +# Argbash is a bash code generator used to get arguments parsing right. +# Argbash is FREE SOFTWARE, see https://argbash.io for more info +# Generated online by https://argbash.io/generate + + +die() +{ + local _ret="${2:-1}" + test "${_PRINT_HELP:-no}" = yes && print_help >&2 + echo "$1" >&2 + exit "${_ret}" +} + + +begins_with_short_option() +{ + local first_option all_short_options='h' + first_option="${1:0:1}" + test "$all_short_options" = "${all_short_options/$first_option/}" && return 1 || return 0 +} + +# THE DEFAULTS INITIALIZATION - POSITIONALS +_positionals=() +# THE DEFAULTS INITIALIZATION - OPTIONALS +_arg_clear_data="off" +_arg_use_stardog_syntax="off" + +_arg_log_output="off" +_arg_log_file="LOG.txt" + +print_help() +{ + printf '%s\n' "Upload JSONLD and Turtle data to a Neurobagel graph" + printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-stardog-syntax] [--(no-)log-output] [--log-file ] \n' "$0" + printf '\t%s\n' ": Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded." + printf '\t%s\n' ": Host and port at which to access the graph database to add data to (e.g., localhost:7200)" + printf '\t%s\n' ": Name of graph database to add data to" + printf '\t%s\n' ": Username for graph database access" + printf '\t%s\n' ": Password for graph database user" + printf '\t%s\n' "-h, --help: Prints help" + printf '\t%s\n' "--clear-data, --no-clear-data: Whether or not to first clear all existing data from the graph database (off by default)" + printf '\t%s\n' "--use-stardog-syntax, --no-use-stardog-syntax: Set to use Stardog API endpoints to update the specified graph database. If unset, assumes the graph database is a GraphDB database. (off by default)" + printf '\t%s\n' "--log-output, --no-log-output: Whether or not to write the output to a log file (off by default)" + printf '\t%s\n' "--log-file: Path to the log file (default: 'LOG.txt')" +} + + +parse_commandline() +{ + _positionals_count=0 + while test $# -gt 0 + do + _key="$1" + case "$_key" in + -h|--help) + print_help + exit 0 + ;; + -h*) + print_help + exit 0 + ;; + --no-clear-data|--clear-data) + _arg_clear_data="on" + test "${1:0:5}" = "--no-" && _arg_clear_data="off" + ;; + --no-use-stardog-syntax|--use-stardog-syntax) + _arg_use_stardog_syntax="on" + test "${1:0:5}" = "--no-" && _arg_use_stardog_syntax="off" + ;; + --no-log-output|--log-output) + _arg_log_output="on" + test "${1:0:5}" = "--no-" && _arg_log_output="off" + ;; + --log-file) + test $# -lt 2 && die "Missing value for the optional argument '$_key'." 1 + _arg_log_file="$2" + shift + ;; + --log-file=*) + _arg_log_file="${_key##--log-file=}" + ;; + *) + _last_positional="$1" + _positionals+=("$_last_positional") + _positionals_count=$((_positionals_count + 1)) + ;; + esac + shift + done +} + + +handle_passed_args_count() +{ + local _required_args_string="'dir', 'graph-url', 'graph-database', 'user' and 'password'" + test "${_positionals_count}" -ge 5 || _PRINT_HELP=yes die "FATAL ERROR: Not enough positional arguments - we require exactly 5 (namely: $_required_args_string), but got only ${_positionals_count}." 1 + test "${_positionals_count}" -le 5 || _PRINT_HELP=yes die "FATAL ERROR: There were spurious positional arguments --- we expect exactly 5 (namely: $_required_args_string), but got ${_positionals_count} (the last one was: '${_last_positional}')." 1 +} + + +assign_positional_args() +{ + local _positional_name _shift_for=$1 + _positional_names="_arg_dir _arg_graph_url _arg_graph_database _arg_user _arg_password " + + shift "$_shift_for" + for _positional_name in ${_positional_names} + do + test $# -gt 0 || break + eval "$_positional_name=\${1}" || die "Error during argument parsing, possibly an Argbash bug." 1 + shift + done +} + +parse_commandline "$@" +handle_passed_args_count +assign_positional_args 1 "${_positionals[@]}" + +# OTHER STUFF GENERATED BY Argbash + +### END OF CODE GENERATED BY Argbash (sortof) ### ]) +# [ <-- needed because of Argbash + + +# Reassign positional args to more readable named variables (https://argbash.readthedocs.io/en/latest/guide.html#using-parsing-results) +jsonld_dir=$_arg_dir +user=$_arg_user +password=$_arg_password +graph_db=$_arg_graph_database +graph_url=$_arg_graph_url +clear_data=$_arg_clear_data # value is either on or off (https://argbash.readthedocs.io/en/stable/guide.html#optional-arguments) +use_stardog_syntax=$_arg_use_stardog_syntax + +log_output=$_arg_log_output +log_file=$_arg_log_file + +DELETE_TRIPLES_QUERY=" +DELETE { + ?s ?p ?o . +} WHERE { + ?s ?p ?o . +}" + +# Depending on the graph backend used, set URLs for uploading data to and clearing data in graph database +base_url="http://${graph_url}/${graph_db}" +if [ "$use_stardog_syntax" = "on" ]; then + upload_data_url=$base_url + clear_data_url="${base_url}/update" +else + upload_data_url="${base_url}/statements" + clear_data_url=$upload_data_url +fi + +# Main logic +main() { + # Clear existing data in graph database if requested + if [ "$clear_data" = "on" ]; then + echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..." + + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $clear_data_url \ + -H "Content-Type: application/sparql-update" \ + --data-binary "${DELETE_TRIPLES_QUERY}") + + # Extract and check status code outputted as final line of response + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + echo -e "\nERROR: Failed to clear ${graph_db}:" + echo "$(sed '$d' <<< "$response")" + echo -e "\nEXITING..." + exit 1 + fi + fi + + # Add data to specified graph database + echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n" + + upload_failed=() + + for db in ${jsonld_dir}/*.jsonld; do + # Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself + [ -e "$db" ] || continue + + echo "$(basename ${db}):" + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: application/ld+json" \ + --data-binary @${db}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${db}") + fi + # Print rest of response to stdout + echo -e "$(sed '$d' <<< "$response")\n" + done + + for file in ${jsonld_dir}/*.ttl; do + [ -e "$file" ] || continue + + echo "$(basename ${file}):" + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: text/turtle" \ + --data-binary @${file}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${file}") + fi + echo -e "$(sed '$d' <<< "$response")\n" + done + + echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}." + + if (( ${#upload_failed[@]} != 0 )); then + echo -e "\nERROR: Upload failed for these files:" + printf '%s\n' "${upload_failed[@]}" + fi +} + +# Call the main logic function with or without output redirection +if [ "$log_output" = "on" ]; then + main > "$log_file" +else + main +fi +# ] <-- needed because of Argbash \ No newline at end of file diff --git a/dev/data-config_template.ttl b/dev/data-config_template.ttl new file mode 100644 index 0000000..386b706 --- /dev/null +++ b/dev/data-config_template.ttl @@ -0,0 +1,49 @@ +# +# RDF4J configuration template for a GraphDB repository +# +@prefix rdfs: . +@prefix rep: . +@prefix sr: . +@prefix sail: . +@prefix graphdb: . + +[] a rep:Repository ; + rep:repositoryID "my_db" ; + rdfs:label "" ; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository" ; + sr:sailImpl [ + sail:sailType "graphdb:Sail" ; + + graphdb:read-only "false" ; + + # Inference and Validation + graphdb:ruleset "rdfsplus-optimized" ; + graphdb:disable-sameAs "true" ; + graphdb:check-for-inconsistencies "false" ; + + # Indexing + graphdb:entity-id-size "32" ; + graphdb:enable-context-index "false" ; + graphdb:enablePredicateList "true" ; + graphdb:enable-fts-index "false" ; + graphdb:fts-indexes ("default" "iri") ; + graphdb:fts-string-literals-index "default" ; + graphdb:fts-iris-index "none" ; + + # Queries and Updates + graphdb:query-timeout "0" ; + graphdb:throw-QueryEvaluationException-on-timeout "false" ; + graphdb:query-limit-results "0" ; + + # Settable in the file but otherwise hidden in the UI and in the RDF4J console + graphdb:base-URL "http://example.org/owlim#" ; + graphdb:defaultNS "" ; + graphdb:imports "" ; + graphdb:repository-type "file-repository" ; + graphdb:storage-folder "storage" ; + graphdb:entity-index-size "10000000" ; + graphdb:in-memory-literal-properties "true" ; + graphdb:enable-literal-index "true" ; + ] + ]. \ No newline at end of file diff --git a/dev/data/example_synthetic.jsonld b/dev/data/example_synthetic.jsonld new file mode 100644 index 0000000..937951b --- /dev/null +++ b/dev/data/example_synthetic.jsonld @@ -0,0 +1,331 @@ +{ + "@context": { + "nb": "http://neurobagel.org/vocab/", + "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/", + "nidm": "http://purl.org/nidash/nidm#", + "cogatlas": "https://www.cognitiveatlas.org/task/id/", + "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#", + "Acquisition": "nb:Acquisition", + "identifier": "@id", + "hasContrastType": { + "@id": "nb:hasContrastType" + }, + "schemaKey": "@type", + "Assessment": "nb:Assessment", + "Bagel": "nb:Bagel", + "BaseModel": "nb:BaseModel", + "ControlledTerm": "nb:ControlledTerm", + "Dataset": "nb:Dataset", + "hasLabel": { + "@id": "nb:hasLabel" + }, + "hasPortalURI": { + "@id": "nb:hasPortalURI" + }, + "hasSamples": { + "@id": "nb:hasSamples" + }, + "Diagnosis": "nb:Diagnosis", + "Image": "nb:Image", + "ImagingSession": "nb:ImagingSession", + "hasFilePath": { + "@id": "nb:hasFilePath" + }, + "hasAcquisition": { + "@id": "nb:hasAcquisition" + }, + "PhenotypicSession": "nb:PhenotypicSession", + "hasAge": { + "@id": "nb:hasAge" + }, + "hasSex": { + "@id": "nb:hasSex" + }, + "isSubjectGroup": { + "@id": "nb:isSubjectGroup" + }, + "hasDiagnosis": { + "@id": "nb:hasDiagnosis" + }, + "hasAssessment": { + "@id": "nb:hasAssessment" + }, + "Session": "nb:Session", + "Sex": "nb:Sex", + "Subject": "nb:Subject", + "hasSession": { + "@id": "nb:hasSession" + }, + "SubjectGroup": "nb:SubjectGroup" + }, + "identifier": "nb:e0c7d08c-edcc-4c2a-816a-306878ed7be2", + "hasLabel": "BIDS synthetic", + "hasPortalURI": "https://github.com/bids-standard/bids-examples", + "hasSamples": [ + { + "identifier": "nb:34ec1e2d-9a81-4a50-bcd0-eb22c88d11e1", + "hasLabel": "sub-01", + "hasSession": [ + { + "identifier": "nb:85c7473c-6122-4999-ad3b-5cd57a883c87", + "hasLabel": "ses-01", + "hasAge": 34.1, + "hasSex": { + "identifier": "snomed:248152002", + "schemaKey": "Sex" + }, + "isSubjectGroup": { + "identifier": "ncit:C94342", + "schemaKey": "SubjectGroup" + }, + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + }, + { + "identifier": "nb:eb57d0c1-fb96-4c04-8c16-1f29f7f40db4", + "hasLabel": "ses-02", + "hasAge": 35.3, + "hasSex": { + "identifier": "snomed:248152002", + "schemaKey": "Sex" + }, + "isSubjectGroup": { + "identifier": "ncit:C94342", + "schemaKey": "SubjectGroup" + }, + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + } + ], + "schemaKey": "Subject" + }, + { + "identifier": "nb:2de41397-805f-4059-b464-388ef501d6e5", + "hasLabel": "sub-02", + "hasSession": [ + { + "identifier": "nb:0ce182f3-b47c-4b50-8b76-8f257e490cf3", + "hasLabel": "ses-01", + "hasSex": { + "identifier": "snomed:248153007", + "schemaKey": "Sex" + }, + "hasDiagnosis": [ + { + "identifier": "snomed:406506008", + "schemaKey": "Diagnosis" + } + ], + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + }, + { + "identifier": "nb:4871d5c2-963f-4825-ab43-85a87edac4ce", + "hasLabel": "ses-02", + "hasAge": 39.0, + "hasSex": { + "identifier": "snomed:248153007", + "schemaKey": "Sex" + }, + "hasDiagnosis": [ + { + "identifier": "snomed:406506008", + "schemaKey": "Diagnosis" + } + ], + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + } + ], + "schemaKey": "Subject" + }, + { + "identifier": "nb:d4258426-2971-43ff-92fa-4b20bd21c31f", + "hasLabel": "sub-03", + "hasSession": [ + { + "identifier": "nb:3f9c82d5-6648-4b9b-8f86-195ee9283875", + "hasLabel": "ses-01", + "hasAge": 22.1, + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + }, + { + "identifier": "nb:0f7fe93b-13b1-4ca7-9e58-b997a18ec9b9", + "hasLabel": "ses-02", + "hasAge": 23.2, + "hasDiagnosis": [ + { + "identifier": "snomed:406506008", + "schemaKey": "Diagnosis" + } + ], + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + } + ], + "schemaKey": "Subject" + }, + { + "identifier": "nb:73193f71-cd19-4b8c-ae73-abc882e9b688", + "hasLabel": "sub-04", + "hasSession": [ + { + "identifier": "nb:962aa986-dbd7-4412-a180-49c0a24c9f87", + "hasLabel": "ses-01", + "hasAge": 21.1, + "hasSex": { + "identifier": "snomed:248152002", + "schemaKey": "Sex" + }, + "isSubjectGroup": { + "identifier": "ncit:C94342", + "schemaKey": "SubjectGroup" + }, + "hasAssessment": [ + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + }, + { + "identifier": "nb:8622fc66-013d-4675-a328-fdcccd4616b0", + "hasLabel": "ses-02", + "hasAge": 22.3, + "hasSex": { + "identifier": "snomed:248152002", + "schemaKey": "Sex" + }, + "isSubjectGroup": { + "identifier": "ncit:C94342", + "schemaKey": "SubjectGroup" + }, + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + } + ], + "schemaKey": "Subject" + }, + { + "identifier": "nb:cb8d7a96-fdec-425c-b0b5-9e9ac3c70e8c", + "hasLabel": "sub-05", + "hasSession": [ + { + "identifier": "nb:1d7c30f5-cae9-43c9-a154-26dbe0029594", + "hasLabel": "ses-01", + "hasAge": 42.5, + "hasSex": { + "identifier": "snomed:248153007", + "schemaKey": "Sex" + }, + "hasDiagnosis": [ + { + "identifier": "snomed:406506008", + "schemaKey": "Diagnosis" + } + ], + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + }, + { + "identifier": "nb:2401e418-6f9b-4113-9511-b1b9a332a5d0", + "hasLabel": "ses-02", + "hasAge": 43.2, + "hasSex": { + "identifier": "snomed:248153007", + "schemaKey": "Sex" + }, + "hasDiagnosis": [ + { + "identifier": "snomed:406506008", + "schemaKey": "Diagnosis" + } + ], + "hasAssessment": [ + { + "identifier": "cogatlas:trm_57964b8a66aed", + "schemaKey": "Assessment" + }, + { + "identifier": "cogatlas:tsk_4a57abb949ece", + "schemaKey": "Assessment" + } + ], + "schemaKey": "PhenotypicSession" + } + ], + "schemaKey": "Subject" + } + ], + "schemaKey": "Dataset" +} \ No newline at end of file diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml new file mode 100644 index 0000000..394e282 --- /dev/null +++ b/dev/docker-compose.yml @@ -0,0 +1,61 @@ +version: "3.8" + +services: + + api: + image: "neurobagel/api:${NB_NAPI_TAG:-latest}" + profiles: + - "local_node" + - "local_node_query" + - "full_stack" + ports: + - "${NB_NAPI_PORT_HOST:-8000}:${NB_NAPI_PORT:-8000}" + environment: + NB_GRAPH_USERNAME: ${NB_GRAPH_USERNAME} + NB_GRAPH_PASSWORD: ${NB_GRAPH_PASSWORD} + NB_GRAPH_ADDRESS: ${NB_GRAPH_ADDRESS:-graph} + NB_GRAPH_PORT: ${NB_GRAPH_PORT:-7200} + NB_GRAPH_DB: ${NB_GRAPH_DB:-repositories/my_db} + NB_RETURN_AGG: ${NB_RETURN_AGG:-true} + NB_API_PORT: ${NB_NAPI_PORT:-8000} + NB_API_ALLOWED_ORIGINS: ${NB_NAPI_ALLOWED_ORIGINS} + + graph: + image: "${NB_GRAPH_IMG:-ontotext/graphdb:10.3.1}" + profiles: + - "local_node" + - "local_node_query" + - "full_stack" + volumes: + - "${NB_GRAPH_ROOT_HOST:-~/graphdb-home}:${NB_GRAPH_ROOT_CONT:-/opt/graphdb/home}" + - "${PWD}:/usr/src" + - "${LOCAL_GRAPH_DATA:-./data}:/usr/src/data" + ports: + - "${NB_GRAPH_PORT_HOST:-7200}:${NB_GRAPH_PORT:-7200}" + env_file: .env + entrypoint: + - "/usr/src/setup.sh" + working_dir: "/usr/src" + + federation: + image: "neurobagel/federation_api:${NB_FAPI_TAG:-latest}" + profiles: + - "local_federation" + - "full_stack" + ports: + - "${NB_FAPI_PORT_HOST:-8080}:${NB_FAPI_PORT:-8000}" + volumes: + - "${PWD}/local_nb_nodes.json:/usr/src/local_nb_nodes.json:ro" + environment: + NB_API_PORT: ${NB_FAPI_PORT:-8000} + + query: + image: "neurobagel/query_tool:${NB_QUERY_TAG:-latest}" + profiles: + - "local_federation" + - "local_node_query" + - "full_stack" + ports: + - "${NB_QUERY_PORT_HOST:-3000}:5173" + environment: + NB_API_QUERY_URL: ${NB_API_QUERY_URL:-http://localhost:8000/} \ No newline at end of file diff --git a/dev/graphdb_setup.sh b/dev/graphdb_setup.sh new file mode 100755 index 0000000..62a16f5 --- /dev/null +++ b/dev/graphdb_setup.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# +# ARG_POSITIONAL_SINGLE([admin-pass],[Password for the "admin" superuser that GraphDB creates. If running the first-time user setup, this will be the password set for the admin. The admin user will only be used to create and modify permissions of other database users.]) +# ARG_OPTIONAL_SINGLE([env-file-path],[],[Path to a .env file containing environment variables for Neurobagel node configuration.],[.env]) +# ARG_OPTIONAL_BOOLEAN([run-user-setup],[],[Whether or not to run the first-time GraphDB setup steps, including changing the admin password and creating a new database user.],[on]) +# ARG_HELP([Run first-time user setup for a new GraphDB instance and/or set up a new GraphDB graph database. This script will automatically create a GraphDB configuration file (data-config.ttl) for your newly created database in your current directory. For more information, see https://neurobagel.org/infrastructure/.]) +# ARGBASH_GO() +# needed because of Argbash --> m4_ignore([ +### START OF CODE GENERATED BY Argbash v2.9.0 one line above ### +# Argbash is a bash code generator used to get arguments parsing right. +# Argbash is FREE SOFTWARE, see https://argbash.io for more info +# Generated online by https://argbash.io/generate + + +die() +{ + local _ret="${2:-1}" + test "${_PRINT_HELP:-no}" = yes && print_help >&2 + echo "$1" >&2 + exit "${_ret}" +} + + +begins_with_short_option() +{ + local first_option all_short_options='h' + first_option="${1:0:1}" + test "$all_short_options" = "${all_short_options/$first_option/}" && return 1 || return 0 +} + +# THE DEFAULTS INITIALIZATION - POSITIONALS +_positionals=() +# THE DEFAULTS INITIALIZATION - OPTIONALS +_arg_env_file_path=".env" +_arg_run_user_setup="on" + + +print_help() +{ + printf '%s\n' "Run first-time user setup for a new GraphDB instance and/or set up a new GraphDB graph database. This script will automatically create a GraphDB configuration file (data-config.ttl) for your newly created database in your current directory. For more information, see https://neurobagel.org/infrastructure/." + printf 'Usage: %s [--env-file-path ] [--(no-)run-user-setup] [-h|--help] \n' "$0" + printf '\t%s\n' ": Password for the \"admin\" superuser that GraphDB creates. If running the first-time user setup, this will be the password set for the admin. The admin user will only be used to create and modify permissions of other database users." + printf '\t%s\n' "--env-file-path: Path to a .env file containing environment variables for Neurobagel node configuration. (default: '.env')" + printf '\t%s\n' "--run-user-setup, --no-run-user-setup: Whether or not to run the first-time GraphDB setup steps, including changing the admin password and creating a new database user. (on by default)" + printf '\t%s\n' "-h, --help: Prints help" +} + + +parse_commandline() +{ + _positionals_count=0 + while test $# -gt 0 + do + _key="$1" + case "$_key" in + --env-file-path) + test $# -lt 2 && die "Missing value for the optional argument '$_key'." 1 + _arg_env_file_path="$2" + shift + ;; + --env-file-path=*) + _arg_env_file_path="${_key##--env-file-path=}" + ;; + --no-run-user-setup|--run-user-setup) + _arg_run_user_setup="on" + test "${1:0:5}" = "--no-" && _arg_run_user_setup="off" + ;; + -h|--help) + print_help + exit 0 + ;; + -h*) + print_help + exit 0 + ;; + *) + _last_positional="$1" + _positionals+=("$_last_positional") + _positionals_count=$((_positionals_count + 1)) + ;; + esac + shift + done +} + + +handle_passed_args_count() +{ + local _required_args_string="'admin-pass'" + test "${_positionals_count}" -ge 1 || _PRINT_HELP=yes die "FATAL ERROR: Not enough positional arguments - we require exactly 1 (namely: $_required_args_string), but got only ${_positionals_count}." 1 + test "${_positionals_count}" -le 1 || _PRINT_HELP=yes die "FATAL ERROR: There were spurious positional arguments --- we expect exactly 1 (namely: $_required_args_string), but got ${_positionals_count} (the last one was: '${_last_positional}')." 1 +} + + +assign_positional_args() +{ + local _positional_name _shift_for=$1 + _positional_names="_arg_admin_pass " + + shift "$_shift_for" + for _positional_name in ${_positional_names} + do + test $# -gt 0 || break + eval "$_positional_name=\${1}" || die "Error during argument parsing, possibly an Argbash bug." 1 + shift + done +} + +parse_commandline "$@" +handle_passed_args_count +assign_positional_args 1 "${_positionals[@]}" + +# OTHER STUFF GENERATED BY Argbash + +### END OF CODE GENERATED BY Argbash (sortof) ### ]) +# [ <-- needed because of Argbash + +set -euo pipefail + +# Reassign command line args to more readable named variables +ENV_FILE_PATH=$_arg_env_file_path +ADMIN_PASS=$_arg_admin_pass +RUN_USER_SETUP=$_arg_run_user_setup + +# Set the environment variables in the shell, to use in the script +source "${ENV_FILE_PATH}" +echo "Environment variables have been set from ${ENV_FILE_PATH}." + +# Extract just the database name +DB_NAME="${NB_GRAPH_DB#repositories/}" +NB_GRAPH_PORT_HOST=${NB_GRAPH_PORT_HOST:-7200} + +# Get the directory of this script to be able to find the data-config_template.ttl file +SCRIPT_DIR=$(dirname "$0") + +echo "The GraphDB server is being accessed at http://localhost:${NB_GRAPH_PORT_HOST}." + +##### First time GraphDB setup ##### + +if [ "${RUN_USER_SETUP}" = "on" ]; then + echo "First time GraphDB user setup enabled." + + # 1. Change database admin password + echo "Changing the admin password (note: if you have previously set the admin password, this has no effect)..." + # TODO: To change a *previously set* admin password, we need to also provide the current password via -u + curl -X PATCH --header 'Content-Type: application/json' http://localhost:${NB_GRAPH_PORT_HOST}/rest/security/users/admin -d "{\"password\": \""${ADMIN_PASS}"\"}" + + # 2. If security is not enabled, enable it (i.e. allow only authenticated users access) + is_security_enabled=$(curl -s -X GET http://localhost:${NB_GRAPH_PORT_HOST}/rest/security) + if [ "${is_security_enabled}" = "false" ]; then + echo "Enabling password-based access control to all databases ..." + # NOTE: This command fails without credentials once security is enabled + curl -X POST --header 'Content-Type: application/json' -d true http://localhost:${NB_GRAPH_PORT_HOST}/rest/security + else + echo "Password-based access control has already been enabled." + fi + + # 3. Create a new database user + # TODO: Separate this out from the first-time setup? As this can technically be run at any time to create additional users. + # NOTE: If user already exists, response will be "An account with the given username already exists." OK for script. + echo "Creating a new database user ${NB_GRAPH_USERNAME}..." + curl -X POST --header 'Content-Type: application/json' -u "admin:${ADMIN_PASS}" -d @- http://localhost:${NB_GRAPH_PORT_HOST}/rest/security/users/${NB_GRAPH_USERNAME} < data-config.ttl + +# 5. Create a new database +# Assumes data-config.ttl is in the same directory as this script! +echo "Creating the GraphDB database ${DB_NAME}..." +curl -X PUT -u "admin:${ADMIN_PASS}" http://localhost:${NB_GRAPH_PORT_HOST}/${NB_GRAPH_DB} --data-binary "@data-config.ttl" -H "Content-Type: application/x-turtle" + +# 6. Grant newly created user access permission to the database +# Confirm user wants to proceed with changing user permissions +# while true; do +# read -p "WARNING: We will now give ${NB_GRAPH_USERNAME} read/write access to ${NB_GRAPH_DB}. This operation will REPLACE any existing permissions you have granted to user ${NB_GRAPH_USERNAME}, including any access to other databases. ${NB_GRAPH_USERNAME} may lose access to other databases as a result. Proceed? (y/n) " yn +# case $yn in +# [Yy]* ) break;; +# [Nn]* ) echo "Exiting..."; exit;; +# * ) echo "Please answer y or n.";; +# esac +# done + +echo "Granting user ${NB_GRAPH_USERNAME} read/write permissions to database ${DB_NAME}..." +curl -X PUT --header 'Content-Type: application/json' -d " +{\"grantedAuthorities\": [\"WRITE_REPO_${DB_NAME}\",\"READ_REPO_${DB_NAME}\"]}" http://localhost:${NB_GRAPH_PORT_HOST}/rest/security/users/${NB_GRAPH_USERNAME} -u "admin:${ADMIN_PASS}" + +echo "Done." + +# ] <-- needed because of Argbash diff --git a/dev/local_nb_nodes.template.json b/dev/local_nb_nodes.template.json new file mode 100644 index 0000000..bc5fea7 --- /dev/null +++ b/dev/local_nb_nodes.template.json @@ -0,0 +1,6 @@ +[ + { + "NodeName": "Local graph 1", + "ApiURL": "http://api:8000" + } +] diff --git a/dev/setup.sh b/dev/setup.sh new file mode 100755 index 0000000..80e467d --- /dev/null +++ b/dev/setup.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +/opt/graphdb/dist/bin/graphdb -Dgraphdb.home=/opt/graphdb/ & +GRAPHDB_PID=$! + +# Waiting for GraphDB to start +while ! curl --silent "localhost:${NB_GRAPH_PORT_HOST}/rest/repositories" | grep '\[\]'; do + : +done + +# TODO: Do we also want to use this elsewhere in the script or stick to ./? +SCRIPT_DIR=$(dirname "$0") + +# Logic for main setup +main() { + echo -e "Setting up a Neurobagel graph backend...\n" + + echo "Setting up GraphDB server..." + ./graphdb_setup.sh "${NB_GRAPH_ADMIN_PASSWORD}" + echo "Finished server setup." + + echo "Adding datasets to the database..." + ./add_data_to_graph.sh ./data localhost:${NB_GRAPH_PORT_HOST} ${NB_GRAPH_DB} "${NB_GRAPH_USERNAME}" "${NB_GRAPH_PASSWORD}" + echo "Finished adding datasets to databases." + + echo "Adding Neurobagel vocabulary to the database" + ./add_data_to_graph.sh ./vocab localhost:${NB_GRAPH_PORT_HOST} ${NB_GRAPH_DB} "${NB_GRAPH_USERNAME}" "${NB_GRAPH_PASSWORD}" + echo "Finished adding the Neurobagel vocabulary to the database." + + echo "Finished setting up the Neurobagel graph backend." +} + +main 2>&1 | tee -a ${SCRIPT_DIR}/DEPLOY.log + +# We don't have jobcontrol here, so can't bring GraphDB back to foreground +# instead we'll wait +wait $GRAPHDB_PID diff --git a/dev/template.env b/dev/template.env new file mode 100644 index 0000000..47070ad --- /dev/null +++ b/dev/template.env @@ -0,0 +1,45 @@ +# THIS IS A TEMPLATE .env FILE. MAKE A COPY OF THIS FILE NAMED .env +# AND MODIFY THE VALUES AS DESCRIBED IN THE DOCUMENTATION. + +# ---- CONFIGURATION FOR docker compose ---- +# Default service profile (deployment "flavour") when none are provided to the `docker compose` command +COMPOSE_PROFILES=local_node + +# ---- CONFIGURATION FOR graph ---- +NB_GRAPH_ADMIN_PASSWORD=ADMINPASSWORD +NB_GRAPH_USERNAME=DBUSER # REPLACE DBUSER WITH YOUR GRAPH DATABASE USERNAME +NB_GRAPH_PASSWORD=DBPASSWORD # REPLACE DBPASSWORD WITH YOUR GRAPH DATABASE PASSWORD +NB_GRAPH_DB=repositories/my_db +NB_RETURN_AGG=true +NB_NAPI_TAG=latest +NB_GRAPH_IMG=ontotext/graphdb:10.3.1 +LOCAL_GRAPH_DATA=./data # REPLACE WITH PATH TO YOUR JSONLD FILES + +# ---- CONFIGURATION FOR n-API ---- +## ADDITIONAL CONFIGURABLE PARAMETERS: Uncomment and modify values of the below variables as needed to use non-default values. +NB_NAPI_ALLOWED_ORIGINS="*" # Allow multiple origins of requests. e.g. For a query tool deployed locally using default ports, use: NB_API_ALLOWED_ORIGINS="http://localhost:3000 http://127.0.0.1:3000" +NB_NAPI_PORT_HOST=8000 +NB_NAPI_PORT=8000 +NB_GRAPH_ROOT_HOST=~/graphdb-home +NB_GRAPH_ROOT_CONT=/opt/graphdb/home +NB_GRAPH_ADDRESS=graph +NB_GRAPH_PORT_HOST=7200 +NB_GRAPH_PORT=7200 +NB_QUERY_TAG=latest +NB_QUERY_PORT_HOST=3000 + +# ---- CONFIGURATION FOR f-API ---- +# Port that the f-API will run on INSIDE the docker container (default 8000) +NB_FAPI_PORT=8000 +# Port that the f-API will be exposed on to the host computer (and likely the outside network) +NB_FAPI_PORT_HOST=8080 +# Docker image tag of the f-API (default latest) +NB_FAPI_TAG=latest + +# ---- CONFIGURATION FOR QUERY TOOL ---- +# URL of the f-API as it will appear to a user +NB_API_QUERY_URL=http://localhost:8080 +# Docker image tag of the query tool (default latest) +NB_QUERY_TAG=latest +# Port that the query tool will be exposed on the host and likely the network (default 3000) +NB_QUERY_PORT_HOST=3000 \ No newline at end of file diff --git a/dev/vocab/nb_vocab.ttl b/dev/vocab/nb_vocab.ttl new file mode 100644 index 0000000..2dd308b --- /dev/null +++ b/dev/vocab/nb_vocab.ttl @@ -0,0 +1,37 @@ +@prefix nb: . +@prefix rdfs: . + +nb:Acquisition + a rdfs:Class. + +nb:Age + a rdfs:Class. + +nb:Assessment + a rdfs:Class ; + rdfs:subClassOf nb:ControlledTerm . + +nb:ControlledTerm + a rdfs:Class. + +nb:Diagnosis + a rdfs:Class ; + rdfs:subClassOf nb:ControlledTerm . + +nb:Image + a rdfs:Class ; + rdfs:subClassOf nb:ControlledTerm . + +nb:Session + a rdfs:Class. + +nb:Sex + a rdfs:Class ; + rdfs:subClassOf nb:ControlledTerm . + +nb:Subject + a rdfs:Class. + +nb:SubjectGroup + a rdfs:Class ; + rdfs:subClassOf nb:ControlledTerm .