From 0993d31a3b8b4e4f3526edda00000cb75a5e23f5 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 14 Nov 2023 17:05:38 -0500 Subject: [PATCH 1/8] feat!: implement new workflow system --- src/api/workflows/main.go | 39 ++++++++++++++++-------------------- src/api/workflows/vcf_gz.wdl | 25 +++++++++++++++++------ 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index 24903d8..de50089 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -13,14 +13,20 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "name": "Compressed-VCF Elasticsearch Indexing", "description": "This ingestion workflow will validate and ingest a BGZip-Compressed-VCF into Elasticsearch.", "data_type": "variant", + "tags": []string{"variant"}, "file": "vcf_gz.wdl", - "action": "ingestion", + "type": "ingestion", "inputs": []map[string]interface{}{ { - "id": "vcf_gz_file_names", - "type": "file[]", - "required": true, - "extensions": []string{".vcf.gz"}, + "id": "project_dataset", + "type": "project:dataset", + "required": true, + }, + { + "id": "vcf_gz_file_names", + "type": "file[]", + "required": true, + "pattern": "^.*\\.vcf\\.gz$", }, { "id": "assembly_id", @@ -37,26 +43,15 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "default": "false", }, { - "id": "gohan_url", - "type": "string", - "required": true, - "value": "FROM_CONFIG", - "hidden": true, - }, - }, - "outputs": []map[string]interface{}{ - { - "id": "txt_output", - "type": "file", - "value": "{txt_output}", - }, - { - "id": "err_output", - "type": "file", - "value": "{err_output}", + "id": "gohan_url", + "type": "service-kind", + "required": true, + "injected": true, + "service_kind": "gohan", }, }, }, }, "analysis": map[string]interface{}{}, + "export": map[string]interface{}{}, } diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 343f3de..af51b7b 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -2,24 +2,37 @@ workflow vcf_gz { String gohan_url Array[File] vcf_gz_file_names String assembly_id - String project_id - String dataset_id + String project_dataset String filter_out_references - String secret__access_token + String access_token + + call project_and_dataset_id { + input: project_dataset = project_dataset + } scatter(file_name in vcf_gz_file_names) { call vcf_gz_gohan { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, - project = project_id, - dataset = dataset_id, + project = project_and_dataset_id.out[0], + dataset = project_and_dataset_id.out[1], filter_out_references = filter_out_references, - access_token = secret__access_token, + access_token = access_token, } } } +task project_and_dataset_id { + input { + String project_dataset + } + command <<< python3 -c 'import json; print(json.dumps("~{project_dataset}".split(":")))' >>> + output { + Array[String] out = read_json(stdout()) + } +} + task vcf_gz_gohan { String gohan_url String vcf_gz_file_name From 9b7a1ce2c8c38aa9b64cd595c641a3b6a5cf3368 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 22 Nov 2023 14:18:22 -0500 Subject: [PATCH 2/8] fix: workflow inputs --- src/api/workflows/main.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index de50089..e11e5f3 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -33,18 +33,16 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "type": "enum", "required": true, "values": []c.AssemblyId{a.GRCh38, a.GRCh37}, - "default": "GRCh38", }, { "id": "filter_out_references", - "type": "enum", + "type": "boolean", "required": true, "values": []string{"true", "false"}, // simulate boolean type - "default": "false", }, { "id": "gohan_url", - "type": "service-kind", + "type": "service-url", "required": true, "injected": true, "service_kind": "gohan", From d35865bb9bc4eb2ec9acb8b8505ba2b7f89fab60 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 22 Nov 2023 14:34:01 -0500 Subject: [PATCH 3/8] chore: more work on new gohan ingest workflow --- src/api/workflows/main.go | 8 ++- src/api/workflows/vcf_gz.wdl | 109 ++++++++++++++++++----------------- 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index e11e5f3..1ba2321 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -38,7 +38,6 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "id": "filter_out_references", "type": "boolean", "required": true, - "values": []string{"true", "false"}, // simulate boolean type }, { "id": "gohan_url", @@ -47,6 +46,13 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "injected": true, "service_kind": "gohan", }, + { + "id": "validate_ssl", + "type": "config", + "required": true, + "injected": true, + "key": "validate_ssl", + }, }, }, }, diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index af51b7b..e03d0bc 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -1,10 +1,15 @@ +version 1.0 + workflow vcf_gz { - String gohan_url - Array[File] vcf_gz_file_names - String assembly_id - String project_dataset - String filter_out_references - String access_token + input { + String gohan_url + Array[File] vcf_gz_file_names + String assembly_id + String project_dataset + Boolean filter_out_references + String access_token + Boolean validate_ssl + } call project_and_dataset_id { input: project_dataset = project_dataset @@ -19,6 +24,7 @@ workflow vcf_gz { dataset = project_and_dataset_id.out[1], filter_out_references = filter_out_references, access_token = access_token, + validate_ssl = validate_ssl } } } @@ -34,38 +40,36 @@ task project_and_dataset_id { } task vcf_gz_gohan { - String gohan_url - String vcf_gz_file_name - String assembly_id - String project - String dataset - String filter_out_references - String access_token - - command { - echo "Using temporary-token : ${access_token}" - - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&project=${project}&filterOutReferences=${filter_out_references}" - AUTH_HEADER="Authorization: Bearer ${access_token}" + input { + String gohan_url + String vcf_gz_file_name + String assembly_id + String project + String dataset + Boolean filter_out_references + String access_token + Boolean validate_ssl + } + + command <<< + QUERY='fileNames=~{vcf_gz_file_name}&assemblyId=~{assembly_id}&dataset=~{dataset}&project=~{project}&filterOutReferences=~{true="true" false="false" filter_out_references}' + + AUTH_HEADER='Authorization: Bearer ~{access_token}' - # TODO: refactor - # append temporary-token header if present - if [ "${access_token}" == "" ] - then - RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') - else - RUN_RESPONSE=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') - fi + RUN_RESPONSE=$(curl -vvv \ + -H "${AUTH_HEADER}" \ + ~{true="" false="-k" validate_ssl} \ + "~{gohan_url}/private/variants/ingestion/run?${QUERY}" | sed 's/"/\"/g') - echo $RUN_RESPONSE + echo "${RUN_RESPONSE}" # reformat response string to include double quotes in the json object RUN_RESPONSE_WITH_QUOTES=$(echo $RUN_RESPONSE | sed 's/"/\"/g') - echo $RUN_RESPONSE_WITH_QUOTES + echo "${RUN_RESPONSE_WITH_QUOTES}" # obtain request id from the response for this one file just requested to process REQUEST_ID=$(echo $RUN_RESPONSE_WITH_QUOTES | jq -r '.[] |"\(.id)"') - echo $REQUEST_ID + echo "${REQUEST_ID}" # give it a second.. sleep 1s @@ -73,53 +77,50 @@ task vcf_gz_gohan { # "while loop to ping '/variants/ingestion/requests' and wait for this file ingestion to complete or display an error..." while : do - # TODO: refactor # fetch run requests - # append temporary-token header if present - if [ "${access_token}" == "" ] - then - REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) - else - REQUESTS=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/requests" -k) - fi + REQUESTS=$(curl -vvv \ + -H "${AUTH_HEADER}" \ + ~{true="" false="-k" validate_ssl} \ + "~{gohan_url}/private/variants/ingestion/requests" -k) - echo $REQUESTS + echo "${REQUESTS}" # reformat response string to include double quotes in the json object REQ_WITH_QUOTES=$(echo $REQUESTS | sed 's/"/\"/g') - echo $REQ_WITH_QUOTES + echo "${REQ_WITH_QUOTES}" # organize json objects as individual lines per response object (file being processed) JQ_RES=$(echo $REQ_WITH_QUOTES | jq -r '.[] | "\(.id) \(.filename) \(.state)"') - echo "$JQ_RES" - + echo "${JQ_RES}" # determine the state of the run request by filename THIS_FILE_RESULT=$(echo "$JQ_RES" | grep $REQUEST_ID | tr ' ' '\n' | grep . | tail -n1) - echo $THIS_FILE_RESULT + echo "${THIS_FILE_RESULT}" - if [ "$THIS_FILE_RESULT" == "Done" ] || [ "$THIS_FILE_RESULT" == "Error" ] - then - WITH_ERROR_MESSAGE= + if [ "${THIS_FILE_RESULT}" == "Done" ] || [ "${THIS_FILE_RESULT}" == "Error" ]; then + WITH_ERROR_MESSAGE='' - if [ "$THIS_FILE_RESULT" == "Error" ] - then + if [ "${THIS_FILE_RESULT}" == "Error" ]; then WITH_ERROR_MESSAGE=" in error!" echo "This is what we found from the /variants/ingestion/requests :" - echo "$THIS_FILE_RESULT" + echo "${THIS_FILE_RESULT}" fi - echo "File ${vcf_gz_file_name} with assembly id ${assembly_id} done processing $WITH_ERROR_MESSAGE" + echo "File ~{vcf_gz_file_name} with assembly id ~{assembly_id} done processing ${WITH_ERROR_MESSAGE}" break - elif [ "$THIS_FILE_RESULT" == "" ] - then - echo "Something went wrong. Got invalid response from Gohan API : $REQUESTS" + elif [ "${THIS_FILE_RESULT}" == "" ]; then + echo "Something went wrong. Got invalid response from Gohan API: ${REQUESTS}" break else - echo "Waiting 5 seconds.." + echo '~{vcf_gz_file_name}: Waiting 5 seconds...' sleep 5s fi done + >>> + + output { + String out = stdout() + String err = stderr() } } From cee17542c0d3bac86ec5fff6866324edd43d1e06 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 22 Nov 2023 15:38:24 -0500 Subject: [PATCH 4/8] fix: add missing access_token injected input --- src/api/workflows/main.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index 1ba2321..44fb75f 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -17,6 +17,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "file": "vcf_gz.wdl", "type": "ingestion", "inputs": []map[string]interface{}{ + // User inputs: { "id": "project_dataset", "type": "project:dataset", @@ -39,6 +40,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "type": "boolean", "required": true, }, + // Injected inputs: { "id": "gohan_url", "type": "service-url", @@ -46,6 +48,13 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "injected": true, "service_kind": "gohan", }, + { + "id": "access_token", + "type": "secret", + "required": true, + "injected": true, + "key": "access_token", + }, { "id": "validate_ssl", "type": "config", From dba9602e5fd148c1af7f4e1d23c8a13436bbe6e6 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 22 Nov 2023 15:41:34 -0500 Subject: [PATCH 5/8] fix: grammar in workflow name/desc --- src/api/workflows/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index 44fb75f..6923b81 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -10,8 +10,8 @@ type WorkflowSchema map[string]interface{} var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "ingestion": map[string]interface{}{ "vcf_gz": map[string]interface{}{ - "name": "Compressed-VCF Elasticsearch Indexing", - "description": "This ingestion workflow will validate and ingest a BGZip-Compressed-VCF into Elasticsearch.", + "name": "Compressed VCF Elasticsearch Indexing", + "description": "This ingestion workflow will validate and ingest a BGZip-compressed VCF into Elasticsearch.", "data_type": "variant", "tags": []string{"variant"}, "file": "vcf_gz.wdl", From bb261c06488ce856a66b73875c9a8e9971262d87 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 30 Nov 2023 16:00:56 -0500 Subject: [PATCH 6/8] fix(workflows): remove duplicate -k curl flag --- src/api/workflows/vcf_gz.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index e03d0bc..48b0720 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -81,7 +81,7 @@ task vcf_gz_gohan { REQUESTS=$(curl -vvv \ -H "${AUTH_HEADER}" \ ~{true="" false="-k" validate_ssl} \ - "~{gohan_url}/private/variants/ingestion/requests" -k) + "~{gohan_url}/private/variants/ingestion/requests") echo "${REQUESTS}" From 3c85d4b773c2c021a821baca3224ca933fb18cfb Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 30 Nov 2023 16:06:08 -0500 Subject: [PATCH 7/8] chore: update base image versions --- etc/example.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/example.env b/etc/example.env index 4f521de..2c0e19f 100644 --- a/etc/example.env +++ b/etc/example.env @@ -39,8 +39,8 @@ GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest GOHAN_API_BUILDER_BASE_IMAGE=golang:1.21-bookworm -GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.10.20 -GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.10.20 +GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.11.10 +GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.11.10 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 From 4881d86e0b7c6ebdd9ca4662d746f2c6a47dd137 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 30 Nov 2023 16:06:18 -0500 Subject: [PATCH 8/8] chore: bump version to 5.0.0 --- etc/example.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/example.env b/etc/example.env index 2c0e19f..57c2a1e 100644 --- a/etc/example.env +++ b/etc/example.env @@ -2,7 +2,7 @@ GOHAN_DEBUG=false GOHAN_SERVICE_CONTACT=someone@somewhere.ca -GOHAN_SEMVER=4.0.1 +GOHAN_SEMVER=5.0.0 GOHAN_SERVICES="gateway api elasticsearch kibana drs authorization" # GOOS=linux