From e57abd25cfabfc74af83d6f990900e89e3633372 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Mon, 15 Apr 2024 14:16:44 +1200 Subject: [PATCH 1/3] refactor: templating stac-validate workflow TDE-1136 --- templates/argo-tasks/README.md | 26 ++++++++- templates/argo-tasks/stac-validate.yml | 57 +++++++++++++++++++ workflows/raster/publish-odr.yaml | 2 +- workflows/raster/standardising.yaml | 23 +++----- workflows/stac/README.md | 23 +------- ...idate.yaml => stac-validate-parallel.yaml} | 38 ++++--------- 6 files changed, 102 insertions(+), 67 deletions(-) create mode 100644 templates/argo-tasks/stac-validate.yml rename workflows/stac/{stac-validate.yaml => stac-validate-parallel.yaml} (69%) diff --git a/templates/argo-tasks/README.md b/templates/argo-tasks/README.md index ad11000d9..cc44fd635 100644 --- a/templates/argo-tasks/README.md +++ b/templates/argo-tasks/README.md @@ -178,7 +178,7 @@ See https://github.com/linz/argo-tasks#stac-github-import Template to build ODR target paths using collection metadata. See https://github.com/linz/argo-tasks#generate-paths -## Template Usage +### Template Usage ```yaml name: generate-path @@ -194,3 +194,27 @@ arguments: - name: source value: '{{inputs.parameters.source}}' ``` + +## argo-tasks/stac-validate + +Template to validate STAC Collections and Items against [STAC](https://stacspec.org/) schemas and STAC Extension schemas. +See (https://github.com/linz/argo-tasks#stac-validate) + +### Template Usage + +```yaml +- name: stac-validate + templateRef: + name: tpl-at-stac-validate + template: main + arguments: + parameters: + - name: uri + value: 's3://my-bucket/path/collection.json' + - name: checksum + value: '{{workflow.parameters.checksum}}' + - name: recursive + value: '{{workflow.parameters.recursive}}' + - name: concurrency + value: '20' +``` diff --git a/templates/argo-tasks/stac-validate.yml b/templates/argo-tasks/stac-validate.yml new file mode 100644 index 000000000..670b36e6a --- /dev/null +++ b/templates/argo-tasks/stac-validate.yml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/argoproj/argo-workflows/v3.4.13/api/jsonschema/schema.json + +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + # Template from linz/argo-tasks + # see https://github.com/linz/argo-tasks?tab=readme-ov-file#stac-validate + name: tpl-at-stac-validate +spec: + templateDefaults: + container: + imagePullPolicy: Always + image: '' + entrypoint: main + templates: + - name: main + inputs: + parameters: + - name: uri + description: STAC file uri to validate + default: '' + + - name: recursive + description: Follow and validate STAC links + default: 'true' + + - name: concurrency + description: Number of requests to run concurrently + default: '50' + + - name: checksum + description: Validate the file:checksum if it exists + default: 'false' + + - name: version + description: container version to use + default: 'v3' + + container: + image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(inputs.parameters.version)}}' + resources: + requests: + cpu: 15000m + memory: 7.8Gi + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + 'stac', + 'validate', + '--concurrency={{inputs.parameters.concurrency}}', + '--recursive={{inputs.parameters.recursive}}', + '--checksum={{inputs.parameters.checksum}}', + '{{inputs.parameters.uri}}', + ] diff --git a/workflows/raster/publish-odr.yaml b/workflows/raster/publish-odr.yaml index c8188e9b5..de20989f6 100644 --- a/workflows/raster/publish-odr.yaml +++ b/workflows/raster/publish-odr.yaml @@ -3,7 +3,7 @@ apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: - name: publish-odr + name: test-publish-odr namespace: argo labels: linz.govt.nz/category: raster diff --git a/workflows/raster/standardising.yaml b/workflows/raster/standardising.yaml index 2ac87be0c..affe8bc50 100644 --- a/workflows/raster/standardising.yaml +++ b/workflows/raster/standardising.yaml @@ -3,7 +3,7 @@ apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: - name: imagery-standardising + name: test-stac-validate-imagery-standardising namespace: argo labels: linz.govt.nz/category: raster @@ -277,6 +277,7 @@ spec: - name: target_bucket_name value: '' enum: + - '' - 'nz-imagery' - 'nz-elevation' - name: copy_option @@ -362,11 +363,13 @@ spec: depends: 'standardise-validate' - name: stac-validate - template: stac-validate + templateRef: + name: tpl-at-stac-validate + template: main arguments: parameters: - - name: location - value: '{{tasks.get-location.outputs.parameters.location}}' + - name: uri + value: '{{tasks.get-location.outputs.parameters.location}}flat/collection.json' artifacts: - name: stac-result raw: @@ -542,18 +545,6 @@ spec: - '--concurrency' - '25' - - name: stac-validate - inputs: - parameters: - - name: location - container: - image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' - command: [node, /app/index.js] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: ['stac', 'validate', '--recursive', '{{inputs.parameters.location}}flat/collection.json'] - - name: get-location script: image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' diff --git a/workflows/stac/README.md b/workflows/stac/README.md index 464c78da0..ddb980333 100644 --- a/workflows/stac/README.md +++ b/workflows/stac/README.md @@ -1,25 +1,6 @@ -# Contents +# stac-validate-parallel -- [stac-validate](#stac-validate) - -# stac-validate - -Validate STAC Collections and Items against [STAC](https://stacspec.org/) schemas and STAC Extension schemas. -Uses the [argo-tasks](https://github.com/linz/argo-tasks#stac-validate) container `stac-validate` command. - -## Workflow Input Parameters - -| Parameter | Type | Default | Description | -| --------- | ----- | --------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -| uri | str | s3://linz-imagery-staging/test/stac-validate/ | The full AWS S3 URI (path) to the STAC file(s) | -| include | regex | `collection.json$` | Regular expression to match object path(s) or name(s) from within the source path to include in STAC validation. | -| checksum | enum | false | Set to "true" to validate the checksums of linked asset files. | - -The `--recursive` flag is specified inside the STAC Validate WorkflowTemplate. Linked STAC items linked to from a STAC collection will also be validated. - -The STAC Validate Workflow will validate each collection (and linked items/assets) in a separate pod so that multiple collections can be processed in parallel. - -Access permissions are controlled by the [Bucket Sharing Config](https://github.com/linz/topo-aws-infrastructure/blob/master/src/stacks/bucket.sharing.ts) which gives Argo Workflows access to the S3 buckets we use. +This Workflow will validate each collection (and linked items/assets) in a separate pod so that multiple collections can be processed in parallel, using the `tpl-at-stac-validate` template. ## Workflow Outputs diff --git a/workflows/stac/stac-validate.yaml b/workflows/stac/stac-validate-parallel.yaml similarity index 69% rename from workflows/stac/stac-validate.yaml rename to workflows/stac/stac-validate-parallel.yaml index 23c46b88d..76300f797 100644 --- a/workflows/stac/stac-validate.yaml +++ b/workflows/stac/stac-validate-parallel.yaml @@ -1,8 +1,9 @@ ---- +# yaml-language-server: $schema=https://raw.githubusercontent.com/argoproj/argo-workflows/v3.4.13/api/jsonschema/schema.json + apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: - name: stac-validate + name: stac-validate-parallel namespace: argo labels: linz.govt.nz/category: stac @@ -29,6 +30,7 @@ spec: templateDefaults: container: imagePullPolicy: Always + image: '' templates: - name: main dag: @@ -36,11 +38,15 @@ spec: - name: aws-list-collections template: aws-list-collections - name: stac-validate-collections - template: stac-validate-collections + templateRef: + name: tpl-at-stac-validate + template: main arguments: parameters: - - name: file + - name: uri value: '{{item}}' + - name: checksum + value: '{{workflow.parameters.checksum}}' depends: aws-list-collections withParam: '{{tasks.aws-list-collections.outputs.parameters.files}}' - name: aws-list-collections @@ -67,27 +73,3 @@ spec: - name: files valueFrom: path: /tmp/file_list.json - - name: stac-validate-collections - inputs: - parameters: - - name: file - container: - image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' - resources: - requests: - cpu: 15000m - memory: 7.8Gi - command: [node, /app/index.js] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: - [ - 'stac', - 'validate', - '--concurrency', - '50', - '--recursive', - '--checksum={{workflow.parameters.checksum}}', - '{{inputs.parameters.file}}', - ] From 71ba7a283c1d9f36b363d033ff9232d4c73d5980 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Wed, 17 Apr 2024 11:01:04 +1200 Subject: [PATCH 2/3] refactor: use - - - style for args arrays --- templates/argo-tasks/stac-validate.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/templates/argo-tasks/stac-validate.yml b/templates/argo-tasks/stac-validate.yml index 670b36e6a..bf1274997 100644 --- a/templates/argo-tasks/stac-validate.yml +++ b/templates/argo-tasks/stac-validate.yml @@ -47,11 +47,9 @@ spec: - name: AWS_ROLE_CONFIG_PATH value: s3://linz-bucket-config/config.json args: - [ - 'stac', - 'validate', - '--concurrency={{inputs.parameters.concurrency}}', - '--recursive={{inputs.parameters.recursive}}', - '--checksum={{inputs.parameters.checksum}}', - '{{inputs.parameters.uri}}', - ] + - 'stac' + - 'validate' + - '--concurrency={{inputs.parameters.concurrency}}' + - '--recursive={{inputs.parameters.recursive}}' + - '--checksum={{inputs.parameters.checksum}}' + - '{{inputs.parameters.uri}}' From b14d2735a5a203ce328c2a86baa42953dc6d6650 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 18 Apr 2024 15:10:23 +1200 Subject: [PATCH 3/3] chore: upgrade argo jsonschema version --- templates/argo-tasks/stac-validate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/argo-tasks/stac-validate.yml b/templates/argo-tasks/stac-validate.yml index bf1274997..77b8d6ba4 100644 --- a/templates/argo-tasks/stac-validate.yml +++ b/templates/argo-tasks/stac-validate.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/argoproj/argo-workflows/v3.4.13/api/jsonschema/schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/argoproj/argo-workflows/v3.5.5/api/jsonschema/schema.json apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate