diff --git a/templates/argo-tasks/README.md b/templates/argo-tasks/README.md index ad11000d9..cc44fd635 100644 --- a/templates/argo-tasks/README.md +++ b/templates/argo-tasks/README.md @@ -178,7 +178,7 @@ See https://github.com/linz/argo-tasks#stac-github-import Template to build ODR target paths using collection metadata. See https://github.com/linz/argo-tasks#generate-paths -## Template Usage +### Template Usage ```yaml name: generate-path @@ -194,3 +194,27 @@ arguments: - name: source value: '{{inputs.parameters.source}}' ``` + +## argo-tasks/stac-validate + +Template to validate STAC Collections and Items against [STAC](https://stacspec.org/) schemas and STAC Extension schemas. +See (https://github.com/linz/argo-tasks#stac-validate) + +### Template Usage + +```yaml +- name: stac-validate + templateRef: + name: tpl-at-stac-validate + template: main + arguments: + parameters: + - name: uri + value: 's3://my-bucket/path/collection.json' + - name: checksum + value: '{{workflow.parameters.checksum}}' + - name: recursive + value: '{{workflow.parameters.recursive}}' + - name: concurrency + value: '20' +``` diff --git a/templates/argo-tasks/stac-validate.yml b/templates/argo-tasks/stac-validate.yml new file mode 100644 index 000000000..77b8d6ba4 --- /dev/null +++ b/templates/argo-tasks/stac-validate.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/argoproj/argo-workflows/v3.5.5/api/jsonschema/schema.json + +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + # Template from linz/argo-tasks + # see https://github.com/linz/argo-tasks?tab=readme-ov-file#stac-validate + name: tpl-at-stac-validate +spec: + templateDefaults: + container: + imagePullPolicy: Always + image: '' + entrypoint: main + templates: + - name: main + inputs: + parameters: + - name: uri + description: STAC file uri to validate + default: '' + + - name: recursive + description: Follow and validate STAC links + default: 'true' + + - name: concurrency + description: Number of requests to run concurrently + default: '50' + + - name: checksum + description: Validate the file:checksum if it exists + default: 'false' + + - name: version + description: container version to use + default: 'v3' + + container: + image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(inputs.parameters.version)}}' + resources: + requests: + cpu: 15000m + memory: 7.8Gi + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + - 'stac' + - 'validate' + - '--concurrency={{inputs.parameters.concurrency}}' + - '--recursive={{inputs.parameters.recursive}}' + - '--checksum={{inputs.parameters.checksum}}' + - '{{inputs.parameters.uri}}' diff --git a/workflows/raster/standardising.yaml b/workflows/raster/standardising.yaml index 15d34d359..0cb4b4ad6 100644 --- a/workflows/raster/standardising.yaml +++ b/workflows/raster/standardising.yaml @@ -276,6 +276,7 @@ spec: - name: target_bucket_name value: '' enum: + - '' - 'nz-imagery' - 'nz-elevation' - '' @@ -362,11 +363,13 @@ spec: depends: 'standardise-validate' - name: stac-validate - template: stac-validate + templateRef: + name: tpl-at-stac-validate + template: main arguments: parameters: - - name: location - value: '{{tasks.get-location.outputs.parameters.location}}' + - name: uri + value: '{{tasks.get-location.outputs.parameters.location}}flat/collection.json' artifacts: - name: stac-result raw: @@ -542,18 +545,6 @@ spec: - '--concurrency' - '25' - - name: stac-validate - inputs: - parameters: - - name: location - container: - image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' - command: [node, /app/index.js] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: ['stac', 'validate', '--recursive', '{{inputs.parameters.location}}flat/collection.json'] - - name: get-location script: image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' diff --git a/workflows/stac/README.md b/workflows/stac/README.md index 464c78da0..ddb980333 100644 --- a/workflows/stac/README.md +++ b/workflows/stac/README.md @@ -1,25 +1,6 @@ -# Contents +# stac-validate-parallel -- [stac-validate](#stac-validate) - -# stac-validate - -Validate STAC Collections and Items against [STAC](https://stacspec.org/) schemas and STAC Extension schemas. -Uses the [argo-tasks](https://github.com/linz/argo-tasks#stac-validate) container `stac-validate` command. - -## Workflow Input Parameters - -| Parameter | Type | Default | Description | -| --------- | ----- | --------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -| uri | str | s3://linz-imagery-staging/test/stac-validate/ | The full AWS S3 URI (path) to the STAC file(s) | -| include | regex | `collection.json$` | Regular expression to match object path(s) or name(s) from within the source path to include in STAC validation. | -| checksum | enum | false | Set to "true" to validate the checksums of linked asset files. | - -The `--recursive` flag is specified inside the STAC Validate WorkflowTemplate. Linked STAC items linked to from a STAC collection will also be validated. - -The STAC Validate Workflow will validate each collection (and linked items/assets) in a separate pod so that multiple collections can be processed in parallel. - -Access permissions are controlled by the [Bucket Sharing Config](https://github.com/linz/topo-aws-infrastructure/blob/master/src/stacks/bucket.sharing.ts) which gives Argo Workflows access to the S3 buckets we use. +This Workflow will validate each collection (and linked items/assets) in a separate pod so that multiple collections can be processed in parallel, using the `tpl-at-stac-validate` template. ## Workflow Outputs diff --git a/workflows/stac/stac-validate.yaml b/workflows/stac/stac-validate-parallel.yaml similarity index 70% rename from workflows/stac/stac-validate.yaml rename to workflows/stac/stac-validate-parallel.yaml index 36171db1a..7895d390b 100644 --- a/workflows/stac/stac-validate.yaml +++ b/workflows/stac/stac-validate-parallel.yaml @@ -3,7 +3,7 @@ apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: - name: stac-validate + name: stac-validate-parallel labels: linz.govt.nz/category: stac spec: @@ -29,6 +29,7 @@ spec: templateDefaults: container: imagePullPolicy: Always + image: '' templates: - name: main dag: @@ -36,11 +37,15 @@ spec: - name: aws-list-collections template: aws-list-collections - name: stac-validate-collections - template: stac-validate-collections + templateRef: + name: tpl-at-stac-validate + template: main arguments: parameters: - - name: file + - name: uri value: '{{item}}' + - name: checksum + value: '{{workflow.parameters.checksum}}' depends: aws-list-collections withParam: '{{tasks.aws-list-collections.outputs.parameters.files}}' - name: aws-list-collections @@ -67,27 +72,3 @@ spec: - name: files valueFrom: path: /tmp/file_list.json - - name: stac-validate-collections - inputs: - parameters: - - name: file - container: - image: '019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/argo-tasks:{{=sprig.trim(workflow.parameters.version_argo_tasks)}}' - resources: - requests: - cpu: 15000m - memory: 7.8Gi - command: [node, /app/index.js] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: - [ - 'stac', - 'validate', - '--concurrency', - '50', - '--recursive', - '--checksum={{workflow.parameters.checksum}}', - '{{inputs.parameters.file}}', - ]