diff --git a/workflows/cron/README.md b/workflows/cron/README.md index 6d56196a4..cf0d69fee 100644 --- a/workflows/cron/README.md +++ b/workflows/cron/README.md @@ -1,9 +1,20 @@ # Contents: -- [cron STAC validate](#cron-STAC-validate) +- [cron-stac-validata](#cron-stac-validate) +- [cron-stac-validate-all-data](#cron-stac-validate-all-data) -# cron STAC validate +# STAC validation + +## cron-stac-validate Workflow that validates the STAC metadata using [`stac-validate`](https://teams.microsoft.com/v2/?meetingjoin=true#/l/meetup-join/19:meeting_MDc1MWEzNzYtYTI4Yy00OWZmLWJhMzUtYjA1ZmU1ODBmNTg5@thread.v2/0?context=%7b%22Tid%22%3a%222134e961-7e38-4c34-a22b-10da5466b725%22%2c%22Oid%22%3a%2263d2d811-1d35-49f7-b9a3-c60e9b9a9ed1%22%7d&anon=true&deeplinkId=1c8a1674-d597-4c2d-ab7e-6ed968f086b7) and verify that the [STAC links](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#link-object) are valid (using their checksums). -- schedule: every day at 5am +- schedule: **every day at 5am** + +## cron-stac-validate-all-data + +It also validate that the data - assets - is valid (using their checksums). Verifying all data checksum is costly, so this workflow is ran less often than the [cron STAC validate](#cron-stac-validate). + +> **_NOTE:_** Due to the parallelism design, this workflow does not validate the root parent `catalog.json` in order to validate each `collection.json` separately. This is not an issue as the `catalog.json` does not contain any `asset` and is already validated by the [cron-stac-validata](#cron-stac-validate) job. + +- schedule: **To decide** diff --git a/workflows/cron/cron-stac-validate-checksums.yaml b/workflows/cron/cron-stac-validate-all-data.yaml similarity index 89% rename from workflows/cron/cron-stac-validate-checksums.yaml rename to workflows/cron/cron-stac-validate-all-data.yaml index 0e97e7157..7b4a786a1 100644 --- a/workflows/cron/cron-stac-validate-checksums.yaml +++ b/workflows/cron/cron-stac-validate-all-data.yaml @@ -2,11 +2,11 @@ apiVersion: argoproj.io/v1alpha1 kind: CronWorkflow metadata: - name: cron-stac-validate-checksums + name: cron-stac-validate-all-data labels: linz.govt.nz/category: stac spec: - schedule: '0 07 1 * *' # 7 AM every 1st of the month + schedule: '0 05 1 * *' # 5 AM every 1st of the month timezone: 'NZ' startingDeadlineSeconds: 3600 # Allow 1 hour delay if the workflow-controller clashes during the starting time. concurrencyPolicy: 'Allow' diff --git a/workflows/cron/cron-stac-validate.yaml b/workflows/cron/cron-stac-validate.yaml index 020185558..1b430eb56 100644 --- a/workflows/cron/cron-stac-validate.yaml +++ b/workflows/cron/cron-stac-validate.yaml @@ -2,11 +2,11 @@ apiVersion: argoproj.io/v1alpha1 kind: CronWorkflow metadata: - name: test-cron-stac-validate + name: cron-stac-validate labels: linz.govt.nz/category: stac spec: - schedule: 56 13 * * * # 7 AM every day + schedule: '0 05 * * *' # 5 AM every day timezone: 'NZ' startingDeadlineSeconds: 3600 # Allow 1 hour delay if the workflow-controller clashes during the starting time. concurrencyPolicy: 'Allow'