From 83a5f5779ad49610166639b872f4d7bfccea8ee8 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 3 Mar 2023 14:24:07 +1300 Subject: [PATCH 01/17] feat: example of running a random bash script --- workflows/test/earthscanner.yaml | 563 +++++++++++++++++++++++++++++++ 1 file changed, 563 insertions(+) create mode 100644 workflows/test/earthscanner.yaml diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml new file mode 100644 index 000000000..c66071dd2 --- /dev/null +++ b/workflows/test/earthscanner.yaml @@ -0,0 +1,563 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: test-earthscanner- + namespace: argo +spec: + parallelism: 50 + nodeSelector: + karpenter.sh/capacity-type: "spot" + entrypoint: main + synchronization: + semaphore: + configMapKeyRef: + name: semaphores + key: standardising + arguments: + parameters: + - name: version-argo-tasks + value: "v2" + - name: version-basemaps-cli + value: "v6.39.0-15-g3e982390" + - name: version-topo-imagery + value: "v1" + - name: source + value: "s3://linz-topographic-upload/earthscanner/2023-02-JL1KF01B-sample-zip/JL1KF01B_PMSL3_20230221054314_200135719_101_0035_001_L3C" + - name: include + value: ".zip$" + - name: scale + value: "None" + enum: + - "500" + - "1000" + - "2000" + - "5000" + - "10000" + - "50000" + - "None" + - name: source-epsg + value: "32760" + - name: target-epsg + value: "2193" + - name: group + value: "1" + - name: compression + value: "webp" + enum: + - "webp" + - "lzw" + - name: cutline + description: "(Optional) location of a cutline file to cut the imagery to .fgb or .geojson" + value: "" + - name: title + value: "*Region/District/City* *GSD* *Urban/Rural* Aerial Photos (*Year-Year*)" + - name: description + value: "Orthophotography within the *Region Name* region captured in the *Year*-*Year* flying season." + - name: producer + value: "Unknown" + enum: + [ + "Unknown", + "AAM NZ", + "Aerial Surveys", + "Beca", + "Chang Guang Satellite Technology", + "European Space Agency", + "GeoSmart", + "Landpro", + "Maxar", + "NZ Aerial Mapping", + "Recon", + "SkyCan", + "Terralink International", + "UAV Mapping NZ", + ] + - name: licensor + value: "Unknown" + enum: + [ + "Unknown", + "Ashburton District Council", + "Auckland Council", + "BOPLASS", + "Bay of Plenty Regional Council", + "Buller District Council", + "Canterbury Aerial Imagery Consortium (CAI)", + "Carterton District Council", + "Central Hawke's Bay District Council", + "Central Otago District Council", + "Chang Guang Satellite Technology", + "Chatham Islands Council", + "Christchurch City Council", + "Clutha District Council", + "CoLAB", + "Department of Conservation", + "Dunedin City Council", + "Environment Canterbury", + "Environment Southland", + "Far North District Council", + "Gisborne District Council", + "Gore District Council", + "Greater Wellington Regional Council", + "Grey District Council", + "Hamilton City Council", + "Hastings District Council", + "Hauraki District Council", + "Hawke's Bay Local Authority Shared Services (HB LASS)", + "Hawke's Bay Regional Council", + "Horizons Regional Council", + "Horowhenua District Council", + "Hurunui District Council", + "Hutt City Council", + "Invercargill City Council", + "Kaikōura District Council", + "Kaipara District Council", + "Kawerau District Council", + "Kāpiti Coast District Council", + "Mackenzie District Council", + "Manawatū District Council", + "Manawatū-Whanganui LASS", + "Marlborough District Council", + "Masterton District Council", + "Matamata-Piako District Council", + "Maxar Technologies", + "Ministry of Primary Industries", + "NZ Aerial Mapping", + "Napier City Council", + "Nelson City Council", + "New Plymouth District Council", + "Northland Regional Council", + "Ōpōtiki District Council", + "Ōtorohanga District Council", + "Otago Regional Council", + "Palmerston North City Council", + "Porirua City Council", + "Queenstown-Lakes District Council", + "Rangitīkei District Council", + "Rotorua District Council", + "Ruapehu District Council", + "Selwyn District Council", + "Sinergise", + "South Taranaki District Council", + "South Waikato District Council", + "South Wairarapa District Council", + "Southland District Council", + "Stratford District Council", + "Taranaki Regional Council", + "Tararua District Council", + "Tasman District Council", + "Taupō District Council", + "Tauranga City Council", + "Terralink International", + "Thames-Coromandel District Council", + "Timaru District Council", + "Toitū Te Whenua Land Information New Zealand", + "Upper Hutt City Council", + "Waikato District Council", + "Waikato Regional Aerial Photography Service (WRAPS)", + "Waikato Regional Council", + "Waimakariri District Council", + "Waimate District Council", + "Waipā District Council", + "Wairoa District Council", + "Waitaki District Council", + "Waitomo District Council", + "Waka Kotahi", + "Wellington City Council", + "West Coast Regional Council", + "Western Bay of Plenty District Council", + "Westland District Council", + "Whakatāne District Council", + "Whanganui District Council", + "Whangārei District Council", + ] + - name: licensor-list + description: "(Optional) Separate the licensor names by a semi-colon (;). It has no effect unless a semicolon-delimited list is entered." + value: "" + - name: start-datetime + value: "1990-02-20" + - name: end-datetime + value: "1990-02-20" + - name: copy-option + value: "--no-clobber" + enum: + - "--no-clobber" + - "--force" + - "--force-no-clobber" + templateDefaults: + container: + imagePullPolicy: Always + templates: + - name: main + dag: + tasks: + - name: aws-list + template: aws-list + - name: generate-ulid + template: generate-ulid + - name: standardise-validate + template: standardise-validate + arguments: + parameters: + - name: file + value: "{{item}}" + - name: collection-id + value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" + depends: "aws-list && generate-ulid" + withParam: "{{tasks.aws-list.outputs.parameters.files}}" + # - name: flatten + # template: flatten + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # depends: "get-location && standardise-validate" + # - name: flatten-copy + # template: flatten-copy + # arguments: + # parameters: + # - name: file + # value: "{{item}}" + # depends: "flatten" + # withParam: "{{tasks.flatten.outputs.parameters.files}}" + # - name: create-collection + # template: create-collection + # arguments: + # parameters: + # - name: collection-id + # value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # depends: "flatten-copy" + # - name: stac-validate + # template: stac-validate + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # artifacts: + # - name: stac-result + # raw: + # data: "{{tasks.stac-validate.outputs.result}}" + # depends: "create-collection" + # - name: get-location + # template: get-location + # - name: create-overview + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # template: create-overview + # depends: "get-location && flatten-copy" + # - name: create-config + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # template: create-config + # depends: "get-location && create-overview" + # outputs: + # parameters: + # - name: target + # valueFrom: + # parameter: "{{tasks.get-location.outputs.parameters.location}}" + - name: aws-list + container: + image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "list", + "--verbose", + "--include", + "{{=sprig.trim(workflow.parameters.include)}}", + "--group", + "{{=sprig.trim(workflow.parameters.group)}}", + "--output", + "/tmp/file_list.json", + "{{=sprig.trim(workflow.parameters.source)}}", + ] + outputs: + parameters: + - name: files + valueFrom: + path: /tmp/file_list.json + - name: generate-ulid + script: + image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + command: [python] + source: | + import ulid + with open("/tmp/ulid", "w") as f: + f.write(str(ulid.ULID())) + outputs: + parameters: + - name: ulid + valueFrom: + path: "/tmp/ulid" + - name: standardise-validate + # retryStrategy: + # limit: "0" + nodeSelector: + karpenter.sh/capacity-type: "spot" + inputs: + parameters: + - name: file + - name: collection-id + script: + image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + resources: + requests: + memory: 7.8Gi + cpu: 15000m + ephemeral-storage: 3Gi + volumeMounts: + - name: ephemeral + mountPath: "/tmp" + command: + - "bash" + source: | + # ensure the script dies if something goes wrong + set -e + apt install jq wget -y + + # grab s5cmd so we can get files from s3 + wget https://github.com/peak/s5cmd/releases/download/v2.0.0/s5cmd_2.0.0_Linux-64bit.tar.gz + tar xvf *.tar.gz + + # parameters are a list of tiffs, this could be expanded into a loop if needed + SOURCE_FILE=$(echo '{{inputs.parameters.file}}' | jq '.[0]' -r) + echo $SOURCE_FILE + ./s5cmd cp $SOURCE_FILE . + + ZIP_NAME=$(basename $SOURCE_FILE) + + ls *.zip + unzip $ZIP_NAME + # no need to keep the zip after its extracted + rm $ZIP_NAME + + INPUT_TIFF=$(ls -1 JL1KF01B*/*.tif) + echo $INPUT_TIFF + + # Grab the metadata with the cutlines + ./s5cmd cp s3://linz-topographic-upload/earthscanner/EarthScanner_Tiles_Metadata_FullCapture.zip . + unzip EarthScanner_Tiles_Metadata_FullCapture.zip + + SENSOR_NAME=$(echo $ZIP_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name + SCENE_NAME=$(echo $ZIP_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 + echo "Sensor:" $SENSOR_NAME "Scene:" $SCENE_NAME + + PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) + echo "Product:" $PRODUCT_ID + + # Conver the tiff into a byte re-ordering the bands that are wrong + gdal_translate \ + -of GTiff \ + -co TILED=YES \ + -co NUM_THREADS=all_cpus \ + -co COMPRESS=zstd \ + -co BIGTIFF=YES \ + -ot byte \ + -scale 0 1500 0 255 \ + -b 3 -b 2 -b 1 \ + -colorinterp red,green,blue \ + $INPUT_TIFF $INPUT_TIFF.cog.tiff + + # tiff from the zip is no longer needed, clear it up to save space + rm $INPUT_TIFF + + ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" + + python /app/scripts/standardise_validate.py \ + --source $INPUT_TIFF.cog.tiff \ + --scale {{workflow.parameters.scale}} \ + --preset {{workflow.parameters.compression}} \ + --start-datetime "{{=sprig.trim(workflow.parameters['start-datetime'])}}" \ + --end-datetime "{{=sprig.trim(workflow.parameters['end-datetime'])}}" \ + --collection-id "{{inputs.parameters.collection-id}}" \ + --source-epsg "{{=sprig.trim(workflow.parameters['source-epsg'])}}" \ + --target-epsg "{{=sprig.trim(workflow.parameters['target-epsg'])}}" \ + --cutline cutline.fgb + + outputs: + artifacts: + - name: standardised_tiffs + path: /tmp/ + archive: + none: {} + - name: flatten + inputs: + parameters: + - name: location + container: + image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "create-manifest", + "--flatten", + "--verbose", + "--include", + ".tiff?$|.json$", + "--group", + "1000", + "--group-size", + "100Gi", + "--output", + "/tmp/file_list.json", + "--target", + "{{inputs.parameters.location}}flat/", + "{{inputs.parameters.location}}", + ] + outputs: + parameters: + - name: files + valueFrom: + path: /tmp/file_list.json + - name: flatten-copy + retryStrategy: + limit: "2" + inputs: + parameters: + - name: file + container: + image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + resources: + requests: + memory: 7.8Gi + cpu: 2000m + command: [node, /app/index.js] + args: + [ + "copy", + "{{workflow.parameters.copy-option}}", + "{{inputs.parameters.file}}", + ] + - name: create-collection + retryStrategy: + limit: "2" + nodeSelector: + karpenter.sh/capacity-type: "spot" + inputs: + parameters: + - name: collection-id + - name: location + container: + image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + resources: + requests: + memory: 7.8Gi + cpu: 2000m + command: + - python + - "/app/scripts/collection_from_items.py" + args: + - "--uri" + - "{{inputs.parameters.location}}flat/" + - "--collection-id" + - "{{inputs.parameters.collection-id}}" + - "--title" + - "{{=sprig.trim(workflow.parameters.title)}}" + - "--description" + - "{{=sprig.trim(workflow.parameters.description)}}" + - "--producer" + - "{{workflow.parameters.producer}}" + - "--licensor" + - "{{workflow.parameters.licensor}}" + - "--licensor-list" + - "{{=sprig.trim(workflow.parameters['licensor-list'])}}" + - "--concurrency" + - "25" + - name: stac-validate + inputs: + parameters: + - name: location + container: + image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + command: [node, /app/index.js] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "stac-validate", + "--recursive", + "{{inputs.parameters.location}}flat/collection.json", + ] + - name: get-location + script: + image: node:alpine + command: [node] + source: | + const fs = require('fs'); + const loc = JSON.parse(process.env['ARGO_TEMPLATE']).archiveLocation.s3; + const key = loc.key.replace('{{pod.name}}',''); + fs.writeFileSync('/tmp/location', `s3://${loc.bucket}/${key}`); + outputs: + parameters: + - name: location + valueFrom: + path: "/tmp/location" + - name: create-overview + retryStrategy: + limit: "2" + inputs: + parameters: + - name: location + container: + image: "ghcr.io/linz/basemaps/cli:{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" + resources: + requests: + cpu: 3000m + memory: 7.8Gi + command: [node, index.cjs] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "-V", + "create-overview", + "--source", + "{{inputs.parameters.location}}flat/", + "--output", + "{{inputs.parameters.location}}flat/", + ] + - name: create-config + inputs: + parameters: + - name: location + container: + image: "ghcr.io/linz/basemaps/cli:{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" + command: [node, index.cjs] + env: + - name: AWS_ROLE_CONFIG_PATH + value: s3://linz-bucket-config/config.json + args: + [ + "-V", + "create-config", + "--path", + "{{inputs.parameters.location}}flat/", + "--output", + "/tmp/url", + "--title", + "{{=sprig.trim(workflow.parameters.title)}}", + "--commit", + ] + outputs: + parameters: + - name: url + valueFrom: + path: "/tmp/url" + volumes: + - name: ephemeral + emptyDir: {} From c0e4992bb2f932b82ec1625c5a8eed28a70d855b Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 3 Mar 2023 15:00:10 +1300 Subject: [PATCH 02/17] feat: run nearblack to remove black edges --- workflows/test/earthscanner.yaml | 122 ++++++++++++++++--------------- 1 file changed, 63 insertions(+), 59 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index c66071dd2..3f8f4c3e8 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -23,7 +23,7 @@ spec: - name: version-topo-imagery value: "v1" - name: source - value: "s3://linz-topographic-upload/earthscanner/2023-02-JL1KF01B-sample-zip/JL1KF01B_PMSL3_20230221054314_200135719_101_0035_001_L3C" + value: "s3://linz-topographic-upload/earthscanner/2023-02-JL1KF01B-sample-zip/" - name: include value: ".zip$" - name: scale @@ -206,62 +206,62 @@ spec: value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" depends: "aws-list && generate-ulid" withParam: "{{tasks.aws-list.outputs.parameters.files}}" - # - name: flatten - # template: flatten - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # depends: "get-location && standardise-validate" - # - name: flatten-copy - # template: flatten-copy - # arguments: - # parameters: - # - name: file - # value: "{{item}}" - # depends: "flatten" - # withParam: "{{tasks.flatten.outputs.parameters.files}}" - # - name: create-collection - # template: create-collection - # arguments: - # parameters: - # - name: collection-id - # value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # depends: "flatten-copy" - # - name: stac-validate - # template: stac-validate - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # artifacts: - # - name: stac-result - # raw: - # data: "{{tasks.stac-validate.outputs.result}}" - # depends: "create-collection" - # - name: get-location - # template: get-location - # - name: create-overview - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # template: create-overview - # depends: "get-location && flatten-copy" - # - name: create-config - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # template: create-config - # depends: "get-location && create-overview" - # outputs: - # parameters: - # - name: target - # valueFrom: - # parameter: "{{tasks.get-location.outputs.parameters.location}}" + - name: flatten + template: flatten + arguments: + parameters: + - name: location + value: "{{tasks.get-location.outputs.parameters.location}}" + depends: "get-location && standardise-validate" + - name: flatten-copy + template: flatten-copy + arguments: + parameters: + - name: file + value: "{{item}}" + depends: "flatten" + withParam: "{{tasks.flatten.outputs.parameters.files}}" + - name: create-collection + template: create-collection + arguments: + parameters: + - name: collection-id + value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" + - name: location + value: "{{tasks.get-location.outputs.parameters.location}}" + depends: "flatten-copy" + - name: stac-validate + template: stac-validate + arguments: + parameters: + - name: location + value: "{{tasks.get-location.outputs.parameters.location}}" + artifacts: + - name: stac-result + raw: + data: "{{tasks.stac-validate.outputs.result}}" + depends: "create-collection" + - name: get-location + template: get-location + - name: create-overview + arguments: + parameters: + - name: location + value: "{{tasks.get-location.outputs.parameters.location}}" + template: create-overview + depends: "get-location && flatten-copy" + - name: create-config + arguments: + parameters: + - name: location + value: "{{tasks.get-location.outputs.parameters.location}}" + template: create-config + depends: "get-location && create-overview" + outputs: + parameters: + - name: target + valueFrom: + parameter: "{{tasks.get-location.outputs.parameters.location}}" - name: aws-list container: image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" @@ -366,15 +366,19 @@ spec: -scale 0 1500 0 255 \ -b 3 -b 2 -b 1 \ -colorinterp red,green,blue \ - $INPUT_TIFF $INPUT_TIFF.cog.tiff + $INPUT_TIFF $INPUT_TIFF.color.tiff # tiff from the zip is no longer needed, clear it up to save space rm $INPUT_TIFF + # remove the black edges + nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $INPUT_TIFF.alpha.tiff + rm $INPUT_TIFF.color.tiff + ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" python /app/scripts/standardise_validate.py \ - --source $INPUT_TIFF.cog.tiff \ + --source $INPUT_TIFF.alpha.tiff \ --scale {{workflow.parameters.scale}} \ --preset {{workflow.parameters.compression}} \ --start-datetime "{{=sprig.trim(workflow.parameters['start-datetime'])}}" \ From 9b97d14ac416345dbeb4a447e43f8ad734f32c67 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 3 Mar 2023 16:55:48 +1300 Subject: [PATCH 03/17] refactor: l3d doesnt need color settings --- workflows/test/earthscanner.yaml | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 3f8f4c3e8..d793d4bdb 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -23,7 +23,7 @@ spec: - name: version-topo-imagery value: "v1" - name: source - value: "s3://linz-topographic-upload/earthscanner/2023-02-JL1KF01B-sample-zip/" + value: "s3://linz-topographic-upload/earthscanner/2023-03-JL1KF01B-raw/" - name: include value: ".zip$" - name: scale @@ -51,7 +51,7 @@ spec: description: "(Optional) location of a cutline file to cut the imagery to .fgb or .geojson" value: "" - name: title - value: "*Region/District/City* *GSD* *Urban/Rural* Aerial Photos (*Year-Year*)" + value: "Earthscanner 0.5m (2023)" - name: description value: "Orthophotography within the *Region Name* region captured in the *Year*-*Year* flying season." - name: producer @@ -355,24 +355,24 @@ spec: PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) echo "Product:" $PRODUCT_ID - # Conver the tiff into a byte re-ordering the bands that are wrong - gdal_translate \ - -of GTiff \ - -co TILED=YES \ - -co NUM_THREADS=all_cpus \ - -co COMPRESS=zstd \ - -co BIGTIFF=YES \ - -ot byte \ - -scale 0 1500 0 255 \ - -b 3 -b 2 -b 1 \ - -colorinterp red,green,blue \ - $INPUT_TIFF $INPUT_TIFF.color.tiff + # # Conver the tiff into a byte re-ordering the bands that are wrong + # gdal_translate \ + # -of GTiff \ + # -co TILED=YES \ + # -co NUM_THREADS=all_cpus \ + # -co COMPRESS=zstd \ + # -co BIGTIFF=YES \ + # -ot byte \ + # -scale 0 1500 0 255 \ + # -b 3 -b 2 -b 1 \ + # -colorinterp red,green,blue \ + # $INPUT_TIFF $INPUT_TIFF.color.tiff - # tiff from the zip is no longer needed, clear it up to save space - rm $INPUT_TIFF + # # tiff from the zip is no longer needed, clear it up to save space + # rm $INPUT_TIFF # remove the black edges - nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $INPUT_TIFF.alpha.tiff + nearblack -of gtiff $INPUT_TIFF.tiff -setalpha -o $INPUT_TIFF.alpha.tiff rm $INPUT_TIFF.color.tiff ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" From ca9e26597e4cd60421684eea1af4ad60970c20cc Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 3 Mar 2023 17:21:51 +1300 Subject: [PATCH 04/17] refactor: fixup issues based off file names from new delivery names --- workflows/test/earthscanner.yaml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index d793d4bdb..380f62e35 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -323,6 +323,8 @@ spec: source: | # ensure the script dies if something goes wrong set -e + set -o xtrace + apt install jq wget -y # grab s5cmd so we can get files from s3 @@ -331,25 +333,29 @@ spec: # parameters are a list of tiffs, this could be expanded into a loop if needed SOURCE_FILE=$(echo '{{inputs.parameters.file}}' | jq '.[0]' -r) - echo $SOURCE_FILE - ./s5cmd cp $SOURCE_FILE . + echo "$SOURCE_FILE" + ./s5cmd cp "$SOURCE_FILE" . - ZIP_NAME=$(basename $SOURCE_FILE) + ZIP_NAME=$(basename "$SOURCE_FILE") ls *.zip - unzip $ZIP_NAME + unzip "$ZIP_NAME" # no need to keep the zip after its extracted - rm $ZIP_NAME + rm "$ZIP_NAME" + + ls -R JL1K* - INPUT_TIFF=$(ls -1 JL1KF01B*/*.tif) + INPUT_TIFF=$(ls -1 JL1K*/*.tif) + TIFF_NAME=$(basename $INPUT_TIFF) echo $INPUT_TIFF + # Grab the metadata with the cutlines ./s5cmd cp s3://linz-topographic-upload/earthscanner/EarthScanner_Tiles_Metadata_FullCapture.zip . unzip EarthScanner_Tiles_Metadata_FullCapture.zip - SENSOR_NAME=$(echo $ZIP_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name - SCENE_NAME=$(echo $ZIP_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 + SENSOR_NAME=$(echo $TIFF_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name + SCENE_NAME=$(echo $TIFF_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 echo "Sensor:" $SENSOR_NAME "Scene:" $SCENE_NAME PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) @@ -372,13 +378,13 @@ spec: # rm $INPUT_TIFF # remove the black edges - nearblack -of gtiff $INPUT_TIFF.tiff -setalpha -o $INPUT_TIFF.alpha.tiff - rm $INPUT_TIFF.color.tiff + nearblack -of gtiff $INPUT_TIFF -setalpha -o $TIFF_NAME + # rm $INPUT_TIFF.color.tiff ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" python /app/scripts/standardise_validate.py \ - --source $INPUT_TIFF.alpha.tiff \ + --source $TIFF_NAME \ --scale {{workflow.parameters.scale}} \ --preset {{workflow.parameters.compression}} \ --start-datetime "{{=sprig.trim(workflow.parameters['start-datetime'])}}" \ From 99e629c05bbbaada606b1ad85d86a6680eb552ed Mon Sep 17 00:00:00 2001 From: paul Date: Mon, 6 Mar 2023 14:27:43 +1300 Subject: [PATCH 05/17] wip: update node container to not use docker.io due to rate limit issue --- workflows/test/earthscanner.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 380f62e35..f9c36ff8c 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -23,7 +23,7 @@ spec: - name: version-topo-imagery value: "v1" - name: source - value: "s3://linz-topographic-upload/earthscanner/2023-03-JL1KF01B-raw/" + value: "s3://linz-satellite-imagery-staging/earthscanner/2023-02-21/L3D/" - name: include value: ".zip$" - name: scale @@ -176,9 +176,9 @@ spec: description: "(Optional) Separate the licensor names by a semi-colon (;). It has no effect unless a semicolon-delimited list is entered." value: "" - name: start-datetime - value: "1990-02-20" + value: "2023-02-21" - name: end-datetime - value: "1990-02-20" + value: "2023-02-21" - name: copy-option value: "--no-clobber" enum: @@ -504,7 +504,7 @@ spec: ] - name: get-location script: - image: node:alpine + image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" command: [node] source: | const fs = require('fs'); From c4930d595c15f5b6fe34ae5cc54356911d3401e7 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 6 Mar 2023 15:56:53 +1300 Subject: [PATCH 06/17] wip: run more nodes at once --- workflows/test/earthscanner.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index f9c36ff8c..dbff789b6 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -5,7 +5,7 @@ metadata: generateName: test-earthscanner- namespace: argo spec: - parallelism: 50 + parallelism: 100 nodeSelector: karpenter.sh/capacity-type: "spot" entrypoint: main From d9d0f824fd545770fac68c94e0418a535c378370 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 6 Mar 2023 16:00:37 +1300 Subject: [PATCH 07/17] feat: set producer --- workflows/test/earthscanner.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index dbff789b6..7de3dbd4d 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -55,7 +55,7 @@ spec: - name: description value: "Orthophotography within the *Region Name* region captured in the *Year*-*Year* flying season." - name: producer - value: "Unknown" + value: "Chang Guang Satellite Technology" enum: [ "Unknown", From cccb3bbf56d4405cd05b57ddf1f24bf9f529d799 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 6 Mar 2023 16:01:10 +1300 Subject: [PATCH 08/17] wip: try without cutline --- workflows/test/earthscanner.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 7de3dbd4d..da718d608 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -391,8 +391,7 @@ spec: --end-datetime "{{=sprig.trim(workflow.parameters['end-datetime'])}}" \ --collection-id "{{inputs.parameters.collection-id}}" \ --source-epsg "{{=sprig.trim(workflow.parameters['source-epsg'])}}" \ - --target-epsg "{{=sprig.trim(workflow.parameters['target-epsg'])}}" \ - --cutline cutline.fgb + --target-epsg "{{=sprig.trim(workflow.parameters['target-epsg'])}}" outputs: artifacts: @@ -420,7 +419,7 @@ spec: "--group", "1000", "--group-size", - "100Gi", + "50Gi", "--output", "/tmp/file_list.json", "--target", From 0b51086052f280198531933bd28c9c8a24078ca0 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 6 Mar 2023 16:36:00 +1300 Subject: [PATCH 09/17] fix: add retries back in --- workflows/test/earthscanner.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index da718d608..58969542d 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -300,8 +300,8 @@ spec: valueFrom: path: "/tmp/ulid" - name: standardise-validate - # retryStrategy: - # limit: "0" + retryStrategy: + limit: "2" nodeSelector: karpenter.sh/capacity-type: "spot" inputs: From 869f41de384404c06bc9792b469a661ab05019cf Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Tue, 7 Mar 2023 17:17:21 +1300 Subject: [PATCH 10/17] feat: use better colors for l3c --- workflows/test/earthscanner.yaml | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 58969542d..4971d01b5 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -2,10 +2,10 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: test-earthscanner- + generateName: test-earthscanner-l3c- namespace: argo spec: - parallelism: 100 + parallelism: 50 nodeSelector: karpenter.sh/capacity-type: "spot" entrypoint: main @@ -23,7 +23,7 @@ spec: - name: version-topo-imagery value: "v1" - name: source - value: "s3://linz-satellite-imagery-staging/earthscanner/2023-02-21/L3D/" + value: "s3://linz-satellite-imagery-staging/earthscanner/2023-02-21/L3C/" - name: include value: ".zip$" - name: scale @@ -51,7 +51,7 @@ spec: description: "(Optional) location of a cutline file to cut the imagery to .fgb or .geojson" value: "" - name: title - value: "Earthscanner 0.5m (2023)" + value: "Earthscanner L3C 0.5m (2023)" - name: description value: "Orthophotography within the *Region Name* region captured in the *Year*-*Year* flying season." - name: producer @@ -362,24 +362,26 @@ spec: echo "Product:" $PRODUCT_ID # # Conver the tiff into a byte re-ordering the bands that are wrong - # gdal_translate \ - # -of GTiff \ - # -co TILED=YES \ - # -co NUM_THREADS=all_cpus \ - # -co COMPRESS=zstd \ - # -co BIGTIFF=YES \ - # -ot byte \ - # -scale 0 1500 0 255 \ - # -b 3 -b 2 -b 1 \ - # -colorinterp red,green,blue \ - # $INPUT_TIFF $INPUT_TIFF.color.tiff + gdal_translate \ + -of GTiff \ + -co TILED=YES \ + -co NUM_THREADS=all_cpus \ + -co COMPRESS=zstd \ + -co BIGTIFF=YES \ + -ot byte \ + -scale 200 2500 0 255 \ + -scale 300 2100 0 255 \ + -scale 300 1700 0 255 \ + -b 3 -b 2 -b 1 \ + -colorinterp red,green,blue \ + $INPUT_TIFF $INPUT_TIFF.color.tiff # # tiff from the zip is no longer needed, clear it up to save space - # rm $INPUT_TIFF + rm $INPUT_TIFF # remove the black edges - nearblack -of gtiff $INPUT_TIFF -setalpha -o $TIFF_NAME - # rm $INPUT_TIFF.color.tiff + nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $TIFF_NAME -co NUM_THREADS=ALL_CPUS -co COMPRESS=zstd -co TILED=yes + rm $INPUT_TIFF.color.tiff ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" From d7ae445039cf247d46e5e9c8a1c7a428c48b4297 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Tue, 7 Mar 2023 17:22:22 +1300 Subject: [PATCH 11/17] feat: better color scaling for l3c --- workflows/test/earthscanner.yaml | 20 ++++++++++---------- workflows/test/sleep.yml | 12 ++++++------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 4971d01b5..68f63f0d0 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -264,7 +264,7 @@ spec: parameter: "{{tasks.get-location.outputs.parameters.location}}" - name: aws-list container: - image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" command: [node, /app/index.js] env: - name: AWS_ROLE_CONFIG_PATH @@ -288,7 +288,7 @@ spec: path: /tmp/file_list.json - name: generate-ulid script: - image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" command: [python] source: | import ulid @@ -309,7 +309,7 @@ spec: - name: file - name: collection-id script: - image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" resources: requests: memory: 7.8Gi @@ -406,7 +406,7 @@ spec: parameters: - name: location container: - image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" command: [node, /app/index.js] env: - name: AWS_ROLE_CONFIG_PATH @@ -440,7 +440,7 @@ spec: parameters: - name: file container: - image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" resources: requests: memory: 7.8Gi @@ -462,7 +462,7 @@ spec: - name: collection-id - name: location container: - image: "ghcr.io/linz/topo-imagery:{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" resources: requests: memory: 7.8Gi @@ -492,7 +492,7 @@ spec: parameters: - name: location container: - image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" command: [node, /app/index.js] env: - name: AWS_ROLE_CONFIG_PATH @@ -505,7 +505,7 @@ spec: ] - name: get-location script: - image: "ghcr.io/linz/argo-tasks:{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" command: [node] source: | const fs = require('fs'); @@ -524,7 +524,7 @@ spec: parameters: - name: location container: - image: "ghcr.io/linz/basemaps/cli:{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:basemaps-cli-{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" resources: requests: cpu: 3000m @@ -547,7 +547,7 @@ spec: parameters: - name: location container: - image: "ghcr.io/linz/basemaps/cli:{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" + image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:basemaps-cli-{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" command: [node, index.cjs] env: - name: AWS_ROLE_CONFIG_PATH diff --git a/workflows/test/sleep.yml b/workflows/test/sleep.yml index d04cda788..1c804e63e 100644 --- a/workflows/test/sleep.yml +++ b/workflows/test/sleep.yml @@ -6,12 +6,12 @@ spec: entrypoint: sleep templates: - name: sleep - nodeSelector: - karpenter.sh/capacity-type: "spot" + # nodeSelector: + # karpenter.sh/capacity-type: "spot" container: - resources: - requests: - memory: 3.9Gi - cpu: 2000m + # resources: + # requests: + # memory: 3.9Gi + # cpu: 2000m image: ubuntu:22.04 command: ["sleep", "3600"] From f24919b3be8ce661af56ecbe4c76cad7f0c00c03 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Tue, 7 Mar 2023 20:17:14 +1300 Subject: [PATCH 12/17] feat: remove nearblack params as it was taking far too long to nearblack --- workflows/test/earthscanner.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 68f63f0d0..c69841f78 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -351,17 +351,17 @@ spec: # Grab the metadata with the cutlines - ./s5cmd cp s3://linz-topographic-upload/earthscanner/EarthScanner_Tiles_Metadata_FullCapture.zip . - unzip EarthScanner_Tiles_Metadata_FullCapture.zip + # ./s5cmd cp s3://linz-topographic-upload/earthscanner/EarthScanner_Tiles_Metadata_FullCapture.zip . + # unzip EarthScanner_Tiles_Metadata_FullCapture.zip - SENSOR_NAME=$(echo $TIFF_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name - SCENE_NAME=$(echo $TIFF_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 - echo "Sensor:" $SENSOR_NAME "Scene:" $SCENE_NAME + # SENSOR_NAME=$(echo $TIFF_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name + # SCENE_NAME=$(echo $TIFF_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 + # echo "Sensor:" $SENSOR_NAME "Scene:" $SCENE_NAME - PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) - echo "Product:" $PRODUCT_ID + # PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) + # echo "Product:" $PRODUCT_ID - # # Conver the tiff into a byte re-ordering the bands that are wrong + # Convert the tiff into a byte re-ordering the bands that are wrong gdal_translate \ -of GTiff \ -co TILED=YES \ @@ -380,10 +380,10 @@ spec: rm $INPUT_TIFF # remove the black edges - nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $TIFF_NAME -co NUM_THREADS=ALL_CPUS -co COMPRESS=zstd -co TILED=yes + nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $TIFF_NAME rm $INPUT_TIFF.color.tiff - ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" + # ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" python /app/scripts/standardise_validate.py \ --source $TIFF_NAME \ From 85ef80c2cbe358a0953b99a5fd75798d1f04090d Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Wed, 8 Mar 2023 11:18:38 +1300 Subject: [PATCH 13/17] refactor: remove commented code] --- workflows/test/earthscanner.yaml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index c69841f78..ec6fa1a5a 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -349,18 +349,6 @@ spec: TIFF_NAME=$(basename $INPUT_TIFF) echo $INPUT_TIFF - - # Grab the metadata with the cutlines - # ./s5cmd cp s3://linz-topographic-upload/earthscanner/EarthScanner_Tiles_Metadata_FullCapture.zip . - # unzip EarthScanner_Tiles_Metadata_FullCapture.zip - - # SENSOR_NAME=$(echo $TIFF_NAME | cut -f2 -d'_') # Get the "PMSR6" from the file name - # SCENE_NAME=$(echo $TIFF_NAME | cut -f5-7 -d'_') # get the scene id 101_0034_001 - # echo "Sensor:" $SENSOR_NAME "Scene:" $SCENE_NAME - - # PRODUCT_ID=$(ogrinfo -ro -al *.dbf | grep productId | grep $SENSOR_NAME | grep $SCENE_NAME | cut -f6- -d' ' ) - # echo "Product:" $PRODUCT_ID - # Convert the tiff into a byte re-ordering the bands that are wrong gdal_translate \ -of GTiff \ @@ -376,14 +364,12 @@ spec: -colorinterp red,green,blue \ $INPUT_TIFF $INPUT_TIFF.color.tiff - # # tiff from the zip is no longer needed, clear it up to save space + # tiff from the zip is no longer needed, clear it up to save space rm $INPUT_TIFF # remove the black edges nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $TIFF_NAME rm $INPUT_TIFF.color.tiff - - # ogr2ogr cutline.fgb *.dbf -where "productId = '${PRODUCT_ID}'" python /app/scripts/standardise_validate.py \ --source $TIFF_NAME \ From fc6213facf5567beb399d852917b671782d57211 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 10 Mar 2023 14:00:41 +1300 Subject: [PATCH 14/17] feat: recompress l3c imagery into lzw predictor=yes COGS --- workflows/test/earthscanner.yaml | 102 +++++++++++++------------------ 1 file changed, 41 insertions(+), 61 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index ec6fa1a5a..0a6282608 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -221,42 +221,42 @@ spec: value: "{{item}}" depends: "flatten" withParam: "{{tasks.flatten.outputs.parameters.files}}" - - name: create-collection - template: create-collection - arguments: - parameters: - - name: collection-id - value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" - - name: location - value: "{{tasks.get-location.outputs.parameters.location}}" - depends: "flatten-copy" - - name: stac-validate - template: stac-validate - arguments: - parameters: - - name: location - value: "{{tasks.get-location.outputs.parameters.location}}" - artifacts: - - name: stac-result - raw: - data: "{{tasks.stac-validate.outputs.result}}" - depends: "create-collection" + # - name: create-collection + # template: create-collection + # arguments: + # parameters: + # - name: collection-id + # value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # depends: "flatten-copy" + # - name: stac-validate + # template: stac-validate + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # artifacts: + # - name: stac-result + # raw: + # data: "{{tasks.stac-validate.outputs.result}}" + # depends: "create-collection" - name: get-location template: get-location - - name: create-overview - arguments: - parameters: - - name: location - value: "{{tasks.get-location.outputs.parameters.location}}" - template: create-overview - depends: "get-location && flatten-copy" - - name: create-config - arguments: - parameters: - - name: location - value: "{{tasks.get-location.outputs.parameters.location}}" - template: create-config - depends: "get-location && create-overview" + # - name: create-overview + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # template: create-overview + # depends: "get-location && flatten-copy" + # - name: create-config + # arguments: + # parameters: + # - name: location + # value: "{{tasks.get-location.outputs.parameters.location}}" + # template: create-config + # depends: "get-location && create-overview" outputs: parameters: - name: target @@ -272,6 +272,8 @@ spec: args: [ "list", + "--limit", + "1", "--verbose", "--include", "{{=sprig.trim(workflow.parameters.include)}}", @@ -351,35 +353,13 @@ spec: # Convert the tiff into a byte re-ordering the bands that are wrong gdal_translate \ - -of GTiff \ - -co TILED=YES \ + -of COG \ + -co COMPRESS=lzw \ + -co PREDICTOR=yes \ + -co BLOCKSIZE=512 \ -co NUM_THREADS=all_cpus \ - -co COMPRESS=zstd \ -co BIGTIFF=YES \ - -ot byte \ - -scale 200 2500 0 255 \ - -scale 300 2100 0 255 \ - -scale 300 1700 0 255 \ - -b 3 -b 2 -b 1 \ - -colorinterp red,green,blue \ - $INPUT_TIFF $INPUT_TIFF.color.tiff - - # tiff from the zip is no longer needed, clear it up to save space - rm $INPUT_TIFF - - # remove the black edges - nearblack -of gtiff $INPUT_TIFF.color.tiff -setalpha -o $TIFF_NAME - rm $INPUT_TIFF.color.tiff - - python /app/scripts/standardise_validate.py \ - --source $TIFF_NAME \ - --scale {{workflow.parameters.scale}} \ - --preset {{workflow.parameters.compression}} \ - --start-datetime "{{=sprig.trim(workflow.parameters['start-datetime'])}}" \ - --end-datetime "{{=sprig.trim(workflow.parameters['end-datetime'])}}" \ - --collection-id "{{inputs.parameters.collection-id}}" \ - --source-epsg "{{=sprig.trim(workflow.parameters['source-epsg'])}}" \ - --target-epsg "{{=sprig.trim(workflow.parameters['target-epsg'])}}" + $INPUT_TIFF /tmp/${TIFF_NAME}f outputs: artifacts: From 2bc47f2e769df9cfdf706853b310e97924b2109d Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 10 Mar 2023 14:32:34 +1300 Subject: [PATCH 15/17] feat: copy xml and jpg files --- workflows/test/earthscanner.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 0a6282608..8650517d8 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -347,6 +347,10 @@ spec: ls -R JL1K* + # copy xml and jpg files to output in /tmp + mv JL1K*/*.xml /tmp/ + mv JL1K*/*.jpg /tmp/ + INPUT_TIFF=$(ls -1 JL1K*/*.tif) TIFF_NAME=$(basename $INPUT_TIFF) echo $INPUT_TIFF From 8ca9c95c7832c765cac112368fc2ebb83a29a693 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 10 Mar 2023 14:45:54 +1300 Subject: [PATCH 16/17] fix: copy xml and jpeg too --- workflows/test/earthscanner.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index 8650517d8..ec96947a4 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -387,7 +387,7 @@ spec: "--flatten", "--verbose", "--include", - ".tiff?$|.json$", + ".tiff?$|.json$|.xml$|.jpeg$", "--group", "1000", "--group-size", From 40312813af566addc48a2e16eb464b7f8afcbc50 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Sun, 12 Mar 2023 12:06:33 +1300 Subject: [PATCH 17/17] wip --- workflows/test/earthscanner.yaml | 337 +------------------------------ 1 file changed, 7 insertions(+), 330 deletions(-) diff --git a/workflows/test/earthscanner.yaml b/workflows/test/earthscanner.yaml index ec96947a4..8845b60d1 100644 --- a/workflows/test/earthscanner.yaml +++ b/workflows/test/earthscanner.yaml @@ -2,7 +2,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: test-earthscanner-l3c- + generateName: test-earthscanner-cog- namespace: argo spec: parallelism: 50 @@ -23,162 +23,11 @@ spec: - name: version-topo-imagery value: "v1" - name: source - value: "s3://linz-satellite-imagery-staging/earthscanner/2023-02-21/L3C/" + value: "s3://linz-workflow-artifacts/2023-03/07-test-earthscanner-l3c-wbqs6/flat/" - name: include - value: ".zip$" - - name: scale - value: "None" - enum: - - "500" - - "1000" - - "2000" - - "5000" - - "10000" - - "50000" - - "None" - - name: source-epsg - value: "32760" - - name: target-epsg - value: "2193" + value: ".tiff$" - name: group value: "1" - - name: compression - value: "webp" - enum: - - "webp" - - "lzw" - - name: cutline - description: "(Optional) location of a cutline file to cut the imagery to .fgb or .geojson" - value: "" - - name: title - value: "Earthscanner L3C 0.5m (2023)" - - name: description - value: "Orthophotography within the *Region Name* region captured in the *Year*-*Year* flying season." - - name: producer - value: "Chang Guang Satellite Technology" - enum: - [ - "Unknown", - "AAM NZ", - "Aerial Surveys", - "Beca", - "Chang Guang Satellite Technology", - "European Space Agency", - "GeoSmart", - "Landpro", - "Maxar", - "NZ Aerial Mapping", - "Recon", - "SkyCan", - "Terralink International", - "UAV Mapping NZ", - ] - - name: licensor - value: "Unknown" - enum: - [ - "Unknown", - "Ashburton District Council", - "Auckland Council", - "BOPLASS", - "Bay of Plenty Regional Council", - "Buller District Council", - "Canterbury Aerial Imagery Consortium (CAI)", - "Carterton District Council", - "Central Hawke's Bay District Council", - "Central Otago District Council", - "Chang Guang Satellite Technology", - "Chatham Islands Council", - "Christchurch City Council", - "Clutha District Council", - "CoLAB", - "Department of Conservation", - "Dunedin City Council", - "Environment Canterbury", - "Environment Southland", - "Far North District Council", - "Gisborne District Council", - "Gore District Council", - "Greater Wellington Regional Council", - "Grey District Council", - "Hamilton City Council", - "Hastings District Council", - "Hauraki District Council", - "Hawke's Bay Local Authority Shared Services (HB LASS)", - "Hawke's Bay Regional Council", - "Horizons Regional Council", - "Horowhenua District Council", - "Hurunui District Council", - "Hutt City Council", - "Invercargill City Council", - "Kaikōura District Council", - "Kaipara District Council", - "Kawerau District Council", - "Kāpiti Coast District Council", - "Mackenzie District Council", - "Manawatū District Council", - "Manawatū-Whanganui LASS", - "Marlborough District Council", - "Masterton District Council", - "Matamata-Piako District Council", - "Maxar Technologies", - "Ministry of Primary Industries", - "NZ Aerial Mapping", - "Napier City Council", - "Nelson City Council", - "New Plymouth District Council", - "Northland Regional Council", - "Ōpōtiki District Council", - "Ōtorohanga District Council", - "Otago Regional Council", - "Palmerston North City Council", - "Porirua City Council", - "Queenstown-Lakes District Council", - "Rangitīkei District Council", - "Rotorua District Council", - "Ruapehu District Council", - "Selwyn District Council", - "Sinergise", - "South Taranaki District Council", - "South Waikato District Council", - "South Wairarapa District Council", - "Southland District Council", - "Stratford District Council", - "Taranaki Regional Council", - "Tararua District Council", - "Tasman District Council", - "Taupō District Council", - "Tauranga City Council", - "Terralink International", - "Thames-Coromandel District Council", - "Timaru District Council", - "Toitū Te Whenua Land Information New Zealand", - "Upper Hutt City Council", - "Waikato District Council", - "Waikato Regional Aerial Photography Service (WRAPS)", - "Waikato Regional Council", - "Waimakariri District Council", - "Waimate District Council", - "Waipā District Council", - "Wairoa District Council", - "Waitaki District Council", - "Waitomo District Council", - "Waka Kotahi", - "Wellington City Council", - "West Coast Regional Council", - "Western Bay of Plenty District Council", - "Westland District Council", - "Whakatāne District Council", - "Whanganui District Council", - "Whangārei District Council", - ] - - name: licensor-list - description: "(Optional) Separate the licensor names by a semi-colon (;). It has no effect unless a semicolon-delimited list is entered." - value: "" - - name: start-datetime - value: "2023-02-21" - - name: end-datetime - value: "2023-02-21" - name: copy-option value: "--no-clobber" enum: @@ -194,17 +43,13 @@ spec: tasks: - name: aws-list template: aws-list - - name: generate-ulid - template: generate-ulid - name: standardise-validate template: standardise-validate arguments: parameters: - name: file value: "{{item}}" - - name: collection-id - value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" - depends: "aws-list && generate-ulid" + depends: "aws-list" withParam: "{{tasks.aws-list.outputs.parameters.files}}" - name: flatten template: flatten @@ -221,42 +66,8 @@ spec: value: "{{item}}" depends: "flatten" withParam: "{{tasks.flatten.outputs.parameters.files}}" - # - name: create-collection - # template: create-collection - # arguments: - # parameters: - # - name: collection-id - # value: "{{tasks.generate-ulid.outputs.parameters.ulid}}" - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # depends: "flatten-copy" - # - name: stac-validate - # template: stac-validate - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # artifacts: - # - name: stac-result - # raw: - # data: "{{tasks.stac-validate.outputs.result}}" - # depends: "create-collection" - name: get-location template: get-location - # - name: create-overview - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # template: create-overview - # depends: "get-location && flatten-copy" - # - name: create-config - # arguments: - # parameters: - # - name: location - # value: "{{tasks.get-location.outputs.parameters.location}}" - # template: create-config - # depends: "get-location && create-overview" outputs: parameters: - name: target @@ -288,19 +99,6 @@ spec: - name: files valueFrom: path: /tmp/file_list.json - - name: generate-ulid - script: - image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" - command: [python] - source: | - import ulid - with open("/tmp/ulid", "w") as f: - f.write(str(ulid.ULID())) - outputs: - parameters: - - name: ulid - valueFrom: - path: "/tmp/ulid" - name: standardise-validate retryStrategy: limit: "2" @@ -309,7 +107,6 @@ spec: inputs: parameters: - name: file - - name: collection-id script: image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" resources: @@ -338,32 +135,15 @@ spec: echo "$SOURCE_FILE" ./s5cmd cp "$SOURCE_FILE" . - ZIP_NAME=$(basename "$SOURCE_FILE") - - ls *.zip - unzip "$ZIP_NAME" - # no need to keep the zip after its extracted - rm "$ZIP_NAME" + TIFF_NAME=$(basename "$SOURCE_FILE") - ls -R JL1K* - - # copy xml and jpg files to output in /tmp - mv JL1K*/*.xml /tmp/ - mv JL1K*/*.jpg /tmp/ - - INPUT_TIFF=$(ls -1 JL1K*/*.tif) - TIFF_NAME=$(basename $INPUT_TIFF) - echo $INPUT_TIFF - - # Convert the tiff into a byte re-ordering the bands that are wrong gdal_translate \ -of COG \ -co COMPRESS=lzw \ -co PREDICTOR=yes \ -co BLOCKSIZE=512 \ -co NUM_THREADS=all_cpus \ - -co BIGTIFF=YES \ - $INPUT_TIFF /tmp/${TIFF_NAME}f + $TIFF_NAME /tmp/${TIFF_NAME} outputs: artifacts: @@ -387,7 +167,7 @@ spec: "--flatten", "--verbose", "--include", - ".tiff?$|.json$|.xml$|.jpeg$", + ".tiff?$|.json$", "--group", "1000", "--group-size", @@ -422,57 +202,6 @@ spec: "{{workflow.parameters.copy-option}}", "{{inputs.parameters.file}}", ] - - name: create-collection - retryStrategy: - limit: "2" - nodeSelector: - karpenter.sh/capacity-type: "spot" - inputs: - parameters: - - name: collection-id - - name: location - container: - image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:topo-imagery-{{=sprig.trim(workflow.parameters['version-topo-imagery'])}}" - resources: - requests: - memory: 7.8Gi - cpu: 2000m - command: - - python - - "/app/scripts/collection_from_items.py" - args: - - "--uri" - - "{{inputs.parameters.location}}flat/" - - "--collection-id" - - "{{inputs.parameters.collection-id}}" - - "--title" - - "{{=sprig.trim(workflow.parameters.title)}}" - - "--description" - - "{{=sprig.trim(workflow.parameters.description)}}" - - "--producer" - - "{{workflow.parameters.producer}}" - - "--licensor" - - "{{workflow.parameters.licensor}}" - - "--licensor-list" - - "{{=sprig.trim(workflow.parameters['licensor-list'])}}" - - "--concurrency" - - "25" - - name: stac-validate - inputs: - parameters: - - name: location - container: - image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" - command: [node, /app/index.js] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: - [ - "stac-validate", - "--recursive", - "{{inputs.parameters.location}}flat/collection.json", - ] - name: get-location script: image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:argo-tasks-{{=sprig.trim(workflow.parameters['version-argo-tasks'])}}" @@ -487,58 +216,6 @@ spec: - name: location valueFrom: path: "/tmp/location" - - name: create-overview - retryStrategy: - limit: "2" - inputs: - parameters: - - name: location - container: - image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:basemaps-cli-{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" - resources: - requests: - cpu: 3000m - memory: 7.8Gi - command: [node, index.cjs] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: - [ - "-V", - "create-overview", - "--source", - "{{inputs.parameters.location}}flat/", - "--output", - "{{inputs.parameters.location}}flat/", - ] - - name: create-config - inputs: - parameters: - - name: location - container: - image: "019359803926.dkr.ecr.ap-southeast-2.amazonaws.com/eks:basemaps-cli-{{=sprig.trim(workflow.parameters['version-basemaps-cli'])}}" - command: [node, index.cjs] - env: - - name: AWS_ROLE_CONFIG_PATH - value: s3://linz-bucket-config/config.json - args: - [ - "-V", - "create-config", - "--path", - "{{inputs.parameters.location}}flat/", - "--output", - "/tmp/url", - "--title", - "{{=sprig.trim(workflow.parameters.title)}}", - "--commit", - ] - outputs: - parameters: - - name: url - valueFrom: - path: "/tmp/url" volumes: - name: ephemeral emptyDir: {}