Skip to content

Commit

Permalink
feat: generate-argo-commands
Browse files Browse the repository at this point in the history
  • Loading branch information
MDavidson17 committed Apr 17, 2023
1 parent 3a153ac commit 857cc93
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
16 changes: 16 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Tools

This folder contains single use scripts which have been used to assist in running argo workflows.
The scripts are stored in this folder if it is thought they may become useful again in the future.

## generate-argo-commands-imagery.py

**Date:** 14/02/2023

**Related Jira Tickets:** [TDE-632](https://toitutewhenua.atlassian.net/jira/software/c/projects/TDE/boards/768/backlog?atlOrigin=eyJpIjoiNjVkNmMyNmNmNGJlNDIzOGI2YmIyMzViNzVkNDUwZjEiLCJwIjoiaiJ9); [TDE-631](https://toitutewhenua.atlassian.net/browse/TDE-631?atlOrigin=eyJpIjoiNDI5OGE5MGY5ZmUxNGUyNzkwZjdlYTcxOTg5ZmQ0MGUiLCJwIjoiaiJ9)

**Description:** This script was generated to allow for the processing of numerous imagery datasets using the argo cli.

**Additional Resources/links:**

- [CSV](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true)
200 changes: 200 additions & 0 deletions tools/generate-argo-cli-commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import csv
from typing import List, Optional

import yaml
from linz_logger import get_log

# nb: CHANGE if working from a different source
# SOURCE = "s3://linz-data-lake-raster-prod/"
SOURCE = "s3://linz-raster-data-store/"

PARAMETERS_CSV = "./imagery-standardising-parameters-bulk-process.csv"
with open("../workflows/imagery/standardising.yaml", "r") as f:
workflow = yaml.load(f, Loader=yaml.loader.SafeLoader)
for parameter in workflow["spec"]["arguments"]["parameters"]:
if parameter["name"] == "producer":
PRODUCERS = parameter["enum"]
if parameter["name"] == "licensor":
LICENSORS = parameter["enum"]
if parameter["name"] == "scale":
SCALES = parameter["enum"]

spi_list = []
sp_list = []


def _format_date(date: str) -> str:
fd_lst = date.split("/")
year = fd_lst[2]
day = f"{int(fd_lst[0]):02}"
month = f"{int(fd_lst[1]):02}"
return f"{year}-{month}-{day}"


def _validate_licensor(licensor: str) -> Optional[str]:
if licensor in LICENSORS:
return licensor
if licensor == "BOPLASS Limited":
return "BOPLASS"
if licensor == "Kapiti Coast District Council" or licensor == "Kapiti District Council":
return "Kāpiti Coast District Council"
if licensor == "The Canterbury Aerial Imagery (CAI) Consortium":
return "Canterbury Aerial Imagery Consortium (CAI)"
if licensor == "Hawke's Bay Local Authority Shared Services (HBLASS)":
return "Hawke's Bay Local Authority Shared Services (HB LASS)"
if licensor == "Central Hawkes Bay District Council":
return "Central Hawke's Bay District Council"
if licensor == "Thames Coromandel District Council":
return "Thames-Coromandel District Council"
if licensor == "Waikato Regional Aerial Photography Service (WRAPS) 2017-2019":
return "Waikato Regional Aerial Photography Service (WRAPS)"
if licensor == "Northland Aerial Imagery Consortium (NAIC)":
return "Northland Aerial Imagery Consortium (NAIC)"
if licensor == "AAM NZ Limited":
return "AAM NZ"
if " and " in licensor:
return licensor.replace(" and ", ";")
return None


def _validate_producer(producer: str) -> Optional[str]:
if producer in PRODUCERS:
return producer
elif producer == "NZ Aerial Mapping Ltd":
return "NZ Aerial Mapping"
elif producer == "Aerial Surveys Ltd" or producer == "Aerial Surveys Limited":
return "Aerial Surveys"
elif producer == "AAM NZ Limited":
return "AAM NZ"
elif producer == "Landpro Ltd":
return "Landpro"
elif producer == "UAV Mapping NZ Ltd":
return "UAV Mapping NZ"
return None


def _validate_scale(scale: str) -> Optional[str]:
if scale in SCALES:
return scale
return None

def main() -> None:
with open(PARAMETERS_CSV, "r") as csv_file:
reader = csv.reader(csv_file)
header = next(reader)

ind_comment = header.index("Comment")
ind_source = header.index("source")
ind_target = header.index("target")
ind_scale = header.index("scale")
ind_title = header.index("Title")
ind_licensor = header.index("licensor(s)")
ind_producer = header.index("producer(s)")
ind_description = header.index("description")
ind_startdate = header.index("start_datetime")
ind_enddate = header.index("end_datetime")
ind_basemaps = header.index("basemaps s3 path")

command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n"

for row in reader:
if not row[ind_source].startswith(SOURCE):
continue

if row[ind_comment] != "":
get_log().warning(
"skipped: comment",
comment=row[ind_comment],
source=row[ind_source],
title=row[ind_title],
)
continue

params = {
"source": row[ind_source].rstrip("/") + "/",
"target": row[ind_target],
"scale": _validate_scale(row[ind_scale]),
"title": row[ind_title],
"description": row[ind_description],
"producer": _validate_producer(row[ind_producer]),
"start-datetime": _format_date(row[ind_startdate]),
"end-datetime": _format_date(row[ind_enddate]),
}

licensor = _validate_licensor(row[ind_licensor])
if licensor and ";" in licensor:
params["licensor-list"] = licensor
params["licensor"] = ""
else:
params["licensor"] = licensor
params["licensor-list"] = ""

if not params["licensor"] and params["licensor-list"] == "":
get_log().warning(
"skipped: invalid licensor",
licensor=row[ind_licensor],
source=row[ind_source],
title=row[ind_title],
)
continue

if not params["producer"]:
get_log().warning(
"skipped: invalid producer",
producer=row[ind_producer],
source=row[ind_source],
title=row[ind_title],
)
continue

if not params["scale"]:
get_log().warning(
"skipped: invalid scale",
scale=f"{row[ind_scale]}",
source=row[ind_source],
title=row[ind_title],
)
continue

file_name = row[ind_target].rstrip("/rgb/2193/").split("/")[-1]
formatted_file_name = file_name.replace("_", "-").replace(".", "-")

if row[ind_basemaps] == "":
get_log().info(
"basemaps import required",
source=row[ind_source],
title=row[ind_title],
)
bm_params = {
"category": "Urban Aerial Photos",
"name": params["target"].rstrip("/rgb/2193/").split("/")[-1],
"tile-matrix": "NZTM2000Quad/WebMercatorQuad",
"blend": "20",
"aligned-level": "6",
"create-pull-request": "true"
}
params = {**params, **bm_params}
spi_list.append(command.format(formatted_file_name, formatted_file_name))
else:
sp_list.append(command.format(formatted_file_name, formatted_file_name))

with open(f"./{formatted_file_name}.yaml", "w", encoding="utf-8") as output:
yaml.dump(
params,
output,
default_flow_style=False,
default_style='"',
sort_keys=False,
allow_unicode=True,
width=1000,
)

with open("standardise-publish.sh", "w") as script:
script.write("#!/bin/bash\n\n")
script.writelines(sp_list)

with open("standardise-publish-import.sh", "w") as script:
script.write("#!/bin/bash\n\n")
script.writelines(spi_list)

main()

0 comments on commit 857cc93

Please sign in to comment.