feat: generate-argo-commands

fix: format python file with black fix: format README fix: move csv path to an easy to find location fix: move filename declaration fix: extend yaml.dump line length, rstrip /, add empty licensor-list fix: add lincensors, unknown provider, fix "and" bug fix: format feat: use latest version of basemaps-cli feat: TO DELETE if merged test files already run fix: issue with tasman deleted in linz-imagery WIP: workflow update will need to be reversed if merged test: hurunui and cantabury checked feat: generate-name based on filename chore: update progress fix: add / to end of source to ensure only loops source folder feat: update to run for s3://linz-raster-data-store feat: update for raster data store feat: revert versions
linz · Mar 30, 2023 · 2df6a7e · 2df6a7e
1 parent 71b6876
commit 2df6a7e
Show file tree

Hide file tree

Showing 2 changed files with 216 additions and 0 deletions.
diff --git a/tools/README.md b/tools/README.md
@@ -0,0 +1,16 @@
+# Tools
+
+This folder contains single use scripts which have been used to assist in running argo workflows.
+The scripts are stored in this folder if it is thought they may become useful again in the future.
+
+## generate-argo-commands-imagery.py
+
+**Date:** 14/02/2023
+
+**Related Jira Tickets:** [TDE-632](https://toitutewhenua.atlassian.net/jira/software/c/projects/TDE/boards/768/backlog?atlOrigin=eyJpIjoiNjVkNmMyNmNmNGJlNDIzOGI2YmIyMzViNzVkNDUwZjEiLCJwIjoiaiJ9); [TDE-631](https://toitutewhenua.atlassian.net/browse/TDE-631?atlOrigin=eyJpIjoiNDI5OGE5MGY5ZmUxNGUyNzkwZjdlYTcxOTg5ZmQ0MGUiLCJwIjoiaiJ9)
+
+**Description:** This script was generated to allow for the processing of numerous imagery datasets using the argo cli.
+
+**Additional Resources/links:**
+
+- [CSV](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true)
diff --git a/tools/generate-argo-cli-commands.py b/tools/generate-argo-cli-commands.py
@@ -0,0 +1,200 @@
+import csv
+from typing import List, Optional
+
+import yaml
+from linz_logger import get_log
+
+# nb: CHANGE if working from a different source
+# SOURCE = "s3://linz-data-lake-raster-prod/"
+SOURCE = "s3://linz-raster-data-store/"
+
+PARAMETERS_CSV = "./imagery-standardising-parameters-bulk-process.csv"
+with open("../workflows/imagery/standardising.yaml", "r") as f:
+    workflow = yaml.load(f, Loader=yaml.loader.SafeLoader)
+    for parameter in workflow["spec"]["arguments"]["parameters"]:
+        if parameter["name"] == "producer":
+            PRODUCERS = parameter["enum"]
+        if parameter["name"] == "licensor":
+            LICENSORS = parameter["enum"]
+        if parameter["name"] == "scale":
+            SCALES = parameter["enum"]
+
+spi_list = []
+sp_list = []
+
+
+def _format_date(date: str) -> str:
+    fd_lst = date.split("/")
+    year = fd_lst[2]
+    day = f"{int(fd_lst[0]):02}"
+    month = f"{int(fd_lst[1]):02}"
+    return f"{year}-{month}-{day}"
+
+
+def _validate_licensor(licensor: str) -> Optional[str]:
+    if licensor in LICENSORS:
+        return licensor
+    if licensor == "BOPLASS Limited":
+        return "BOPLASS"
+    if licensor == "Kapiti Coast District Council" or licensor == "Kapiti District Council":
+        return "Kāpiti Coast District Council"
+    if licensor == "The Canterbury Aerial Imagery (CAI) Consortium":
+        return "Canterbury Aerial Imagery Consortium (CAI)"
+    if licensor == "Hawke's Bay Local Authority Shared Services (HBLASS)":
+        return "Hawke's Bay Local Authority Shared Services (HB LASS)"
+    if licensor == "Central Hawkes Bay District Council":
+        return "Central Hawke's Bay District Council"
+    if licensor == "Thames Coromandel District Council":
+        return "Thames-Coromandel District Council"
+    if licensor == "Waikato Regional Aerial Photography Service (WRAPS) 2017-2019":
+        return "Waikato Regional Aerial Photography Service (WRAPS)"
+    if licensor == "Northland Aerial Imagery Consortium (NAIC)":
+        return "Northland Aerial Imagery Consortium (NAIC)"
+    if licensor == "AAM NZ Limited":
+        return "AAM NZ"
+    if " and " in licensor:
+        return licensor.replace(" and ", ";")
+    return None
+
+
+def _validate_producer(producer: str) -> Optional[str]:
+    if producer in PRODUCERS:
+        return producer
+    elif producer == "NZ Aerial Mapping Ltd":
+        return "NZ Aerial Mapping"
+    elif producer == "Aerial Surveys Ltd" or producer == "Aerial Surveys Limited":
+        return "Aerial Surveys"
+    elif producer == "AAM NZ Limited":
+        return "AAM NZ"
+    elif producer == "Landpro Ltd":
+        return "Landpro"
+    elif producer == "UAV Mapping NZ Ltd":
+        return "UAV Mapping NZ"
+    return None
+
+
+def _validate_scale(scale: str) -> Optional[str]:
+    if scale in SCALES:
+        return scale
+    return None
+
+def main() -> None:
+    with open(PARAMETERS_CSV, "r") as csv_file:
+        reader = csv.reader(csv_file)
+        header = next(reader)
+
+        ind_comment = header.index("Comment")
+        ind_source = header.index("source")
+        ind_target = header.index("target")
+        ind_scale = header.index("scale")
+        ind_title = header.index("Title")
+        ind_licensor = header.index("licensor(s)")
+        ind_producer = header.index("producer(s)")
+        ind_description = header.index("description")
+        ind_startdate = header.index("start_datetime")
+        ind_enddate = header.index("end_datetime")
+        ind_basemaps = header.index("basemaps s3 path")
+
+        command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n"
+
+        for row in reader:
+            if not row[ind_source].startswith(SOURCE):
+                continue
+
+            if row[ind_comment] != "":
+                get_log().warning(
+                    "skipped: comment",
+                    comment=row[ind_comment],
+                    source=row[ind_source],
+                    title=row[ind_title],
+                )
+                continue
+
+            params = {
+                "source": row[ind_source].rstrip("/") + "/",
+                "target": row[ind_target],
+                "scale": _validate_scale(row[ind_scale]),
+                "title": row[ind_title],
+                "description": row[ind_description],
+                "producer": _validate_producer(row[ind_producer]),
+                "start-datetime": _format_date(row[ind_startdate]),
+                "end-datetime": _format_date(row[ind_enddate]),
+            }
+
+            licensor = _validate_licensor(row[ind_licensor])
+            if licensor and ";" in licensor:
+                params["licensor-list"] = licensor
+                params["licensor"] = ""
+            else:
+                params["licensor"] = licensor
+                params["licensor-list"] = ""
+
+            if not params["licensor"] and params["licensor-list"] == "":
+                get_log().warning(
+                    "skipped: invalid licensor",
+                    licensor=row[ind_licensor],
+                    source=row[ind_source],
+                    title=row[ind_title],
+                )
+                continue
+
+            if not params["producer"]:
+                get_log().warning(
+                    "skipped: invalid producer",
+                    producer=row[ind_producer],
+                    source=row[ind_source],
+                    title=row[ind_title],
+                )
+                continue
+
+            if not params["scale"]:
+                get_log().warning(
+                    "skipped: invalid scale",
+                    scale=f"{row[ind_scale]}",
+                    source=row[ind_source],
+                    title=row[ind_title],
+                )
+                continue
+
+            file_name = row[ind_target].rstrip("/rgb/2193/").split("/")[-1]
+            formatted_file_name = file_name.replace("_", "-").replace(".", "-")
+
+            if row[ind_basemaps] == "":
+                get_log().info(
+                    "basemaps import required",
+                    source=row[ind_source],
+                    title=row[ind_title],
+                )
+                bm_params = {
+                    "category": "Urban Aerial Photos",
+                    "name": params["target"].rstrip("/rgb/2193/").split("/")[-1],
+                    "tile-matrix": "NZTM2000Quad/WebMercatorQuad",
+                    "blend": "20",
+                    "aligned-level": "6",
+                    "create-pull-request": "true"
+                }
+                params = {**params, **bm_params}
+                spi_list.append(command.format(formatted_file_name, formatted_file_name))
+            else:
+                sp_list.append(command.format(formatted_file_name, formatted_file_name))
+
+            with open(f"./{formatted_file_name}.yaml", "w", encoding="utf-8") as output:
+                yaml.dump(
+                    params,
+                    output,
+                    default_flow_style=False,
+                    default_style='"',
+                    sort_keys=False,
+                    allow_unicode=True,
+                    width=1000,
+                )
+
+    with open("standardise-publish.sh", "w") as script:
+        script.write("#!/bin/bash\n\n")
+        script.writelines(sp_list)
+
+    with open("standardise-publish-import.sh", "w") as script:
+        script.write("#!/bin/bash\n\n")
+        script.writelines(spi_list)
+
+main()