diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 000000000..a595c7f86 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,48 @@ +# Tools + +This folder contains single use scripts which have been used to assist in various argo tasks. +The scripts should be stored in this folder if they may become useful again in the future. + +## generate-argo-commands-imagery.py + +**Date:** 14/02/2023 + +**Related Jira Tickets:** [TDE-632](https://toitutewhenua.atlassian.net/jira/software/c/projects/TDE/boards/768/backlog?atlOrigin=eyJpIjoiNjVkNmMyNmNmNGJlNDIzOGI2YmIyMzViNzVkNDUwZjEiLCJwIjoiaiJ9); [TDE-631](https://toitutewhenua.atlassian.net/browse/TDE-631?atlOrigin=eyJpIjoiNDI5OGE5MGY5ZmUxNGUyNzkwZjdlYTcxOTg5ZmQ0MGUiLCJwIjoiaiJ9) + +**Description:** +This script sets up for the automated processing of numerous imagery datasets using the argo cli. + +**Setup:** + +Download the [parameters csv](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true) from sharepoint, store as `imagery-standardising-parameters-bulk-process.csv` in `./tools/` + _nb: you will have to convert this from xlsx to csv, this can be done many places [online](https://cloudconvert.com/xlsx-to-csv)._ + +**Instructions:** + +1. If necessary, update the `SOURCE` variable in generate-argo-cli-commands.py +2. Run: + +```bash +cd ./tools +python3 generate-argo-cli-commands.py > log.txt +``` + +**Output:** + +- **region-year-scale.yaml:** workflow parameters for this dataset +- **standardise-publish.sh:** bash script to 'deploy' argo workflows +- **standardise-publish-import.sh:** bash script to 'deploy' argo workflows that also require basemaps import +- **logs.txt:** Contains important logs about skipped datasets. + +**Submitting:** +`standardise-publish.sh` is set up and ready to go, just run: + +```bash +sh standardise-publish.sh +``` + +If created, `standardise-publish-import.sh` will require you to uncomment some lines in `standardise-publish-import.yaml`, then run: + +```bash +sh standardise-publish-import.sh +``` diff --git a/tools/generate-argo-cli-commands.py b/tools/generate-argo-cli-commands.py new file mode 100644 index 000000000..8288cecaa --- /dev/null +++ b/tools/generate-argo-cli-commands.py @@ -0,0 +1,228 @@ +import csv +from typing import Dict, List, Optional + +import yaml +from linz_logger import get_log + +# ####################################### +# USER PARAMETERS: +SOURCE = "s3://linz-raster-data-store/" +PARAMETERS_CSV = "./imagery-standardising-parameters-bulk-process.csv" +# ####################################### + +# read in enums from workflow template +with open("../workflows/imagery/standardising.yaml", "r") as f: + workflow = yaml.load(f, Loader=yaml.loader.SafeLoader) + for parameter in workflow["spec"]["arguments"]["parameters"]: + if parameter["name"] == "producer": + PRODUCERS = parameter["enum"] + if parameter["name"] == "licensor": + LICENSORS = parameter["enum"] + if parameter["name"] == "scale": + SCALES = parameter["enum"] + + +def _format_date(date: str) -> str: + fd_lst = date.split("/") + year = fd_lst[2] + day = f"{int(fd_lst[0]):02}" + month = f"{int(fd_lst[1]):02}" + return f"{year}-{month}-{day}" + + +def _validate_licensor(licensor: str) -> Optional[str]: + if licensor in LICENSORS: + return licensor + if licensor == "BOPLASS Limited": + return "BOPLASS" + if licensor == "Kapiti Coast District Council" or licensor == "Kapiti District Council": + return "Kāpiti Coast District Council" + if licensor == "The Canterbury Aerial Imagery (CAI) Consortium": + return "Canterbury Aerial Imagery Consortium (CAI)" + if licensor == "Hawke's Bay Local Authority Shared Services (HBLASS)": + return "Hawke's Bay Local Authority Shared Services (HB LASS)" + if licensor == "Central Hawkes Bay District Council": + return "Central Hawke's Bay District Council" + if licensor == "Thames Coromandel District Council": + return "Thames-Coromandel District Council" + if licensor == "Waikato Regional Aerial Photography Service (WRAPS) 2017-2019": + return "Waikato Regional Aerial Photography Service (WRAPS)" + if licensor == "Northland Aerial Imagery Consortium (NAIC)": + return "Northland Aerial Imagery Consortium (NAIC)" + if licensor == "AAM NZ Limited": + return "AAM NZ" + if ( + licensor == "Manawatū-Whanganui LASS Ltd-Whanganui LASS Ltd" + or licensor == "Manawatū-Whanganui LASS Ltd" + or licensor == "Manawatū-Whanganui LASS Ltd District Council" + ): + return "Manawatū-Whanganui LASS" + if " and " in licensor: + return licensor.replace(" and ", ";") + return None + + +def _add_licensor(row: List[str], index: Dict[str, int]) -> Dict[str, str]: + licensor = _validate_licensor(row[index["licensor"]]) + if not licensor: + get_log().warning( + "skipped: invalid licensor", + licensor=row[index["licensor"]], + source=row[index["source"]], + title=row[index["title"]], + ) + return {} + elif licensor and ";" in licensor: + return {"licensor-list": licensor, "licensor": ""} + else: + return {"licensor": licensor, "licensor-list": ""} + + +def _get_valid_producer(producer: str) -> Dict[str, str]: + if producer in PRODUCERS: + return {"producer": producer} + elif producer == "NZ Aerial Mapping Ltd": + return {"producer": "NZ Aerial Mapping"} + elif producer == "Aerial Surveys Ltd" or producer == "Aerial Surveys Limited": + return {"producer": "Aerial Surveys"} + elif producer == "AAM NZ Limited": + return {"producer": "AAM NZ"} + elif producer == "Landpro Ltd": + return {"producer": "Landpro"} + elif producer == "UAV Mapping NZ Ltd": + return {"producer": "UAV Mapping NZ"} + return {} + + +def _get_valid_scale(scale: str) -> Dict[str, str]: + if scale in SCALES: + return {"scale": scale} + return {} + + +def _index_csv(header: List[str]) -> Dict[str, int]: + ind = {} + ind["comment"] = header.index("Comment") + ind["source"] = header.index("source") + ind["target"] = header.index("target") + ind["scale"] = header.index("scale") + ind["title"] = header.index("Title") + ind["licensor"] = header.index("licensor(s)") + ind["producer"] = header.index("producer(s)") + ind["description"] = header.index("description") + ind["startdate"] = header.index("start_datetime") + ind["enddate"] = header.index("end_datetime") + ind["basemaps"] = header.index("basemaps s3 path") + return ind + + +def _add_bm_params(target: str, row: List[str], index: Dict[str, int]) -> Dict[str, str]: + get_log().info( + "basemaps import required", + source=row[index["source"]], + title=row[index["title"]], + ) + return { + "category": "Urban Aerial Photos", + "name": "target".rstrip("/rgb/2193/").split("/")[-1], + "tile-matrix": "NZTM2000Quad/WebMercatorQuad", + "blend": "20", + "aligned-level": "6", + "create-pull-request": "true", + } + + +def _validate_params(params: Dict[str, str], row: List[str], index: Dict[str, int]) -> bool: + if not params["scale"]: + get_log().warning( + "skipped: invalid scale", + scale=row[index["scale"]], + source=row[index["source"]], + title=row[index["title"]], + ) + return False + if not params["producer"]: + get_log().warning( + "skipped: invalid producer", + producer=row[index["producer"]], + source=row[index["source"]], + title=row[index["title"]], + ) + return False + return True + + +def _write_params(params: Dict[str, str], file: str) -> None: + with open(f"./{file}.yaml", "w", encoding="utf-8") as output: + yaml.dump( + params, + output, + default_flow_style=False, + default_style='"', + sort_keys=False, + allow_unicode=True, + width=1000, + ) + + +def main() -> None: + spi_list = [] + sp_list = [] + + command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n" + + with open(PARAMETERS_CSV, "r") as csv_file: + reader = csv.reader(csv_file) + header = next(reader) + index = _index_csv(header) + + for row in reader: + if not row[index["source"]].startswith(SOURCE): + continue + + if row[index["comment"]] != "": + get_log().warning( + "skipped: comment", + comment=row[index["comment"]], + source=row[index["source"]], + title=row[index["title"]], + ) + continue + + file_name = row[index["target"]].rstrip("/rgb/2193/").split("/")[-1] + formatted_file_name = file_name.replace("_", "-").replace(".", "-") + + params = { + "source": row[index["source"]].rstrip("/") + "/", + "target": row[index["target"]], + "title": row[index["title"]], + "description": row[index["description"]], + "start-datetime": _format_date(row[index["startdate"]]), + "end-datetime": _format_date(row[index["enddate"]]), + } + + params = {**params, **_add_licensor(row, index)} + params = {**params, **_get_valid_producer(row[index["producer"]])} + params = {**params, **_get_valid_scale(row[index["scale"]])} + + if not _validate_params(params, row, index): + continue + + if row[index["basemaps"]] == "": + params = {**params, **_add_bm_params(params["target"], row, index)} + spi_list.append(command.format(formatted_file_name, formatted_file_name)) + else: + sp_list.append(command.format(formatted_file_name, formatted_file_name)) + + _write_params(params, formatted_file_name) + + with open("standardise-publish.sh", "w") as script: + script.write("#!/bin/bash\n\n") + script.writelines(sp_list) + + with open("standardise-publish-import.sh", "w") as script: + script.write("#!/bin/bash\n\n") + script.writelines(spi_list) + + +main()