Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: tool generate params TDE-632 #102

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Tools

This folder contains single use scripts which have been used to assist in various argo tasks.
The scripts should be stored in this folder if they may become useful again in the future.

## generate-argo-commands-imagery.py

**Date:** 14/02/2023

**Related Jira Tickets:** [TDE-632](https://toitutewhenua.atlassian.net/jira/software/c/projects/TDE/boards/768/backlog?atlOrigin=eyJpIjoiNjVkNmMyNmNmNGJlNDIzOGI2YmIyMzViNzVkNDUwZjEiLCJwIjoiaiJ9); [TDE-631](https://toitutewhenua.atlassian.net/browse/TDE-631?atlOrigin=eyJpIjoiNDI5OGE5MGY5ZmUxNGUyNzkwZjdlYTcxOTg5ZmQ0MGUiLCJwIjoiaiJ9)

**Description:**
This script sets up for the automated processing of numerous imagery datasets using the argo cli.

**Setup:**

Download the [parameters csv](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true) from sharepoint, store as `imagery-standardising-parameters-bulk-process.csv` in `./tools/`
_nb: you will have to convert this from xlsx to csv, this can be done many places [online](https://cloudconvert.com/xlsx-to-csv)._

**Instructions:**

1. If necessary, update the `SOURCE` variable in generate-argo-cli-commands.py
2. Run:

```bash
cd ./tools
python3 generate-argo-cli-commands.py > log.txt
```

**Output:**

- **region-year-scale.yaml:** workflow parameters for this dataset
- **standardise-publish.sh:** bash script to 'deploy' argo workflows
- **standardise-publish-import.sh:** bash script to 'deploy' argo workflows that also require basemaps import
- **logs.txt:** Contains important logs about skipped datasets.

**Submitting:**
`standardise-publish.sh` is set up and ready to go, just run:

```bash
sh standardise-publish.sh
```

If created, `standardise-publish-import.sh` will require you to uncomment some lines in `standardise-publish-import.yaml`, then run:

```bash
sh standardise-publish-import.sh
```
228 changes: 228 additions & 0 deletions tools/generate-argo-cli-commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import csv
from typing import Dict, List, Optional

import yaml
from linz_logger import get_log

# #######################################
# USER PARAMETERS:
SOURCE = "s3://linz-raster-data-store/"
PARAMETERS_CSV = "./imagery-standardising-parameters-bulk-process.csv"
# #######################################

# read in enums from workflow template
with open("../workflows/imagery/standardising.yaml", "r") as f:
workflow = yaml.load(f, Loader=yaml.loader.SafeLoader)
for parameter in workflow["spec"]["arguments"]["parameters"]:
if parameter["name"] == "producer":
PRODUCERS = parameter["enum"]
if parameter["name"] == "licensor":
LICENSORS = parameter["enum"]
if parameter["name"] == "scale":
SCALES = parameter["enum"]


def _format_date(date: str) -> str:
fd_lst = date.split("/")
year = fd_lst[2]
day = f"{int(fd_lst[0]):02}"
month = f"{int(fd_lst[1]):02}"
return f"{year}-{month}-{day}"


def _validate_licensor(licensor: str) -> Optional[str]:
if licensor in LICENSORS:
return licensor
if licensor == "BOPLASS Limited":
return "BOPLASS"
if licensor == "Kapiti Coast District Council" or licensor == "Kapiti District Council":
return "Kāpiti Coast District Council"
if licensor == "The Canterbury Aerial Imagery (CAI) Consortium":
return "Canterbury Aerial Imagery Consortium (CAI)"
if licensor == "Hawke's Bay Local Authority Shared Services (HBLASS)":
return "Hawke's Bay Local Authority Shared Services (HB LASS)"
if licensor == "Central Hawkes Bay District Council":
return "Central Hawke's Bay District Council"
if licensor == "Thames Coromandel District Council":
return "Thames-Coromandel District Council"
if licensor == "Waikato Regional Aerial Photography Service (WRAPS) 2017-2019":
return "Waikato Regional Aerial Photography Service (WRAPS)"
if licensor == "Northland Aerial Imagery Consortium (NAIC)":
return "Northland Aerial Imagery Consortium (NAIC)"
if licensor == "AAM NZ Limited":
return "AAM NZ"
if (
licensor == "Manawatū-Whanganui LASS Ltd-Whanganui LASS Ltd"
or licensor == "Manawatū-Whanganui LASS Ltd"
or licensor == "Manawatū-Whanganui LASS Ltd District Council"
):
return "Manawatū-Whanganui LASS"
if " and " in licensor:
return licensor.replace(" and ", ";")
return None


def _add_licensor(row: List[str], index: Dict[str, int]) -> Dict[str, str]:
licensor = _validate_licensor(row[index["licensor"]])
if not licensor:
get_log().warning(
"skipped: invalid licensor",
licensor=row[index["licensor"]],
source=row[index["source"]],
title=row[index["title"]],
)
return {}
elif licensor and ";" in licensor:
return {"licensor-list": licensor, "licensor": ""}
else:
return {"licensor": licensor, "licensor-list": ""}


def _get_valid_producer(producer: str) -> Dict[str, str]:
if producer in PRODUCERS:
return {"producer": producer}
elif producer == "NZ Aerial Mapping Ltd":
return {"producer": "NZ Aerial Mapping"}
elif producer == "Aerial Surveys Ltd" or producer == "Aerial Surveys Limited":
return {"producer": "Aerial Surveys"}
elif producer == "AAM NZ Limited":
return {"producer": "AAM NZ"}
elif producer == "Landpro Ltd":
return {"producer": "Landpro"}
elif producer == "UAV Mapping NZ Ltd":
return {"producer": "UAV Mapping NZ"}
return {}


def _get_valid_scale(scale: str) -> Dict[str, str]:
if scale in SCALES:
return {"scale": scale}
return {}


def _index_csv(header: List[str]) -> Dict[str, int]:
ind = {}
ind["comment"] = header.index("Comment")
ind["source"] = header.index("source")
ind["target"] = header.index("target")
ind["scale"] = header.index("scale")
ind["title"] = header.index("Title")
ind["licensor"] = header.index("licensor(s)")
ind["producer"] = header.index("producer(s)")
ind["description"] = header.index("description")
ind["startdate"] = header.index("start_datetime")
ind["enddate"] = header.index("end_datetime")
ind["basemaps"] = header.index("basemaps s3 path")
return ind


def _add_bm_params(target: str, row: List[str], index: Dict[str, int]) -> Dict[str, str]:
get_log().info(
"basemaps import required",
source=row[index["source"]],
title=row[index["title"]],
)
return {
"category": "Urban Aerial Photos",
"name": "target".rstrip("/rgb/2193/").split("/")[-1],
"tile-matrix": "NZTM2000Quad/WebMercatorQuad",
"blend": "20",
"aligned-level": "6",
"create-pull-request": "true",
}


def _validate_params(params: Dict[str, str], row: List[str], index: Dict[str, int]) -> bool:
if not params["scale"]:
get_log().warning(
"skipped: invalid scale",
scale=row[index["scale"]],
source=row[index["source"]],
title=row[index["title"]],
)
return False
if not params["producer"]:
get_log().warning(
"skipped: invalid producer",
producer=row[index["producer"]],
source=row[index["source"]],
title=row[index["title"]],
)
return False
return True


def _write_params(params: Dict[str, str], file: str) -> None:
with open(f"./{file}.yaml", "w", encoding="utf-8") as output:
yaml.dump(
params,
output,
default_flow_style=False,
default_style='"',
sort_keys=False,
allow_unicode=True,
width=1000,
)


def main() -> None:
spi_list = []
sp_list = []

command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n"

with open(PARAMETERS_CSV, "r") as csv_file:
reader = csv.reader(csv_file)
header = next(reader)
index = _index_csv(header)

for row in reader:
if not row[index["source"]].startswith(SOURCE):
continue

if row[index["comment"]] != "":
get_log().warning(
"skipped: comment",
comment=row[index["comment"]],
source=row[index["source"]],
title=row[index["title"]],
)
continue

file_name = row[index["target"]].rstrip("/rgb/2193/").split("/")[-1]
formatted_file_name = file_name.replace("_", "-").replace(".", "-")

params = {
"source": row[index["source"]].rstrip("/") + "/",
"target": row[index["target"]],
"title": row[index["title"]],
"description": row[index["description"]],
"start-datetime": _format_date(row[index["startdate"]]),
"end-datetime": _format_date(row[index["enddate"]]),
}

params = {**params, **_add_licensor(row, index)}
params = {**params, **_get_valid_producer(row[index["producer"]])}
params = {**params, **_get_valid_scale(row[index["scale"]])}

if not _validate_params(params, row, index):
continue

if row[index["basemaps"]] == "":
params = {**params, **_add_bm_params(params["target"], row, index)}
spi_list.append(command.format(formatted_file_name, formatted_file_name))
else:
sp_list.append(command.format(formatted_file_name, formatted_file_name))

_write_params(params, formatted_file_name)

with open("standardise-publish.sh", "w") as script:
script.write("#!/bin/bash\n\n")
script.writelines(sp_list)

with open("standardise-publish-import.sh", "w") as script:
script.write("#!/bin/bash\n\n")
script.writelines(spi_list)


main()