Skip to content

Commit

Permalink
feat: refactor and complete readme
Browse files Browse the repository at this point in the history
  • Loading branch information
MDavidson17 committed Apr 3, 2023
1 parent 51e9386 commit 60421b2
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 110 deletions.
42 changes: 37 additions & 5 deletions tools/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,48 @@
# Tools

This folder contains single use scripts which have been used to assist in running argo workflows.
The scripts are stored in this folder if it is thought they may become useful again in the future.
This folder contains single use scripts which have been used to assist in various argo tasks.
The scripts should be stored in this folder if they may become useful again in the future.

## generate-argo-commands-imagery.py

**Date:** 14/02/2023

**Related Jira Tickets:** [TDE-632](https://toitutewhenua.atlassian.net/jira/software/c/projects/TDE/boards/768/backlog?atlOrigin=eyJpIjoiNjVkNmMyNmNmNGJlNDIzOGI2YmIyMzViNzVkNDUwZjEiLCJwIjoiaiJ9); [TDE-631](https://toitutewhenua.atlassian.net/browse/TDE-631?atlOrigin=eyJpIjoiNDI5OGE5MGY5ZmUxNGUyNzkwZjdlYTcxOTg5ZmQ0MGUiLCJwIjoiaiJ9)

**Description:** This script was generated to allow for the processing of numerous imagery datasets using the argo cli.
**Description:**
This script sets up for the automated processing of numerous imagery datasets using the argo cli.

**Additional Resources/links:**
**Setup:**

- [CSV](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true)
Download the [parameters csv](https://linzsrm.sharepoint.com/:x:/r/sites/Topography/_layouts/15/Doc.aspx?sourcedoc=%7B508567E2-EF88-458B-9115-0FC719CAA540%7D&file=imagery-standardising-parameters-bulk-process.xlsx&action=default&mobileredirect=true) from sharepoint, store as `imagery-standardising-parameters-bulk-process.csv` in `./tools/`
_nb: you may have to convert this from xlsx to csv, this can be done many places [online](https://cloudconvert.com/xlsx-to-csv)._

**Instructions:**

1. Update the `SOURCE` variable in generate-argo-cli-commands.py
2. Run:

```bash
cd ./tools
python3 generate-argo-cli-commands.py > log.txt
```

**Output:**

- **region-year-scale.yaml:** workflow parameters for this dataset
- **standardise-publish.sh:** bash script to 'deploy' argo workflows
- **standardise-publish-import.sh:** bash script to 'deploy' argo workflows that also require basemaps import
- **logs.txt:** Contains important logs about skipped datasets.

**Submitting:**
`standardise-publish.sh` is set up and ready to go, just run:

```bash
sh standardise-publish.sh
```

If created, `standardise-publish-import.sh` will require you to uncomment some lines in `standardise-publish-import.yaml`, then run:

```bash
sh standardise-publish-import.sh
```
232 changes: 127 additions & 105 deletions tools/generate-argo-cli-commands.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import csv
from typing import List, Optional
from typing import Dict, List, Optional

import yaml
from linz_logger import get_log

# nb: CHANGE if working from a different source
# SOURCE = "s3://linz-data-lake-raster-prod/"
# #######################################
# USER PARAMETERS:
SOURCE = "s3://linz-raster-data-store/"

PARAMETERS_CSV = "./imagery-standardising-parameters-bulk-process.csv"
# #######################################

# read in enums from workflow template
with open("../workflows/imagery/standardising.yaml", "r") as f:
workflow = yaml.load(f, Loader=yaml.loader.SafeLoader)
for parameter in workflow["spec"]["arguments"]["parameters"]:
Expand All @@ -19,9 +21,6 @@
if parameter["name"] == "scale":
SCALES = parameter["enum"]

spi_list = []
sp_list = []


def _format_date(date: str) -> str:
fd_lst = date.split("/")
Expand Down Expand Up @@ -57,137 +56,159 @@ def _validate_licensor(licensor: str) -> Optional[str]:
return None


def _validate_producer(producer: str) -> Optional[str]:
def _add_licensor(row: List[str], index: Dict[str, int]) -> Dict[str, str]:
licensor = _validate_licensor(row[index["licensor"]])
if not licensor:
get_log().warning(
"skipped: invalid licensor",
licensor=row[index["licensor"]],
source=row[index["source"]],
title=row[index["title"]],
)
return {}
elif licensor and ";" in licensor:
return {"licensor-list": licensor, "licensor": ""}
else:
return {"licensor": licensor, "licensor-list": ""}


def _get_valid_producer(producer: str) -> Dict[str, str]:
if producer in PRODUCERS:
return producer
return {"producer": producer}
elif producer == "NZ Aerial Mapping Ltd":
return "NZ Aerial Mapping"
return {"producer": "NZ Aerial Mapping"}
elif producer == "Aerial Surveys Ltd" or producer == "Aerial Surveys Limited":
return "Aerial Surveys"
return {"producer": "Aerial Surveys"}
elif producer == "AAM NZ Limited":
return "AAM NZ"
return {"producer": "AAM NZ"}
elif producer == "Landpro Ltd":
return "Landpro"
return {"producer": "Landpro"}
elif producer == "UAV Mapping NZ Ltd":
return "UAV Mapping NZ"
return None
return {"producer": "UAV Mapping NZ"}
return {}


def _validate_scale(scale: str) -> Optional[str]:
def _get_valid_scale(scale: str) -> Dict[str, str]:
if scale in SCALES:
return scale
return None
return {"scale": scale}
return {}


def _index_csv(header: List[str]) -> Dict[str, int]:
ind = {}
ind["comment"] = header.index("Comment")
ind["source"] = header.index("source")
ind["target"] = header.index("target")
ind["scale"] = header.index("scale")
ind["title"] = header.index("Title")
ind["licensor"] = header.index("licensor(s)")
ind["producer"] = header.index("producer(s)")
ind["description"] = header.index("description")
ind["startdate"] = header.index("start_datetime")
ind["enddate"] = header.index("end_datetime")
ind["basemaps"] = header.index("basemaps s3 path")
return ind


def _add_bm_params(target: str, row: List[str], index: Dict[str, int]) -> Dict[str, str]:
get_log().info(
"basemaps import required",
source=row[index["source"]],
title=row[index["title"]],
)
return {
"category": "Urban Aerial Photos",
"name": "target".rstrip("/rgb/2193/").split("/")[-1],
"tile-matrix": "NZTM2000Quad/WebMercatorQuad",
"blend": "20",
"aligned-level": "6",
"create-pull-request": "true",
}


def _validate_params(params: Dict[str, str], row: List[str], index: Dict[str, int]) -> bool:
if not params["scale"]:
get_log().warning(
"skipped: invalid scale",
scale=row[index["scale"]],
source=row[index["source"]],
title=row[index["title"]],
)
return False
if not params["producer"]:
get_log().warning(
"skipped: invalid producer",
producer=row[index["producer"]],
source=row[index["source"]],
title=row[index["title"]],
)
return False
return True


def _write_params(params: Dict[str, str], file: str) -> None:
with open(f"./{file}.yaml", "w", encoding="utf-8") as output:
yaml.dump(
params,
output,
default_flow_style=False,
default_style='"',
sort_keys=False,
allow_unicode=True,
width=1000,
)


def main() -> None:
spi_list = []
sp_list = []

command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n"

with open(PARAMETERS_CSV, "r") as csv_file:
reader = csv.reader(csv_file)
header = next(reader)

ind_comment = header.index("Comment")
ind_source = header.index("source")
ind_target = header.index("target")
ind_scale = header.index("scale")
ind_title = header.index("Title")
ind_licensor = header.index("licensor(s)")
ind_producer = header.index("producer(s)")
ind_description = header.index("description")
ind_startdate = header.index("start_datetime")
ind_enddate = header.index("end_datetime")
ind_basemaps = header.index("basemaps s3 path")

command = "argo submit ~/dev/topo-workflows/workflows/imagery/standardising-publish-import.yaml -n argo -f ./{0}.yaml --generate-name ispi-{1}-\n"
index = _index_csv(header)

for row in reader:
if not row[ind_source].startswith(SOURCE):
if not row[index["source"]].startswith(SOURCE):
continue

if row[ind_comment] != "":
if row[index["comment"]] != "":
get_log().warning(
"skipped: comment",
comment=row[ind_comment],
source=row[ind_source],
title=row[ind_title],
comment=row[index["comment"]],
source=row[index["source"]],
title=row[index["title"]],
)
continue

file_name = row[index["target"]].rstrip("/rgb/2193/").split("/")[-1]
formatted_file_name = file_name.replace("_", "-").replace(".", "-")

params = {
"source": row[ind_source].rstrip("/") + "/",
"target": row[ind_target],
"scale": _validate_scale(row[ind_scale]),
"title": row[ind_title],
"description": row[ind_description],
"producer": _validate_producer(row[ind_producer]),
"start-datetime": _format_date(row[ind_startdate]),
"end-datetime": _format_date(row[ind_enddate]),
"source": row[index["source"]].rstrip("/") + "/",
"target": row[index["target"]],
"title": row[index["title"]],
"description": row[index["description"]],
"start-datetime": _format_date(row[index["startdate"]]),
"end-datetime": _format_date(row[index["enddate"]]),
}

licensor = _validate_licensor(row[ind_licensor])
if licensor and ";" in licensor:
params["licensor-list"] = licensor
params["licensor"] = ""
else:
params["licensor"] = licensor
params["licensor-list"] = ""
params = {**params, **_add_licensor(row, index)}
params = {**params, **_get_valid_producer(row[index["producer"]])}
params = {**params, **_get_valid_scale(row[index["scale"]])}

if not params["licensor"] and params["licensor-list"] == "":
get_log().warning(
"skipped: invalid licensor",
licensor=row[ind_licensor],
source=row[ind_source],
title=row[ind_title],
)
continue

if not params["producer"]:
get_log().warning(
"skipped: invalid producer",
producer=row[ind_producer],
source=row[ind_source],
title=row[ind_title],
)
if not _validate_params(params, row, index):
continue

if not params["scale"]:
get_log().warning(
"skipped: invalid scale",
scale=f"{row[ind_scale]}",
source=row[ind_source],
title=row[ind_title],
)
continue

file_name = row[ind_target].rstrip("/rgb/2193/").split("/")[-1]
formatted_file_name = file_name.replace("_", "-").replace(".", "-")

if row[ind_basemaps] == "":
get_log().info(
"basemaps import required",
source=row[ind_source],
title=row[ind_title],
)
bm_params = {
"category": "Urban Aerial Photos",
"name": params["target"].rstrip("/rgb/2193/").split("/")[-1],
"tile-matrix": "NZTM2000Quad/WebMercatorQuad",
"blend": "20",
"aligned-level": "6",
"create-pull-request": "true"
}
params = {**params, **bm_params}
if row[index["basemaps"]] == "":
params = {**params, **_add_bm_params(params["target"], row, index)}
spi_list.append(command.format(formatted_file_name, formatted_file_name))
else:
sp_list.append(command.format(formatted_file_name, formatted_file_name))

with open(f"./{formatted_file_name}.yaml", "w", encoding="utf-8") as output:
yaml.dump(
params,
output,
default_flow_style=False,
default_style='"',
sort_keys=False,
allow_unicode=True,
width=1000,
)
_write_params(params, formatted_file_name)

with open("standardise-publish.sh", "w") as script:
script.write("#!/bin/bash\n\n")
Expand All @@ -197,4 +218,5 @@ def main() -> None:
script.write("#!/bin/bash\n\n")
script.writelines(spi_list)


main()

0 comments on commit 60421b2

Please sign in to comment.