-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(create-cart-diagram): add create-cart-diagram pipelien
- Loading branch information
1 parent
73deb2e
commit 1ee941e
Showing
3 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: Push create-cart-diagram (meghalaya) | ||
|
||
on: | ||
push: | ||
paths: | ||
- ".github/workflows/push-create-cart-diagram-meg.yml" | ||
- "create-cart-diagram/**" | ||
|
||
jobs: | ||
deploy: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
|
||
- uses: actions/setup-python@v2 | ||
with: | ||
python-version: "3.11" | ||
|
||
- name: Configure OpenHEXA CLI | ||
uses: blsq/openhexa-cli-action@v1 | ||
with: | ||
workspace: "pathways-meghalaya-558459" | ||
token: ${{ secrets.OH_TOKEN_MEG }} | ||
- name: Push pipeline to OpenHEXA | ||
run: | | ||
openhexa pipelines push create-cart-diagram \ | ||
-n ${{ github.sha }} \ | ||
-l "https://github.com/BLSQ/pathways-typing-pipelines/commit/${{ github.sha }}" \ | ||
--yes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
import json | ||
from datetime import datetime | ||
from pathlib import Path | ||
from typing import Optional | ||
|
||
from openhexa.sdk import Dataset, current_run, parameter, pipeline, workspace | ||
from pathways.typing.mermaid import cart_diagram | ||
from pathways.typing.tree import build_binary_tree, merge_trees | ||
|
||
|
||
@pipeline("create-cart-diagram", name="Create CART diagram") | ||
@parameter( | ||
"cart_outputs", | ||
name="CART outputs", | ||
help="OpenHEXA dataset containing JSON CART outputs", | ||
type=Dataset, | ||
required=True, | ||
) | ||
@parameter( | ||
"version_name", | ||
name="Dataset version", | ||
help="You can optionally specify the dataset version to use. If not specified, the latest version will be used.", | ||
type=str, | ||
required=False, | ||
) | ||
@parameter( | ||
"output_dir", | ||
name="Output directory", | ||
help="If not specified, outputs will be saved into `workspace/typing/data/output/cart_diagram`", | ||
type=str, | ||
required=False, | ||
) | ||
def create_cart_diagram( | ||
cart_outputs: Dataset, version_name: Optional[str], output_dir: Optional[str] | ||
): | ||
"""Create a CART diagram from CART outputs.""" | ||
|
||
data = load_dataset(dataset=cart_outputs, version_name=version_name) | ||
|
||
if output_dir: | ||
output_dir = Path(workspace.files_path, output_dir) | ||
else: | ||
output_dir = Path( | ||
workspace.files_path, | ||
"typing", | ||
"data", | ||
"output", | ||
"cart_diagram", | ||
data["version"], | ||
datetime.now().strftime("%Y-%m-%d_%H:%M:%S"), | ||
) | ||
|
||
generate_diagram( | ||
urban_cart=data["urban"], | ||
rural_cart=data["rural"], | ||
output_dir=output_dir, | ||
version_name=data["version"], | ||
) | ||
|
||
|
||
@create_cart_diagram.task | ||
def load_dataset(dataset: Dataset, version_name: str | None = None) -> dict: | ||
"""Load urban and rural JSON files from dataset. | ||
Parameters | ||
---------- | ||
dataset : Dataset | ||
The dataset containing the urban and rural JSON files. | ||
version_name : str, optional | ||
The name of the dataset version to use. If not specified, the latest version is used. | ||
Return | ||
------ | ||
dict | ||
A dictionary containing the urban and rural JSON files (with `urban` and `rural` keys). | ||
""" | ||
ds: Dataset = None | ||
|
||
# if a dataset version has been specified, use it | ||
# use the latest dataset version by default | ||
if version_name: | ||
for version in dataset.versions: | ||
if version.name == version_name: | ||
ds = version | ||
break | ||
|
||
if ds is None: | ||
msg = f"Dataset version `{version_name}` not found" | ||
current_run.log_error(msg) | ||
raise FileNotFoundError(msg) | ||
|
||
else: | ||
ds = dataset.latest_version | ||
|
||
# load urban & rural json files from dataset | ||
urban: list[dict] = None | ||
rural: list[dict] = None | ||
for f in ds.files: | ||
if f.filename == "urban_frame.json": | ||
urban = json.loads(f.read().decode()) | ||
if f.filename == "rural_frame.json": | ||
rural = json.loads(f.read().decode()) | ||
|
||
if urban is None: | ||
msg = "Urban JSON file not found in dataset" | ||
current_run.log_error(msg) | ||
raise FileNotFoundError(msg) | ||
if rural is None: | ||
msg = "Rural JSON file not found in dataset" | ||
current_run.log_error(msg) | ||
raise FileNotFoundError(msg) | ||
|
||
return {"urban": urban, "rural": rural, "version": ds.name} | ||
|
||
|
||
@create_cart_diagram.task | ||
def generate_diagram( | ||
urban_cart: list[dict], rural_cart: list[dict], output_dir: Path, version_name: str | ||
): | ||
"""Generate a mermaid diagram from urban and rural CART outputs. | ||
Both trees are merged into a single tree before generating the diagram. | ||
Parameters | ||
---------- | ||
urban_cart : list[dict] | ||
The urban CART output (nodes as list of dicts) | ||
rural_cart : list[dict] | ||
The rural CART output (nodes as list of dicts) | ||
output_dir : Path | ||
The output directory to save the diagram | ||
version_name : str | ||
The name of the dataset version | ||
""" | ||
urban = build_binary_tree(urban_cart, strata="urban") | ||
current_run.log_info(f"Loaded urban CART ({len(urban)} nodes)") | ||
rural = build_binary_tree(rural_cart, strata="rural") | ||
current_run.log_info(f"Loaded rural CART ({len(rural)} nodes)") | ||
root = merge_trees(urban, rural) | ||
current_run.log_info("Merged urban and rural CARTs") | ||
|
||
mermaid = cart_diagram(root) | ||
current_run.log_info( | ||
f"Generated CART mermaid diagram ({len(mermaid.split('\n'))} lines)" | ||
) | ||
|
||
fp = output_dir / f"{version_name}_diagram.txt" | ||
with open(fp, "w") as f: | ||
f.write(mermaid) | ||
|
||
current_run.add_file_output(fp.absolute().as_posix()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pathways-typing @ git+https://github.com/BLSQ/pathways-typing@main |