From 228aa4e92fa5ee4714be8c1a99bfd4525b1071c8 Mon Sep 17 00:00:00 2001 From: Fabrice Brito Date: Fri, 20 Oct 2023 22:10:27 +0200 Subject: [PATCH] adds cwl-wrapper and stage --- .github/workflows/build.yaml | 41 +++++- containers/cwl-wrapper/Dockerfile | 7 + containers/cwl-wrapper/README.md | 4 + containers/cwl-wrapper/default.conf | 2 + containers/cwl-wrapper/templates/main.yaml | 11 ++ containers/cwl-wrapper/templates/rules.yaml | 79 ++++++++++ .../cwl-wrapper/templates/stage-in.yaml_ | 52 +++++++ containers/stage/Dockerfile | 3 + stage/stage-in.yaml | 65 ++++++++ stage/stage-out.yaml | 139 ++++++++++++++++++ 10 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 containers/cwl-wrapper/Dockerfile create mode 100644 containers/cwl-wrapper/README.md create mode 100644 containers/cwl-wrapper/default.conf create mode 100644 containers/cwl-wrapper/templates/main.yaml create mode 100644 containers/cwl-wrapper/templates/rules.yaml create mode 100644 containers/cwl-wrapper/templates/stage-in.yaml_ create mode 100644 containers/stage/Dockerfile create mode 100644 stage/stage-in.yaml create mode 100644 stage/stage-out.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f20bbb4..6632e3e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -44,7 +44,7 @@ jobs: run: echo "::set-output name=version::$APP_VERSION" container-crop: - + needs: version runs-on: ubuntu-latest @@ -95,7 +95,7 @@ jobs: docker push $IMAGE_ID:${{needs.version.outputs.app-version}} container-stac: - + needs: version runs-on: ubuntu-latest @@ -110,7 +110,40 @@ jobs: docker build water-bodies/command-line-tools/stac --file water-bodies/command-line-tools/stac/Dockerfile --tag stac docker tag stac $IMAGE_ID:${{needs.version.outputs.app-version}} docker push $IMAGE_ID:${{needs.version.outputs.app-version}} + + container-stage: + + needs: version + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: echo version ${{needs.version.outputs.app-version}} + - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin + - name: build & push stage image + run: | + IMAGE_ID=ghcr.io/terradue/ogc-eo-application-package-hands-on/stage + docker build . --file containers/stage/Dockerfile --tag stage + docker tag stage $IMAGE_ID:${{needs.version.outputs.app-version}} + docker push $IMAGE_ID:${{needs.version.outputs.app-version}} + + container-cwl-wrapper: + + needs: version + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - run: echo version ${{needs.version.outputs.app-version}} + - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin + - name: build & push cwl-runner image + run: | + IMAGE_ID=ghcr.io/terradue/ogc-eo-application-package-hands-on/cwl-wrapper + docker build . --file containers/cwl-wrapper/Dockerfile --tag cwl-wrapper + docker tag cwl-wrapper $IMAGE_ID:${{needs.version.outputs.app-version}} + docker push $IMAGE_ID:${{needs.version.outputs.app-version}} publish: @@ -133,9 +166,13 @@ jobs: - run: yq -i eval '(.$graph[] | select (.id == "norm_diff") ).hints.DockerRequirement.dockerPull = "ghcr.io/terradue/ogc-eo-application-package-hands-on/norm_diff:${{needs.version.outputs.app-version}}"' water-bodies/app-package-cloud-native.cwl - run: yq -i eval '(.$graph[] | select (.id == "otsu") ).hints.DockerRequirement.dockerPull = "ghcr.io/terradue/ogc-eo-application-package-hands-on/otsu:${{needs.version.outputs.app-version}}"' water-bodies/app-package-cloud-native.cwl - run: yq -i eval '(.$graph[] | select (.id == "stac") ).hints.DockerRequirement.dockerPull = "ghcr.io/terradue/ogc-eo-application-package-hands-on/stac:${{needs.version.outputs.app-version}}"' water-bodies/app-package-cloud-native.cwl + - run: yq -i eval '(.requirements.DockerRequirement.dockerPull = "ghcr.io/terradue/ogc-eo-application-package-hands-on/stage:${{needs.version.outputs.app-version}}"' stage/stage-in.yaml + - run: yq -i eval '(.requirements.DockerRequirement.dockerPull = "ghcr.io/terradue/ogc-eo-application-package-hands-on/stage:${{needs.version.outputs.app-version}}"' stage/stage-out.yaml - run: mkdir downloads - run: cp water-bodies/app-package.cwl downloads/app-water-bodies.${{needs.version.outputs.app-version}}.cwl - run: cp water-bodies/app-package-cloud-native.cwl downloads/app-water-bodies-cloud-native.${{needs.version.outputs.app-version}}.cwl + - run: cp stage/stage-in.yaml downloads/stage-in.yaml + - run: cp stage/stage-out.yaml downloads/stage-out.yaml - run: ls downloads/app-water-bodies.${{needs.version.outputs.app-version}}.cwl - uses: actions/upload-artifact@v2 with: diff --git a/containers/cwl-wrapper/Dockerfile b/containers/cwl-wrapper/Dockerfile new file mode 100644 index 0000000..c4ab2e2 --- /dev/null +++ b/containers/cwl-wrapper/Dockerfile @@ -0,0 +1,7 @@ +FROM docker.io/eoepca/cwl-wrapper@sha256:066566ab6dcee685bb95d4eeab20ef07eefc5aaaf320857a251a3a608c81e694 + +COPY cwl-wrapper/templates /templates + +COPY cwl-wrapper/default.conf /home/jovyan/.cwlwrapper/default.conf + +ENTRYPOINT [ "cwl-wrapper" ] \ No newline at end of file diff --git a/containers/cwl-wrapper/README.md b/containers/cwl-wrapper/README.md new file mode 100644 index 0000000..00fecaf --- /dev/null +++ b/containers/cwl-wrapper/README.md @@ -0,0 +1,4 @@ + +docker run --rm -i cwl-wrapper:latest $PWD:/app-package eoepca/ + +docker run --rm -i -v $PWD:/app cwl-wrapper:latest --stagein /app/stage/stage-in.yaml --stageout /app/stage/stage-out.yaml app/app-water-bodies.1.3.1.cwl \ No newline at end of file diff --git a/containers/cwl-wrapper/default.conf b/containers/cwl-wrapper/default.conf new file mode 100644 index 0000000..c02ede7 --- /dev/null +++ b/containers/cwl-wrapper/default.conf @@ -0,0 +1,2 @@ +maincwl="/templates/main.yaml" +rulez="/templates/rules.yaml" \ No newline at end of file diff --git a/containers/cwl-wrapper/templates/main.yaml b/containers/cwl-wrapper/templates/main.yaml new file mode 100644 index 0000000..27371a2 --- /dev/null +++ b/containers/cwl-wrapper/templates/main.yaml @@ -0,0 +1,11 @@ +class: Workflow +doc: Main stage manager +id: main +label: macro-cwl +inputs: {} +outputs: {} + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + InlineJavascriptRequirement: {} diff --git a/containers/cwl-wrapper/templates/rules.yaml b/containers/cwl-wrapper/templates/rules.yaml new file mode 100644 index 0000000..4dff9f2 --- /dev/null +++ b/containers/cwl-wrapper/templates/rules.yaml @@ -0,0 +1,79 @@ +rulez: + version: 1 + +parser: + type: $graph + driver: cwl + +onstage: + driver: cwl + + stage_in: + connection_node: node_stage_in + if_scatter: + scatterMethod: dotproduct + input: + template: + overwrite: True + + on_stage: + connection_node: on_stage + + stage_out: + connection_node: node_stage_out + scatter: False + if_scatter: + scatterMethod: dotproduct + follow_node: node_metrics_out + + +output: + driver: cwl + name: '-' + type: $graph + + +cwl: + GlobalInput: + Directory: string + Directory[]: string[] + + OptionalInput: + Directory: string? + Directory[]: string[]? + + stage_in: + Directory?: + type: string? + + Directory: + type: string? + + Directory[]: + type: string[] + + stage_out: + Directory: + type: Directory + + Directory[]: + type: Directory[] + + outputBindingResult: + command: + Directory: + outputBinding: + glob: . + type: Directory + Directory[]: + outputBinding: + glob: . + type: Directory[] + Directory?: + outputBinding: + glob: ${ if (inputs.input == null) {return null } else {return ".";} } + type: Directory? + stepOut: + type: + items: Directory + type: array diff --git a/containers/cwl-wrapper/templates/stage-in.yaml_ b/containers/cwl-wrapper/templates/stage-in.yaml_ new file mode 100644 index 0000000..9be8f4b --- /dev/null +++ b/containers/cwl-wrapper/templates/stage-in.yaml_ @@ -0,0 +1,52 @@ +cwlVersion: v1.0 +doc: "Run Stars for staging input data" +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: ghcr.io/terradue/stars:2.14.5 + "cwltool:Secrets": + secrets: [] + +id: stars +requirements: + EnvVarRequirement: + envDef: + PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + ResourceRequirement: {} + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entryname: stagein.sh + entry: |- + #!/bin/bash + set -x + res=0 + input='$( inputs.input )' + usersettings='$( inputs.usersettings.path )' + + [ "\${input}" != "null" ] && { + + IFS='#' read -r -a reference <<< '$( inputs.input )' + input_len=\${#reference[@]} + + [[ \${input_len} == 2 ]] && { + IFS=',' read -r -a assets <<< \${reference[1]} + af=" " + for asset in \${assets[@]} + do + af="\${af} -af \${asset}" + done + } || { + af="--empty" + } + Stars copy -conf \${usersettings} -v -rel -r '4' \${af} -o ./ \${reference[0]} + res=$? + } + rm -fr stagein.sh + exit 0 + +baseCommand: ['/bin/bash', 'stagein.sh'] + +inputs: {} + +outputs: {} \ No newline at end of file diff --git a/containers/stage/Dockerfile b/containers/stage/Dockerfile new file mode 100644 index 0000000..9e4db79 --- /dev/null +++ b/containers/stage/Dockerfile @@ -0,0 +1,3 @@ +FROM docker.io/python:3.9.18-slim-bullseye + +RUN pip install --no-cache-dir --upgrade pip stac-asset boto3 \ No newline at end of file diff --git a/stage/stage-in.yaml b/stage/stage-in.yaml new file mode 100644 index 0000000..90544a7 --- /dev/null +++ b/stage/stage-in.yaml @@ -0,0 +1,65 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: CommandLineTool +id: stage + +inputs: {} + +outputs: {} + +baseCommand: +- python +- stage.py +arguments: +- $( inputs.input ) + +requirements: + DockerRequirement: + dockerPull: stac-asset:latest + EnvVarRequirement: + envDef: + A: "2" + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entryname: stage.py + entry: |- + import pystac + import stac_asset + import asyncio + import os + import sys + + config = stac_asset.Config(warn=True) + + async def main(href: str): + + item = pystac.read_file(href) + + os.makedirs(item.id, exist_ok=True) + cwd = os.getcwd() + + os.chdir(item.id) + item = await stac_asset.download_item(item=item, directory=".", config=config) + os.chdir(cwd) + + cat = pystac.Catalog( + id="catalog", + description=f"catalog with staged {item.id}", + title=f"catalog with staged {item.id}", + ) + cat.add_item(item) + + cat.normalize_hrefs("./") + cat.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) + + return cat + + href = sys.argv[1] + + cat = asyncio.run(main(href)) + + + diff --git a/stage/stage-out.yaml b/stage/stage-out.yaml new file mode 100644 index 0000000..2da2e8b --- /dev/null +++ b/stage/stage-out.yaml @@ -0,0 +1,139 @@ +cwlVersion: v1.0 + +class: CommandLineTool +id: stage-out + +doc: "Stage-out the results to S3" + +baseCommand: +- python +- stage.py + +hints: + DockerRequirement: + dockerPull: stac-asset:latest + "cwltool:Secrets": + secrets: + - AWS_SERVICEURL + - AWS_REGION + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + +inputs: + s3_bucket: + type: string + sub_path: + type: string + aws_access_key_id: + type: string + aws_secret_access_key: + type: string + region_name: + type: string + endpoint_url: + type: string +outputs: + s3_catalog_output: + outputBinding: + outputEval: ${ return "s3://" + inputs.s3_bucket + "/" + inputs.sub_path + "/catalog.json"; } + type: string + +arguments: + - $( inputs.wf_outputs.path ) + - $( inputs.s3_bucket ) + - $( inputs.sub_path ) + - $( inputs.aws_access_key_id ) + - $( inputs.aws_secret_access_key ) + - $( inputs.region_name ) + - $( inputs.endpoint_url ) + +requirements: + InitialWorkDirRequirement: + listing: + - entryname: stage.py + entry: |- + import os + import sys + import pystac + import botocore + import boto3 + import shutil + from pystac.stac_io import DefaultStacIO, StacIO + from urllib.parse import urlparse + + cat_url = sys.argv[1] + bucket = sys.argv[2] + subfolder = sys.argv[3] + aws_access_key_id = sys.argv[4] + aws_secret_access_key = sys.argv[5] + region_name = sys.argv[6] + endpoint_url = sys.argv[7] + + shutil.copytree(cat_url, "/tmp/catalog") + cat = pystac.read_file(os.path.join("/tmp/catalog", "catalog.json")) + + class CustomStacIO(DefaultStacIO): + """Custom STAC IO class that uses boto3 to read from S3.""" + + def __init__(self): + self.session = botocore.session.Session() + self.s3_client = self.session.create_client( + service_name="s3", + use_ssl=True, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + endpoint_url=endpoint_url, + region_name=region_name, + ) + + def write_text(self, dest, txt, *args, **kwargs): + parsed = urlparse(dest) + if parsed.scheme == "s3": + self.s3_client.put_object( + Body=txt.encode("UTF-8"), + Bucket=parsed.netloc, + Key=parsed.path[1:], + ContentType="application/geo+json", + ) + else: + super().write_text(dest, txt, *args, **kwargs) + + + client = boto3.client( + "s3", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + endpoint_url=endpoint_url, + region_name=region_name, + ) + + StacIO.set_default(CustomStacIO) + + for item in cat.get_items(): + for key, asset in item.get_assets().items(): + s3_path = os.path.normpath( + os.path.join(os.path.join(subfolder, item.id, asset.href)) + ) + print(f"upload {asset.href} to s3://{bucket}/{s3_path}",file=sys.stderr) + client.upload_file( + asset.get_absolute_href(), + bucket, + s3_path, + ) + # upload item to S3 + print(f"upload {item.id} to s3://{bucket}/{subfolder}", file=sys.stderr) + pystac.write_file(item, item.get_self_href()) + + cat.normalize_hrefs(f"s3://{bucket}/{subfolder}") + + # upload catalog to S3 + print(f"upload catalog.json to s3://{bucket}/{subfolder}", file=sys.stderr) + pystac.write_file(cat, cat.get_self_href()) + + print(f"s3://{bucket}/{subfolder}/catalog.json", file=sys.stdout) + + InlineJavascriptRequirement: {} + EnvVarRequirement: + envDef: + A: "2" + ResourceRequirement: {} \ No newline at end of file