From b80ab7677c8209291212da1129f7e73217643012 Mon Sep 17 00:00:00 2001 From: Thomas Grandjean Date: Thu, 19 Dec 2024 21:52:48 +0100 Subject: [PATCH] test pipeline with catalog --- argo-pipeline/pipeline.yaml | 24 +++++++- argo-pipeline/src/catalog.py | 56 +++++++++++++++++++ cartiflette/s3/__init__.py | 2 + .../cartiflette/cartiflette/constants.py | 2 +- 4 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 argo-pipeline/src/catalog.py diff --git a/argo-pipeline/pipeline.yaml b/argo-pipeline/pipeline.yaml index 0d99187..279842d 100644 --- a/argo-pipeline/pipeline.yaml +++ b/argo-pipeline/pipeline.yaml @@ -87,6 +87,11 @@ spec: - name: year value: "{{item}}" withParam: "{{tasks.operationnal-selection-of-vintages-to-generate.outputs.parameters.years}}" + + # TASK 6 : GENERATE CATALOG + - name: make-catalog + template: make-catalog + dependencies: [generate-downstream-datasets] # -------------------------- # TEMPLATES DEFINITION @@ -132,8 +137,8 @@ spec: - name: PATH_WRITING_S3 value: "test" - name: ENVIRONMENT - # set value to "dev" to simplify pipeline execution (2 years, only topojson, etc.) - value: preprod + # set value to "dev" to simplify pipeline execution (2 years, only topojson, etc.), use "preprod" or "prod" else + value: test - name: download-all-sources outputs: @@ -299,3 +304,18 @@ spec: - name: volume-workflow-tmp mountPath: /mnt env: *env_parameters + + - name: make-catalog + outputs: + parameters: + - name: result + valueFrom: + path: "catalog/result.json" + container: + image: inseefrlab/cartiflette:latest + command: ["sh", "-c"] + args: ["python /mnt/bin/src/catalog.py"] + volumeMounts: + - name: volume-workflow-tmp + mountPath: /mnt + env: *env_parameters diff --git a/argo-pipeline/src/catalog.py b/argo-pipeline/src/catalog.py new file mode 100644 index 0000000..e98aa44 --- /dev/null +++ b/argo-pipeline/src/catalog.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Create cartiflette's catalog +""" + +import json +import logging + +from s3fs import S3FileSystem + +from cartiflette.config import ( + BUCKET, + PATH_WITHIN_BUCKET, + FS, +) +from cartiflette.s3 import make_s3_inventory + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +logger.info("=" * 50) +logger.info("\n%s", __doc__) +logger.info("=" * 50) + +# Nota : no parsed needed for this command + + +def main( + bucket: str = BUCKET, + path_within_bucket: str = PATH_WITHIN_BUCKET, + fs: S3FileSystem = FS, +): + + success = True + try: + make_s3_inventory( + fs=fs, bucket=bucket, path_within_bucket=path_within_bucket + ) + except Exception: + success = False + + out_path = "catalog/result.json" + with open(out_path, "w", encoding="utf8") as out: + json.dump(success, out) + + logger.info("Success!") + + +if __name__ == "__main__": + main( + bucket=BUCKET, + path_within_bucket=PATH_WITHIN_BUCKET, + fs=FS, + ) diff --git a/cartiflette/s3/__init__.py b/cartiflette/s3/__init__.py index 374a9c8..6a12633 100644 --- a/cartiflette/s3/__init__.py +++ b/cartiflette/s3/__init__.py @@ -1,10 +1,12 @@ from .download_vectorfile import download_vectorfile_url_all from .geodataset import S3GeoDataset, concat_s3geodataset from .dataset import S3Dataset +from .inventory import make_s3_inventory __all__ = [ "download_vectorfile_url_all", "S3GeoDataset", "S3Dataset", "concat_s3geodataset", + "make_s3_inventory", ] diff --git a/python-package/cartiflette/cartiflette/constants.py b/python-package/cartiflette/cartiflette/constants.py index a4d3f1a..b0d889f 100644 --- a/python-package/cartiflette/cartiflette/constants.py +++ b/python-package/cartiflette/cartiflette/constants.py @@ -9,7 +9,7 @@ DIR_CACHE = platformdirs.user_cache_dir(APP_NAME, ensure_exists=True) CACHE_NAME = "cartiflette_http_cache.sqlite" BUCKET = "projet-cartiflette" -PATH_WITHIN_BUCKET = "production" +PATH_WITHIN_BUCKET = "test" CATALOG = url = ( "https://minio.lab.sspcloud.fr/"