From 0fe830985b14549ecc7ee01b5c24126f22ba4161 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Sun, 24 Dec 2023 10:43:32 +0100 Subject: [PATCH 01/12] Update Trevas version --- Dockerfile | 2 +- pom.xml | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0c11dcb..b99b3b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM inseefrlab/onyxia-jupyter-pyspark:py3.10.9-spark3.3.1 +FROM inseefrlab/onyxia-jupyter-pyspark:py3.11.6-spark3.5.0 # Allows the kernel to load the Spark and Hadoop config. ENV CLASSPATH_PREFIX "/opt/hadoop/etc/hadoop:/opt/spark/conf" diff --git a/pom.xml b/pom.xml index 6e49a2e..6f5e888 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ Jupyter notebook providing VTL support through Trevas engine trevas-jupyter fr.insee - 0.4.2 + 0.5.0 @@ -18,9 +18,9 @@ - 11 + 17 UTF-8 - 1.1.1 + 1.2.0 @@ -98,7 +98,7 @@ org.apache.spark spark-kubernetes_2.12 - 3.3.1 + 3.5.0 @@ -182,12 +182,12 @@ org.apache.maven.plugins maven-compiler-plugin - 3.10.1 + 3.12.0 - ${jdk.version} - ${jdk.version} - ${jdk.version} - ${jdk.version} + ${java.version} + ${java.version} + ${java.version} + ${java.version} From df58e028ce43fb93b9c184523a3f13b009e83db6 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Sun, 24 Dec 2023 10:51:24 +0100 Subject: [PATCH 02/12] Update ci.yml --- .github/workflows/ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a693eb..a41e419 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,10 +13,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 11 - uses: actions/setup-java@v1 + - name: Set up Maven Central Repository + uses: actions/setup-java@v4 with: - java-version: 11 + java-version: 17 + distribution: "adopt" - name: Set current version run: mvn -B versions:set -DnewVersion=${GITHUB_REF##*/} -DprocessAllModules -DgenerateBackupPoms=false - uses: s4u/maven-settings-action@v2.8.0 From 1bf086496b89f206e6fad9cda641ff8f131ad11c Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Sun, 24 Dec 2023 11:46:35 +0100 Subject: [PATCH 03/12] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b99b3b9..dec1d65 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM inseefrlab/onyxia-jupyter-pyspark:py3.11.6-spark3.5.0 +FROM inseefrlab/onyxia-jupyter-pyspark:py3.10.13-spark3.5.0 # Allows the kernel to load the Spark and Hadoop config. ENV CLASSPATH_PREFIX "/opt/hadoop/etc/hadoop:/opt/spark/conf" From 97647eab01f787a64857552fa2bb6a866b95d12b Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Sun, 24 Dec 2023 11:57:47 +0100 Subject: [PATCH 04/12] Fix elyra issue --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index dec1d65..2be6289 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,6 @@ COPY target/appassembler/repo/fr/insee/trevas/vtl-model/*/vtl-model-*.jar /vtl-m COPY target/appassembler/repo/fr/insee/trevas/vtl-engine/*/vtl-engine-*.jar /vtl-engine.jar COPY target/appassembler/repo/fr/insee/trevas/vtl-parser/*/vtl-parser-*.jar /vtl-parser.jar -RUN mamba install -y -c conda-forge "elyra[all]" +RUN conda install -c conda-forge elyra-pipeline-editor-extension CMD ["jupyter", "lab", "--no-browser", "--ip", "0.0.0.0"] From 10d5385c95df74688457a439f1f8077889f121d2 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Sun, 24 Dec 2023 12:25:36 +0100 Subject: [PATCH 05/12] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2be6289..0961334 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,6 @@ COPY target/appassembler/repo/fr/insee/trevas/vtl-model/*/vtl-model-*.jar /vtl-m COPY target/appassembler/repo/fr/insee/trevas/vtl-engine/*/vtl-engine-*.jar /vtl-engine.jar COPY target/appassembler/repo/fr/insee/trevas/vtl-parser/*/vtl-parser-*.jar /vtl-parser.jar -RUN conda install -c conda-forge elyra-pipeline-editor-extension +RUN mamba install -c conda-forge elyra-pipeline-editor-extension CMD ["jupyter", "lab", "--no-browser", "--ip", "0.0.0.0"] From a415f380b513e09b6c8ef627863cefa6d6d6039f Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Tue, 26 Dec 2023 10:22:39 +0100 Subject: [PATCH 06/12] Update Dockerfile --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0961334..73ce9de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM inseefrlab/onyxia-jupyter-pyspark:py3.10.13-spark3.5.0 +FROM --platform=linux/amd64 inseefrlab/onyxia-jupyter-pyspark:py3.10.13-spark3.5.0 # Allows the kernel to load the Spark and Hadoop config. ENV CLASSPATH_PREFIX "/opt/hadoop/etc/hadoop:/opt/spark/conf" @@ -11,6 +11,6 @@ COPY target/appassembler/repo/fr/insee/trevas/vtl-model/*/vtl-model-*.jar /vtl-m COPY target/appassembler/repo/fr/insee/trevas/vtl-engine/*/vtl-engine-*.jar /vtl-engine.jar COPY target/appassembler/repo/fr/insee/trevas/vtl-parser/*/vtl-parser-*.jar /vtl-parser.jar -RUN mamba install -c conda-forge elyra-pipeline-editor-extension +RUN pip3 install --upgrade elyra-pipeline-editor-extension CMD ["jupyter", "lab", "--no-browser", "--ip", "0.0.0.0"] From e1c7ae06fdd1eb380c804eac7525a7f1dbdaf659 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Wed, 24 Jan 2024 17:18:12 +0100 Subject: [PATCH 07/12] Update Trevas version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f1055da..e060728 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ 17 UTF-8 - 1.2.0 + 1.3.0 From 2eeefe1afa6d650d5cf4f509ea70c8559e8835e1 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Wed, 24 Jan 2024 17:18:19 +0100 Subject: [PATCH 08/12] Handle PersistentDataset --- src/main/java/fr/insee/trevas/jupyter/VtlKernel.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java b/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java index 0371844..588a0cd 100644 --- a/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java +++ b/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java @@ -2,6 +2,7 @@ import fr.insee.vtl.engine.VtlScriptEngine; import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.PersistentDataset; import fr.insee.vtl.model.Structured; import fr.insee.vtl.spark.SparkDataset; import io.github.spencerpark.jupyter.channels.JupyterConnection; @@ -58,9 +59,16 @@ private static Map getRoleMap(fr.insee.vtl.model.Dataset d private static SparkDataset asSparkDataset(Dataset dataset) { if (dataset instanceof SparkDataset) { return (SparkDataset) dataset; - } else { - return new SparkDataset(dataset, getRoleMap(dataset), spark); } + if (dataset instanceof PersistentDataset) { + fr.insee.vtl.model.Dataset ds = ((PersistentDataset) dataset).getDelegate(); + if (ds instanceof SparkDataset) { + return (SparkDataset) ds; + } else { + return new SparkDataset(ds, getRoleMap(dataset), spark); + } + } + throw new IllegalArgumentException("Unknow dataset type"); } public static SparkDataset loadParquet(String path) throws Exception { From 0e6e04565a208e361d7459ecd68262080f014ef8 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Thu, 25 Jan 2024 16:37:47 +0100 Subject: [PATCH 09/12] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 73ce9de..89d2a1b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 inseefrlab/onyxia-jupyter-pyspark:py3.10.13-spark3.5.0 +FROM --platform=linux/amd64 inseefrlab/onyxia-jupyter-pyspark:py3.11.6-spark3.5.0 # Allows the kernel to load the Spark and Hadoop config. ENV CLASSPATH_PREFIX "/opt/hadoop/etc/hadoop:/opt/spark/conf" From 6e209544b636d7dfd01b7184ccbed37c5778cd4f Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Mon, 12 Feb 2024 07:18:58 +0100 Subject: [PATCH 10/12] Add Add-Opens configuration to maven-jar-plugin --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e060728..55c06da 100644 --- a/pom.xml +++ b/pom.xml @@ -175,6 +175,7 @@ fr.insee.trevas.jupyter + java.base/sun.nio.ch @@ -182,7 +183,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.12.0 + 3.12.1 ${java.version} ${java.version} From 43fd9a0cc83370dc3c3e550fe20906364ab224f6 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Mon, 12 Feb 2024 07:33:50 +0100 Subject: [PATCH 11/12] Fix docker user --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index 89d2a1b..6ece7be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,10 @@ COPY target/appassembler/repo/fr/insee/trevas/vtl-model/*/vtl-model-*.jar /vtl-m COPY target/appassembler/repo/fr/insee/trevas/vtl-engine/*/vtl-engine-*.jar /vtl-engine.jar COPY target/appassembler/repo/fr/insee/trevas/vtl-parser/*/vtl-parser-*.jar /vtl-parser.jar +USER root + RUN pip3 install --upgrade elyra-pipeline-editor-extension +USER 1000 + CMD ["jupyter", "lab", "--no-browser", "--ip", "0.0.0.0"] From 84cefd3e7fb51411548f1b3f6c88ef9ad3586bd5 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Mon, 12 Feb 2024 09:45:06 +0100 Subject: [PATCH 12/12] Add extraJvmArguments for spark module issue --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 55c06da..3c1db75 100644 --- a/pom.xml +++ b/pom.xml @@ -153,6 +153,7 @@ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED fr.insee.trevas.jupyter.VtlKernel @@ -175,7 +176,6 @@ fr.insee.trevas.jupyter - java.base/sun.nio.ch