diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a9c2ed5..f9f24ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,9 +14,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up JDK 11 - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: - java-version: 11 + java-version: 17 + distribution: "adopt" - name: Set current version run: mvn -B versions:set -DnewVersion=${GITHUB_REF##*/} -DprocessAllModules -DgenerateBackupPoms=false - uses: s4u/maven-settings-action@v3.0.0 diff --git a/Dockerfile b/Dockerfile index 0c11dcb..6ece7be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM inseefrlab/onyxia-jupyter-pyspark:py3.10.9-spark3.3.1 +FROM --platform=linux/amd64 inseefrlab/onyxia-jupyter-pyspark:py3.11.6-spark3.5.0 # Allows the kernel to load the Spark and Hadoop config. ENV CLASSPATH_PREFIX "/opt/hadoop/etc/hadoop:/opt/spark/conf" @@ -11,6 +11,10 @@ COPY target/appassembler/repo/fr/insee/trevas/vtl-model/*/vtl-model-*.jar /vtl-m COPY target/appassembler/repo/fr/insee/trevas/vtl-engine/*/vtl-engine-*.jar /vtl-engine.jar COPY target/appassembler/repo/fr/insee/trevas/vtl-parser/*/vtl-parser-*.jar /vtl-parser.jar -RUN mamba install -y -c conda-forge "elyra[all]" +USER root + +RUN pip3 install --upgrade elyra-pipeline-editor-extension + +USER 1000 CMD ["jupyter", "lab", "--no-browser", "--ip", "0.0.0.0"] diff --git a/pom.xml b/pom.xml index 26040bd..c66ef23 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ Jupyter notebook providing VTL support through Trevas engine trevas-jupyter fr.insee - 0.4.2 + 0.5.0 @@ -18,9 +18,9 @@ - 11 + 17 UTF-8 - 1.1.1 + 1.3.0 @@ -98,7 +98,7 @@ org.apache.spark spark-kubernetes_2.12 - 3.3.1 + 3.5.0 @@ -153,6 +153,7 @@ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED fr.insee.trevas.jupyter.VtlKernel @@ -184,10 +185,10 @@ maven-compiler-plugin 3.12.1 - ${jdk.version} - ${jdk.version} - ${jdk.version} - ${jdk.version} + ${java.version} + ${java.version} + ${java.version} + ${java.version} diff --git a/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java b/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java index 0371844..588a0cd 100644 --- a/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java +++ b/src/main/java/fr/insee/trevas/jupyter/VtlKernel.java @@ -2,6 +2,7 @@ import fr.insee.vtl.engine.VtlScriptEngine; import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.PersistentDataset; import fr.insee.vtl.model.Structured; import fr.insee.vtl.spark.SparkDataset; import io.github.spencerpark.jupyter.channels.JupyterConnection; @@ -58,9 +59,16 @@ private static Map getRoleMap(fr.insee.vtl.model.Dataset d private static SparkDataset asSparkDataset(Dataset dataset) { if (dataset instanceof SparkDataset) { return (SparkDataset) dataset; - } else { - return new SparkDataset(dataset, getRoleMap(dataset), spark); } + if (dataset instanceof PersistentDataset) { + fr.insee.vtl.model.Dataset ds = ((PersistentDataset) dataset).getDelegate(); + if (ds instanceof SparkDataset) { + return (SparkDataset) ds; + } else { + return new SparkDataset(ds, getRoleMap(dataset), spark); + } + } + throw new IllegalArgumentException("Unknow dataset type"); } public static SparkDataset loadParquet(String path) throws Exception {