Skip to content

Commit

Permalink
Init loadCSV improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoLaval committed Mar 11, 2024
1 parent 2ee6133 commit f7fe185
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 5 deletions.
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,14 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.1.2</version>
<configuration>
<argLine>--add-exports java.base/sun.nio.ch=ALL-UNNAMED</argLine>
</configuration>
</plugin>
</plugins>
</build>
</project>
19 changes: 14 additions & 5 deletions src/main/java/fr/insee/trevas/jupyter/SparkUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@

import fr.insee.vtl.engine.VtlScriptEngine;
import fr.insee.vtl.spark.SparkDataset;
import java.nio.file.Files;
import java.nio.file.Path;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;

public class SparkUtils {

public static VtlScriptEngine buildSparkEngine(SparkSession spark) {
Expand Down Expand Up @@ -70,8 +72,15 @@ public static SparkDataset readSasDataset(SparkSession spark, String path) throw

public static SparkDataset readCSVDataset(SparkSession spark, String path) throws Exception {
Dataset<Row> dataset;

String url = Utils.getURL(path);
Map<String, String> options = Utils.getQueryMap(path);

try {
dataset = spark.read().option("sep", ";").option("header", "true").csv(path);
dataset = spark.read().option("sep", ";")
.option("header", "true")
.options(options)
.csv(url);
} catch (Exception e) {
throw new Exception(e);
}
Expand Down
26 changes: 26 additions & 0 deletions src/main/java/fr/insee/trevas/jupyter/Utils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package fr.insee.trevas.jupyter;

import java.util.HashMap;
import java.util.Map;

public class Utils {

public static String getURL(String path) {
return path.split("\\?")[0];
}

public static Map<String, String> getQueryMap(String path) {
if (path == null || !path.contains("\\?")) {
return Map.of();
}
String[] params = path.split("\\?")[1].split("&");
Map<String, String> map = new HashMap<>();

for (String param : params) {
String name = param.split("=")[0];
String value = param.split("=")[1];
map.put(name, value);
}
return map;
}
}
28 changes: 28 additions & 0 deletions src/test/java/CSVTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import fr.insee.trevas.jupyter.SparkUtils;
import fr.insee.vtl.model.Dataset;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

public class CSVTest {

private SparkSession spark;

@BeforeEach
public void setUp() {
spark = SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate();
}

@Test
public void readCSVDatasetTest() throws Exception {
Dataset ds1 = SparkUtils.readCSVDataset(spark, "src/test/resources/ds1.csv");
assertThat(ds1.getDataPoints().get(1).get("name")).isEqualTo("B");
Dataset ds2 = SparkUtils.readCSVDataset(spark, "src/test/resources/ds2.csv?sep=\"|\"&delimiter=\"\"");
assertThat(ds2.getDataPoints().get(1).get("name")).isEqualTo("B");
}
}
6 changes: 6 additions & 0 deletions src/test/resources/ds1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"id";"name"
"1";"A"
"2";"B"
"3";"C"
"4";"D"
"5";"E"
6 changes: 6 additions & 0 deletions src/test/resources/ds2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id|name
6|F
7|G
8|H
9|I
10|J

0 comments on commit f7fe185

Please sign in to comment.