Skip to content

Commit

Permalink
Merge pull request #40 from InseeFr/develop
Browse files Browse the repository at this point in the history
Add Pairwise variable, reporting table and parquet export
  • Loading branch information
loichenninger authored Nov 27, 2023
2 parents 80b0beb + 8190575 commit ab102ef
Show file tree
Hide file tree
Showing 109 changed files with 341,550 additions and 241 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ RemoteSystemsTempFiles
# Test generated files
src/test/resources/functional_tests/out/*
!src/test/resources/functional_tests/out/.gitkeep
src/test/resources/functional_tests/temp/*
!src/test/resources/functional_tests/temp/.gitkeep
src/test/resources/unit_tests/out/*
!src/test/resources/unit_tests/out/.gitkeep
kraftwerk-core/src/test/resources/unit_tests/out/*
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog


## 1.5.0 - [2023-11-24]

### Added
- New format for output : parquet
- Add new reporting data in output
- Add possibility to calculate variables from reporting data with VTL script
- Export of reporting data variables into a separate table
- Export of last survey validation date from paradata into root
- Functional tests module
- Add Pairwise

## 1.4.2 - [2023-09-26]

### Changed
Expand Down
Binary file added kraftwerk-api/LOG_RACINE.parquet
Binary file not shown.
12 changes: 9 additions & 3 deletions kraftwerk-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
<parent>
<groupId>fr.insee.kraftwerk</groupId>
<artifactId>kraftwerk</artifactId>
<version>1.4.2</version>
<version>1.5.0</version>
</parent>
<artifactId>kraftwerk-api</artifactId>
<name>kraftwerk-api</name>
<packaging>jar</packaging>

<properties>
<kraftwerk.version>1.4.2</kraftwerk.version>
<kraftwerk.version>1.5.0</kraftwerk.version>
</properties>

<dependencies>
Expand All @@ -21,6 +21,12 @@
<groupId>fr.insee.kraftwerk</groupId>
<artifactId>kraftwerk-core</artifactId>
<version>${kraftwerk.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Spring -->
Expand All @@ -32,7 +38,7 @@
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
<version>2.1.0</version>
<version>2.2.0</version>
</dependency>
</dependencies>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ private void multimodalProcess() {
}

/* Step 4 : Write output files */
private void outputFileWriter() {
private void outputFileWriter() throws KraftwerkException {
WriterSequence writerSequence = new WriterSequence();
writerSequence.writeOutputFiles(inDirectory, vtlBindings, userInputs.getModeInputsMap(),
userInputs.getMultimodeDatasetName(), metadataVariables, errors);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public ResponseEntity<String> buildVtlBindings(
boolean withDDI = true;
MainProcessing mp = new MainProcessing(inDirectoryParam, fileByFile,withAllReportingData,withDDI, defaultDirectory);
try {
mp.runMain();
mp.init();
} catch (KraftwerkException e) {
return ResponseEntity.status(e.getStatus()).body(e.getMessage());
}
Expand Down Expand Up @@ -87,7 +87,7 @@ public ResponseEntity<String> buildVtlBindingsByDataMode(
boolean withDDI = true;
MainProcessing mp = new MainProcessing(inDirectoryParam, fileByFile,withAllReportingData,withDDI, defaultDirectory);
try {
mp.runMain();
mp.init();
} catch (KraftwerkException e) {
return ResponseEntity.status(e.getStatus()).body(e.getMessage());
}
Expand Down Expand Up @@ -201,7 +201,7 @@ public ResponseEntity<String> multimodalProcessing(
@Operation(operationId = "writeOutputFiles", summary = "${summary.writeOutputFiles}", description = "${description.writeOutputFiles}")
public ResponseEntity<String> writeOutputFiles(
@Parameter(description = "${param.inDirectory}", required = true, example = INDIRECTORY_EXAMPLE) @RequestBody String inDirectoryParam
) {
) throws KraftwerkException {
Path inDirectory;
try {
inDirectory = controlInputSequence.getInDirectory(inDirectoryParam);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ summary.main=Service principal : ce service appelle toutes les
description.main=Ce service enchaîne les traitements : \n - buildVtlBindings : construction des jeux de données au format VTL \n - unimodalProcessing : traitement unimodal pour chaque mode de collecte disponible \n - multimodalProcessing : Réconciliation des différents modes et traitements multimodal \n - Séparation en tables par niveau d'information (boucles) et export CSV

summary.mainLunaticOnly=Service principal qui n'utilise pas le DDI
description.mainLunaticOnly=Ce service enchaîne les traitements comme le service main, mais sans utiliser le DDI. Seule la spécification Lunatic est utilisée. Ce service est un mode dégradé à n'utiliser que lorsque la spécification DDI n'existe pas.
description.mainLunaticOnly=Ce service enchaîne les traitements comme le service main, mais sans utiliser le DDI. Seule la spécification Lunatic est utilisée. Ce service est un mode dégradé à n'utiliser que lorsque la spécification DDI n'existe pas. :warning: **ATTENTION** l'identifiant du questionnaire dans Lunatic ("id") doit être égal à l'identifiant "QuestionnaireModelId" mentionné dans les fichiers de données

summary.fileByFile=Traitement fichier par fichier (ne gère pas les paradonnées pour le moment)
description.fileByFile=Ce service réalise tous les traitements jusqu'à l'écriture dans le csv sur un seul fichier à la fois. Il liste les fichiers à traiter. Le service s'arrête une fois tous les fichiers traités.
Expand Down
76 changes: 65 additions & 11 deletions kraftwerk-core/pom.xml
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>fr.insee.kraftwerk</groupId>
<artifactId>kraftwerk</artifactId>
<version>1.4.2</version>
<version>1.5.0</version>
</parent>

<artifactId>kraftwerk-core</artifactId>
<packaging>jar</packaging>
<name>kraftwerk-core</name>

<properties>
<trevas.version>0.4.8</trevas.version>
<trevas.version>1.1.1</trevas.version>
<parquet.version>1.13.1</parquet.version>
</properties>


<dependencies>

Expand All @@ -24,7 +27,7 @@
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
<version>10.3</version>
<version>12.3</version>
</dependency>
<!-- XML oriented test dependency -->
<!-- https://www.xmlunit.org/ -->
Expand All @@ -38,19 +41,21 @@
<dependency>
<groupId>xom</groupId>
<artifactId>xom</artifactId>
<version>1.3.4</version>
<version>1.3.9</version>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- TODO: maybe replace XOM (see: https://www.baeldung.com/java-xml-libraries) -->
<!-- TODO: maybe replace XOM (see:
https://www.baeldung.com/java-xml-libraries) -->

<!-- JSON libraries -->
<!-- json-simple -->
<!-- https://mvnrepository.com/artifact/com.googlecode.json-simple/json-simple -->
<!--
https://mvnrepository.com/artifact/com.googlecode.json-simple/json-simple -->
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
Expand All @@ -61,7 +66,8 @@

<!-- CSV -->
<!-- OpenCSV -->
<!-- https://www.geeksforgeeks.org/writing-a-csv-file-in-java-using-opencsv/ -->
<!--
https://www.geeksforgeeks.org/writing-a-csv-file-in-java-using-opencsv/ -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
Expand All @@ -81,6 +87,54 @@
<artifactId>vtl-jackson</artifactId>
<version>${trevas.version}</version>
</dependency>


<!-- PARQUET -->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>${parquet.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.6</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.3.6</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

</project>
54 changes: 38 additions & 16 deletions kraftwerk-core/src/main/java/fr/insee/kraftwerk/core/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ private Constants() {}
// ----- Fixed parameters
public static final String ROOT_GROUP_NAME = "RACINE";
public static final String ROOT_IDENTIFIER_NAME = "IdUE";
public static final String REPORTING_DATA_GROUP_NAME = "REPORTINGDATA";

// ----- Explicit Variables Names
public static final String MODE_VARIABLE_NAME = "MODE_KRAFTWERK";
Expand All @@ -63,30 +64,51 @@ private Constants() {}
public static final String NUMBER_ORCHESTRATORS_NAME = "NB_ORCHESTRATORS";
public static final String NUMBER_SESSIONS_NAME = "NB_SESSIONS";
public static final String PARADATA_VARIABLES_PREFIX = "CHANGES_";
public static final String PARADATA_TIMESTAMP_SUFFIX = "_LONG";
public static final String SURVEY_VALIDATION_DATE_NAME = "DATE_COLLECTE";

// ----- Paradata event name
public static final String PARADATA_SURVEY_VALIDATION_EVENT_NAME = "agree-sending-modal-button-orchestrator-collect";



// ----- Reporting Variables Names
public static final String REPORTING_DATA_PREFIX_NAME = "Report_";
public static final String STATE_SUFFIX_NAME = "STATE";
public static final String LAST_STATE_NAME = "LAST_" + Constants.STATE_SUFFIX_NAME;

public static final String INTERVIEWER_ID_NAME = "IDENQ";
public static final String ORGANIZATION_UNIT_ID_NAME = "ORGANIZATION_UNIT_ID";
public static final String ADRESS_RGES_NAME = "RGES";
public static final String ADRESS_NUMFA_NAME = "NUMFA";
public static final String ADRESS_SSECH_NAME = "SSECH";
public static final String ADRESS_LE_NAME = "LE";
public static final String ADRESS_EC_NAME = "EC";
public static final String ADRESS_BS_NAME = "BS";
public static final String ADRESS_NOI_NAME = "NOI";
public static final String SURVEY_DATE_DAY_NAME = "JOURENQ";
public static final String SURVEY_DATE_MONTH_NAME = "MOISENQ";
public static final String SURVEY_DATE_YEAR_NAME = "ANNEENQ";
public static final String OUTCOME_NAME = "OUTCOME";
public static final String NUMBER_ATTEMPTS_NAME = "NUMBER_CONTACT_ATTEMPTS";
public static final String OUTCOME_ATTEMPT_SUFFIX_NAME = "ATTEMPT";
public static final String ADRESS_ID_STAT_INSEE = "IDSTATINSEE";
public static final String INTERVIEWER_ID_NAME = "IDENQ";
public static final String ORGANIZATION_UNIT_ID_NAME = "ORGANIZATION_UNIT_ID";
public static final String ADRESS_RGES_NAME = "RGES";
public static final String ADRESS_NUMFA_NAME = "NUMFA";
public static final String ADRESS_SSECH_NAME = "SSECH";
public static final String ADRESS_LE_NAME = "LE";
public static final String ADRESS_EC_NAME = "EC";
public static final String ADRESS_BS_NAME = "BS";
public static final String ADRESS_NOI_NAME = "NOI";
public static final String SURVEY_DATE_DAY_NAME = "JOURENQ";
public static final String SURVEY_DATE_MONTH_NAME = "MOISENQ";
public static final String SURVEY_DATE_YEAR_NAME = "ANNEENQ";
public static final String OUTCOME_NAME = "OUTCOME";
public static final String NUMBER_ATTEMPTS_NAME = "NUMBER_CONTACT_ATTEMPTS";
public static final String OUTCOME_ATTEMPT_SUFFIX_NAME = "ATTEMPT";
public static final String ADRESS_ID_STAT_INSEE = "IDSTATINSEE";
public static final String LAST_ATTEMPT_DATE = "LAST_ATTEMPT_DATE";
public static final String OUTCOME_DATE = "OUTCOME_DATE";
public static final String IDENTIFICATION_NAME = "identification";
public static final String ACCESS_NAME = "access";
public static final String SITUATION_NAME = "situation";
public static final String CATEGORY_NAME = "category";
public static final String OCCUPANT_NAME = "occupant";

// ------ Pairwise variables

public static final int MAX_LINKS_ALLOWED = 21;
public static final String BOUCLE_PRENOMS = "BOUCLE_PRENOMS";
public static final String LIEN = "LIEN_";
public static final String LIENS = "LIENS";
public static final String SAME_AXIS_VALUE = "0";
public static final String NO_PAIRWISE_VALUE = "99";

// ---------- Functions
// ---------- Get a file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import fr.insee.kraftwerk.core.vtl.VtlBindings;
import fr.insee.kraftwerk.core.vtl.VtlScript;
import lombok.extern.log4j.Log4j2;
;

@Log4j2
public class CalculatedProcessing extends DataProcessing {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringJoiner;
import java.util.stream.Collectors;

import fr.insee.kraftwerk.core.KraftwerkError;
import fr.insee.kraftwerk.core.metadata.PaperUcq;
Expand Down Expand Up @@ -67,7 +66,7 @@ protected VtlScript generateVtlInstructions(String bindingName) {
paperUcqVtlNames.addAll(
variablesMap.getPaperUcq().stream()
.map(variable -> variablesMap.getFullyQualifiedName(variable.getName()))
.collect(Collectors.toList())
.toList()
);
}
if (!paperUcqVtlNames.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.Set;

import fr.insee.kraftwerk.core.Constants;
import fr.insee.kraftwerk.core.metadata.VariablesMap;
Expand Down Expand Up @@ -39,15 +39,17 @@ protected VtlScript generateVtlInstructions(String bindingName) {

// Root dataset
StringBuilder rootInstructions = new StringBuilder();
Set<String> rootVariableNames = multimodeVariablesMap.getGroupVariableNames(Constants.ROOT_GROUP_NAME);

String rootMeasures = VtlMacros.toVtlSyntax(multimodeVariablesMap.getGroupVariableNames(Constants.ROOT_GROUP_NAME));
String rootMeasures = VtlMacros.toVtlSyntax(rootVariableNames);
rootInstructions.append(String.format("%s := %s [keep %s, %s, %s];",
Constants.ROOT_GROUP_NAME, bindingName, Constants.ROOT_IDENTIFIER_NAME, rootMeasures, Constants.MODE_VARIABLE_NAME));

vtlScript.add(rootInstructions.toString());

// To delete duplicates, to be eventually reviewed with a better VTL solution
vtlScript.add(Constants.ROOT_GROUP_NAME + " := union(" + Constants.ROOT_GROUP_NAME + ", " + Constants.ROOT_GROUP_NAME +");");


// Group datasets
for (String groupName : multimodeVariablesMap.getSubGroupNames()) {
Expand All @@ -56,12 +58,12 @@ protected VtlScript generateVtlInstructions(String bindingName) {
// First init the dataset using measure names, that are fully qualified name
List<String> groupVariableNames = new ArrayList<>(multimodeVariablesMap.getGroupVariableNames(groupName));
List<String> groupMeasureNames = groupVariableNames.stream()
.map(multimodeVariablesMap::getFullyQualifiedName).collect(Collectors.toList());
.map(multimodeVariablesMap::getFullyQualifiedName).toList();

String groupMeasures = VtlMacros.toVtlSyntax(groupMeasureNames);
groupInstructions.append(String.format("%s := %s [keep %s, %s, %s, %s];",
groupName, bindingName, Constants.ROOT_IDENTIFIER_NAME, groupName, groupMeasures, Constants.MODE_VARIABLE_NAME));
// Epmpty lines are created to produce group level tables and need to be removed
// Empty lines are created to produce group level tables and need to be removed
groupInstructions.append(String.format("%s := %s [filter %s<>\"\"];",
groupName, groupName, groupName));

Expand Down
Loading

0 comments on commit ab102ef

Please sign in to comment.