Skip to content

Commit

Permalink
Merge pull request #60 from OP-TED/feature/check-schematron-entry-file
Browse files Browse the repository at this point in the history
Add more checks on schematron files
  • Loading branch information
bertrand-lorentz authored Aug 12, 2024
2 parents 9713066 + dc1ae16 commit b5926c4
Show file tree
Hide file tree
Showing 34 changed files with 2,614 additions and 26 deletions.
15 changes: 8 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@

<java.version>11</java.version>

<maven.compiler.release>${java.version}</maven.compiler.release>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>

<version.eforms-core-java>1.4.0-SNAPSHOT</version.eforms-core-java>
<version.efx-toolkit>2.0.0-SNAPSHOT</version.efx-toolkit>
<version.eforms-core-java>1.4.0</version.eforms-core-java>
<version.efx-toolkit>2.0.0-alpha.4</version.efx-toolkit>

<!-- Version - Third-party libraries -->
<version.commons-collections4>4.4</version.commons-collections4>
<version.commons-lang3>3.12.0</version.commons-lang3>
<version.cucumber>7.11.1</version.cucumber>
<version.drools>8.44.0.Final</version.drools>
<version.drools>8.44.2.Final</version.drools>
<version.istack>3.0.11</version.istack>
<version.jackson>2.14.1</version.jackson>
<version.jaxb-bind-api>4.0.0</version.jaxb-bind-api>
Expand All @@ -58,8 +59,8 @@
<version.xmlschema>2.3.0</version.xmlschema>

<!-- Versions - Plugins -->
<version.compiler.version>3.10.1</version.compiler.version>
<version.compiler.plugin>3.3.0</version.compiler.plugin>
<version.compiler.plugin>3.13.0</version.compiler.plugin>
<version.shade.plugin>3.6.0</version.shade.plugin>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -384,12 +385,12 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${version.compiler.version}</version>
<version>${version.compiler.plugin}</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${version.compiler.plugin}</version>
<version>${version.shade.plugin}</version>
</plugin>
<plugin>
<groupId>org.kie</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

public class SchematronAssert {
private final String id;
private String diagnostics;

public SchematronAssert(String id) {
this.id = id;
Expand All @@ -10,4 +11,12 @@ public SchematronAssert(String id) {
public String getId() {
return id;
}

public String getDiagnostics() {
return diagnostics;
}

public void setDiagnostics(String diagnostics) {
this.diagnostics = diagnostics;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package eu.europa.ted.eforms.sdk.analysis.domain.schematron;

public class SchematronDiagnostic {
private final String id;

public SchematronDiagnostic(String id) {
this.id = id;
}

public String getId() {
return id;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,48 @@

public class SchematronFile {
private final Path path;
private final List<SchematronAssert> asserts;
private List<SchematronPhase> phases;
private List<SchematronPattern> patterns;
private List<SchematronDiagnostic> diagnostics;
private List<SchematronAssert> asserts;

public SchematronFile(Path path) {
this(path, null);
}

public SchematronFile(Path path, List<SchematronAssert> asserts) {
this.path = path;
this.asserts = asserts;
}

public Path getPath() {
return path;
}

public List<SchematronPhase> getPhases() {
return phases;
}

public void setPhases(List<SchematronPhase> phases) {
this.phases = phases;
}

public List<SchematronPattern> getPatterns() {
return patterns;
}

public void setPatterns(List<SchematronPattern> patterns) {
this.patterns = patterns;
}

public List<SchematronDiagnostic> getDiagnostics() {
return diagnostics;
}

public void setDiagnostics(List<SchematronDiagnostic> diagnostics) {
this.diagnostics = diagnostics;
}

public List<SchematronAssert> getAsserts() {
return asserts;
}

public void setAsserts(List<SchematronAssert> asserts) {
this.asserts = asserts;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package eu.europa.ted.eforms.sdk.analysis.domain.schematron;

public class SchematronPattern {
private final String id;

public SchematronPattern(String id) {
this.id = id;
}

public String getId() {
return id;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package eu.europa.ted.eforms.sdk.analysis.domain.schematron;

import java.util.ArrayList;
import java.util.List;

public class SchematronPhase {
private final String id;
private final List<String> activePatterns = new ArrayList<>();

public SchematronPhase(String id) {
this.id = id;
}

public String getId() {
return id;
}

public List<String> getActivePatterns() {
return activePatterns;
}

public void addActivePattern(String patternId) {
activePatterns.add(patternId);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import java.util.stream.Collectors;

import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronAssert;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronDiagnostic;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronFile;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronPattern;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronPhase;

/**
* Represents a complete set of schematron rules.
Expand All @@ -25,12 +28,41 @@ public List<SchematronAssert> getAsserts() {
return schematronFile.getAsserts();
}

public List<SchematronDiagnostic> getDiagnostics() {
return schematronFile.getDiagnostics();
}

public List<SchematronPhase> getPhases() {
return schematronFile.getPhases();
}

public List<SchematronPattern> getPatterns() {
return schematronFile.getPatterns();
}

public List<String> getDuplicateAssertIds() {
Set<String> set = new HashSet<String>();
return getAsserts().stream().map(SchematronAssert::getId).filter(id -> !set.add(id))
.collect(Collectors.toList());
}

/**
* Returns the list of diagnostic identifiers that are referenced in an assert but not defined by
* a diagnostic element.
*
* @return List of diagnostic identifiers missing a definition.
*/
public List<String> getMissingDiagnostics() {
Set<String> definedDiagnosticIds = getDiagnostics().stream()
.map(SchematronDiagnostic::getId)
.collect(Collectors.toSet());

return getAsserts().stream()
.map(SchematronAssert::getDiagnostics)
.filter(id -> !definedDiagnosticIds.contains(id))
.collect(Collectors.toList());
}

@Override
public String getId() {
return schematronFile.getPath().toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -15,9 +16,13 @@
import com.helger.xml.microdom.IMicroDocument;
import com.helger.xml.microdom.IMicroElement;
import com.helger.xml.microdom.IMicroNode;
import com.helger.xml.microdom.serialize.MicroReader;

import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronAssert;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronDiagnostic;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronFile;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronPattern;
import eu.europa.ted.eforms.sdk.analysis.domain.schematron.SchematronPhase;

public class SchematronParser {
private static final Logger logger = LoggerFactory.getLogger(SchematronParser.class);
Expand All @@ -27,32 +32,92 @@ private SchematronParser() {}
public static SchematronFile loadSchematronFile(Path schematronFilePath) {
logger.debug("Loading Schematron file " + schematronFilePath.toString());

SchematronFile schematronFile = new SchematronFile(schematronFilePath);

IReadableResource schematron = new FileSystemResource(schematronFilePath);

final IMicroDocument doc = MicroReader.readMicroXML(schematron);
if (doc == null || doc.getAllChildren() == null) {
logger.error("Schematron file {} could not be loaded as XML", schematronFilePath);
return null;
}
List<IMicroNode> children = doc.getAllChildrenRecursive();
if (children == null) {
logger.error("Schematron file {} has unexpected structure", schematronFilePath);
return null;
}

// Parse phase definitions
List<SchematronPhase> phases = new ArrayList<>();
children.stream()
.filter(node -> node.isElement() && "phase".equals(node.getNodeName()))
.forEach(node -> {
IMicroElement element = (IMicroElement)node;
String id = element.getAttributeValue("id");
SchematronPhase phase = new SchematronPhase(id);
List<IMicroNode> phaseRefs = element.getAllChildren();
if (phaseRefs == null) {
logger.error("Incorrect definition for phase {}", id);
return;
}
phaseRefs.stream()
.filter(n -> n.isElement() && "active".equals(n.getNodeName()))
.map(n -> ((IMicroElement)n).getAttributeValue("pattern"))
.forEach(s -> phase.addActivePattern(s));

phases.add(phase);
});

schematronFile.setPhases(phases);

// Parse list diagnostic definitions
List<SchematronDiagnostic> diagnostics = children.stream()
.filter(node -> node.isElement() && "diagnostic".equals(node.getNodeName()))
.map(n -> ((IMicroElement)n).getAttributeValue("id"))
.map(s -> new SchematronDiagnostic(s))
.collect(Collectors.toList());

schematronFile.setDiagnostics(diagnostics);

// Resolve all included files, so that they also get loaded.
final IMicroDocument doc = SchematronHelper.getWithResolvedSchematronIncludes(schematron,
final IMicroDocument docResolved = SchematronHelper.getWithResolvedSchematronIncludes(schematron,
e -> handleError(e, schematronFilePath));

if (doc == null) {
logger.error("Schematron file {} could not be loaded as XML", schematronFilePath);
if (docResolved == null) {
logger.error("Schematron file {} with resolved includes could not be loaded as XML",
schematronFilePath);
return null;
}

List<SchematronAssert> asserts = new ArrayList<>();
List<IMicroNode> allChildren = doc.getAllChildrenRecursive();
List<IMicroNode> allChildren = docResolved.getAllChildrenRecursive();
if (allChildren == null) {
logger.error("Schematron file {} does not have the expected content", schematronFilePath);
return null;
}

// Parse all patterns
List<SchematronPattern> patterns = allChildren.stream()
.filter(node -> node.isElement() && "pattern".equals(node.getNodeName()))
.map(n -> ((IMicroElement)n).getAttributeValue("id"))
.map(s -> new SchematronPattern(s))
.collect(Collectors.toList());

for (IMicroNode node : allChildren) {
if (node != null && node.isElement() && "assert".equals(node.getNodeName())) {
IMicroElement element = (IMicroElement)node;
SchematronAssert schAssert = new SchematronAssert(element.getAttributeValue("id"));
asserts.add(schAssert);
}
}
schematronFile.setPatterns(patterns);

// Parse all asserts
List<SchematronAssert> asserts = new ArrayList<>();
allChildren.stream()
.filter(node -> node.isElement() && "assert".equals(node.getNodeName()))
.forEach(n -> {
IMicroElement element = (IMicroElement)n;
String id = element.getAttributeValue("id");
SchematronAssert schematronAssert = new SchematronAssert(id);
String diag = element.getAttributeValue("diagnostics");
schematronAssert.setDiagnostics(diag);
asserts.add(schematronAssert);
});

SchematronFile schematronFile = new SchematronFile(schematronFilePath, asserts);
schematronFile.setAsserts(asserts);

return schematronFile;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,27 @@ when
then
results.add(new ValidationResult($sch, "Schematron contains duplicate assert ids: " + $duplicates, ValidationStatusEnum.ERROR));
end

rule "All expected phases are present"
when
/schematrons[ $sch: this, $schId: id ];
/noticeTypes[ $expected: id ]
not (exists /schematrons[ id == $schId ]/phases[ id == "eforms-" + $expected ])
then
results.add(new ValidationResult($sch, "Schematron is missing phase for: " + $expected, ValidationStatusEnum.ERROR));
end

rule "Every pattern is part of at least one phase"
when
/schematrons[ $sch: this, $schId: id ]/patterns[ $patternId: id ]
not (exists /schematrons[ id == $schId ]/phases[ activePatterns contains $patternId ])
then
results.add(new ValidationResult($sch, "Schematron has pattern that is not part of any phase: " + $patternId, ValidationStatusEnum.ERROR));
end

rule "Every assert diagnostics is defined in the schematron file"
when
$missing : /schematrons[ $sch: this]/missingDiagnostics
then
results.add(new ValidationResult($sch, "Schematron is missing diagnostic definitions for: " + $missing, ValidationStatusEnum.ERROR));
end
Loading

0 comments on commit b5926c4

Please sign in to comment.