diff --git a/.gitignore b/.gitignore
index 7b8a872..401ce68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
/target/
+.antlr
.settings
.project
.classpath
diff --git a/README.md b/README.md
index 5707890..41a9229 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,13 @@ Execute the following on the root folder of this project:
mvn clean install
+## Testing
+
+Unit tests are available under `src/test/java/`.
+
+After running the unit tests with `mvn test`, you can generate a coverage report with `mvn jacoco:report`.
+The report is available under `target/site/jacoco/`, in HTML, CSV, and XML format.
+
[^1]: _Copyright 2022 European Union_
_Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European Commission –
diff --git a/pom.xml b/pom.xml
index 21decd2..4f04e10 100644
--- a/pom.xml
+++ b/pom.xml
@@ -55,6 +55,7 @@
${java.version}
+ 4.9.3
3.2.2
2.11.0
3.12.0
@@ -72,9 +73,11 @@
2.0.3
+ 3.3.0
3.10.1
1.5
2.5.2
+ 0.8.10
3.4.0
3.2.1
3.3.0
@@ -211,6 +214,11 @@
jsr305
${version.jsr305}
+
+ org.antlr
+ antlr4-runtime
+ ${version.antlr4}
+
org.jooq
jool
@@ -338,6 +346,10 @@
jsr305
provided
+
+ org.antlr
+ antlr4-runtime
+
org.jooq
jool
@@ -367,6 +379,16 @@
true
+
+ org.antlr
+ antlr4-maven-plugin
+ ${version.antlr4}
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+ ${version.build-helper.plugin}
+
org.apache.maven.plugins
maven-jar-plugin
@@ -387,6 +409,11 @@
maven-gpg-plugin
${version.gpg.plugin}
+
+ org.jacoco
+ jacoco-maven-plugin
+ ${version.jacoco.plugin}
+
org.sonatype.plugins
nexus-staging-maven-plugin
@@ -394,6 +421,64 @@
+
+
+
+ org.antlr
+ antlr4-maven-plugin
+
+
+ antlr
+
+ antlr4
+
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-source
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+
+
+ **/XPath20*.class
+
+
+
+
+
+ prepare-agent
+
+
+
+ report
+ prepare-package
+
+ report
+
+
+
+
+
diff --git a/src/main/antlr4/eu/europa/ted/eforms/xpath/XPath20.g4 b/src/main/antlr4/eu/europa/ted/eforms/xpath/XPath20.g4
new file mode 100644
index 0000000..893ef34
--- /dev/null
+++ b/src/main/antlr4/eu/europa/ted/eforms/xpath/XPath20.g4
@@ -0,0 +1,343 @@
+// XPath v2.0
+// Author--Ken Domino
+// Date--2 Jan 2022
+//
+// This is a faithful implementation of the XPath version 2.0 grammar
+// from the spec at https://www.w3.org/TR/xpath20/
+
+grammar XPath20;
+
+// [1]
+xpath : expr EOF ;
+expr : exprsingle ( COMMA exprsingle)* ;
+exprsingle : forexpr | quantifiedexpr | ifexpr | orexpr ;
+forexpr : simpleforclause KW_RETURN exprsingle ;
+// [5]
+simpleforclause : KW_FOR DOLLAR varname KW_IN exprsingle ( COMMA DOLLAR varname KW_IN exprsingle )* ;
+quantifiedexpr : ( KW_SOME | KW_EVERY) DOLLAR varname KW_IN exprsingle ( COMMA DOLLAR varname KW_IN exprsingle)* KW_SATISFIES exprsingle ;
+ifexpr : KW_IF OP expr CP KW_THEN exprsingle KW_ELSE exprsingle ;
+orexpr : andexpr ( KW_OR andexpr )* ;
+andexpr : comparisonexpr ( KW_AND comparisonexpr )* ;
+// [10]
+comparisonexpr : rangeexpr ( (valuecomp | generalcomp | nodecomp) rangeexpr )? ;
+rangeexpr : additiveexpr ( KW_TO additiveexpr )? ;
+additiveexpr : multiplicativeexpr ( (PLUS | MINUS) multiplicativeexpr )* ;
+multiplicativeexpr : unionexpr ( (STAR | KW_DIV | KW_IDIV | KW_MOD) unionexpr )* ;
+unionexpr : intersectexceptexpr ( (KW_UNION | P) intersectexceptexpr )* ;
+// [15]
+intersectexceptexpr : instanceofexpr ( ( KW_INTERSECT | KW_EXCEPT) instanceofexpr )* ;
+instanceofexpr : treatexpr ( KW_INSTANCE KW_OF sequencetype )? ;
+treatexpr : castableexpr ( KW_TREAT KW_AS sequencetype )? ;
+castableexpr : castexpr ( KW_CASTABLE KW_AS singletype )? ;
+castexpr : unaryexpr ( KW_CAST KW_AS singletype )? ;
+// [20]
+unaryexpr : ( MINUS | PLUS)* valueexpr ;
+valueexpr : pathexpr ;
+generalcomp : EQ | NE | LT | LE | GT | GE ;
+valuecomp : KW_EQ | KW_NE | KW_LT | KW_LE | KW_GT | KW_GE ;
+nodecomp : KW_IS | LL | GG ;
+// [25]
+pathexpr : ( SLASH relativepathexpr?) | ( SS relativepathexpr) | relativepathexpr ;
+relativepathexpr : stepexpr (( SLASH | SS) stepexpr)* ;
+stepexpr : filterexpr | axisstep ;
+axisstep : (reversestep | forwardstep) predicatelist ;
+forwardstep : (forwardaxis nodetest) | abbrevforwardstep ;
+// [30]
+forwardaxis : ( KW_CHILD COLONCOLON) | ( KW_DESCENDANT COLONCOLON) | ( KW_ATTRIBUTE COLONCOLON) | ( KW_SELF COLONCOLON) | ( KW_DESCENDANT_OR_SELF COLONCOLON) | ( KW_FOLLOWING_SIBLING COLONCOLON) | ( KW_FOLLOWING COLONCOLON) | ( KW_NAMESPACE COLONCOLON) ;
+abbrevforwardstep : AT? nodetest ;
+reversestep : (reverseaxis nodetest) | abbrevreversestep ;
+reverseaxis : ( KW_PARENT COLONCOLON) | ( KW_ANCESTOR COLONCOLON) | ( KW_PRECEDING_SIBLING COLONCOLON) | ( KW_PRECEDING COLONCOLON) | ( KW_ANCESTOR_OR_SELF COLONCOLON) ;
+abbrevreversestep : DD ;
+// [35]
+nodetest : kindtest | nametest ;
+nametest : qname | wildcard ;
+wildcard : STAR | (NCName CS) | ( SC NCName) ;
+filterexpr : primaryexpr predicatelist ;
+predicatelist : predicate* ;
+// [40]
+predicate : OB expr CB ;
+primaryexpr : literal | varref | parenthesizedexpr | contextitemexpr | functioncall ;
+literal : numericliteral | StringLiteral ;
+numericliteral : IntegerLiteral | DecimalLiteral | DoubleLiteral ;
+varref : DOLLAR varname ;
+// [45]
+varname : qname ;
+parenthesizedexpr : OP expr? CP ;
+contextitemexpr : D ;
+functioncall :
+ { !(
+ getInputStream().LA(1)==KW_ARRAY
+ || getInputStream().LA(1)==KW_ATTRIBUTE
+ || getInputStream().LA(1)==KW_COMMENT
+ || getInputStream().LA(1)==KW_DOCUMENT_NODE
+ || getInputStream().LA(1)==KW_ELEMENT
+ || getInputStream().LA(1)==KW_EMPTY_SEQUENCE
+ || getInputStream().LA(1)==KW_FUNCTION
+ || getInputStream().LA(1)==KW_IF
+ || getInputStream().LA(1)==KW_ITEM
+ || getInputStream().LA(1)==KW_MAP
+ || getInputStream().LA(1)==KW_NAMESPACE_NODE
+ || getInputStream().LA(1)==KW_NODE
+ || getInputStream().LA(1)==KW_PROCESSING_INSTRUCTION
+ || getInputStream().LA(1)==KW_SCHEMA_ATTRIBUTE
+ || getInputStream().LA(1)==KW_SCHEMA_ELEMENT
+ || getInputStream().LA(1)==KW_TEXT
+ ) }?
+ qname OP (exprsingle ( COMMA exprsingle)*)? CP ;
+singletype : atomictype QM? ;
+// [50]
+sequencetype : ( KW_EMPTY_SEQUENCE OP CP) | (itemtype occurrenceindicator?) ;
+occurrenceindicator : QM | STAR | PLUS ;
+itemtype : kindtest | ( KW_ITEM OP CP) | atomictype ;
+atomictype : qname ;
+kindtest : documenttest | elementtest | attributetest | schemaelementtest | schemaattributetest | pitest | commenttest | texttest | anykindtest ;
+// [55]
+anykindtest : KW_NODE OP CP ;
+documenttest : KW_DOCUMENT_NODE OP (elementtest | schemaelementtest)? CP ;
+texttest : KW_TEXT OP CP ;
+commenttest : KW_COMMENT OP CP ;
+pitest : KW_PROCESSING_INSTRUCTION OP (NCName | StringLiteral)? CP ;
+// [60]
+attributetest : KW_ATTRIBUTE OP (attribnameorwildcard ( COMMA typename_)?)? CP ;
+attribnameorwildcard : attributename | STAR ;
+schemaattributetest : KW_SCHEMA_ATTRIBUTE OP attributedeclaration CP ;
+attributedeclaration : attributename ;
+elementtest : KW_ELEMENT OP (elementnameorwildcard ( COMMA typename_ QM?)?)? CP ;
+// [65]
+elementnameorwildcard : elementname | STAR ;
+schemaelementtest : KW_SCHEMA_ELEMENT OP elementdeclaration CP ;
+elementdeclaration : elementname ;
+attributename : qname ;
+elementname : qname ;
+// [70]
+typename_ : qname ;
+
+
+// Error in the spec. EQName also includes acceptable keywords.
+qname : QName | URIQualifiedName
+ | KW_ANCESTOR
+ | KW_ANCESTOR_OR_SELF
+ | KW_AND
+ | KW_ARRAY
+ | KW_AS
+ | KW_ATTRIBUTE
+ | KW_CAST
+ | KW_CASTABLE
+ | KW_CHILD
+ | KW_COMMENT
+ | KW_DESCENDANT
+ | KW_DESCENDANT_OR_SELF
+ | KW_DIV
+ | KW_DOCUMENT_NODE
+ | KW_ELEMENT
+ | KW_ELSE
+ | KW_EMPTY_SEQUENCE
+ | KW_EQ
+ | KW_EVERY
+ | KW_EXCEPT
+ | KW_FOLLOWING
+ | KW_FOLLOWING_SIBLING
+ | KW_FOR
+ | KW_FUNCTION
+ | KW_GE
+ | KW_GT
+ | KW_IDIV
+ | KW_IF
+ | KW_IN
+ | KW_INSTANCE
+ | KW_INTERSECT
+ | KW_IS
+ | KW_ITEM
+ | KW_LE
+ | KW_LET
+ | KW_LT
+ | KW_MAP
+ | KW_MOD
+ | KW_NAMESPACE
+ | KW_NAMESPACE_NODE
+ | KW_NE
+ | KW_NODE
+ | KW_OF
+ | KW_OR
+ | KW_PARENT
+ | KW_PRECEDING
+ | KW_PRECEDING_SIBLING
+ | KW_PROCESSING_INSTRUCTION
+ | KW_RETURN
+ | KW_SATISFIES
+ | KW_SCHEMA_ATTRIBUTE
+ | KW_SCHEMA_ELEMENT
+ | KW_SELF
+ | KW_SOME
+ | KW_TEXT
+ | KW_THEN
+ | KW_TREAT
+ | KW_UNION
+ ;
+
+// Not per spec. Specified for testing.
+auxilary : (expr SEMI )+ EOF;
+
+
+AT : '@' ;
+BANG : '!' ;
+CB : ']' ;
+CC : '}' ;
+CEQ : ':=' ;
+COLON : ':' ;
+COLONCOLON : '::' ;
+COMMA : ',' ;
+CP : ')' ;
+CS : ':*' ;
+D : '.' ;
+DD : '..' ;
+DOLLAR : '$' ;
+EG : '=>' ;
+EQ : '=' ;
+GE : '>=' ;
+GG : '>>' ;
+GT : '>' ;
+LE : '<=' ;
+LL : '<<' ;
+LT : '<' ;
+MINUS : '-' ;
+NE : '!=' ;
+OB : '[' ;
+OC : '{' ;
+OP : '(' ;
+P : '|' ;
+PLUS : '+' ;
+POUND : '#' ;
+PP : '||' ;
+QM : '?' ;
+SC : '*:' ;
+SLASH : '/' ;
+SS : '//' ;
+STAR : '*' ;
+
+// KEYWORDS
+
+KW_ANCESTOR : 'ancestor' ;
+KW_ANCESTOR_OR_SELF : 'ancestor-or-self' ;
+KW_AND : 'and' ;
+KW_ARRAY : 'array' ;
+KW_AS : 'as' ;
+KW_ATTRIBUTE : 'attribute' ;
+KW_CAST : 'cast' ;
+KW_CASTABLE : 'castable' ;
+KW_CHILD : 'child' ;
+KW_COMMENT : 'comment' ;
+KW_DESCENDANT : 'descendant' ;
+KW_DESCENDANT_OR_SELF : 'descendant-or-self' ;
+KW_DIV : 'div' ;
+KW_DOCUMENT_NODE : 'document-node' ;
+KW_ELEMENT : 'element' ;
+KW_ELSE : 'else' ;
+KW_EMPTY_SEQUENCE : 'empty-sequence' ;
+KW_EQ : 'eq' ;
+KW_EVERY : 'every' ;
+KW_EXCEPT : 'except' ;
+KW_FOLLOWING : 'following' ;
+KW_FOLLOWING_SIBLING : 'following-sibling' ;
+KW_FOR : 'for' ;
+KW_FUNCTION : 'function' ;
+KW_GE : 'ge' ;
+KW_GT : 'gt' ;
+KW_IDIV : 'idiv' ;
+KW_IF : 'if' ;
+KW_IN : 'in' ;
+KW_INSTANCE : 'instance' ;
+KW_INTERSECT : 'intersect' ;
+KW_IS : 'is' ;
+KW_ITEM : 'item' ;
+KW_LE : 'le' ;
+KW_LET : 'let' ;
+KW_LT : 'lt' ;
+KW_MAP : 'map' ;
+KW_MOD : 'mod' ;
+KW_NAMESPACE : 'namespace' ;
+KW_NAMESPACE_NODE : 'namespace-node' ;
+KW_NE : 'ne' ;
+KW_NODE : 'node' ;
+KW_OF : 'of' ;
+KW_OR : 'or' ;
+KW_PARENT : 'parent' ;
+KW_PRECEDING : 'preceding' ;
+KW_PRECEDING_SIBLING : 'preceding-sibling' ;
+KW_PROCESSING_INSTRUCTION : 'processing-instruction' ;
+KW_RETURN : 'return' ;
+KW_SATISFIES : 'satisfies' ;
+KW_SCHEMA_ATTRIBUTE : 'schema-attribute' ;
+KW_SCHEMA_ELEMENT : 'schema-element' ;
+KW_SELF : 'self' ;
+KW_SOME : 'some' ;
+KW_TEXT : 'text' ;
+KW_THEN : 'then' ;
+KW_TO : 'to' ;
+KW_TREAT : 'treat' ;
+KW_UNION : 'union' ;
+
+// A.2.1. TEMINAL SYMBOLS
+// This isn't a complete list of tokens in the language.
+// Keywords and symbols are terminals.
+
+IntegerLiteral : FragDigits ;
+DecimalLiteral : ('.' FragDigits) | (FragDigits '.' [0-9]*) ;
+DoubleLiteral : (('.' FragDigits) | (FragDigits ('.' [0-9]*)?)) [eE] [+-]? FragDigits ;
+StringLiteral : ('"' (FragEscapeQuot | ~[^"])*? '"') | ('\'' (FragEscapeApos | ~['])*? '\'') ;
+URIQualifiedName : BracedURILiteral NCName ;
+BracedURILiteral : 'Q' '{' [^{}]* '}' ;
+// Error in spec: EscapeQuot and EscapeApos are not terminals!
+fragment FragEscapeQuot : '""' ;
+fragment FragEscapeApos : '\'';
+// Error in spec: Comment isn't really a terminal, but an off-channel object.
+Comment : '(:' (Comment | CommentContents)*? ':)' -> skip ;
+QName : FragQName ;
+NCName : FragmentNCName ;
+// Error in spec: Char is not a terminal!
+fragment Char : FragChar ;
+fragment FragDigits : [0-9]+ ;
+fragment CommentContents : Char ;
+// https://www.w3.org/TR/REC-xml-names/#NT-QName
+fragment FragQName : FragPrefixedName | FragUnprefixedName ;
+fragment FragPrefixedName : FragPrefix ':' FragLocalPart ;
+fragment FragUnprefixedName : FragLocalPart ;
+fragment FragPrefix : FragmentNCName ;
+fragment FragLocalPart : FragmentNCName ;
+fragment FragNCNameStartChar
+ : 'A'..'Z'
+ | '_'
+ | 'a'..'z'
+ | '\u00C0'..'\u00D6'
+ | '\u00D8'..'\u00F6'
+ | '\u00F8'..'\u02FF'
+ | '\u0370'..'\u037D'
+ | '\u037F'..'\u1FFF'
+ | '\u200C'..'\u200D'
+ | '\u2070'..'\u218F'
+ | '\u2C00'..'\u2FEF'
+ | '\u3001'..'\uD7FF'
+ | '\uF900'..'\uFDCF'
+ | '\uFDF0'..'\uFFFD'
+ | '\u{10000}'..'\u{EFFFF}'
+ ;
+fragment FragNCNameChar
+ : FragNCNameStartChar | '-' | '.' | '0'..'9'
+ | '\u00B7' | '\u0300'..'\u036F'
+ | '\u203F'..'\u2040'
+ ;
+fragment FragmentNCName : FragNCNameStartChar FragNCNameChar* ;
+
+// https://www.w3.org/TR/REC-xml/#NT-Char
+
+fragment FragChar : '\u0009' | '\u000a' | '\u000d'
+ | '\u0020'..'\ud7ff'
+ | '\ue000'..'\ufffd'
+ | '\u{10000}'..'\u{10ffff}'
+ ;
+
+// https://github.com/antlr/grammars-v4/blob/17d3db3fd6a8fc319a12176e0bb735b066ec0616/xpath/xpath31/XPath31.g4#L389
+Whitespace : ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip ;
+
+// Not per spec. Specified for testing.
+SEMI : ';' ;
\ No newline at end of file
diff --git a/src/main/java/eu/europa/ted/eforms/xpath/XPathInfo.java b/src/main/java/eu/europa/ted/eforms/xpath/XPathInfo.java
new file mode 100644
index 0000000..9c9550e
--- /dev/null
+++ b/src/main/java/eu/europa/ted/eforms/xpath/XPathInfo.java
@@ -0,0 +1,51 @@
+package eu.europa.ted.eforms.xpath;
+
+import java.util.LinkedList;
+import java.util.List;
+
+public class XPathInfo {
+ private LinkedList steps = new LinkedList<>();
+ private String pathToLastElement;
+ private String attributeName;
+
+ public List getSteps() {
+ return steps;
+ }
+
+ public XPathStep getLastStep() {
+ return steps.getLast();
+ }
+
+ public void addStep(XPathStep step) {
+ steps.addLast(step);
+ }
+
+ public String getPathToLastElement() {
+ return pathToLastElement;
+ }
+
+ public void setPathToLastElement(String pathToLastElement) {
+ this.pathToLastElement = pathToLastElement;
+ }
+
+ public boolean isAttribute() {
+ return attributeName != null;
+ }
+
+ public String getAttributeName() {
+ return attributeName;
+ }
+
+ public void setAttributeName(String attributeName) {
+ this.attributeName = attributeName;
+ }
+
+ /**
+ * Returns true if the XPath has any predicate that contains the specified string
+ * @param match The string to search for
+ * @return true if the XPath has any predicate that contains the specified string, false otherwise
+ */
+ public boolean hasPredicate(String match) {
+ return getSteps().stream().anyMatch(s -> s.getPredicateText().contains(match));
+ }
+}
diff --git a/src/main/java/eu/europa/ted/eforms/xpath/XPathListenerImpl.java b/src/main/java/eu/europa/ted/eforms/xpath/XPathListenerImpl.java
new file mode 100644
index 0000000..93db210
--- /dev/null
+++ b/src/main/java/eu/europa/ted/eforms/xpath/XPathListenerImpl.java
@@ -0,0 +1,168 @@
+package eu.europa.ted.eforms.xpath;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+
+import eu.europa.ted.eforms.xpath.XPath20Parser.AbbrevforwardstepContext;
+import eu.europa.ted.eforms.xpath.XPath20Parser.AxisstepContext;
+import eu.europa.ted.eforms.xpath.XPath20Parser.FilterexprContext;
+import eu.europa.ted.eforms.xpath.XPath20Parser.PredicateContext;
+
+class XPathListenerImpl extends XPath20BaseListener {
+ private XPathInfo xpathInfo;
+
+ private String inputText;
+ private CharStream inputStream;
+ private LinkedList steps;
+ private int inPredicate = 0;
+
+ public XPathInfo parse(String xpathInput) {
+ steps = new LinkedList<>();
+ xpathInfo = new XPathInfo();
+ inPredicate = 0;
+
+ this.inputText = xpathInput;
+ this.inputStream = CharStreams.fromString(xpathInput);
+ final XPath20Lexer lexer = new XPath20Lexer(inputStream);
+ final CommonTokenStream tokens = new CommonTokenStream(lexer);
+ final XPath20Parser parser = new XPath20Parser(tokens);
+ final ParseTree tree = parser.xpath();
+
+ final ParseTreeWalker walker = new ParseTreeWalker();
+ walker.walk(this, tree);
+
+ steps.stream().forEach(s -> {
+ XPathStep step = new XPathStep(s.stepText, s.predicates);
+ xpathInfo.addStep(step);
+ });
+
+ if (!xpathInfo.isAttribute()) {
+ // The XPath does not point to an attribute, so it is the path to the last element
+ xpathInfo.setPathToLastElement(xpathInput);
+ }
+
+ return xpathInfo;
+ }
+
+ @Override
+ public void exitAxisstep(AxisstepContext ctx) {
+ if (inPredicateMode()) {
+ return;
+ }
+
+ // When we recognize a step, we add it to the queue if is is empty.
+ // If the queue is not empty, and the depth of the new step is not smaller than
+ // the depth of the last step in the queue, then this step needs to be added to
+ // the queue too.
+ // Otherwise, the last step in the queue is a sub-expression of the new step,
+ // and we need to
+ // replace it in the queue with the new step.
+ if (this.steps.isEmpty() || !this.steps.getLast().isPartOf(ctx.getSourceInterval())) {
+ this.steps.offer(new StepInfo(ctx, this::getInputText));
+ } else {
+ Interval removedInterval = ctx.getSourceInterval();
+ while(!this.steps.isEmpty() && this.steps.getLast().isPartOf(removedInterval)) {
+ this.steps.removeLast();
+ }
+ this.steps.offer(new StepInfo(ctx, this::getInputText));
+ }
+ }
+
+ @Override
+ public void exitFilterexpr(FilterexprContext ctx) {
+ if (inPredicateMode()) {
+ return;
+ }
+
+ // Same logic as for axis steps here (sse exitAxisstep).
+ if (this.steps.isEmpty() || !this.steps.getLast().isPartOf(ctx.getSourceInterval())) {
+ this.steps.offer(new StepInfo(ctx, this::getInputText));
+ } else {
+ Interval removedInterval = ctx.getSourceInterval();
+ while(!this.steps.isEmpty() && this.steps.getLast().isPartOf(removedInterval)) {
+ this.steps.removeLast();
+ }
+ this.steps.offer(new StepInfo(ctx, this::getInputText));
+ }
+ }
+
+ @Override
+ public void enterPredicate(PredicateContext ctx) {
+ this.inPredicate++;
+ }
+
+ @Override
+ public void exitPredicate(PredicateContext ctx) {
+ this.inPredicate--;
+ }
+
+ @Override
+ public void exitAbbrevforwardstep(AbbrevforwardstepContext ctx) {
+ if (!inPredicateMode() && ctx.AT() != null) {
+ xpathInfo.setAttributeName(ctx.nodetest().getText());
+
+ int splitPosition = ctx.AT().getSymbol().getCharPositionInLine();
+ String path = inputText.substring(0, splitPosition);
+ while (path.endsWith("/")) {
+ path = path.substring(0, path.length() - 1);
+ }
+ xpathInfo.setPathToLastElement(path);
+ }
+ }
+
+ /**
+ * Helper method that returns the input text that matched a parser rule context. It is useful
+ * because {@link ParserRuleContext#getText()} omits whitespace and other lexer tokens in the
+ * HIDDEN channel.
+ *
+ * @param context Information on a rule that matched
+ * @return The input text that matched the rule corresponding to the specified context
+ */
+ private String getInputText(ParserRuleContext context) {
+ return this.inputStream
+ .getText(new Interval(context.start.getStartIndex(), context.stop.getStopIndex()));
+ }
+
+ private Boolean inPredicateMode() {
+ return inPredicate > 0;
+ }
+
+ private class StepInfo {
+ String stepText;
+ List predicates;
+ int a;
+ int b;
+
+ private StepInfo(AxisstepContext ctx, Function getInputText) {
+ this(ctx.reversestep() != null ? getInputText.apply(ctx.reversestep()) : getInputText.apply(ctx.forwardstep()),
+ ctx.predicatelist().predicate().stream().map(getInputText).collect(Collectors.toList()),
+ ctx.getSourceInterval());
+ }
+ private StepInfo(FilterexprContext ctx, Function getInputText) {
+ this(getInputText.apply(ctx.primaryexpr()),
+ ctx.predicatelist().predicate().stream().map(getInputText).collect(Collectors.toList()),
+ ctx.getSourceInterval());
+ }
+
+ private StepInfo(String stepText, List predicates, Interval interval) {
+ this.stepText = stepText;
+ this.predicates = predicates;
+ this.a = interval.a;
+ this.b = interval.b;
+ }
+
+ private Boolean isPartOf(Interval interval) {
+ return this.a >= interval.a && this.b <= interval.b;
+ }
+ }
+}
diff --git a/src/main/java/eu/europa/ted/eforms/xpath/XPathProcessor.java b/src/main/java/eu/europa/ted/eforms/xpath/XPathProcessor.java
new file mode 100644
index 0000000..1667f2e
--- /dev/null
+++ b/src/main/java/eu/europa/ted/eforms/xpath/XPathProcessor.java
@@ -0,0 +1,140 @@
+package eu.europa.ted.eforms.xpath;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+import java.util.stream.Collectors;
+
+public class XPathProcessor {
+
+ public static XPathInfo parse(String xpathInput) {
+ XPathListenerImpl parser = new XPathListenerImpl();
+
+ return parser.parse(xpathInput);
+ }
+
+ public static String addAxis(String axis, String path) {
+ LinkedList steps = new LinkedList<>(parse(path).getSteps());
+
+ while (steps.getFirst().getStepText().equals("..")) {
+ steps.removeFirst();
+ }
+
+ return axis + "::" + steps.stream().map(s -> s.getStepText()).collect(Collectors.joining("/"));
+ }
+
+ public static String join(final String first, final String second) {
+
+ if (first == null || first.trim().isEmpty()) {
+ return second;
+ }
+
+ if (second == null || second.trim().isEmpty()) {
+ return first;
+ }
+
+ LinkedList firstPartSteps = new LinkedList<>(parse(first).getSteps());
+ LinkedList secondPartSteps = new LinkedList<>(parse(second).getSteps());
+
+ return getJoinedXPath(firstPartSteps, secondPartSteps);
+ }
+
+ public static String contextualize(final String contextXpath, final String xpath) {
+
+ // If we are asked to contextualise against a null or empty context
+ // then we must return the original xpath (instead of throwing an exception).
+ if (contextXpath == null || contextXpath.isEmpty()) {
+ return xpath;
+ }
+
+ LinkedList contextSteps = new LinkedList<>(parse(contextXpath).getSteps());
+ LinkedList pathSteps = new LinkedList<>(parse(xpath).getSteps());
+
+ return getContextualizedXpath(contextSteps, pathSteps);
+ }
+
+ private static String getContextualizedXpath(Queue contextQueue,
+ final Queue pathQueue) {
+
+ // We will store the relative xPath here as we build it.
+ String relativeXpath = "";
+
+ if (contextQueue != null) {
+
+ // First we will "consume" all nodes that are the same in both xPaths.
+ while (!contextQueue.isEmpty() && !pathQueue.isEmpty()
+ && pathQueue.peek().isTheSameAs(contextQueue.peek())) {
+ contextQueue.poll();
+ pathQueue.poll();
+ }
+
+ // At this point there are no more matching nodes in the two queues.
+
+ // We look at the first of the remaining steps in both queues and look if
+ // the context is more restrictive than the path. In this case we want to use a dot step
+ // with the predicate of the path.
+ if (!contextQueue.isEmpty() && !pathQueue.isEmpty()
+ && pathQueue.peek().isSimilarTo(contextQueue.peek())) {
+ contextQueue.poll(); // consume the same step from the contextQueue
+ if (contextQueue.isEmpty()) {
+ // Since there are no more steps in the contextQueue, the relative xpath should
+ // start with a dot step to provide a context for the predicate.
+ relativeXpath += "." + pathQueue.poll().getPredicateText();
+ } else {
+ // Since there are more steps in the contextQueue which we will need to navigate back to,
+ // using back-steps, we will use a back-step to provide context of the predicate.
+ // This avoids an output that looks like ../.[predicate] which is valid but silly.
+ contextQueue.poll(); // consume the step from the contextQueue
+ relativeXpath += ".." + pathQueue.poll().getPredicateText();
+ }
+ }
+
+ // We start building the resulting relativeXpath by appending any nodes
+ // remaining in the pathQueue.
+ while (!pathQueue.isEmpty()) {
+ final XPathStep step = pathQueue.poll();
+ relativeXpath += "/" + step.getStepText() + step.getPredicateText();
+ }
+
+ // We remove any leading forward slashes from the resulting xPath.
+ while (relativeXpath.startsWith("/")) {
+ relativeXpath = relativeXpath.substring(1);
+ }
+
+ // For each step remaining in the contextQueue we prepend a back-step (..) in
+ // the resulting relativeXpath.
+ while (!contextQueue.isEmpty()) {
+ contextQueue.poll(); // consume the step
+ relativeXpath = "../" + relativeXpath; // prepend a back-step
+ }
+
+ // We remove any trailing forward slashes from the resulting xPath.
+ while (relativeXpath.endsWith("/")) {
+ relativeXpath = relativeXpath.substring(0, relativeXpath.length() - 1);
+ }
+
+
+ // The relativeXpath will be empty if the path was identical to the context.
+ // In this case we return a dot.
+ if (relativeXpath.isEmpty()) {
+ relativeXpath = ".";
+ }
+ }
+
+ return relativeXpath;
+ }
+
+ private static String getJoinedXPath(LinkedList first,
+ final LinkedList second) {
+ List dotSteps = Arrays.asList("..", ".");
+ while (second.getFirst().getStepText().equals("..")
+ && !dotSteps.contains(first.getLast().getStepText()) && !first.getLast().isVariableStep()) {
+ second.removeFirst();
+ first.removeLast();
+ }
+
+ return first.stream().map(f -> f.getStepText()).collect(Collectors.joining("/"))
+ + "/" + second.stream().map(s -> s.getStepText()).collect(Collectors.joining("/"));
+ }
+}
diff --git a/src/main/java/eu/europa/ted/eforms/xpath/XPathStep.java b/src/main/java/eu/europa/ted/eforms/xpath/XPathStep.java
new file mode 100644
index 0000000..c016f74
--- /dev/null
+++ b/src/main/java/eu/europa/ted/eforms/xpath/XPathStep.java
@@ -0,0 +1,148 @@
+package eu.europa.ted.eforms.xpath;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.commons.lang3.StringUtils;
+
+public class XPathStep implements Comparable {
+ private final String stepText;
+ private final List predicates;
+
+ public XPathStep(String stepText, List predicates) {
+ this.stepText = StringUtils.strip(stepText);
+ this.predicates = predicates;
+ }
+
+ public String getStepText() {
+ return stepText;
+ }
+
+ public List getPredicates() {
+ return predicates;
+ }
+
+ public String getPredicateText() {
+ return String.join("", predicates);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(stepText, predicates);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+
+ XPathStep other = (XPathStep) obj;
+ // First check the step texts are the different.
+ if (!Objects.equals(getStepText(), other.getStepText())) {
+ return false;
+ }
+
+ if (getPredicates().size() != other.getPredicates().size()) {
+ // If one of the two steps has more predicates that the other,
+ // then the steps are not equal.
+ return false;
+ }
+
+ // Both steps have the same number of predicates
+ // If there are no predicates then the steps are the same.
+ if (getPredicates().isEmpty()) {
+ return true;
+ }
+
+ // If there is only one predicate in each step, then we can do a quick comparison.
+ if (getPredicates().size() == 1) {
+ return Objects.equals(getPredicates().get(0), other.getPredicates().get(0));
+ }
+
+ // Both steps contain multiple predicates.
+ // We need to compare them one by one.
+ // First we make a copy so that we can sort them without affecting the original lists.
+ List pathPredicates = new ArrayList<>(getPredicates());
+ List contextPredicates = new ArrayList<>(other.getPredicates());
+ Collections.sort(pathPredicates);
+ Collections.sort(contextPredicates);
+
+ return pathPredicates.equals(contextPredicates);
+ }
+
+ public boolean isTheSameAs(final XPathStep other) {
+
+ // First check the step texts are the different.
+ if (!Objects.equals(getStepText(), other.getStepText())) {
+ return false;
+ }
+
+ // If one of the two steps has more predicates that the other,
+ if (this.getPredicates().size() != other.getPredicates().size()) {
+ // then the steps are the same if the path has no predicates
+ // or all the predicates of the path are also found in the context.
+ return this.getPredicates().isEmpty() || other.getPredicates().containsAll(this.getPredicates());
+ }
+
+ // Both steps have the same number of predicates
+ // If there are no predicates then the steps are the same.
+ if (this.getPredicates().isEmpty()) {
+ return true;
+ }
+
+ // If there is only one predicate in each step, then we can do a quick comparison.
+ if (this.getPredicates().size() == 1) {
+ return Objects.equals(getPredicates().get(0), other.getPredicates().get(0));
+ }
+
+ // Both steps contain multiple predicates.
+ // We need to compare them one by one.
+ // First we make a copy so that we can sort them without affecting the original lists.
+ List pathPredicates = new ArrayList<>(this.getPredicates());
+ List contextPredicates = new ArrayList<>(other.getPredicates());
+ Collections.sort(pathPredicates);
+ Collections.sort(contextPredicates);
+
+ return pathPredicates.equals(contextPredicates);
+ }
+
+ public boolean isSimilarTo(final XPathStep other) {
+
+ // First check the step texts are different.
+ if (!Objects.equals(other.stepText, this.stepText)) {
+ return false;
+ }
+
+ // If one of the two steps has more predicates that the other,
+ if (this.predicates.size() != other.predicates.size()) {
+ // then the steps are similar if either of them has no predicates
+ // or all the predicates of this step are also found in the specific step.
+ return this.predicates.isEmpty() || other.predicates.isEmpty()
+ || other.predicates.containsAll(this.predicates);
+ }
+
+ assert !this.isTheSameAs(other) : "You should not be calling isSimilarTo() without first checking isTheSameAs()";
+ return false;
+ }
+
+ @Override
+ public int compareTo(XPathStep other) {
+ return Comparator.comparing(XPathStep::getStepText)
+ .thenComparing(XPathStep::getPredicateText)
+ .compare(this, other);
+ }
+
+ public boolean isVariableStep() {
+ return stepText.startsWith("$");
+ }
+}
diff --git a/src/test/java/eu/europa/ted/eforms/xpath/XPathProcessorTest.java b/src/test/java/eu/europa/ted/eforms/xpath/XPathProcessorTest.java
new file mode 100644
index 0000000..456e241
--- /dev/null
+++ b/src/test/java/eu/europa/ted/eforms/xpath/XPathProcessorTest.java
@@ -0,0 +1,209 @@
+package eu.europa.ted.eforms.xpath;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class XPathProcessorTest {
+ private void testAttribute(final String fullPath, final String expectedPath,
+ final String expectedAttribute) {
+ XPathInfo result = XPathProcessor.parse(fullPath);
+
+ assertEquals(expectedPath, result.getPathToLastElement());
+ assertEquals(expectedAttribute, result.getAttributeName());
+ }
+
+ private void testSteps(final String xpath, String... steps) {
+ XPathInfo result = XPathProcessor.parse(xpath);
+
+ String[] parsedSteps = result.getSteps()
+ .stream()
+ .map(XPathStep::getStepText)
+ .toArray(String[]::new);
+
+ assertArrayEquals(steps, parsedSteps);
+ }
+
+ private String contextualize(final String context, final String xpath) {
+ return XPathProcessor.contextualize(context, xpath);
+ }
+
+ @Test
+ void testXPathAttribute_WithAttribute() {
+ testAttribute("/a/b/@attribute", "/a/b", "attribute");
+ }
+
+ @Test
+ void testXPathAttribute_WithMultipleAttributes() {
+ testAttribute("/a/b[@otherAttribute = 'text']/@attribute",
+ "/a/b[@otherAttribute = 'text']", "attribute");
+ }
+
+ @Test
+ void testXPathAttribute_WithoutAttribute() {
+ testAttribute("/a/b[@otherAttribute = 'text']",
+ "/a/b[@otherAttribute = 'text']", null);
+ }
+
+ @Test
+ void testXPathAttribute_WithoutPath() {
+ testAttribute("@attribute", "", "attribute");
+ }
+
+ @Test
+ void testXPathSteps() {
+ testSteps("/a/b/c", "a", "b", "c");
+ testSteps("/a/b[u/v='z']/c[x][y]", "a", "b", "c");
+ }
+
+ // START tests for contextualize
+ @Test
+ void testIdentical() {
+ assertEquals(".", contextualize("/a/b/c", "/a/b/c"));
+ }
+
+ @Test
+ void testContextEmpty() {
+ assertEquals("/a/b/c", contextualize("", "/a/b/c"));
+ }
+
+ @Test
+ void testUnderContext() {
+ assertEquals("c", contextualize("/a/b", "/a/b/c"));
+ }
+
+ @Test
+ void testAboveContext() {
+ assertEquals("..", contextualize("/a/b/c", "/a/b"));
+ }
+
+ @Test
+ void testSibling() {
+ assertEquals("../d", contextualize("/a/b/c", "/a/b/d"));
+ }
+
+ @Test
+ void testTwoLevelsDifferent() {
+ assertEquals("../../x/y", contextualize("/a/b/c/d", "/a/b/x/y"));
+ }
+
+ @Test
+ void testAllDifferent() {
+ assertEquals("../../../x/y/z", contextualize("/a/b/c/d", "/a/x/y/z"));
+ }
+
+ @Test
+ void testDifferentRoot() {
+ // Not realistic, as XML has a single root, but a valid result
+ assertEquals("../../../x/y/z", contextualize("/a/b/c", "/x/y/z"));
+ }
+
+ @Test
+ void testAttributeInXpath() {
+ assertEquals("../c/@attribute", contextualize("/a/b", "/a/c/@attribute"));
+ }
+
+ @Test
+ void testAttributeInContext() {
+ assertEquals("../c/d", contextualize("/a/b/@attribute", "/a/b/c/d"));
+ }
+
+ @Test
+ void testAttributeInBoth() {
+ assertEquals("../@x", contextualize("/a/b/c/@d", "/a/b/c/@x"));
+ }
+
+ @Test
+ void testAttributeInBothSame() {
+ assertEquals(".", contextualize("/a/b/c/@d", "/a/b/c/@d"));
+ }
+
+ @Test
+ void testPredicateInXpathLeaf() {
+ assertEquals("../d[x/y = 'z']", contextualize("/a/b/c", "/a/b/d[x/y = 'z']"));
+ }
+
+ @Test
+ void testPredicateBeingTheOnlyDifference() {
+ assertEquals(".[x/y = 'z']", contextualize("/a/b/c", "/a/b/c[x/y = 'z']"));
+ }
+
+ @Test
+ void testPredicateInContextBeingTheOnlyDifference() {
+ assertEquals(".", contextualize("/a/b/c[e/f = 'z']", "/a/b/c"));
+ }
+
+ @Test
+ void testPredicatesBeingTheOnlyDifferences() {
+ assertEquals("..[u/v = 'w']/c[x/y = 'z']", contextualize("/a/b/c", "/a/b[u/v = 'w']/c[x/y = 'z']"));
+ }
+
+ @Test
+ void testPredicateInContextLeaf() {
+ assertEquals("../d", contextualize("/a/b/c[e/f = 'z']", "/a/b/d"));
+ }
+
+ @Test
+ void testPredicateInBothLeaf() {
+ assertEquals("../d[x = 'y']", contextualize("/a/b/c[e = 'f']", "/a/b/d[x = 'y']"));
+ }
+
+ @Test
+ void testPredicateInXpathMiddle() {
+ assertEquals("..[x/y = 'z']/d", contextualize("/a/b/c", "/a/b[x/y = 'z']/d"));
+ }
+
+ @Test
+ void testPredicateInContextMiddle() {
+ assertEquals("../d", contextualize("/a/b[e/f = 'z']/c", "/a/b/d"));
+ }
+
+ @Test
+ void testPredicateSameInBoth() {
+ assertEquals("../d", contextualize("/a/b[e/f = 'z']/c", "/a/b[e/f = 'z']/d"));
+ }
+
+ @Test
+ void testPredicateDifferentOnSameElement() {
+ assertEquals("../../b[x = 'y']/d", contextualize("/a/b[e = 'f']/c", "/a/b[x = 'y']/d"));
+ }
+
+ @Test
+ void testPredicateDifferent() {
+ assertEquals(".[x = 'y']/d", contextualize("/a/b[e = 'f']/c", "/a/b/c[x = 'y']/d"));
+ }
+
+ @Test
+ void testPredicateMoreInXpath() {
+ assertEquals("../../b[e][f]/c/d", contextualize("/a/b[e]/c", "/a/b[e][f]/c/d"));
+ }
+
+ @Test
+ void testPredicateMoreInContext() {
+ assertEquals("d", contextualize("/a/b[e][f]/c", "/a/b[e]/c/d"));
+ }
+
+ @Test
+ void testSeveralPredicatesIdentical() {
+ assertEquals("d", contextualize("/a/b[e][f]/c", "/a/b[e][f]/c/d"));
+ }
+
+ @Test
+ void testSeveralPredicatesOneDifferent() {
+ assertEquals("../../b[e][x]/c/d", contextualize("/a/b[e][f]/c", "/a/b[e][x]/c/d"));
+ }
+ // END tests for contextualize
+
+ @Test
+ void testAddAxis() {
+ assertEquals("preceding::b/c", XPathProcessor.addAxis("preceding", "b/c"));
+ assertEquals("descendant::b/c", XPathProcessor.addAxis("descendant", "../../b/c"));
+ }
+
+ @Test
+ void testJoin() {
+ assertEquals("a/b/c/d", XPathProcessor.join("a/b", "c/d"));
+ assertEquals("a/x/y", XPathProcessor.join("a/b/c", "../../x/y"));
+ }
+}
diff --git a/src/test/java/eu/europa/ted/eforms/xpath/XPathStepTest.java b/src/test/java/eu/europa/ted/eforms/xpath/XPathStepTest.java
new file mode 100644
index 0000000..60a9385
--- /dev/null
+++ b/src/test/java/eu/europa/ted/eforms/xpath/XPathStepTest.java
@@ -0,0 +1,92 @@
+package eu.europa.ted.eforms.xpath;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Arrays;
+
+import org.junit.jupiter.api.Test;
+
+public class XPathStepTest {
+ @Test
+ void testEquals() {
+ XPathStep a = buildStep("foo", "x=1", "y=2");
+ XPathStep b = buildStep("foo", "y=2", "x=1");
+
+ assertTrue(a.equals(b));
+ assertTrue(b.equals(a));
+ }
+
+ @Test
+ void testComparison_Equals() {
+ XPathStep a = buildStep("foo", "x=1", "y=2");
+ XPathStep b = buildStep("foo", "y=2", "x=1");
+
+ assertTrue(a.isTheSameAs(b));
+ assertTrue(b.isTheSameAs(a));
+ }
+
+ @Test
+ void testComparison_DifferentElement() {
+ XPathStep a = buildStep("foo", "a");
+ XPathStep b = buildStep("bar", "a");
+
+ assertFalse(a.isTheSameAs(b));
+
+ assertFalse(a.isSimilarTo(b));
+ }
+
+ @Test
+ void testComparison_MorePredicates() {
+ XPathStep a = buildStep("foo", "x=1");
+ XPathStep b = buildStep("foo", "x=1", "y=2");
+
+ assertTrue(a.isTheSameAs(b));
+
+ assertTrue(a.isSimilarTo(b));
+ }
+
+ @Test
+ void testComparison_LessPredicates() {
+ XPathStep a = buildStep("foo", "x=1", "y=2");
+ XPathStep b = buildStep("foo", "x=1");
+
+ assertFalse(a.isTheSameAs(b));
+
+ assertFalse(a.isSimilarTo(b));
+ }
+
+ @Test
+ void testComparison_DifferentPredicate() {
+ XPathStep a = buildStep("foo", "x=1", "y=2");
+ XPathStep b = buildStep("foo", "x=1", "zzz");
+
+ assertFalse(a.isTheSameAs(b));
+
+ assertFalse(a.isSimilarTo(b));
+ }
+
+ @Test
+ void testComparison_NoPredicates() {
+ XPathStep a = buildStep("foo", "x=1", "y=2");
+ XPathStep b = buildStep("foo");
+
+ assertFalse(a.isTheSameAs(b));
+
+ assertTrue(a.isSimilarTo(b));
+ }
+
+ @Test
+ void testComparison_AddPredicates() {
+ XPathStep a = buildStep("foo");
+ XPathStep b = buildStep("foo", "x=1", "y=2");
+
+ assertTrue(a.isTheSameAs(b));
+
+ assertTrue(a.isSimilarTo(b));
+ }
+
+ private XPathStep buildStep(String elt, String... predicates) {
+ return new XPathStep(elt, Arrays.asList(predicates));
+ }
+}