From 90848534a1a49061ee7648719bbf5e371aaff886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 11 Aug 2024 13:50:13 -0300 Subject: [PATCH 1/5] Change behavior to match previously removed XHTML serializer --- .../validator/html/scan/ASHTMLSerializer.java | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java index 104f66a..e3e14d5 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java @@ -6,6 +6,7 @@ import org.apache.xml.serialize.HTMLdtd; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.InternalPolicy; +import org.owasp.validator.html.TagMatcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,9 +15,13 @@ public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer { private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class); private boolean encodeAllPossibleEntities; + private final TagMatcher allowedEmptyTags; + private final TagMatcher requireClosingTags; public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { super(w, format); + this.allowedEmptyTags = policy.getAllowedEmptyTags(); + this.requireClosingTags = policy.getRequiresClosingTags(); this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters(); } @@ -38,17 +43,21 @@ public void endElementIO(String namespaceURI, String localName, String rawName) _printer.unindent(); state = getElementState(); - if (state.empty) _printer.printText('>'); - // This element is not empty and that last content was another element, so print a line break - // before that last element and this element's closing tag. [keith] Provided this is not an - // anchor. HTML: some elements do not print closing tag (e.g. LI) - if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { - if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine(); - // Must leave CData section first (Illegal in HTML, but still) - if (state.inCData) _printer.printText("]]>"); - _printer.printText("'); + if (state.empty && isAllowedEmptyTag(rawName) && !requiresClosingTag(rawName)) { // + _printer.printText("/>"); + } else { + if(state.empty) _printer.printText('>'); + // This element is not empty and that last content was another element, so print a line break + // before that last element and this element's closing tag. [keith] Provided this is not an + // anchor. HTML: some elements do not print closing tag (e.g. LI) + if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { + if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine(); + // Must leave CData section first (Illegal in HTML, but still) + if (state.inCData) _printer.printText("]]>"); + _printer.printText("'); + } } // Leave the element state and update that of the parent (if we're not root) to not empty and @@ -76,4 +85,12 @@ protected String escapeURI(String uri) { } return ""; } + + private boolean requiresClosingTag(String tagName) { + return requireClosingTags.matches(tagName); + } + + private boolean isAllowedEmptyTag(String tagName) { + return "head".equals(tagName) || allowedEmptyTags.matches( tagName); + } } From a769a9d05a50883dedd42a097d56a2825c0f635b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 11 Aug 2024 13:51:39 -0300 Subject: [PATCH 2/5] Add test for XHTML behvior --- .../owasp/validator/html/test/AntiSamyTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 9669343..3640410 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -2702,4 +2702,19 @@ public void testGithubIssue453() throws ScanException, PolicyException { + "