sparklemotion · flavorjones · Feb 13, 2017 · Jul 19, 2015 · Feb 11, 2017 · Feb 11, 2017
diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java
@@ -114,9 +114,6 @@ private static boolean isTag(final RubyString str) {
         return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
     }
 
-    private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
-    private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");
-
     private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
         if (isNamespace(qName.intern())) return true;
         for (int i=0; i < nodeMap.getLength(); i++) {
@@ -130,39 +127,48 @@ private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
         }
         return false;
     }
-
+
+    private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
+    private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");
+
     private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) {
         if (doc.getDocument() == null) return tags;
         if (doc.getDocument().getDocumentElement() == null) return tags;
-        Matcher matcher = starttag_pattern.matcher(tags);
-        Map<String, String> rewriteTable = new HashMap<String, String>();
-        while(matcher.find()) {
+        Matcher matcher = START_TAG_RE.matcher(tags);
+        Map<CharSequence, CharSequence> rewriteTable = null;
+        while (matcher.find()) {
             String start_tag = matcher.group();
-            Matcher matcher2 = qname_pattern.matcher(start_tag);
-            while(matcher2.find()) {
+            Matcher matcher2 = QNAME_RE.matcher(start_tag);
+            while (matcher2.find()) {
                 String qName = matcher2.group();
                 NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
                 if (isNamespaceDefined(qName, nodeMap)) {
-                    String namespaceDecl = getNamespceDecl(getPrefix(qName), nodeMap);
+                    CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
                     if (namespaceDecl != null) {
-                        rewriteTable.put("<"+qName+">", "<"+qName + " " + namespaceDecl+">");
+                        if (rewriteTable == null) rewriteTable = new HashMap(8, 1);
+                        StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
+                        String key = str.append('<').append(qName).append('>').toString();
+                        str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
+                        rewriteTable.put(key, str.append(namespaceDecl).append('>'));
                     }
                 }
             }
         }
-        Set<String> keys = rewriteTable.keySet();
-        for (String key : keys) {
-            tags = tags.replace(key, rewriteTable.get(key));
+        if (rewriteTable != null) {
+            for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
+                tags = tags.replace(e.getKey(), e.getValue());
+            }
         }
 
         return tags;
     }
 
-    private static String getNamespceDecl(String prefix, NamedNodeMap nodeMap) {
+    private static CharSequence getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) {
         for (int i=0; i < nodeMap.getLength(); i++) {
-            Attr attr = (Attr)nodeMap.item(i);
+            Attr attr = (Attr) nodeMap.item(i);
             if (prefix.equals(attr.getLocalName())) {
-                return attr.getName() + "=\"" + attr.getValue() + "\"";
+                return new StringBuilder().
+                    append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
             }
         }
         return null;

diff --git a/ext/java/nokogiri/XmlElement.java b/ext/java/nokogiri/XmlElement.java
@@ -68,21 +68,20 @@ public void setNode(ThreadContext context, Node node) {
 
     @Override
     public void accept(ThreadContext context, SaveContextVisitor visitor) {
-        visitor.enter((Element)node);
+        visitor.enter((Element) node);
         XmlNodeSet xmlNodeSet = (XmlNodeSet) children(context);
         if (xmlNodeSet.length() > 0) {
-            RubyArray array = (RubyArray) xmlNodeSet.to_a(context);
-            for(int i = 0; i < array.getLength(); i++) {
-                Object item = array.get(i);
+            RubyArray nodes = xmlNodeSet.nodes;
+            for( int i = 0; i < nodes.size(); i++ ) {
+                Object item = nodes.eltInternal(i);
                 if (item instanceof XmlNode) {
-                  XmlNode cur = (XmlNode) item;
-                  cur.accept(context, visitor);
-                } else if (item instanceof XmlNamespace) {
-                    XmlNamespace cur = (XmlNamespace)item;
-                    cur.accept(context, visitor);
+                    ((XmlNode) item).accept(context, visitor);
+                }
+                else if (item instanceof XmlNamespace) {
+                    ((XmlNamespace) item).accept(context, visitor);
                 }
             }
         }
-        visitor.leave((Element)node);
+        visitor.leave((Element) node);
     }
 }
diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java
@@ -227,7 +227,7 @@ protected void decorate(final ThreadContext context) {
             if (node.getNodeType() != Node.DOCUMENT_NODE) {
                 doc = document(context.runtime);
 
-                if (doc != null && doc.isTrue()) {
+                if (doc != null && ! doc.isNil()) {
                     RuntimeHelpers.invoke(context, doc, "decorate", this);
                 }
             }
@@ -700,10 +700,9 @@ public IRubyObject blank_p(ThreadContext context) {
         // a node is blank if if it is a Text or CDATA node consisting of whitespace only
         if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {
             String data = node.getTextContent();
-            if (data == null) return context.getRuntime().getTrue();
-            if ("".equals(data.trim())) return context.getRuntime().getTrue();
+            return context.runtime.newBoolean(data == null || data.isEmpty() || data.trim().isEmpty());
         }
-        return context.getRuntime().getFalse();
+        return context.runtime.getFalse();
     }
 
     @JRubyMethod
@@ -713,8 +712,16 @@ public IRubyObject child(ThreadContext context) {
 
     @JRubyMethod
     public IRubyObject children(ThreadContext context) {
-        XmlNodeSet xmlNodeSet = (XmlNodeSet) NokogiriService.XML_NODESET_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::NodeSet"));
-        xmlNodeSet.setNodeList(node.getChildNodes());
+        XmlNodeSet xmlNodeSet = XmlNodeSet.create(context.runtime);
+
+        NodeList nodeList = node.getChildNodes();
+        if (nodeList.getLength() > 0) {
+            xmlNodeSet.setNodeList(nodeList); // initializes @document from first node
+        }
+        else { // TODO this is very ripe for refactoring
+            setDocumentAndDecorate(context, xmlNodeSet, doc);
+        }
+
         return xmlNodeSet;
     }
 
@@ -1430,9 +1437,10 @@ public IRubyObject set_namespace(ThreadContext context, IRubyObject namespace) {
 
     @JRubyMethod(name = {"unlink", "remove"})
     public IRubyObject unlink(ThreadContext context) {
-        if (node.getParentNode() != null) {
-            clearXpathContext(node.getParentNode());
-            node.getParentNode().removeChild(node);
+        final Node parent = node.getParentNode();
+        if (parent != null) {
+            parent.removeChild(node);
+            clearXpathContext(parent);
         }
         return this;
     }

diff --git a/ext/java/nokogiri/XmlNodeSet.java b/ext/java/nokogiri/XmlNodeSet.java
@@ -44,7 +44,6 @@
 import org.jruby.RubyObject;
 import org.jruby.anno.JRubyClass;
 import org.jruby.anno.JRubyMethod;
-import org.jruby.javasupport.util.RuntimeHelpers;
 import org.jruby.runtime.Block;
 import org.jruby.runtime.ThreadContext;
 import org.jruby.runtime.builtin.IRubyObject;
@@ -60,7 +59,7 @@
 @JRubyClass(name="Nokogiri::XML::NodeSet")
 public class XmlNodeSet extends RubyObject implements NodeList {
 
-    private RubyArray nodes;
+    RubyArray nodes;
 
     public XmlNodeSet(Ruby ruby, RubyClass klazz) {
         super(ruby, klazz);
@@ -118,9 +117,9 @@ final void initialize(Ruby runtime, IRubyObject refNode) {
         }
     }
 
-    public long length() {
-        if (nodes == null) return 0L;
-        return nodes.length().getLongValue();
+    public int length() {
+        if (nodes == null) return 0;
+        return nodes.size();
     }
 
     public void relink_namespace(ThreadContext context) {

diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java
@@ -48,6 +48,7 @@
 import org.jruby.exceptions.RaiseException;
 import org.jruby.runtime.ThreadContext;
 import org.jruby.runtime.builtin.IRubyObject;
+import org.jruby.util.SafePropertyAccessor;
 import org.w3c.dom.Node;
 
 import org.apache.xml.dtm.DTM;
@@ -68,6 +69,16 @@
 @JRubyClass(name="Nokogiri::XML::XPathContext")
 public class XmlXpathContext extends RubyObject {
 
+    static {
+        final String DTMManager = "org.apache.xml.dtm.DTMManager";
+        if (SafePropertyAccessor.getProperty(DTMManager) == null) {
+            try { // use patched "org.apache.xml.dtm.ref.DTMManagerDefault"
+                System.setProperty(DTMManager, nokogiri.internals.XalanDTMManagerPatch.class.getName());
+            }
+            catch (SecurityException ex) { /* no-op - will work although might be slower */ }
+        }
+    }
+
     /**
      * user-data key for (cached) {@link XPathContext}
      */

diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java
@@ -53,6 +53,7 @@
 import org.jruby.runtime.builtin.IRubyObject;
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 /**
@@ -118,14 +119,12 @@ public void enableDocumentFragment() {
 
     @Override
     protected XmlDocument getNewEmptyDocument(ThreadContext context) {
-        IRubyObject[] args = new IRubyObject[0];
+        IRubyObject[] args = IRubyObject.NULL_ARRAY;
         return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
     }
 
     @Override
-    protected XmlDocument wrapDocument(ThreadContext context,
-                                       RubyClass klazz,
-                                       Document document) {
+    protected XmlDocument wrapDocument(ThreadContext context, RubyClass klazz, Document document) {
         HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
         htmlDocument.setDocumentNode(context, document);
         if (ruby_encoding.isNil()) {
@@ -146,18 +145,18 @@ protected XmlDocument wrapDocument(ThreadContext context,
     // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
     // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
     // so, this method attempts to find the charset.
-    private String tryGetCharsetFromHtml5MetaTag(Document document) {
+    private static String tryGetCharsetFromHtml5MetaTag(Document document) {
         if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
-        NodeList list = document.getDocumentElement().getChildNodes();
+        NodeList list = document.getDocumentElement().getChildNodes(); Node item;
         for (int i = 0; i < list.getLength(); i++) {
-            if ("head".equalsIgnoreCase(list.item(i).getNodeName())) {
-                NodeList headers = list.item(i).getChildNodes();
+            if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
+                NodeList headers = item.getChildNodes();
                 for (int j = 0; j < headers.getLength(); j++) {
-                    if ("meta".equalsIgnoreCase(headers.item(j).getNodeName())) {
-                        NamedNodeMap nodeMap = headers.item(j).getAttributes();
+                    if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
+                        NamedNodeMap nodeMap = item.getAttributes();
                         for (int k = 0; k < nodeMap.getLength(); k++) {
-                            if ("charset".equalsIgnoreCase(nodeMap.item(k).getNodeName())) {
-                                return nodeMap.item(k).getNodeValue();
+                            if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
+                                return item.getNodeValue();
                             }
                         }
                     }