Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

this isit #1604

Merged
merged 17 commits into from
Feb 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
b7b7316
failing test (on jruby): css search problem involving whitespace and …
jkraemer Jul 19, 2015
08460fc
avoid multiple param extractions -> split out xpath method
kares Feb 11, 2017
91cb46d
refactor `implied_xpath_contexts' internal into a (frozen) constant
kares Feb 11, 2017
98b6be8
hook-up a (custom) DTMManager implementation
kares Feb 11, 2017
97d3b7b
DOM2DTM :scissors: for (Nokogiri) internal adjustments
kares Feb 12, 2017
3bf3d29
DOM2DTM adapt xalan skipping text nodes (to handle #1320)
kares Feb 12, 2017
c6c9148
DOM2DTM - cleanup unused (static JJK)) pieces ... a bit
kares Feb 12, 2017
214145c
DOM2DTM - use an ArrayList for nodes instead of (synchronized) Vector
kares Feb 12, 2017
3648e36
DOM2DTM keep the white-space skipping logic (for now)
kares Feb 12, 2017
e8fa198
use DOM2DTM customization from "patched" xalan DTMManager
kares Feb 12, 2017
cb7ae6b
review XmlElement#accept to not use RubyArray's List interface
kares Feb 12, 2017
4a3fc13
introduce css_rules_to_xpath helpers -> to convert once from node-set
kares Feb 12, 2017
af2873d
cleanup + less garbage generated on add-ns-declaration-if-needed
kares Feb 12, 2017
099a950
rewrite (RubyArray) nodes.size retrieval - return type is an int actu…
kares Feb 12, 2017
5165f24
make sure NodeSet's @document is set even if children empty
kares Feb 12, 2017
c659b10
code cleanup - avoid trim() until necessary; check doc for not being nil
kares Feb 12, 2017
5fc24ba
code cleanup - less index-ing + use null array instead of alloc
kares Feb 12, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions ext/java/nokogiri/XmlDocumentFragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,6 @@ private static boolean isTag(final RubyString str) {
return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
}

private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");

private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
if (isNamespace(qName.intern())) return true;
for (int i=0; i < nodeMap.getLength(); i++) {
Expand All @@ -130,39 +127,48 @@ private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
}
return false;
}


private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");

private static String addNamespaceDeclIfNeeded(XmlDocument doc, String tags) {
if (doc.getDocument() == null) return tags;
if (doc.getDocument().getDocumentElement() == null) return tags;
Matcher matcher = starttag_pattern.matcher(tags);
Map<String, String> rewriteTable = new HashMap<String, String>();
while(matcher.find()) {
Matcher matcher = START_TAG_RE.matcher(tags);
Map<CharSequence, CharSequence> rewriteTable = null;
while (matcher.find()) {
String start_tag = matcher.group();
Matcher matcher2 = qname_pattern.matcher(start_tag);
while(matcher2.find()) {
Matcher matcher2 = QNAME_RE.matcher(start_tag);
while (matcher2.find()) {
String qName = matcher2.group();
NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
if (isNamespaceDefined(qName, nodeMap)) {
String namespaceDecl = getNamespceDecl(getPrefix(qName), nodeMap);
CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
if (namespaceDecl != null) {
rewriteTable.put("<"+qName+">", "<"+qName + " " + namespaceDecl+">");
if (rewriteTable == null) rewriteTable = new HashMap(8, 1);
StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
String key = str.append('<').append(qName).append('>').toString();
str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
rewriteTable.put(key, str.append(namespaceDecl).append('>'));
}
}
}
}
Set<String> keys = rewriteTable.keySet();
for (String key : keys) {
tags = tags.replace(key, rewriteTable.get(key));
if (rewriteTable != null) {
for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
tags = tags.replace(e.getKey(), e.getValue());
}
}

return tags;
}

private static String getNamespceDecl(String prefix, NamedNodeMap nodeMap) {
private static CharSequence getNamespaceDecl(final String prefix, NamedNodeMap nodeMap) {
for (int i=0; i < nodeMap.getLength(); i++) {
Attr attr = (Attr)nodeMap.item(i);
Attr attr = (Attr) nodeMap.item(i);
if (prefix.equals(attr.getLocalName())) {
return attr.getName() + "=\"" + attr.getValue() + "\"";
return new StringBuilder().
append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
}
}
return null;
Expand Down
19 changes: 9 additions & 10 deletions ext/java/nokogiri/XmlElement.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,20 @@ public void setNode(ThreadContext context, Node node) {

@Override
public void accept(ThreadContext context, SaveContextVisitor visitor) {
visitor.enter((Element)node);
visitor.enter((Element) node);
XmlNodeSet xmlNodeSet = (XmlNodeSet) children(context);
if (xmlNodeSet.length() > 0) {
RubyArray array = (RubyArray) xmlNodeSet.to_a(context);
for(int i = 0; i < array.getLength(); i++) {
Object item = array.get(i);
RubyArray nodes = xmlNodeSet.nodes;
for( int i = 0; i < nodes.size(); i++ ) {
Object item = nodes.eltInternal(i);
if (item instanceof XmlNode) {
XmlNode cur = (XmlNode) item;
cur.accept(context, visitor);
} else if (item instanceof XmlNamespace) {
XmlNamespace cur = (XmlNamespace)item;
cur.accept(context, visitor);
((XmlNode) item).accept(context, visitor);
}
else if (item instanceof XmlNamespace) {
((XmlNamespace) item).accept(context, visitor);
}
}
}
visitor.leave((Element)node);
visitor.leave((Element) node);
}
}
26 changes: 17 additions & 9 deletions ext/java/nokogiri/XmlNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ protected void decorate(final ThreadContext context) {
if (node.getNodeType() != Node.DOCUMENT_NODE) {
doc = document(context.runtime);

if (doc != null && doc.isTrue()) {
if (doc != null && ! doc.isNil()) {
RuntimeHelpers.invoke(context, doc, "decorate", this);
}
}
Expand Down Expand Up @@ -700,10 +700,9 @@ public IRubyObject blank_p(ThreadContext context) {
// a node is blank if if it is a Text or CDATA node consisting of whitespace only
if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {
String data = node.getTextContent();
if (data == null) return context.getRuntime().getTrue();
if ("".equals(data.trim())) return context.getRuntime().getTrue();
return context.runtime.newBoolean(data == null || data.isEmpty() || data.trim().isEmpty());
}
return context.getRuntime().getFalse();
return context.runtime.getFalse();
}

@JRubyMethod
Expand All @@ -713,8 +712,16 @@ public IRubyObject child(ThreadContext context) {

@JRubyMethod
public IRubyObject children(ThreadContext context) {
XmlNodeSet xmlNodeSet = (XmlNodeSet) NokogiriService.XML_NODESET_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::NodeSet"));
xmlNodeSet.setNodeList(node.getChildNodes());
XmlNodeSet xmlNodeSet = XmlNodeSet.create(context.runtime);

NodeList nodeList = node.getChildNodes();
if (nodeList.getLength() > 0) {
xmlNodeSet.setNodeList(nodeList); // initializes @document from first node
}
else { // TODO this is very ripe for refactoring
setDocumentAndDecorate(context, xmlNodeSet, doc);
}

return xmlNodeSet;
}

Expand Down Expand Up @@ -1430,9 +1437,10 @@ public IRubyObject set_namespace(ThreadContext context, IRubyObject namespace) {

@JRubyMethod(name = {"unlink", "remove"})
public IRubyObject unlink(ThreadContext context) {
if (node.getParentNode() != null) {
clearXpathContext(node.getParentNode());
node.getParentNode().removeChild(node);
final Node parent = node.getParentNode();
if (parent != null) {
parent.removeChild(node);
clearXpathContext(parent);
}
return this;
}
Expand Down
9 changes: 4 additions & 5 deletions ext/java/nokogiri/XmlNodeSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.jruby.RubyObject;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.javasupport.util.RuntimeHelpers;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
Expand All @@ -60,7 +59,7 @@
@JRubyClass(name="Nokogiri::XML::NodeSet")
public class XmlNodeSet extends RubyObject implements NodeList {

private RubyArray nodes;
RubyArray nodes;

public XmlNodeSet(Ruby ruby, RubyClass klazz) {
super(ruby, klazz);
Expand Down Expand Up @@ -118,9 +117,9 @@ final void initialize(Ruby runtime, IRubyObject refNode) {
}
}

public long length() {
if (nodes == null) return 0L;
return nodes.length().getLongValue();
public int length() {
if (nodes == null) return 0;
return nodes.size();
}

public void relink_namespace(ThreadContext context) {
Expand Down
11 changes: 11 additions & 0 deletions ext/java/nokogiri/XmlXpathContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.SafePropertyAccessor;
import org.w3c.dom.Node;

import org.apache.xml.dtm.DTM;
Expand All @@ -68,6 +69,16 @@
@JRubyClass(name="Nokogiri::XML::XPathContext")
public class XmlXpathContext extends RubyObject {

static {
final String DTMManager = "org.apache.xml.dtm.DTMManager";
if (SafePropertyAccessor.getProperty(DTMManager) == null) {
try { // use patched "org.apache.xml.dtm.ref.DTMManagerDefault"
System.setProperty(DTMManager, nokogiri.internals.XalanDTMManagerPatch.class.getName());
}
catch (SecurityException ex) { /* no-op - will work although might be slower */ }
}
}

/**
* user-data key for (cached) {@link XPathContext}
*/
Expand Down
23 changes: 11 additions & 12 deletions ext/java/nokogiri/internals/HtmlDomParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
Expand Down Expand Up @@ -118,14 +119,12 @@ public void enableDocumentFragment() {

@Override
protected XmlDocument getNewEmptyDocument(ThreadContext context) {
IRubyObject[] args = new IRubyObject[0];
IRubyObject[] args = IRubyObject.NULL_ARRAY;
return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
}

@Override
protected XmlDocument wrapDocument(ThreadContext context,
RubyClass klazz,
Document document) {
protected XmlDocument wrapDocument(ThreadContext context, RubyClass klazz, Document document) {
HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
htmlDocument.setDocumentNode(context, document);
if (ruby_encoding.isNil()) {
Expand All @@ -146,18 +145,18 @@ protected XmlDocument wrapDocument(ThreadContext context,
// NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset
// from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree
// so, this method attempts to find the charset.
private String tryGetCharsetFromHtml5MetaTag(Document document) {
private static String tryGetCharsetFromHtml5MetaTag(Document document) {
if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null;
NodeList list = document.getDocumentElement().getChildNodes();
NodeList list = document.getDocumentElement().getChildNodes(); Node item;
for (int i = 0; i < list.getLength(); i++) {
if ("head".equalsIgnoreCase(list.item(i).getNodeName())) {
NodeList headers = list.item(i).getChildNodes();
if ("head".equalsIgnoreCase((item = list.item(i)).getNodeName())) {
NodeList headers = item.getChildNodes();
for (int j = 0; j < headers.getLength(); j++) {
if ("meta".equalsIgnoreCase(headers.item(j).getNodeName())) {
NamedNodeMap nodeMap = headers.item(j).getAttributes();
if ("meta".equalsIgnoreCase((item = headers.item(j)).getNodeName())) {
NamedNodeMap nodeMap = item.getAttributes();
for (int k = 0; k < nodeMap.getLength(); k++) {
if ("charset".equalsIgnoreCase(nodeMap.item(k).getNodeName())) {
return nodeMap.item(k).getNodeValue();
if ("charset".equalsIgnoreCase((item = nodeMap.item(k)).getNodeName())) {
return item.getNodeValue();
}
}
}
Expand Down
Loading