diff --git a/src/document/epub/mod.rs b/src/document/epub/mod.rs
index cbcc5506..fda27877 100644
--- a/src/document/epub/mod.rs
+++ b/src/document/epub/mod.rs
@@ -63,7 +63,7 @@ impl EpubDocument {
let mut zf = archive.by_name("META-INF/container.xml")?;
let mut text = String::new();
zf.read_to_string(&mut text)?;
- let root = XmlParser::new(&text).parse();
+ let root = XmlParser::new(&text, false).parse();
root.find("rootfile")
.and_then(|e| e.attr("full-path"))
.map(String::from)
@@ -79,7 +79,7 @@ impl EpubDocument {
text
};
- let info = XmlParser::new(&text).parse();
+ let info = XmlParser::new(&text, false).parse();
let mut spine = Vec::new();
{
@@ -269,7 +269,7 @@ impl EpubDocument {
let mut zf = self.archive.by_name(name).ok()?;
zf.read_to_string(&mut text).ok()?;
}
- let root = XmlParser::new(&text).parse();
+ let root = XmlParser::new(&text, false).parse();
self.cache_uris(&root, name, start_offset, cache);
cache.get(uri).cloned()
} else {
@@ -328,7 +328,7 @@ impl EpubDocument {
}
}
- let mut root = XmlParser::new(&text).parse();
+ let mut root = XmlParser::new(&text, false).parse();
root.wrap_lost_inlines();
let mut stylesheet = Vec::new();
@@ -636,7 +636,7 @@ impl Document for EpubDocument {
return None;
}
- let root = XmlParser::new(&text).parse();
+ let root = XmlParser::new(&text, false).parse();
root.find("navMap").map(|map| {
let mut cache = FxHashMap::default();
let mut index = 0;
diff --git a/src/document/html/mod.rs b/src/document/html/mod.rs
index 5cdfe3d2..a81b2508 100644
--- a/src/document/html/mod.rs
+++ b/src/document/html/mod.rs
@@ -58,8 +58,10 @@ impl HtmlDocument {
let size = file.metadata()?.len() as usize;
let mut text = String::new();
file.read_to_string(&mut text)?;
- let mut content = XmlParser::new(&text).parse();
+ let mut content = XmlParser::new(&text, true).parse();
+ println!("Parsed content is {:#?}", content);
content.wrap_lost_inlines();
+ println!("Wrapped content is {:#?}", content);
let parent = path.as_ref().parent().unwrap_or_else(|| Path::new(""));
Ok(HtmlDocument {
@@ -77,8 +79,10 @@ impl HtmlDocument {
pub fn new_from_memory(text: &str) -> HtmlDocument {
let size = text.len();
- let mut content = XmlParser::new(text).parse();
+ let mut content = XmlParser::new(text, true).parse();
+ println!("Parsed content is {:#?}", content);
content.wrap_lost_inlines();
+ println!("Wrapped content is {:#?}", content);
HtmlDocument {
text: text.to_string(),
@@ -95,7 +99,7 @@ impl HtmlDocument {
pub fn update(&mut self, text: &str) {
self.size = text.len();
- self.content = XmlParser::new(text).parse();
+ self.content = XmlParser::new(text, true).parse();
self.content.wrap_lost_inlines();
self.text = text.to_string();
self.pages.clear();
diff --git a/src/document/html/style.rs b/src/document/html/style.rs
index a5d1e5a2..8b45a137 100644
--- a/src/document/html/style.rs
+++ b/src/document/html/style.rs
@@ -14,8 +14,8 @@ mod tests {
#[test]
fn simple_style() {
- let xml1 = XmlParser::new("").parse();
- let xml2 = XmlParser::new("").parse();
+ let xml1 = XmlParser::new("", false).parse();
+ let xml2 = XmlParser::new("", false).parse();
let (mut css1, _) = CssParser::new("a { b: 23 }").parse(RuleKind::Viewer);
let (mut css2, _) = CssParser::new(".c.x.y { b: 6 }").parse(RuleKind::Document);
let (mut css3, _) = CssParser::new(".y { b: 2 }").parse(RuleKind::Document);
diff --git a/src/document/html/xml.rs b/src/document/html/xml.rs
index 87a91039..b6fd8adb 100644
--- a/src/document/html/xml.rs
+++ b/src/document/html/xml.rs
@@ -5,13 +5,15 @@ use super::dom::{Node, Attributes, text, element, whitespace};
pub struct XmlParser<'a> {
pub input: &'a str,
pub offset: usize,
+ html: bool
}
impl<'a> XmlParser<'a> {
- pub fn new(input: &str) -> XmlParser {
+ pub fn new(input: &str, html: bool) -> XmlParser {
XmlParser {
input,
offset: 0,
+ html
}
}
@@ -88,9 +90,24 @@ impl<'a> XmlParser<'a> {
nodes.push(element(name, offset - 1, attributes, Vec::new()));
},
Some('>') => {
- self.advance(1);
- let children = self.parse_nodes();
- nodes.push(element(name, offset - 1, attributes, children));
+ if self.html {
+ match name {
+ "area"|"base"|"br"|"col"|"command"|"embed"|"hr"|"img"|"input"|"keygen"|"link"|"meta"|"param"|"source"|"track"|"wbr" => {
+ self.advance(1);
+ nodes.push(element(name, offset - 1, attributes, Vec::new()));
+ },
+ _ => {
+ self.advance(1);
+ let children = self.parse_nodes();
+ nodes.push(element(name, offset - 1, attributes, children));
+ }
+
+ }
+ } else {
+ self.advance(1);
+ let children = self.parse_nodes();
+ nodes.push(element(name, offset - 1, attributes, children));
+ }
}
_ => (),
}
@@ -167,7 +184,7 @@ mod tests {
#[test]
fn test_simple_element() {
let text = "";
- let xml = XmlParser::new(text).parse();
+ let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.offset(), 0);
assert_eq!(xml.tag_name(), Some("a"));
}
@@ -175,7 +192,7 @@ mod tests {
#[test]
fn test_attributes() {
let text = r#""#;
- let xml = XmlParser::new(text).parse();
+ let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.attr("b"), Some("c"));
assert_eq!(xml.attr("d"), Some("e\""));
}
@@ -183,7 +200,7 @@ mod tests {
#[test]
fn test_text() {
let text = "bcd";
- let xml = XmlParser::new(text).parse();
+ let xml = XmlParser::new(text, false).parse();
let child = xml.child(0);
assert_eq!(child.map(|c| c.offset()), Some(3));
assert_eq!(child.and_then(|c| c.text()), Some("bcd"));
@@ -192,7 +209,7 @@ mod tests {
#[test]
fn test_inbetween_space() {
let text = "x y";
- let xml = XmlParser::new(text).parse();
+ let xml = XmlParser::new(text, false).parse();
let child = xml.child(1);
assert_eq!(child.and_then(|c| c.text()), Some(" "));
}
@@ -200,7 +217,7 @@ mod tests {
#[test]
fn test_central_space() {
let text = " ";
- let xml = XmlParser::new(text).parse();
+ let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.text(), Some(" "));
}
}