diff --git a/src/document/epub/mod.rs b/src/document/epub/mod.rs index cbcc5506..fda27877 100644 --- a/src/document/epub/mod.rs +++ b/src/document/epub/mod.rs @@ -63,7 +63,7 @@ impl EpubDocument { let mut zf = archive.by_name("META-INF/container.xml")?; let mut text = String::new(); zf.read_to_string(&mut text)?; - let root = XmlParser::new(&text).parse(); + let root = XmlParser::new(&text, false).parse(); root.find("rootfile") .and_then(|e| e.attr("full-path")) .map(String::from) @@ -79,7 +79,7 @@ impl EpubDocument { text }; - let info = XmlParser::new(&text).parse(); + let info = XmlParser::new(&text, false).parse(); let mut spine = Vec::new(); { @@ -269,7 +269,7 @@ impl EpubDocument { let mut zf = self.archive.by_name(name).ok()?; zf.read_to_string(&mut text).ok()?; } - let root = XmlParser::new(&text).parse(); + let root = XmlParser::new(&text, false).parse(); self.cache_uris(&root, name, start_offset, cache); cache.get(uri).cloned() } else { @@ -328,7 +328,7 @@ impl EpubDocument { } } - let mut root = XmlParser::new(&text).parse(); + let mut root = XmlParser::new(&text, false).parse(); root.wrap_lost_inlines(); let mut stylesheet = Vec::new(); @@ -636,7 +636,7 @@ impl Document for EpubDocument { return None; } - let root = XmlParser::new(&text).parse(); + let root = XmlParser::new(&text, false).parse(); root.find("navMap").map(|map| { let mut cache = FxHashMap::default(); let mut index = 0; diff --git a/src/document/html/mod.rs b/src/document/html/mod.rs index 5cdfe3d2..a81b2508 100644 --- a/src/document/html/mod.rs +++ b/src/document/html/mod.rs @@ -58,8 +58,10 @@ impl HtmlDocument { let size = file.metadata()?.len() as usize; let mut text = String::new(); file.read_to_string(&mut text)?; - let mut content = XmlParser::new(&text).parse(); + let mut content = XmlParser::new(&text, true).parse(); + println!("Parsed content is {:#?}", content); content.wrap_lost_inlines(); + println!("Wrapped content is {:#?}", content); let parent = path.as_ref().parent().unwrap_or_else(|| Path::new("")); Ok(HtmlDocument { @@ -77,8 +79,10 @@ impl HtmlDocument { pub fn new_from_memory(text: &str) -> HtmlDocument { let size = text.len(); - let mut content = XmlParser::new(text).parse(); + let mut content = XmlParser::new(text, true).parse(); + println!("Parsed content is {:#?}", content); content.wrap_lost_inlines(); + println!("Wrapped content is {:#?}", content); HtmlDocument { text: text.to_string(), @@ -95,7 +99,7 @@ impl HtmlDocument { pub fn update(&mut self, text: &str) { self.size = text.len(); - self.content = XmlParser::new(text).parse(); + self.content = XmlParser::new(text, true).parse(); self.content.wrap_lost_inlines(); self.text = text.to_string(); self.pages.clear(); diff --git a/src/document/html/style.rs b/src/document/html/style.rs index a5d1e5a2..8b45a137 100644 --- a/src/document/html/style.rs +++ b/src/document/html/style.rs @@ -14,8 +14,8 @@ mod tests { #[test] fn simple_style() { - let xml1 = XmlParser::new("").parse(); - let xml2 = XmlParser::new("").parse(); + let xml1 = XmlParser::new("", false).parse(); + let xml2 = XmlParser::new("", false).parse(); let (mut css1, _) = CssParser::new("a { b: 23 }").parse(RuleKind::Viewer); let (mut css2, _) = CssParser::new(".c.x.y { b: 6 }").parse(RuleKind::Document); let (mut css3, _) = CssParser::new(".y { b: 2 }").parse(RuleKind::Document); diff --git a/src/document/html/xml.rs b/src/document/html/xml.rs index 87a91039..b6fd8adb 100644 --- a/src/document/html/xml.rs +++ b/src/document/html/xml.rs @@ -5,13 +5,15 @@ use super::dom::{Node, Attributes, text, element, whitespace}; pub struct XmlParser<'a> { pub input: &'a str, pub offset: usize, + html: bool } impl<'a> XmlParser<'a> { - pub fn new(input: &str) -> XmlParser { + pub fn new(input: &str, html: bool) -> XmlParser { XmlParser { input, offset: 0, + html } } @@ -88,9 +90,24 @@ impl<'a> XmlParser<'a> { nodes.push(element(name, offset - 1, attributes, Vec::new())); }, Some('>') => { - self.advance(1); - let children = self.parse_nodes(); - nodes.push(element(name, offset - 1, attributes, children)); + if self.html { + match name { + "area"|"base"|"br"|"col"|"command"|"embed"|"hr"|"img"|"input"|"keygen"|"link"|"meta"|"param"|"source"|"track"|"wbr" => { + self.advance(1); + nodes.push(element(name, offset - 1, attributes, Vec::new())); + }, + _ => { + self.advance(1); + let children = self.parse_nodes(); + nodes.push(element(name, offset - 1, attributes, children)); + } + + } + } else { + self.advance(1); + let children = self.parse_nodes(); + nodes.push(element(name, offset - 1, attributes, children)); + } } _ => (), } @@ -167,7 +184,7 @@ mod tests { #[test] fn test_simple_element() { let text = ""; - let xml = XmlParser::new(text).parse(); + let xml = XmlParser::new(text, false).parse(); assert_eq!(xml.offset(), 0); assert_eq!(xml.tag_name(), Some("a")); } @@ -175,7 +192,7 @@ mod tests { #[test] fn test_attributes() { let text = r#""#; - let xml = XmlParser::new(text).parse(); + let xml = XmlParser::new(text, false).parse(); assert_eq!(xml.attr("b"), Some("c")); assert_eq!(xml.attr("d"), Some("e\"")); } @@ -183,7 +200,7 @@ mod tests { #[test] fn test_text() { let text = "bcd"; - let xml = XmlParser::new(text).parse(); + let xml = XmlParser::new(text, false).parse(); let child = xml.child(0); assert_eq!(child.map(|c| c.offset()), Some(3)); assert_eq!(child.and_then(|c| c.text()), Some("bcd")); @@ -192,7 +209,7 @@ mod tests { #[test] fn test_inbetween_space() { let text = "x y"; - let xml = XmlParser::new(text).parse(); + let xml = XmlParser::new(text, false).parse(); let child = xml.child(1); assert_eq!(child.and_then(|c| c.text()), Some(" ")); } @@ -200,7 +217,7 @@ mod tests { #[test] fn test_central_space() { let text = " "; - let xml = XmlParser::new(text).parse(); + let xml = XmlParser::new(text, false).parse(); assert_eq!(xml.text(), Some(" ")); } }