From e7c7ebec22d094226ee7ec139b4f3bc39bb75d7e Mon Sep 17 00:00:00 2001 From: Kornel Date: Fri, 25 Oct 2024 18:39:09 +0100 Subject: [PATCH] Store mutations as text chunks --- src/lib.rs | 2 +- src/rewritable_units/element.rs | 44 +++++++-- src/rewritable_units/mod.rs | 3 +- src/rewritable_units/mutations.rs | 103 +++++++++++++++------- src/rewritable_units/tokens/comment.rs | 10 ++- src/rewritable_units/tokens/end_tag.rs | 12 ++- src/rewritable_units/tokens/mod.rs | 37 ++++---- src/rewritable_units/tokens/start_tag.rs | 12 ++- src/rewritable_units/tokens/text_chunk.rs | 17 ++-- 9 files changed, 159 insertions(+), 81 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index efb23b6b..302a03da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,7 +201,7 @@ cfg_if! { }; pub use self::rewritable_units::{ - EndTag, Serialize, StartTag, Token, TokenCaptureFlags, Mutations + EndTag, Serialize, StartTag, Token, TokenCaptureFlags, }; pub use self::memory::SharedMemoryLimiter; diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 153d1b1c..131921f6 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -1,4 +1,4 @@ -use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag}; +use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StringChunk}; use crate::base::Bytes; use crate::rewriter::{HandlerTypes, LocalHandlerTypes}; use encoding_rs::Encoding; @@ -87,7 +87,9 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { #[inline] fn remove_content(&mut self) { self.start_tag.mutations.content_after.clear(); - self.end_tag_mutations_mut().content_before.clear(); + if let Some(end) = &mut self.end_tag_mutations { + end.content_before.clear(); + } self.should_remove_content = true; } @@ -222,7 +224,10 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn before(&mut self, content: &str, content_type: ContentType) { - self.start_tag.mutations.before(content, content_type); + self.start_tag + .mutations + .content_before + .push_back((content, content_type).into()); } /// Inserts `content` after the element. @@ -255,11 +260,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn after(&mut self, content: &str, content_type: ContentType) { + self.after_chunk((content, content_type).into()); + } + + fn after_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { - self.end_tag_mutations_mut().after(content, content_type); + &mut self.end_tag_mutations_mut().content_after } else { - self.start_tag.mutations.after(content, content_type); + &mut self.start_tag.mutations.content_after } + .push_front(chunk); } /// Prepends `content` to the element's inner content, i.e. inserts content right after @@ -299,8 +309,12 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn prepend(&mut self, content: &str, content_type: ContentType) { + self.prepend_chunk((content, content_type).into()); + } + + fn prepend_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { - self.start_tag.mutations.after(content, content_type); + self.start_tag.mutations.content_after.push_front(chunk); } } @@ -341,8 +355,12 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn append(&mut self, content: &str, content_type: ContentType) { + self.append_chunk((content, content_type).into()); + } + + fn append_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { - self.end_tag_mutations_mut().before(content, content_type); + self.end_tag_mutations_mut().content_before.push_back(chunk); } } @@ -382,9 +400,13 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn set_inner_content(&mut self, content: &str, content_type: ContentType) { + self.set_inner_content_chunk((content, content_type).into()); + } + + fn set_inner_content_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { self.remove_content(); - self.start_tag.mutations.after(content, content_type); + self.start_tag.mutations.content_after.push_front(chunk); } } @@ -417,7 +439,11 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// ``` #[inline] pub fn replace(&mut self, content: &str, content_type: ContentType) { - self.start_tag.mutations.replace(content, content_type); + self.replace_chunk((content, content_type).into()); + } + + fn replace_chunk(&mut self, chunk: StringChunk) { + self.start_tag.mutations.replace(chunk); if self.can_have_content { self.remove_content(); diff --git a/src/rewritable_units/mod.rs b/src/rewritable_units/mod.rs index 922e964f..f1b411a3 100644 --- a/src/rewritable_units/mod.rs +++ b/src/rewritable_units/mod.rs @@ -2,7 +2,8 @@ use std::any::Any; pub use self::document_end::*; pub use self::element::*; -pub use self::mutations::{ContentType, Mutations}; +pub use self::mutations::ContentType; +pub(crate) use self::mutations::{Mutations, StringChunk}; pub use self::tokens::*; /// Data that can be attached to a rewritable unit by a user and shared between content handler diff --git a/src/rewritable_units/mutations.rs b/src/rewritable_units/mutations.rs index a8118bcc..82cdec43 100644 --- a/src/rewritable_units/mutations.rs +++ b/src/rewritable_units/mutations.rs @@ -1,7 +1,11 @@ use crate::base::Bytes; use encoding_rs::Encoding; +use std::error::Error as StdError; + +type BoxResult = Result<(), Box>; /// The type of inserted content. +#[derive(Copy, Clone)] pub enum ContentType { /// HTML content type. The rewriter will insert the content as is. Html, @@ -17,7 +21,7 @@ pub(super) fn content_to_bytes( content: &str, content_type: ContentType, encoding: &'static Encoding, - mut output_handler: &mut dyn FnMut(&[u8]), + output_handler: &mut dyn FnMut(&[u8]), ) { let bytes = Bytes::from_str(content, encoding); @@ -27,68 +31,99 @@ pub(super) fn content_to_bytes( (b'<', b"<"), (b'>', b">"), (b'&', b"&"), - &mut output_handler, + &mut *output_handler, ), } } -pub struct Mutations { - pub content_before: Vec, - pub replacement: Vec, - pub content_after: Vec, +pub(crate) struct Mutations { + pub content_before: DynamicString, + pub replacement: DynamicString, + pub content_after: DynamicString, pub removed: bool, - encoding: &'static Encoding, + pub encoding: &'static Encoding, } impl Mutations { #[inline] - pub fn new(encoding: &'static Encoding) -> Self { - Mutations { - content_before: Vec::default(), - replacement: Vec::default(), - content_after: Vec::default(), + #[must_use] + pub const fn new(encoding: &'static Encoding) -> Self { + Self { + content_before: DynamicString::new(), + replacement: DynamicString::new(), + content_after: DynamicString::new(), removed: false, encoding, } } #[inline] - pub fn before(&mut self, content: &str, content_type: ContentType) { - content_to_bytes(content, content_type, self.encoding, &mut |c| { - self.content_before.extend_from_slice(c); - }); + pub fn replace(&mut self, chunk: StringChunk) { + self.remove(); + self.replacement.clear(); + self.replacement.push_back(chunk); } #[inline] - pub fn after(&mut self, content: &str, content_type: ContentType) { - let mut pos = 0; - - content_to_bytes(content, content_type, self.encoding, &mut |c| { - self.content_after.splice(pos..pos, c.iter().cloned()); + pub fn remove(&mut self) { + self.removed = true; + } - pos += c.len(); - }); + #[inline] + pub const fn removed(&self) -> bool { + self.removed } +} +impl From<(&str, ContentType)> for StringChunk { #[inline] - pub fn replace(&mut self, content: &str, content_type: ContentType) { - let mut replacement = Vec::default(); + fn from((content, content_type): (&str, ContentType)) -> Self { + Self::Buffer(Box::from(content), content_type) + } +} + +pub(crate) enum StringChunk { + Buffer(Box, ContentType), +} - content_to_bytes(content, content_type, self.encoding, &mut |c| { - replacement.extend_from_slice(c); - }); +#[derive(Default)] +pub(crate) struct DynamicString { + chunks: Vec, +} - self.replacement = replacement; - self.remove(); +impl DynamicString { + #[inline] + pub const fn new() -> Self { + Self { chunks: vec![] } } #[inline] - pub fn remove(&mut self) { - self.removed = true; + pub fn clear(&mut self) { + self.chunks.clear(); } #[inline] - pub fn removed(&self) -> bool { - self.removed + pub fn push_front(&mut self, chunk: StringChunk) { + self.chunks.insert(0, chunk); + } + + #[inline] + pub fn push_back(&mut self, chunk: StringChunk) { + self.chunks.push(chunk); + } + + pub fn into_bytes( + self, + encoding: &'static Encoding, + output_handler: &mut dyn FnMut(&[u8]), + ) -> BoxResult { + for chunk in self.chunks { + match chunk { + StringChunk::Buffer(content, content_type) => { + content_to_bytes(&content, content_type, encoding, output_handler); + } + }; + } + Ok(()) } } diff --git a/src/rewritable_units/tokens/comment.rs b/src/rewritable_units/tokens/comment.rs index 1a533c26..1440cbcb 100644 --- a/src/rewritable_units/tokens/comment.rs +++ b/src/rewritable_units/tokens/comment.rs @@ -103,7 +103,9 @@ impl<'i> Comment<'i> { /// ``` #[inline] pub fn before(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.before(content, content_type); + self.mutations + .content_before + .push_back((content, content_type).into()); } /// Inserts `content` after the comment. @@ -135,7 +137,9 @@ impl<'i> Comment<'i> { /// ``` #[inline] pub fn after(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.after(content, content_type); + self.mutations + .content_after + .push_front((content, content_type).into()); } /// Replaces the comment with the `content`. @@ -167,7 +171,7 @@ impl<'i> Comment<'i> { /// ``` #[inline] pub fn replace(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.replace(content, content_type); + self.mutations.replace((content, content_type).into()); } /// Removes the comment. diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index 234e0701..aa0b7722 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -1,7 +1,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; -use crate::rewritable_units::ContentType; +use crate::html_content::ContentType; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -59,7 +59,9 @@ impl<'i> EndTag<'i> { /// Consequent calls to the method append `content` to the previously inserted content. #[inline] pub fn before(&mut self, content: &str, content_type: ContentType) { - self.mutations.before(content, content_type); + self.mutations + .content_before + .push_back((content, content_type).into()); } /// Inserts `content` after the end tag. @@ -67,7 +69,9 @@ impl<'i> EndTag<'i> { /// Consequent calls to the method prepend `content` to the previously inserted content. #[inline] pub fn after(&mut self, content: &str, content_type: ContentType) { - self.mutations.after(content, content_type); + self.mutations + .content_after + .push_front((content, content_type).into()); } /// Replaces the end tag with `content`. @@ -75,7 +79,7 @@ impl<'i> EndTag<'i> { /// Consequent calls to the method overwrite previous replacement content. #[inline] pub fn replace(&mut self, content: &str, content_type: ContentType) { - self.mutations.replace(content, content_type); + self.mutations.replace((content, content_type).into()); } /// Removes the end tag. diff --git a/src/rewritable_units/tokens/mod.rs b/src/rewritable_units/tokens/mod.rs index 4951b517..91956cf9 100644 --- a/src/rewritable_units/tokens/mod.rs +++ b/src/rewritable_units/tokens/mod.rs @@ -16,32 +16,31 @@ macro_rules! impl_serialize { ($Token:ident) => { impl crate::rewritable_units::Serialize for $Token<'_> { #[inline] - fn into_bytes(self, output_handler: &mut dyn FnMut(&[u8])) -> Result<(), RewritingError> { - let Mutations { - content_before, - replacement, - content_after, - removed, - .. - } = &self.mutations; + fn into_bytes( + mut self, + output_handler: &mut dyn FnMut(&[u8]), + ) -> Result<(), crate::errors::RewritingError> { + let content_before = ::std::mem::take(&mut self.mutations.content_before); + content_before + .into_bytes(self.mutations.encoding, output_handler) + .map_err(crate::errors::RewritingError::ContentHandlerError)?; - if !content_before.is_empty() { - output_handler(content_before); - } - - if !removed { + if !self.mutations.removed { match self.raw() { Some(raw) => output_handler(raw), None => self.serialize_from_parts(output_handler)?, } - } else if !replacement.is_empty() { - output_handler(replacement); + } else { + self.mutations + .replacement + .into_bytes(self.mutations.encoding, output_handler) + .map_err(crate::errors::RewritingError::ContentHandlerError)?; } - if !content_after.is_empty() { - output_handler(content_after); - } - Ok(()) + self.mutations + .content_after + .into_bytes(self.mutations.encoding, output_handler) + .map_err(crate::errors::RewritingError::ContentHandlerError) } } }; diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index fe7b5f5a..98a90e3f 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -3,7 +3,7 @@ use super::{Mutations, Serialize, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::Namespace; -use crate::rewritable_units::ContentType; +use crate::html_content::ContentType; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -110,7 +110,9 @@ impl<'i> StartTag<'i> { /// Consequent calls to the method append `content` to the previously inserted content. #[inline] pub fn before(&mut self, content: &str, content_type: ContentType) { - self.mutations.before(content, content_type); + self.mutations + .content_before + .push_back((content, content_type).into()); } /// Inserts `content` after the start tag. @@ -118,7 +120,9 @@ impl<'i> StartTag<'i> { /// Consequent calls to the method prepend `content` to the previously inserted content. #[inline] pub fn after(&mut self, content: &str, content_type: ContentType) { - self.mutations.after(content, content_type); + self.mutations + .content_after + .push_front((content, content_type).into()); } /// Replaces the start tag with `content`. @@ -126,7 +130,7 @@ impl<'i> StartTag<'i> { /// Consequent calls to the method overwrite previous replacement content. #[inline] pub fn replace(&mut self, content: &str, content_type: ContentType) { - self.mutations.replace(content, content_type); + self.mutations.replace((content, content_type).into()); } /// Removes the start tag. diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index cae8994a..a116ad1a 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -2,6 +2,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::TextType; +use crate::html_content::ContentType; use encoding_rs::Encoding; use std::any::Any; use std::borrow::Cow; @@ -179,8 +180,10 @@ impl<'i> TextChunk<'i> { /// assert_eq!(html, r#"
Hello world
"#); /// ``` #[inline] - pub fn before(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.before(content, content_type); + pub fn before(&mut self, content: &str, content_type: ContentType) { + self.mutations + .content_before + .push_back((content, content_type).into()); } /// Inserts `content` after the text chunk. @@ -213,8 +216,10 @@ impl<'i> TextChunk<'i> { /// assert_eq!(html, r#"
FooQuxBar
"#); /// ``` #[inline] - pub fn after(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.after(content, content_type); + pub fn after(&mut self, content: &str, content_type: ContentType) { + self.mutations + .content_after + .push_front((content, content_type).into()); } /// Replaces the text chunk with the `content`. @@ -247,8 +252,8 @@ impl<'i> TextChunk<'i> { /// assert_eq!(html, r#"
Qux
"#); /// ``` #[inline] - pub fn replace(&mut self, content: &str, content_type: crate::rewritable_units::ContentType) { - self.mutations.replace(content, content_type); + pub fn replace(&mut self, content: &str, content_type: ContentType) { + self.mutations.replace((content, content_type).into()); } /// Removes the text chunk.