From d5c08f9036212e2e65b6695bca723c3d873817ad Mon Sep 17 00:00:00 2001 From: Kornel Date: Sat, 26 Oct 2024 12:28:56 +0100 Subject: [PATCH] Streaming content mutations --- Cargo.toml | 1 + fuzz/test_case/src/lib.rs | 14 ++-- src/lib.rs | 2 +- src/rewritable_units/element.rs | 84 ++++++++++++++++++++++- src/rewritable_units/mod.rs | 2 +- src/rewritable_units/mutations.rs | 78 ++++++++++++++++++++- src/rewritable_units/tokens/comment.rs | 37 +++++++++- src/rewritable_units/tokens/end_tag.rs | 30 +++++++- src/rewritable_units/tokens/start_tag.rs | 27 +++++++- src/rewritable_units/tokens/text_chunk.rs | 27 +++++++- src/rewriter/settings.rs | 43 ++++++++++++ 11 files changed, 329 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6b7731bd..8cda84ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ bench = false [features] debug_trace = [] +# Unstable: for internal use only integration_test = [] [[test]] diff --git a/fuzz/test_case/src/lib.rs b/fuzz/test_case/src/lib.rs index 9e781a54..fb80007e 100644 --- a/fuzz/test_case/src/lib.rs +++ b/fuzz/test_case/src/lib.rs @@ -11,7 +11,10 @@ use std::ffi::{CStr, CString}; use encoding_rs::*; use lol_html::html_content::ContentType; -use lol_html::{comments, doc_comments, doc_text, element, text, HtmlRewriter, MemorySettings, Settings}; +use lol_html::{ + comments, doc_comments, doc_text, element, streaming, text, HtmlRewriter, MemorySettings, + Settings, +}; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); @@ -111,10 +114,11 @@ fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) - &format!("", selector), ContentType::Html, ); - el.set_inner_content( - &format!("", selector), - ContentType::Html, - ); + + let replaced = format!("", selector); + el.streaming_set_inner_content(streaming!(move |sink| { + Ok(sink.write_str_chunk(&replaced, ContentType::Html)) + })); Ok(()) }), diff --git a/src/lib.rs b/src/lib.rs index ec1e89c7..f8ed8ddd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,7 +93,7 @@ pub mod errors { pub mod html_content { pub use super::rewritable_units::{ Attribute, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag, - TextChunk, UserData, + StreamingHandler, StreamingHandlerSink, TextChunk, UserData, }; pub use super::html::TextType; diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index c3370c87..df4f16d3 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -1,4 +1,7 @@ -use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StringChunk}; +use super::{ + Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StreamingHandler, + StringChunk, +}; use crate::base::Bytes; use crate::rewriter::{HandlerTypes, LocalHandlerTypes}; use encoding_rs::Encoding; @@ -240,6 +243,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { .push_back((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the element. + /// + /// Consequent calls to the method append to the previously inserted content. + pub fn streaming_before(&mut self, string_writer: Box) { + self.start_tag + .mutations + .content_before + .push_back(string_writer.into()); + } + /// Inserts `content` after the element. /// /// Consequent calls to the method prepend `content` to the previously inserted content. @@ -282,6 +295,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { .push_front(chunk); } + /// Inserts content from a [`StreamingHandler`] after the element. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// + pub fn streaming_after(&mut self, string_writer: Box) { + self.after_chunk(string_writer.into()); + } + /// Prepends `content` to the element's inner content, i.e. inserts content right after /// the element's start tag. /// @@ -328,6 +349,18 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Prepends content from a [`StreamingHandler`] to the element's inner content, + /// i.e. inserts content right after the element's start tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + /// + pub fn streaming_prepend(&mut self, string_writer: Box) { + self.prepend_chunk(string_writer.into()); + } + /// Appends `content` to the element's inner content, i.e. inserts content right before /// the element's end tag. /// @@ -374,6 +407,17 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Appends content from a [`StreamingHandler`] to the element's inner content, + /// i.e. inserts content right before the element's end tag. + /// + /// Consequent calls to the method append to the previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + pub fn streaming_append(&mut self, string_writer: Box) { + self.append_chunk(string_writer.into()); + } + /// Replaces inner content of the element with `content`. /// /// Consequent calls to the method overwrite previously inserted content. @@ -420,6 +464,17 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Replaces inner content of the element with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + /// + pub fn streaming_set_inner_content(&mut self, string_writer: Box) { + self.set_inner_content_chunk(string_writer.into()); + } + /// Replaces the element and its inner content with `content`. /// /// Consequent calls to the method overwrite previously inserted content. @@ -461,6 +516,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Replaces the element and its inner content with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previously inserted content. + /// + pub fn streaming_replace(&mut self, string_writer: Box) { + self.replace_chunk(string_writer.into()); + } + /// Removes the element and its inner content. #[inline] pub fn remove(&mut self) { @@ -629,6 +692,7 @@ mod tests { use crate::rewritable_units::test_utils::*; use crate::*; use encoding_rs::{Encoding, EUC_JP, UTF_8}; + use rewritable_units::mutations::StreamingHandlerSink; fn rewrite_element( html: &[u8], @@ -651,7 +715,11 @@ mod tests { el.before("[before: should be removed]", ContentType::Text); el.after("[after: should be removed]", ContentType::Text); el.append("[append: should be removed]", ContentType::Text); - el.before("[before: should be removed]", ContentType::Text); + el.streaming_before(Box::new(|sink: &mut StreamingHandlerSink| { + sink.write_str_chunk("[before:", ContentType::Text); + sink.write_str_chunk(" should be removed]", ContentType::Text); + Ok(()) + })); Ok(()) }), ], @@ -1087,7 +1155,17 @@ mod tests { #[test] fn self_closing_element() { let output = rewrite_element(b"Hi", UTF_8, "foo", |el| { - el.after("", ContentType::Html); + el.after("->", ContentType::Html); + el.streaming_after(streaming!(|sink| { + sink.write_str_chunk("er-", ContentType::Html); + Ok(()) + })); + el.after("t", ContentType::Html); + el.streaming_after(streaming!(|sink| { + sink.write_str_chunk("af", ContentType::Html); + Ok(()) + })); + el.after("", ContentType::Html); - c.replace("", ContentType::Text); + c.streaming_replace(streaming!(|h| { + h.write_str_chunk("", ContentType::Text); + Ok(()) + })); assert!(c.removed()); }, diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index c39f64c6..b90c9356 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -1,7 +1,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -86,6 +86,34 @@ impl<'i> EndTag<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the end tag. + /// + /// Consequent calls to the method append to the previously inserted content. + #[inline] + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the end tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + #[inline] + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the end tag with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + #[inline] + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the end tag. #[inline] pub fn remove(&mut self) { diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index 216c7844..78e3995d 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -3,7 +3,7 @@ use super::{Mutations, Serialize, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::Namespace; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -135,6 +135,31 @@ impl<'i> StartTag<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the start tag. + /// + /// Consequent calls to the method append to the previously inserted content. + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the start tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the start tag with the content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the start tag. #[inline] pub fn remove(&mut self) { diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index f0867391..781b1caa 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -2,7 +2,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::TextType; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::any::Any; use std::borrow::Cow; @@ -261,6 +261,31 @@ impl<'i> TextChunk<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the text chunk. + /// + /// Consequent calls to the method append `content` to the previously inserted content. + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the text chunk. + /// + /// Consequent calls to the method prepend to the previously inserted content. + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the text chunk with the content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the text chunk. #[inline] pub fn remove(&mut self) { diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index 514e4cd7..4408ac0f 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -499,6 +499,49 @@ macro_rules! comments { }}; } +/// A convenience macro to construct a `StreamingHandler` from a closure. +/// +/// ```rust +/// use lol_html::{element, streaming, RewriteStrSettings}; +/// use lol_html::html_content::ContentType; +/// +/// RewriteStrSettings { +/// element_content_handlers: vec![ +/// element!("div", |element| { +/// element.streaming_replace(streaming!(|sink| { +/// sink.write_str_chunk("…", ContentType::Html); +/// sink.write_str_chunk("…", ContentType::Html); +/// Ok(()) +/// })); +/// Ok(()) +/// }) +/// ], +/// ..RewriteStrSettings::default() +/// }; +/// ``` + +#[macro_export(local_inner_macros)] +macro_rules! streaming { + ($closure:expr) => {{ + use ::std::error::Error; + use $crate::html_content::StreamingHandlerSink; + // Without this rust won't be able to always infer the type of the handler. + #[inline(always)] + const fn streaming_macro_type_hint( + handler_closure: StreamingHandler, + ) -> StreamingHandler + where + StreamingHandler: + FnOnce(&mut StreamingHandlerSink<'_>) -> Result<(), Box> + 'static + Send, + { + handler_closure + } + + Box::new(streaming_macro_type_hint($closure)) + as Box + }}; +} + #[doc(hidden)] #[macro_export] macro_rules! __document_content_handler {