Skip to content

Commit

Permalink
Streaming content mutations
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski committed Oct 28, 2024
1 parent 896464f commit d5c08f9
Show file tree
Hide file tree
Showing 11 changed files with 329 additions and 16 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ bench = false

[features]
debug_trace = []
# Unstable: for internal use only
integration_test = []

[[test]]
Expand Down
14 changes: 9 additions & 5 deletions fuzz/test_case/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ use std::ffi::{CStr, CString};

use encoding_rs::*;
use lol_html::html_content::ContentType;
use lol_html::{comments, doc_comments, doc_text, element, text, HtmlRewriter, MemorySettings, Settings};
use lol_html::{
comments, doc_comments, doc_text, element, streaming, text, HtmlRewriter, MemorySettings,
Settings,
};

include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

Expand Down Expand Up @@ -111,10 +114,11 @@ fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) -
&format!("<!--[/ELEMENT('{}')]-->", selector),
ContentType::Html,
);
el.set_inner_content(
&format!("<!--Replaced ({}) -->", selector),
ContentType::Html,
);

let replaced = format!("<!--Replaced ({}) -->", selector);
el.streaming_set_inner_content(streaming!(move |sink| {
Ok(sink.write_str_chunk(&replaced, ContentType::Html))
}));

Ok(())
}),
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pub mod errors {
pub mod html_content {
pub use super::rewritable_units::{
Attribute, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag,
TextChunk, UserData,
StreamingHandler, StreamingHandlerSink, TextChunk, UserData,
};

pub use super::html::TextType;
Expand Down
84 changes: 81 additions & 3 deletions src/rewritable_units/element.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StringChunk};
use super::{
Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StreamingHandler,
StringChunk,
};
use crate::base::Bytes;
use crate::rewriter::{HandlerTypes, LocalHandlerTypes};
use encoding_rs::Encoding;
Expand Down Expand Up @@ -240,6 +243,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
.push_back((content, content_type).into());
}

/// Inserts content from a [`StreamingHandler`] before the element.
///
/// Consequent calls to the method append to the previously inserted content.
pub fn streaming_before(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.start_tag
.mutations
.content_before
.push_back(string_writer.into());
}

/// Inserts `content` after the element.
///
/// Consequent calls to the method prepend `content` to the previously inserted content.
Expand Down Expand Up @@ -282,6 +295,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
.push_front(chunk);
}

/// Inserts content from a [`StreamingHandler`] after the element.
///
/// Consequent calls to the method prepend to the previously inserted content.
///
pub fn streaming_after(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.after_chunk(string_writer.into());
}

/// Prepends `content` to the element's inner content, i.e. inserts content right after
/// the element's start tag.
///
Expand Down Expand Up @@ -328,6 +349,18 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Prepends content from a [`StreamingHandler`] to the element's inner content,
/// i.e. inserts content right after the element's start tag.
///
/// Consequent calls to the method prepend to the previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
///
pub fn streaming_prepend(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.prepend_chunk(string_writer.into());
}

/// Appends `content` to the element's inner content, i.e. inserts content right before
/// the element's end tag.
///
Expand Down Expand Up @@ -374,6 +407,17 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Appends content from a [`StreamingHandler`] to the element's inner content,
/// i.e. inserts content right before the element's end tag.
///
/// Consequent calls to the method append to the previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
pub fn streaming_append(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.append_chunk(string_writer.into());
}

/// Replaces inner content of the element with `content`.
///
/// Consequent calls to the method overwrite previously inserted content.
Expand Down Expand Up @@ -420,6 +464,17 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Replaces inner content of the element with content from a [`StreamingHandler`].
///
/// Consequent calls to the method overwrite previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
///
pub fn streaming_set_inner_content(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.set_inner_content_chunk(string_writer.into());
}

/// Replaces the element and its inner content with `content`.
///
/// Consequent calls to the method overwrite previously inserted content.
Expand Down Expand Up @@ -461,6 +516,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Replaces the element and its inner content with content from a [`StreamingHandler`].
///
/// Consequent calls to the method overwrite previously inserted content.
///
pub fn streaming_replace(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.replace_chunk(string_writer.into());
}

/// Removes the element and its inner content.
#[inline]
pub fn remove(&mut self) {
Expand Down Expand Up @@ -629,6 +692,7 @@ mod tests {
use crate::rewritable_units::test_utils::*;
use crate::*;
use encoding_rs::{Encoding, EUC_JP, UTF_8};
use rewritable_units::mutations::StreamingHandlerSink;

fn rewrite_element(
html: &[u8],
Expand All @@ -651,7 +715,11 @@ mod tests {
el.before("[before: should be removed]", ContentType::Text);
el.after("[after: should be removed]", ContentType::Text);
el.append("[append: should be removed]", ContentType::Text);
el.before("[before: should be removed]", ContentType::Text);
el.streaming_before(Box::new(|sink: &mut StreamingHandlerSink| {
sink.write_str_chunk("[before:", ContentType::Text);
sink.write_str_chunk(" should be removed]", ContentType::Text);
Ok(())
}));
Ok(())
}),
],
Expand Down Expand Up @@ -1087,7 +1155,17 @@ mod tests {
#[test]
fn self_closing_element() {
let output = rewrite_element(b"<svg><foo/>Hi</foo></svg>", UTF_8, "foo", |el| {
el.after("<!--after-->", ContentType::Html);
el.after("->", ContentType::Html);
el.streaming_after(streaming!(|sink| {
sink.write_str_chunk("er-", ContentType::Html);
Ok(())
}));
el.after("t", ContentType::Html);
el.streaming_after(streaming!(|sink| {
sink.write_str_chunk("af", ContentType::Html);
Ok(())
}));
el.after("<!--", ContentType::Html);
el.set_tag_name("bar").unwrap();
});

Expand Down
2 changes: 1 addition & 1 deletion src/rewritable_units/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::any::Any;

pub use self::document_end::*;
pub use self::element::*;
pub use self::mutations::ContentType;
pub use self::mutations::{ContentType, StreamingHandler, StreamingHandlerSink};
pub(crate) use self::mutations::{Mutations, StringChunk};
pub use self::tokens::*;

Expand Down
78 changes: 77 additions & 1 deletion src/rewritable_units/mutations.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::base::Bytes;
use encoding_rs::Encoding;
use std::error::Error as StdError;
use std::marker::PhantomData;
use std::panic::{RefUnwindSafe, UnwindSafe};

type BoxResult = Result<(), Box<dyn StdError + Send + Sync>>;

Expand All @@ -16,7 +18,6 @@ pub enum ContentType {
Text,
}

#[inline]
pub(super) fn content_to_bytes(
content: &str,
content_type: ContentType,
Expand Down Expand Up @@ -84,6 +85,7 @@ impl From<(&str, ContentType)> for StringChunk {

pub(crate) enum StringChunk {
Buffer(Box<str>, ContentType),
Stream(Box<dyn StreamingHandler>),
}

#[derive(Default)]
Expand Down Expand Up @@ -122,8 +124,82 @@ impl DynamicString {
StringChunk::Buffer(content, content_type) => {
content_to_bytes(&content, content_type, encoding, output_handler);
}
StringChunk::Stream(handler) => {
let output_handler = &mut *output_handler;
handler.write_all(&mut StreamingHandlerSink {
encoding,
output_handler,
_marker: PhantomData,
})?;
}
};
}
Ok(())
}
}

/// Used to write chunks of text or markup in streaming mutation handlers.
///
/// Argument to [`StreamingHandler::write_all`]
pub struct StreamingHandlerSink<'tmp> {
encoding: &'static Encoding,
output_handler: &'tmp mut dyn FnMut(&[u8]),
/// Proactively mark as !Send !Sync in case it needs it later
_marker: PhantomData<*const ()>,
}

impl<'tmp> StreamingHandlerSink<'tmp> {
/// Writes the given UTF-8 string to the output, converting the encoding and [escaping](ContentType) if necessary.
///
/// It may be called multiple times. The strings will be concatenated together.
#[inline]
pub fn write_str_chunk(&mut self, content: &str, content_type: ContentType) {
content_to_bytes(content, content_type, self.encoding, self.output_handler);
}
}

/// A callback used to write content asynchronously.
pub trait StreamingHandler: Send {
/// This method is called only once, and is expected to write content
/// by calling the [`sink.write_str_chunk()`](StreamingHandlerSink::write_str_chunk) one or more times.
///
/// Multiple calls to `sink.write_str_chunk()` append more content to the output.
///
/// See [`StreamingHandlerSink`].
fn write_all(self: Box<Self>, sink: &mut StreamingHandlerSink<'_>) -> BoxResult;

// Safety: due to lack of Sync, this trait must not have `&self` methods
}

/// Avoid requring `StreamingHandler` to be `Sync`.
/// It only has a method taking exclusive ownership, so there's no sharing possible.
unsafe impl Sync for StringChunk {}
impl RefUnwindSafe for StringChunk {}
impl UnwindSafe for StringChunk {}

impl<F> From<F> for Box<dyn StreamingHandler>
where
F: FnOnce(&mut StreamingHandlerSink<'_>) -> BoxResult + Send + 'static,
{
#[inline]
fn from(f: F) -> Self {
Box::new(f)
}
}

impl<F> StreamingHandler for F
where
F: FnOnce(&mut StreamingHandlerSink<'_>) -> BoxResult + Send + 'static,
{
#[inline]
fn write_all(self: Box<F>, sink: &mut StreamingHandlerSink<'_>) -> BoxResult {
(self)(sink)
}
}

impl From<Box<dyn StreamingHandler>> for StringChunk {
#[inline]
fn from(writer: Box<dyn StreamingHandler>) -> Self {
Self::Stream(writer)
}
}
37 changes: 35 additions & 2 deletions src/rewritable_units/tokens/comment.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{Mutations, Token};
use crate::base::Bytes;
use crate::errors::RewritingError;
use crate::html_content::StreamingHandler;
use encoding_rs::Encoding;
use std::any::Any;
use std::fmt::{self, Debug};
Expand Down Expand Up @@ -111,6 +112,14 @@ impl<'i> Comment<'i> {
.push_back((content, content_type).into());
}

/// Inserts content from a [`StreamingHandler`] before the comment.
///
/// Consequent calls to the method append to the previously inserted content.
#[inline]
pub fn streaming_before(&mut self, handler: Box<dyn StreamingHandler>) {
self.mutations.content_before.push_back(handler.into());
}

/// Inserts `content` after the comment.
///
/// Consequent calls to the method prepend `content` to the previously inserted content.
Expand Down Expand Up @@ -145,6 +154,14 @@ impl<'i> Comment<'i> {
.push_front((content, content_type).into());
}

/// Inserts content from a [`StreamingHandler`] after the comment.
///
/// Consequent calls to the method prepend to the previously inserted content.
#[inline]
pub fn streaming_after(&mut self, handler: Box<dyn StreamingHandler>) {
self.mutations.content_after.push_front(handler.into());
}

/// Replaces the comment with the `content`.
///
/// Consequent calls to the method overwrite previous replacement content.
Expand Down Expand Up @@ -177,6 +194,14 @@ impl<'i> Comment<'i> {
self.mutations.replace((content, content_type).into());
}

/// Replaces the comment with the content from a [`StreamingHandler`].
///
/// Consequent calls to the method overwrite previous replacement content.
#[inline]
pub fn streaming_replace(&mut self, handler: Box<dyn StreamingHandler>) {
self.mutations.replace(handler.into());
}

/// Removes the comment.
#[inline]
pub fn remove(&mut self) {
Expand Down Expand Up @@ -338,7 +363,11 @@ mod tests {
assert!(c.removed());

c.before("<before>", ContentType::Html);
c.after("<after>", ContentType::Html);
c.streaming_after(Box::new(|s: &mut StreamingHandlerSink| {
s.write_str_chunk("<af", ContentType::Html);
s.write_str_chunk("ter>", ContentType::Html);
Ok(())
}));
},
"<before><after>"
);
Expand All @@ -355,7 +384,11 @@ mod tests {

c.replace("<div></div>", ContentType::Html);
c.replace("<!--42-->", ContentType::Html);
c.replace("<foo & bar>", ContentType::Text);
c.streaming_replace(streaming!(|h| {
h.write_str_chunk("<foo &", ContentType::Text);
h.write_str_chunk(" bar>", ContentType::Text);
Ok(())
}));

assert!(c.removed());
},
Expand Down
Loading

0 comments on commit d5c08f9

Please sign in to comment.