diff --git a/fuzz/fuzz_targets/bench/lib.rs b/fuzz/fuzz_targets/bench/lib.rs index ec4d8bacf..7841da508 100644 --- a/fuzz/fuzz_targets/bench/lib.rs +++ b/fuzz/fuzz_targets/bench/lib.rs @@ -1386,10 +1386,15 @@ impl<'a, 'de> DeserializeSeed<'de> for BorrowedTypedSerdeData<'a> { // even though it's disguised as a newtype if self.name == RAW_VALUE_TOKEN { if let SerdeDataValue::String(ron) = &self.value { - // pretty serialising can add whitespace and comments - // before and after the raw value - if v.contains(ron) { - return Ok(()); + if let (Ok(v_ron), Ok(ron)) = ( + ron::value::RawValue::from_ron(v), + ron::value::RawValue::from_ron(ron), + ) { + // pretty serialising can add whitespace and comments + // before and after the raw value + if v_ron.trim().get_ron() == ron.trim().get_ron() { + return Ok(()); + } } } } diff --git a/src/de/mod.rs b/src/de/mod.rs index 67342c57b..9707a7a1e 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -31,7 +31,7 @@ const SERDE_TAG_KEY_CANARY: &str = "serde::__private::de::content::TagOrContent" /// If you just want to simply deserialize a value, /// you can use the [`from_str`] convenience function. pub struct Deserializer<'de> { - parser: Parser<'de>, + pub(crate) parser: Parser<'de>, newtype_variant: bool, serde_content_newtype: bool, last_identifier: Option<&'de str>, diff --git a/src/parse.rs b/src/parse.rs index 22eba3624..cf3523210 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -68,6 +68,8 @@ pub struct ParserCursor { last_ws_len: usize, } +const WS_CURSOR_UNCLOSED_LINE: usize = usize::MAX; + impl PartialEq for ParserCursor { fn eq(&self, other: &Self) -> bool { self.cursor == other.cursor @@ -110,7 +112,7 @@ impl<'a> Parser<'a> { Ok(parser) } - pub fn set_cursor(&mut self, cursor: ParserCursor) { + fn set_cursor(&mut self, cursor: ParserCursor) { self.cursor = cursor; } @@ -959,22 +961,24 @@ impl<'a> Parser<'a> { } pub fn skip_ws(&mut self) -> Result<()> { - if self.src().is_empty() { - return Ok(()); - } - - if (self.cursor.pre_ws_cursor + self.cursor.last_ws_len) < self.cursor.cursor { + if (self.cursor.last_ws_len == WS_CURSOR_UNCLOSED_LINE) + || ((self.cursor.pre_ws_cursor + self.cursor.last_ws_len) < self.cursor.cursor) + { // the last whitespace is disjoint from this one, we need to track a new one self.cursor.pre_ws_cursor = self.cursor.cursor; } + if self.src().is_empty() { + return Ok(()); + } + loop { self.advance_bytes(self.next_chars_while_len(is_whitespace_char)); match self.skip_comment()? { None => break, Some(Comment::UnclosedLine) => { - self.cursor.last_ws_len = usize::MAX; + self.cursor.last_ws_len = WS_CURSOR_UNCLOSED_LINE; return Ok(()); } Some(Comment::ClosedLine | Comment::Block) => continue, @@ -987,7 +991,7 @@ impl<'a> Parser<'a> { } pub fn has_unclosed_line_comment(&self) -> bool { - self.src().is_empty() && self.cursor.last_ws_len == usize::MAX + self.src().is_empty() && self.cursor.last_ws_len == WS_CURSOR_UNCLOSED_LINE } pub fn byte_string(&mut self) -> Result> { diff --git a/src/value/raw.rs b/src/value/raw.rs index 20b0b19e4..1dabe6e76 100644 --- a/src/value/raw.rs +++ b/src/value/raw.rs @@ -2,7 +2,7 @@ // https://github.com/serde-rs/json/blob/master/src/raw.rs // Licensed under either of Apache License, Version 2.0 or MIT license at your option. -use std::fmt; +use std::{fmt, ops::Range}; use serde::{de, ser, Deserialize, Serialize}; @@ -37,6 +37,25 @@ impl RawValue { // Safety: RawValue is a transparent newtype around str unsafe { std::mem::transmute::, Box>(raw_value) } } + + #[allow(clippy::expect_used)] + fn trim_range(ron: &str) -> Range { + fn trim_range_inner(ron: &str) -> Result, Error> { + let mut deserializer = crate::Deserializer::from_str(ron).map_err(Error::from)?; + + deserializer.parser.skip_ws()?; + let start_offset = ron.len() - deserializer.parser.src().len(); + + let _ = serde::de::IgnoredAny::deserialize(&mut deserializer)?; + + deserializer.parser.skip_ws()?; + let end_offset = deserializer.parser.pre_ws_src().len(); + + Ok(start_offset..(ron.len() - end_offset)) + } + + trim_range_inner(ron).expect("RawValue must contain valid ron") + } } impl Clone for Box { @@ -115,6 +134,31 @@ impl RawValue { } } +impl RawValue { + #[must_use] + /// Trims any leadning and trailing whitespace off the raw RON string, + /// including whitespace characters and comments. + pub fn trim(&self) -> &Self { + Self::from_borrowed_str(&self.ron[RawValue::trim_range(&self.ron)]) + } + + #[must_use] + #[allow(unsafe_code)] + /// Trims any leadning and trailing whitespace off the boxed raw RON string, + /// including whitespace characters and comments. + pub fn trim_boxed(self: Box) -> Box { + let trim_range = RawValue::trim_range(&self.ron); + let mut boxed_ron = RawValue::into_boxed_str(self).into_string(); + // SAFETY: ron[trim_range] is a valid str, so draining everything + // before and after leaves a valid str + unsafe { + boxed_ron.as_mut_vec().drain(trim_range.end..); + boxed_ron.as_mut_vec().drain(0..trim_range.start); + } + RawValue::from_boxed_str(boxed_ron.into_boxed_str()) + } +} + impl From> for Box { fn from(raw_value: Box) -> Self { RawValue::into_boxed_str(raw_value) diff --git a/tests/407_raw_value.rs b/tests/407_raw_value.rs index a5f5efa90..f6a0cdea4 100644 --- a/tests/407_raw_value.rs +++ b/tests/407_raw_value.rs @@ -219,6 +219,54 @@ fn test_boxed_raw_value_deserialise_from_string() { ); } +#[test] +fn test_whitespace_rountrip_issues_and_trim() { + let mut v = WithRawValue { + a: true, + b: RawValue::from_ron("42 ").unwrap().to_owned(), + }; + v = ron::from_str(&ron::to_string(&v).unwrap()).unwrap(); + assert_eq!(v.b.get_ron(), "42 "); + assert_eq!(v.b.trim().get_ron(), "42"); + assert_eq!(v.b.to_owned().trim_boxed().get_ron(), "42"); + + for i in 1..=10 { + v = ron::from_str( + &ron::ser::to_string_pretty(&v, ron::ser::PrettyConfig::default()).unwrap(), + ) + .unwrap(); + assert_eq!(v.b.get_ron(), &format!("{: <1$}42 ", "", i)); + assert_eq!(v.b.trim().get_ron(), "42"); + assert_eq!(v.b.to_owned().trim_boxed().get_ron(), "42"); + } + + assert_eq!(RawValue::from_ron("42").unwrap().trim().get_ron(), "42"); + assert_eq!( + RawValue::from_ron(" /* start */ 42 // end\n ") + .unwrap() + .trim() + .get_ron(), + "42" + ); + + assert_eq!( + RawValue::from_ron("42") + .unwrap() + .to_owned() + .trim_boxed() + .get_ron(), + "42" + ); + assert_eq!( + RawValue::from_ron(" /* start */ 42 // end\n ") + .unwrap() + .to_owned() + .trim_boxed() + .get_ron(), + "42" + ); +} + #[test] fn test_fuzzer_found_issue() { assert_eq!(