Skip to content

Commit

Permalink
Add test for RawValue whitespace eating + add strip method to normali…
Browse files Browse the repository at this point in the history
…se them
  • Loading branch information
juntyr committed Oct 5, 2023
1 parent 9557e02 commit d6c5289
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 14 deletions.
13 changes: 9 additions & 4 deletions fuzz/fuzz_targets/bench/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1386,10 +1386,15 @@ impl<'a, 'de> DeserializeSeed<'de> for BorrowedTypedSerdeData<'a> {
// even though it's disguised as a newtype
if self.name == RAW_VALUE_TOKEN {
if let SerdeDataValue::String(ron) = &self.value {
// pretty serialising can add whitespace and comments
// before and after the raw value
if v.contains(ron) {
return Ok(());
if let (Ok(v_ron), Ok(ron)) = (
ron::value::RawValue::from_ron(v),
ron::value::RawValue::from_ron(ron),
) {
// pretty serialising can add whitespace and comments
// before and after the raw value
if v_ron.trim().get_ron() == ron.trim().get_ron() {
return Ok(());
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const SERDE_TAG_KEY_CANARY: &str = "serde::__private::de::content::TagOrContent"
/// If you just want to simply deserialize a value,
/// you can use the [`from_str`] convenience function.
pub struct Deserializer<'de> {
parser: Parser<'de>,
pub(crate) parser: Parser<'de>,
newtype_variant: bool,
serde_content_newtype: bool,
last_identifier: Option<&'de str>,
Expand Down
20 changes: 12 additions & 8 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ pub struct ParserCursor {
last_ws_len: usize,
}

const WS_CURSOR_UNCLOSED_LINE: usize = usize::MAX;

impl PartialEq for ParserCursor {
fn eq(&self, other: &Self) -> bool {
self.cursor == other.cursor
Expand Down Expand Up @@ -110,7 +112,7 @@ impl<'a> Parser<'a> {
Ok(parser)
}

pub fn set_cursor(&mut self, cursor: ParserCursor) {
fn set_cursor(&mut self, cursor: ParserCursor) {
self.cursor = cursor;
}

Expand Down Expand Up @@ -959,22 +961,24 @@ impl<'a> Parser<'a> {
}

pub fn skip_ws(&mut self) -> Result<()> {
if self.src().is_empty() {
return Ok(());
}

if (self.cursor.pre_ws_cursor + self.cursor.last_ws_len) < self.cursor.cursor {
if (self.cursor.last_ws_len == WS_CURSOR_UNCLOSED_LINE)
|| ((self.cursor.pre_ws_cursor + self.cursor.last_ws_len) < self.cursor.cursor)
{
// the last whitespace is disjoint from this one, we need to track a new one
self.cursor.pre_ws_cursor = self.cursor.cursor;
}

if self.src().is_empty() {
return Ok(());
}

loop {
self.advance_bytes(self.next_chars_while_len(is_whitespace_char));

match self.skip_comment()? {
None => break,
Some(Comment::UnclosedLine) => {
self.cursor.last_ws_len = usize::MAX;
self.cursor.last_ws_len = WS_CURSOR_UNCLOSED_LINE;
return Ok(());
}
Some(Comment::ClosedLine | Comment::Block) => continue,
Expand All @@ -987,7 +991,7 @@ impl<'a> Parser<'a> {
}

pub fn has_unclosed_line_comment(&self) -> bool {
self.src().is_empty() && self.cursor.last_ws_len == usize::MAX
self.src().is_empty() && self.cursor.last_ws_len == WS_CURSOR_UNCLOSED_LINE
}

pub fn byte_string(&mut self) -> Result<ParsedByteStr<'a>> {
Expand Down
46 changes: 45 additions & 1 deletion src/value/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// https://github.com/serde-rs/json/blob/master/src/raw.rs
// Licensed under either of Apache License, Version 2.0 or MIT license at your option.

use std::fmt;
use std::{fmt, ops::Range};

use serde::{de, ser, Deserialize, Serialize};

Expand Down Expand Up @@ -37,6 +37,25 @@ impl RawValue {
// Safety: RawValue is a transparent newtype around str
unsafe { std::mem::transmute::<Box<RawValue>, Box<str>>(raw_value) }
}

#[allow(clippy::expect_used)]
fn trim_range(ron: &str) -> Range<usize> {
fn trim_range_inner(ron: &str) -> Result<Range<usize>, Error> {
let mut deserializer = crate::Deserializer::from_str(ron).map_err(Error::from)?;

deserializer.parser.skip_ws()?;
let start_offset = ron.len() - deserializer.parser.src().len();

let _ = serde::de::IgnoredAny::deserialize(&mut deserializer)?;

deserializer.parser.skip_ws()?;
let end_offset = deserializer.parser.pre_ws_src().len();

Ok(start_offset..(ron.len() - end_offset))
}

trim_range_inner(ron).expect("RawValue must contain valid ron")
}
}

impl Clone for Box<RawValue> {
Expand Down Expand Up @@ -115,6 +134,31 @@ impl RawValue {
}
}

impl RawValue {
#[must_use]
/// Trims any leadning and trailing whitespace off the raw RON string,
/// including whitespace characters and comments.
pub fn trim(&self) -> &Self {
Self::from_borrowed_str(&self.ron[RawValue::trim_range(&self.ron)])
}

#[must_use]
#[allow(unsafe_code)]
/// Trims any leadning and trailing whitespace off the boxed raw RON string,
/// including whitespace characters and comments.
pub fn trim_boxed(self: Box<Self>) -> Box<Self> {
let trim_range = RawValue::trim_range(&self.ron);
let mut boxed_ron = RawValue::into_boxed_str(self).into_string();
// SAFETY: ron[trim_range] is a valid str, so draining everything
// before and after leaves a valid str
unsafe {
boxed_ron.as_mut_vec().drain(trim_range.end..);
boxed_ron.as_mut_vec().drain(0..trim_range.start);
}
RawValue::from_boxed_str(boxed_ron.into_boxed_str())
}
}

impl From<Box<RawValue>> for Box<str> {
fn from(raw_value: Box<RawValue>) -> Self {
RawValue::into_boxed_str(raw_value)
Expand Down
48 changes: 48 additions & 0 deletions tests/407_raw_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,54 @@ fn test_boxed_raw_value_deserialise_from_string() {
);
}

#[test]
fn test_whitespace_rountrip_issues_and_trim() {
let mut v = WithRawValue {
a: true,
b: RawValue::from_ron("42 ").unwrap().to_owned(),
};
v = ron::from_str(&ron::to_string(&v).unwrap()).unwrap();
assert_eq!(v.b.get_ron(), "42 ");
assert_eq!(v.b.trim().get_ron(), "42");
assert_eq!(v.b.to_owned().trim_boxed().get_ron(), "42");

for i in 1..=10 {
v = ron::from_str(
&ron::ser::to_string_pretty(&v, ron::ser::PrettyConfig::default()).unwrap(),
)
.unwrap();
assert_eq!(v.b.get_ron(), &format!("{: <1$}42 ", "", i));
assert_eq!(v.b.trim().get_ron(), "42");
assert_eq!(v.b.to_owned().trim_boxed().get_ron(), "42");
}

assert_eq!(RawValue::from_ron("42").unwrap().trim().get_ron(), "42");
assert_eq!(
RawValue::from_ron(" /* start */ 42 // end\n ")
.unwrap()
.trim()
.get_ron(),
"42"
);

assert_eq!(
RawValue::from_ron("42")
.unwrap()
.to_owned()
.trim_boxed()
.get_ron(),
"42"
);
assert_eq!(
RawValue::from_ron(" /* start */ 42 // end\n ")
.unwrap()
.to_owned()
.trim_boxed()
.get_ron(),
"42"
);
}

#[test]
fn test_fuzzer_found_issue() {
assert_eq!(
Expand Down

0 comments on commit d6c5289

Please sign in to comment.