Skip to content

Commit

Permalink
Speed up escaped byte buf parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Apr 9, 2024
1 parent e8ca8c5 commit 1bdb8de
Showing 1 changed file with 85 additions and 25 deletions.
110 changes: 85 additions & 25 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,6 @@ impl<'a> Parser<'a> {
.find(|c| !condition(c))
.unwrap_or(self.src().len() - from)
}

#[must_use]
pub fn find_char_index(&self, condition: fn(char) -> bool) -> Option<(usize, char)> {
self.src().char_indices().find(|&(_, c)| condition(c))
}
}

/// actual parsing of ron tokens
Expand Down Expand Up @@ -619,7 +614,8 @@ impl<'a> Parser<'a> {
Err(_) => parser.set_cursor(cursor_backup),
}
let cursor_backup = parser.cursor;
match parser.byte_string() {
// we have already checked for strings, which subsume base64 byte strings
match parser.byte_string_no_base64() {
Ok(_) => (),
// prevent quadratic complexity backtracking for unterminated byte string
Err(err @ (Error::ExpectedStringEnd | Error::Eof)) => return Err(err),
Expand Down Expand Up @@ -1058,7 +1054,13 @@ impl<'a> Parser<'a> {
Err(_) => Err(Error::ExpectedByteString),
}
}
} else if self.consume_str("b\"") {
} else {
self.byte_string_no_base64()
}
}

pub fn byte_string_no_base64(&mut self) -> Result<ParsedByteStr<'a>> {
if self.consume_str("b\"") {
self.escaped_byte_string()
} else if self.consume_str("br") {
self.raw_byte_string()
Expand Down Expand Up @@ -1121,17 +1123,13 @@ impl<'a> Parser<'a> {
}

fn escaped_byte_buf(&mut self, encoding: EscapeEncoding) -> Result<(ParsedByteStr<'a>, usize)> {
let (i, end_or_escape) = self
.find_char_index(|c| matches!(c, '\\' | '"'))
.ok_or(Error::ExpectedStringEnd)?;

if end_or_escape == '"' {
let s = &self.src().as_bytes()[..i];
// Checking for '"' and '\\' separately is faster than searching for both at the same time
let str_end = self.src().find('"').ok_or(Error::ExpectedStringEnd)?;
let escape = self.src()[..str_end].find('\\');

// Advance by the number of bytes of the string + 1 for the `"`.
Ok((ParsedByteStr::Slice(s), i + 1))
} else {
let mut i = i;
if let Some(escape) = escape {
// Now check if escaping is used inside the string
let mut i = escape;
let mut s = self.src().as_bytes()[..i].to_vec();

loop {
Expand All @@ -1149,19 +1147,81 @@ impl<'a> Parser<'a> {
},
}

let (new_i, end_or_escape) = self
.find_char_index(|c| matches!(c, '\\' | '"'))
.ok_or(Error::ExpectedStringEnd)?;
// Checking for '"' and '\\' separately is faster than searching for both at the same time
let new_str_end = self.src().find('"').ok_or(Error::ExpectedStringEnd)?;
let new_escape = self.src()[..new_str_end].find('\\');

i = new_i;
s.extend_from_slice(&self.src().as_bytes()[..i]);

if end_or_escape == '"' {
if let Some(new_escape) = new_escape {
s.extend_from_slice(&self.src().as_bytes()[..new_escape]);
i = new_escape;
} else {
s.extend_from_slice(&self.src().as_bytes()[..new_str_end]);
// Advance to the end of the string + 1 for the `"`.
break Ok((ParsedByteStr::Allocated(s), i + 1));
break Ok((ParsedByteStr::Allocated(s), new_str_end + 1));
}

// let new_i = self.src().find(['\\', '"']).ok_or(Error::ExpectedStringEnd)?;
// let end_or_escape = self.src()[new_i..].chars().next().unwrap();

// let (new_i, end_or_escape) = self
// .find_char_index(|c| matches!(c, '\\' | '"'))
// .ok_or(Error::ExpectedStringEnd)?;

// i = new_i;
// s.extend_from_slice(&self.src().as_bytes()[..i]);

// // if end_or_escape == '"' {
// if self.src()[new_i..].starts_with('"') {
// // Advance to the end of the string + 1 for the `"`.
// break Ok((ParsedByteStr::Allocated(s), i + 1));
// }

// i = if let Some(new_i) = self.src().find('"') {
// s.extend_from_slice(&self.src().as_bytes()[..new_i]);
// // Advance to the end of the string + 1 for the `"`.
// break Ok((ParsedByteStr::Allocated(s), new_i + 1));
// } else if let Some(new_i) = self.src().find('\\') {
// s.extend_from_slice(&self.src().as_bytes()[..new_i]);
// new_i
// } else {
// return Err(Error::ExpectedStringEnd);
// };

// let new_i = self.src().find(['\\', '"']).ok_or(Error::ExpectedStringEnd)?;
// let end_or_escape = self.src()[new_i..].chars().next().unwrap();

// let (new_i, end_or_escape) = self
// .find_char_index(|c| matches!(c, '\\' | '"'))
// .ok_or(Error::ExpectedStringEnd)?;

// i = new_i;
// s.extend_from_slice(&self.src().as_bytes()[..i]);

// if end_or_escape == '"' {
// if self.src()[new_i..].starts_with('"') {
// // Advance to the end of the string + 1 for the `"`.
// break Ok((ParsedByteStr::Allocated(s), i + 1));
// }
}
} else {
let s = &self.src().as_bytes()[..str_end];

// Advance by the number of bytes of the string + 1 for the `"`.
Ok((ParsedByteStr::Slice(s), str_end + 1))
}

// if let Some(i) = self.src().find('"') {
// // First check the happy case of an unescaped string
// // (also finding based on a single char twice is faster)
// let s = &self.src().as_bytes()[..i];

// // Advance by the number of bytes of the string + 1 for the `"`.
// Ok((ParsedByteStr::Slice(s), i + 1))
// } else if let Some(i) = self.src().find('\\') {

// } else {
// Err(Error::ExpectedStringEnd)
// }
}

fn raw_byte_buf(&mut self) -> Result<(ParsedByteStr<'a>, usize)> {
Expand Down

0 comments on commit 1bdb8de

Please sign in to comment.