Skip to content

Commit

Permalink
format_escaped_str() fast and slow paths depending on page boundary
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Feb 14, 2024
1 parent b32a6da commit 7bc104b
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 48 deletions.
33 changes: 33 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ encoding_rs = { version = "0.8", default_features = false }
itoa = { version = "1", default_features = false }
itoap = { version = "1", features = ["std", "simd"] }
once_cell = { version = "1", default_features = false, features = ["race"] }
page_size = { version = "0.6" }
pyo3-ffi = { version = "^0.20.2", default_features = false, features = ["extension-module"]}
ryu = { version = "1", default_features = false }
serde = { version = "1", default_features = false }
Expand Down
123 changes: 75 additions & 48 deletions src/serialize/writer/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,37 @@
// Copyright 2023-2024 liuq19, ijl
// adapted from sonic-rs' src/util/string.rs

use crate::typeref::PAGE_SIZE;
use core::simd::cmp::{SimdPartialEq, SimdPartialOrd};

macro_rules! impl_escape_unchecked {
($src:expr, $dst:expr, $nb:expr, $omask:expr, $cn:expr) => {
$nb -= $cn;
$dst = $dst.add($cn);
$src = $src.add($cn);
let mut mask = $omask << $cn;
$omask >>= $cn;
loop {
$nb -= 1;
mask = mask << 1;
let replacement = if *($src) == b'"' {
(*b"\\\"\0\0\0\0\0\0", 2)
$omask = $omask >> 1;

if *($src) == b'"' {
core::ptr::copy_nonoverlapping(b"\\\"".as_ptr(), $dst, 2);
$dst = $dst.add(2);
} else if *($src) == b'\\' {
(*b"\\\\\0\0\0\0\0\0", 2)
core::ptr::copy_nonoverlapping(b"\\\\".as_ptr(), $dst, 2);
$dst = $dst.add(2);
} else {
match *($src) {
0 => (*b"\\u0000\0\0", 6),
1 => (*b"\\u0001\0\0", 6),
2 => (*b"\\u0002\0\0", 6),
3 => (*b"\\u0003\0\0", 6),
4 => (*b"\\u0004\0\0", 6),
5 => (*b"\\u0005\0\0", 6),
6 => (*b"\\u0006\0\0", 6),
7 => (*b"\\u0007\0\0", 6),
8 => (*b"\\b\0\0\0\0\0\0", 2),
9 => (*b"\\t\0\0\0\0\0\0", 2),
10 => (*b"\\n\0\0\0\0\0\0", 2),
11 => (*b"\\u000b\0\0", 6),
12 => (*b"\\f\0\0\0\0\0\0", 2),
13 => (*b"\\r\0\0\0\0\0\0", 2),
14 => (*b"\\u000e\0\0", 6),
15 => (*b"\\u000f\0\0", 6),
16 => (*b"\\u0010\0\0", 6),
17 => (*b"\\u0011\0\0", 6),
18 => (*b"\\u0012\0\0", 6),
19 => (*b"\\u0013\0\0", 6),
20 => (*b"\\u0014\0\0", 6),
21 => (*b"\\u0015\0\0", 6),
22 => (*b"\\u0016\0\0", 6),
23 => (*b"\\u0017\0\0", 6),
24 => (*b"\\u0018\0\0", 6),
25 => (*b"\\u0019\0\0", 6),
26 => (*b"\\u001a\0\0", 6),
27 => (*b"\\u001b\0\0", 6),
28 => (*b"\\u001c\0\0", 6),
29 => (*b"\\u001d\0\0", 6),
30 => (*b"\\u001e\0\0", 6),
31 => (*b"\\u001f\0\0", 6),
_ => unreachable!(),
}
$dst = write_unusual_escape($src, $dst);
};
core::ptr::copy_nonoverlapping(replacement.0.as_ptr(), $dst, 8);
$dst = $dst.add(replacement.1 as usize);

$src = $src.add(1);
if likely!(mask & (1 << (STRIDE - 1)) != 1) {
if likely!($omask & 1 != 1) {
break;
}
}
};
}

macro_rules! impl_format_simd {
($odptr:expr, $value_ptr:expr, $value_len:expr) => {
let mut dptr = $odptr;
Expand All @@ -81,7 +51,7 @@ macro_rules! impl_format_simd {
while nb >= STRIDE {
let v = StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE));
v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE));
let mask =
let mut mask =
(v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32;

if likely!(mask == 0) {
Expand All @@ -95,12 +65,18 @@ macro_rules! impl_format_simd {
}

while nb > 0 {
let mut v = StrVector::default();
v.as_mut_array()[..nb].copy_from_slice(core::slice::from_raw_parts(sptr, nb));
let v = if unlikely!(is_cross_page!(sptr)) {
let mut v = StrVector::default();
v.as_mut_array()[..nb].copy_from_slice(core::slice::from_raw_parts(sptr, nb));
v
} else {
StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE))
};
v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE));
let mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask()
let mut mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask()
as u32
& (STRIDE_SATURATION >> (STRIDE - nb));
& (STRIDE_SATURATION >> (32 - STRIDE - nb));

if likely!(mask == 0) {
dptr = dptr.add(nb);
break;
Expand All @@ -118,6 +94,57 @@ macro_rules! impl_format_simd {
};
}

macro_rules! is_cross_page {
($src:expr) => {
unsafe { (($src as usize & (PAGE_SIZE - 1)) + STRIDE) > PAGE_SIZE }
};
}

#[cold]
#[inline(never)]
fn write_unusual_escape(sptr: *const u8, dptr: *mut u8) -> *mut u8 {
unsafe {
debug_assert!(*sptr < 32);
let replacement = match *(sptr) {
0 => (*b"\\u0000\0\0", 6),
1 => (*b"\\u0001\0\0", 6),
2 => (*b"\\u0002\0\0", 6),
3 => (*b"\\u0003\0\0", 6),
4 => (*b"\\u0004\0\0", 6),
5 => (*b"\\u0005\0\0", 6),
6 => (*b"\\u0006\0\0", 6),
7 => (*b"\\u0007\0\0", 6),
8 => (*b"\\b\0\0\0\0\0\0", 2),
9 => (*b"\\t\0\0\0\0\0\0", 2),
10 => (*b"\\n\0\0\0\0\0\0", 2),
11 => (*b"\\u000b\0\0", 6),
12 => (*b"\\f\0\0\0\0\0\0", 2),
13 => (*b"\\r\0\0\0\0\0\0", 2),
14 => (*b"\\u000e\0\0", 6),
15 => (*b"\\u000f\0\0", 6),
16 => (*b"\\u0010\0\0", 6),
17 => (*b"\\u0011\0\0", 6),
18 => (*b"\\u0012\0\0", 6),
19 => (*b"\\u0013\0\0", 6),
20 => (*b"\\u0014\0\0", 6),
21 => (*b"\\u0015\0\0", 6),
22 => (*b"\\u0016\0\0", 6),
23 => (*b"\\u0017\0\0", 6),
24 => (*b"\\u0018\0\0", 6),
25 => (*b"\\u0019\0\0", 6),
26 => (*b"\\u001a\0\0", 6),
27 => (*b"\\u001b\0\0", 6),
28 => (*b"\\u001c\0\0", 6),
29 => (*b"\\u001d\0\0", 6),
30 => (*b"\\u001e\0\0", 6),
31 => (*b"\\u001f\0\0", 6),
_ => unreachable!(),
};
core::ptr::copy_nonoverlapping(replacement.0.as_ptr(), dptr, 8);
dptr.add(replacement.1 as usize)
}
}

#[inline(never)]
pub unsafe fn format_escaped_str_impl_128(
odptr: *mut u8,
Expand Down
3 changes: 3 additions & 0 deletions src/typeref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ pub static mut DESCR_STR: *mut PyObject = null_mut();
pub static mut VALUE_STR: *mut PyObject = null_mut();
pub static mut INT_ATTR_STR: *mut PyObject = null_mut();

pub static mut PAGE_SIZE: usize = 0;

#[cfg(feature = "yyjson")]
pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8;

Expand Down Expand Up @@ -134,6 +136,7 @@ pub fn init_typerefs() {
#[cfg_attr(feature = "optimize", optimize(size))]
fn _init_typerefs_impl() -> bool {
unsafe {
PAGE_SIZE = page_size::get();
debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32);
assert!(crate::deserialize::KEY_MAP
.set(crate::deserialize::KeyMap::default())
Expand Down

0 comments on commit 7bc104b

Please sign in to comment.