diff --git a/Cargo.lock b/Cargo.lock index 26fcbe00..2d2fb8a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,7 +204,6 @@ dependencies = [ "itoa", "itoap", "once_cell", - "page_size", "pyo3-build-config", "pyo3-ffi", "ryu", @@ -215,16 +214,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "proc-macro2" version = "1.0.78" @@ -371,28 +360,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index d9626d02..42b8ba47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,6 @@ encoding_rs = { version = "0.8", default_features = false } itoa = { version = "1", default_features = false } itoap = { version = "1", features = ["std", "simd"] } once_cell = { version = "1", default_features = false, features = ["race"] } -page_size = { version = "0.6" } pyo3-ffi = { version = "^0.20.2", default_features = false, features = ["extension-module"]} ryu = { version = "1", default_features = false } serde = { version = "1", default_features = false } diff --git a/src/serialize/writer/json.rs b/src/serialize/writer/json.rs index 147a4875..c603f351 100644 --- a/src/serialize/writer/json.rs +++ b/src/serialize/writer/json.rs @@ -572,7 +572,7 @@ where W: ?Sized + io::Write + WriteExt, { unsafe { - let num_reserved_bytes = value.len() * 8 + 32 + 3; + let num_reserved_bytes = value.len() * 8 + 32; writer.reserve(num_reserved_bytes); let written = crate::serialize::writer::simd::format_escaped_str_impl_128( diff --git a/src/serialize/writer/simd.rs b/src/serialize/writer/simd.rs index 82a12187..dc6c500e 100644 --- a/src/serialize/writer/simd.rs +++ b/src/serialize/writer/simd.rs @@ -2,19 +2,25 @@ // Copyright 2023-2024 liuq19, ijl // adapted from sonic-rs' src/util/string.rs -use crate::typeref::PAGE_SIZE; use core::simd::cmp::{SimdPartialEq, SimdPartialOrd}; macro_rules! impl_escape_unchecked { - ($src:expr, $dst:expr, $nb:expr, $omask:expr, $cn:expr) => { + ($src:expr, $dst:expr, $nb:expr, $omask:expr, $cn:expr, $v:expr, $rotate:expr) => { $nb -= $cn; + if $rotate == true { + for _ in 0..$cn { + $v = $v.rotate_elements_left::<1>(); + } + } $dst = $dst.add($cn); $src = $src.add($cn); $omask >>= $cn; loop { $nb -= 1; + if $rotate == true { + $v = $v.rotate_elements_left::<1>(); + } $omask = $omask >> 1; - if *($src) == b'"' { core::ptr::copy_nonoverlapping(b"\\\"".as_ptr(), $dst, 2); $dst = $dst.add(2); @@ -48,46 +54,48 @@ macro_rules! impl_format_simd { *dptr = b'"'; dptr = dptr.add(1); - while nb >= STRIDE { - let v = StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE)); - v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); - let mut mask = - (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32; - - if likely!(mask == 0) { - nb -= STRIDE; - dptr = dptr.add(STRIDE); - sptr = sptr.add(STRIDE); - } else { - let cn = mask.trailing_zeros() as usize; - impl_escape_unchecked!(sptr, dptr, nb, mask, cn); + { + const ROTATE: bool = false; + while nb >= STRIDE { + let mut v = StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE)); + let mut mask = + (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32; + v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); + + if likely!(mask == 0) { + nb -= STRIDE; + dptr = dptr.add(STRIDE); + sptr = sptr.add(STRIDE); + } else { + let cn = mask.trailing_zeros() as usize; + impl_escape_unchecked!(sptr, dptr, nb, mask, cn, v, ROTATE); + } } } - let mut v = if unlikely!(is_cross_page!(sptr)) { + { + const ROTATE: bool = true; let mut v = StrVector::default(); - v.as_mut_array()[..nb].copy_from_slice(core::slice::from_raw_parts(sptr, nb)); - v - } else { - StrVector::from_slice(core::slice::from_raw_parts(sptr, STRIDE)) - }; - while nb > 0 { - v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); + { + let vec_ptr = v.as_mut_array().as_mut_ptr(); + for idx in 0..nb { + core::ptr::write(vec_ptr.add(idx), *sptr.add(idx)); + } + } + let mut mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32 & (STRIDE_SATURATION >> (32 - STRIDE - nb)); - if likely!(mask == 0) { - dptr = dptr.add(nb); - break; - } else { - let cn = mask.trailing_zeros() as usize; - let nb_start = nb; - impl_escape_unchecked!(sptr, dptr, nb, mask, cn); - let mut consumed = nb_start - nb; - while consumed != 0 { - v = v.rotate_elements_left::<1>(); - consumed -= 1; + while nb > 0 { + v.copy_to_slice(core::slice::from_raw_parts_mut(dptr, STRIDE)); + + if likely!(mask == 0) { + dptr = dptr.add(nb); + break; + } else { + let cn = mask.trailing_zeros() as usize; + impl_escape_unchecked!(sptr, dptr, nb, mask, cn, v, ROTATE); } } } @@ -100,12 +108,6 @@ macro_rules! impl_format_simd { }; } -macro_rules! is_cross_page { - ($src:expr) => { - unsafe { (($src as usize & (PAGE_SIZE - 1)) + STRIDE) > PAGE_SIZE } - }; -} - #[cold] #[inline(never)] fn write_unusual_escape(sptr: *const u8, dptr: *mut u8) -> *mut u8 { diff --git a/src/typeref.rs b/src/typeref.rs index 3689bbb7..eb54306c 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -75,9 +75,6 @@ pub static mut DESCR_STR: *mut PyObject = null_mut(); pub static mut VALUE_STR: *mut PyObject = null_mut(); pub static mut INT_ATTR_STR: *mut PyObject = null_mut(); -#[cfg(feature = "unstable-simd")] -pub static mut PAGE_SIZE: usize = 0; - #[cfg(feature = "yyjson")] pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8; @@ -139,10 +136,6 @@ fn _init_typerefs_impl() -> bool { unsafe { debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32); - #[cfg(feature = "unstable-simd")] - { - PAGE_SIZE = page_size::get(); - } assert!(crate::deserialize::KEY_MAP .set(crate::deserialize::KeyMap::default()) .is_ok());