From 76ee9834571e0add7f5a96ca49c84369e966b621 Mon Sep 17 00:00:00 2001 From: ijl Date: Tue, 9 Jan 2024 02:56:56 +0000 Subject: [PATCH] Escape str using SIMD --- .github/workflows/linux.yaml | 18 +++-- ci/azure-macos.yml | 2 +- src/lib.rs | 1 + src/serialize/writer/mod.rs | 3 + src/serialize/writer/simd.rs | 149 +++++++++++++++++++++++++++++++++++ src/util.rs | 16 ++++ 6 files changed, 181 insertions(+), 8 deletions(-) create mode 100644 src/serialize/writer/simd.rs diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 1ed989cb..03919ffb 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -84,7 +84,7 @@ jobs: - run: | maturin build --release --strip \ --out=dist \ - --features=no-panic,yyjson \ + --features=no-panic,unstable-simd,yyjson \ --compatibility manylinux_2_17 \ --interpreter python${{ matrix.python.version }} \ --target=x86_64-unknown-linux-gnu @@ -147,7 +147,7 @@ jobs: rustup-components: rust-src target: ${{ matrix.platform.target }} manylinux: musllinux_1_1 - args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }} + args: --release --strip --out=dist --features=no-panic,unstable-simd,yyjson -i python${{ matrix.python.version }} - name: Set up QEMU if: matrix.platform.arch != 'x86_64' @@ -194,27 +194,31 @@ jobs: target: [ { arch: 'aarch64', - target: 'aarch64-unknown-linux-gnu', cflags: '-O2 -flto', + features: 'no-panic,unstable-simd,yyjson', rustflags: '-Z mir-opt-level=4 -D warnings', + target: 'aarch64-unknown-linux-gnu', }, { arch: 'armv7', - target: 'armv7-unknown-linux-gnueabihf', cflags: '-Os -flto -fstrict-aliasing', + features: 'no-panic,yyjson', # no SIMD rustflags: '-C opt-level=s -Z mir-opt-level=4 -D warnings', + target: 'armv7-unknown-linux-gnueabihf', }, { arch: 'ppc64le', - target: 'powerpc64le-unknown-linux-gnu', cflags: '-O2 -flto', + features: 'no-panic,unstable-simd,yyjson', rustflags: '-Z mir-opt-level=4 -D warnings', + target: 'powerpc64le-unknown-linux-gnu', }, { arch: 's390x', - target: 's390x-unknown-linux-gnu', cflags: '-O2 -flto -march=z10', + features: 'no-panic,unstable-simd,yyjson', rustflags: '-Z mir-opt-level=4 -C target-cpu=z10 -D warnings', + target: 's390x-unknown-linux-gnu', }, ] steps: @@ -237,7 +241,7 @@ jobs: rust-toolchain: nightly-2024-01-08 rustup-components: rust-src manylinux: auto - args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }} + args: --release --strip --out=dist --features=${{ matrix.target.features }} -i python${{ matrix.python.version }} - uses: uraimo/run-on-arch-action@v2 name: Test diff --git a/ci/azure-macos.yml b/ci/azure-macos.yml index e8ea91f0..3e629d88 100644 --- a/ci/azure-macos.yml +++ b/ci/azure-macos.yml @@ -23,7 +23,7 @@ steps: PATH=$HOME/.cargo/bin:$PATH \ MACOSX_DEPLOYMENT_TARGET=$(macosx_deployment_target) \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ - maturin build --release --strip --features=no-panic,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin + maturin build --release --strip --features=no-panic,unstable-simd,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin env: CC: "clang" CFLAGS: "-O2 -fno-plt -flto=thin -fstrict-aliasing" diff --git a/src/lib.rs b/src/lib.rs index 2e0a86c0..bc512174 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ #![cfg_attr(feature = "optimize", feature(optimize_attribute))] #![cfg_attr(feature = "strict_provenance", feature(strict_provenance))] #![cfg_attr(feature = "strict_provenance", warn(fuzzy_provenance_casts))] +#![cfg_attr(feature = "unstable-simd", feature(portable_simd))] #![allow(unknown_lints)] // internal_features #![allow(internal_features)] // core_intrinsics #![allow(unused_unsafe)] diff --git a/src/serialize/writer/mod.rs b/src/serialize/writer/mod.rs index 47fdf503..fc489426 100644 --- a/src/serialize/writer/mod.rs +++ b/src/serialize/writer/mod.rs @@ -1,9 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 mod byteswriter; +#[cfg(not(feature = "unstable-simd"))] mod escape; mod formatter; mod json; +#[cfg(feature = "unstable-simd")] +mod simd; pub use byteswriter::{BytesWriter, WriteExt}; pub use json::{to_writer, to_writer_pretty}; diff --git a/src/serialize/writer/simd.rs b/src/serialize/writer/simd.rs new file mode 100644 index 00000000..9d0a9e13 --- /dev/null +++ b/src/serialize/writer/simd.rs @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2023-2024 liuq19, ijl +// adapted from sonic-rs' src/util/string.rs + +use std::simd::cmp::{SimdPartialEq, SimdPartialOrd}; + +macro_rules! impl_escape_unchecked { + ($src:expr, $dst:expr, $nb:expr, $omask:expr, $cn:expr) => { + $nb -= $cn; + $dst = $dst.add($cn); + $src = $src.add($cn); + let mut mask = $omask << $cn; + loop { + $nb -= 1; + mask = mask << 1; + let replacement = if *($src) == b'"' { + (*b"\\\"\0\0\0\0\0\0", 2) + } else if *($src) == b'\\' { + (*b"\\\\\0\0\0\0\0\0", 2) + } else { + match *($src) { + 0 => (*b"\\u0000\0\0", 6), + 1 => (*b"\\u0001\0\0", 6), + 2 => (*b"\\u0002\0\0", 6), + 3 => (*b"\\u0003\0\0", 6), + 4 => (*b"\\u0004\0\0", 6), + 5 => (*b"\\u0005\0\0", 6), + 6 => (*b"\\u0006\0\0", 6), + 7 => (*b"\\u0007\0\0", 6), + 8 => (*b"\\b\0\0\0\0\0\0", 2), + 9 => (*b"\\t\0\0\0\0\0\0", 2), + 10 => (*b"\\n\0\0\0\0\0\0", 2), + 11 => (*b"\\u000b\0\0", 6), + 12 => (*b"\\f\0\0\0\0\0\0", 2), + 13 => (*b"\\r\0\0\0\0\0\0", 2), + 14 => (*b"\\u000e\0\0", 6), + 15 => (*b"\\u000f\0\0", 6), + 16 => (*b"\\u0010\0\0", 6), + 17 => (*b"\\u0011\0\0", 6), + 18 => (*b"\\u0012\0\0", 6), + 19 => (*b"\\u0013\0\0", 6), + 20 => (*b"\\u0014\0\0", 6), + 21 => (*b"\\u0015\0\0", 6), + 22 => (*b"\\u0016\0\0", 6), + 23 => (*b"\\u0017\0\0", 6), + 24 => (*b"\\u0018\0\0", 6), + 25 => (*b"\\u0019\0\0", 6), + 26 => (*b"\\u001a\0\0", 6), + 27 => (*b"\\u001b\0\0", 6), + 28 => (*b"\\u001c\0\0", 6), + 29 => (*b"\\u001d\0\0", 6), + 30 => (*b"\\u001e\0\0", 6), + 31 => (*b"\\u001f\0\0", 6), + _ => unreachable!(), + } + }; + std::ptr::copy_nonoverlapping(replacement.0.as_ptr(), $dst, 8); + $dst = $dst.add(replacement.1 as usize); + $src = $src.add(1); + if likely!(mask & (1 << (STRIDE - 1)) != 1) { + break; + } + } + }; +} +macro_rules! impl_format_simd { + ($odptr:expr, $value_ptr:expr, $value_len:expr) => { + let mut dptr = $odptr; + let dstart = $odptr; + let mut sptr = $value_ptr; + let mut nb: usize = $value_len; + + let blash = StrVector::from_array([b'\\'; STRIDE]); + let quote = StrVector::from_array([b'"'; STRIDE]); + let x20 = StrVector::from_array([32; STRIDE]); + + unsafe { + *dptr = b'"'; + dptr = dptr.add(1); + + while nb >= STRIDE { + let v = StrVector::from_slice(std::slice::from_raw_parts(sptr, STRIDE)); + v.copy_to_slice(std::slice::from_raw_parts_mut(dptr, STRIDE)); + let mask = + (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32; + + if likely!(mask == 0) { + nb -= STRIDE; + dptr = dptr.add(STRIDE); + sptr = sptr.add(STRIDE); + } else { + let cn = mask.trailing_zeros() as usize; + impl_escape_unchecked!(sptr, dptr, nb, mask, cn); + } + } + + while nb > 0 { + let v = StrVector::from_slice(std::slice::from_raw_parts(sptr, STRIDE)); + v.copy_to_slice(std::slice::from_raw_parts_mut(dptr, STRIDE)); + let mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() + as u32 + & (STRIDE_SATURATION >> (STRIDE - nb)); + if likely!(mask == 0) { + dptr = dptr.add(nb); + break; + } else { + let cn = mask.trailing_zeros() as usize; + impl_escape_unchecked!(sptr, dptr, nb, mask, cn); + } + } + + *dptr = b'"'; + dptr = dptr.add(1); + } + + return dptr as usize - dstart as usize; + }; +} + +#[inline(never)] +#[cfg(not(target_feature = "avx2"))] +#[cfg_attr(target_arch = "x86_64", cold)] +pub unsafe fn format_escaped_str_impl_128( + odptr: *mut u8, + value_ptr: *const u8, + value_len: usize, +) -> usize { + const STRIDE: usize = 16; + const STRIDE_SATURATION: u32 = u16::MAX as u32; + type StrVector = std::simd::u8x16; + + impl_format_simd!(odptr, value_ptr, value_len); +} + +#[cfg(target_arch = "x86_64")] +#[inline(never)] +#[cfg_attr(not(target_feature = "avx2"), target_feature(enable = "avx2"))] +#[cfg_attr(not(target_feature = "avx2"), target_feature(enable = "bmi2"))] +pub unsafe fn format_escaped_str_impl_256( + odptr: *mut u8, + value_ptr: *const u8, + value_len: usize, +) -> usize { + const STRIDE: usize = 32; + const STRIDE_SATURATION: u32 = u32::MAX; + type StrVector = std::simd::u8x32; + + impl_format_simd!(odptr, value_ptr, value_len); +} diff --git a/src/util.rs b/src/util.rs index 95ed6b33..a51dc633 100644 --- a/src/util.rs +++ b/src/util.rs @@ -70,6 +70,22 @@ macro_rules! unlikely { }; } +#[allow(unused_macros)] +#[cfg(feature = "intrinsics")] +macro_rules! likely { + ($exp:expr) => { + std::intrinsics::likely($exp) + }; +} + +#[allow(unused_macros)] +#[cfg(not(feature = "intrinsics"))] +macro_rules! likely { + ($exp:expr) => { + $exp + }; +} + macro_rules! nonnull { ($exp:expr) => { unsafe { std::ptr::NonNull::new_unchecked($exp) }