Skip to content

Commit

Permalink
Escape str using SIMD
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Dec 22, 2023
1 parent 2b78849 commit cfe313a
Show file tree
Hide file tree
Showing 12 changed files with 474 additions and 18 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ jobs:
]
python: [
{ version: '3.12', abi: 'cp312-cp312' },
{ version: '3.11', abi: 'cp311-cp311' },
{ version: '3.8', abi: 'cp38-cp38' },
]
features: [
"",
"--features=yyjson",
]
env:
CC: "gcc"
CFLAGS: "-O2 -fno-plt"
Expand All @@ -36,9 +39,8 @@ jobs:
PATH="$HOME/.cargo/bin:$PATH" maturin build --release \
--out=dist \
--profile=dev \
--features=yyjson \
--interpreter python${{ matrix.python.version }} \
--target=x86_64-unknown-linux-gnu
--target=x86_64-unknown-linux-gnu ${{ matrix.features }}
- run: python -m pip install --user dist/orjson*.whl
- run: python -m pip install --user -r test/requirements.txt -r integration/requirements.txt
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
- run: |
maturin build --release --strip \
--out=dist \
--features=no-panic,yyjson \
--features=no-panic,simd-write,yyjson \
--compatibility manylinux_2_17 \
--interpreter python${{ matrix.python.version }} \
--target=x86_64-unknown-linux-gnu
Expand Down Expand Up @@ -147,7 +147,7 @@ jobs:
rustup-components: rust-src
target: ${{ matrix.platform.target }}
manylinux: musllinux_1_1
args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }}
args: --release --strip --out=dist --features=no-panic,simd-write,yyjson -i python${{ matrix.python.version }}

- name: Set up QEMU
if: matrix.platform.arch != 'x86_64'
Expand Down Expand Up @@ -194,27 +194,31 @@ jobs:
target: [
{
arch: 'aarch64',
target: 'aarch64-unknown-linux-gnu',
cflags: '-O2 -flto',
features: 'no-panic,simd-write,yyjson', # NEON
rustflags: '-Z mir-opt-level=4 -D warnings',
target: 'aarch64-unknown-linux-gnu',
},
{
arch: 'armv7',
target: 'armv7-unknown-linux-gnueabihf',
cflags: '-Os -flto -fstrict-aliasing',
features: 'no-panic,yyjson', # no SIMD
rustflags: '-C opt-level=s -Z mir-opt-level=4 -D warnings',
target: 'armv7-unknown-linux-gnueabihf',
},
{
arch: 'ppc64le',
target: 'powerpc64le-unknown-linux-gnu',
cflags: '-O2 -flto',
features: 'no-panic,yyjson', # unknown SIMD baseline
rustflags: '-Z mir-opt-level=4 -D warnings',
target: 'powerpc64le-unknown-linux-gnu',
},
{
arch: 's390x',
target: 's390x-unknown-linux-gnu',
cflags: '-O2 -flto -march=z10',
features: 'no-panic,yyjson', # unknown SIMD baseline
rustflags: '-Z mir-opt-level=4 -C target-cpu=z10 -D warnings',
target: 's390x-unknown-linux-gnu',
},
]
steps:
Expand All @@ -237,7 +241,7 @@ jobs:
rust-toolchain: nightly-2023-12-10
rustup-components: rust-src
manylinux: auto
args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }}
args: --release --strip --out=dist --features=${{ matrix.target.features }} -i python${{ matrix.python.version }}

- uses: uraimo/run-on-arch-action@v2
name: Test
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ no-panic = [
"ryu/no-panic",
]

simd-write = [
"packed_simd",
"libc",
]

# Build yyjson as a backend and panic if it fails. The default is to attempt
# to build and on failure fall back to another backend.
yyjson = []
Expand All @@ -58,7 +63,9 @@ compact_str = { version = "0.7", default_features = false, features = ["serde"]
encoding_rs = { version = "0.8", default_features = false }
itoa = { version = "1", default_features = false }
itoap = { version = "1", features = ["std", "simd"] }
libc = { version = "0.2", default_features = false, optional = true }
once_cell = { version = "1", default_features = false, features = ["race"] }
packed_simd = { version = "0.3", default_features = false, optional = true }
pyo3-ffi = { version = "^0.20", default_features = false, features = ["extension-module"]}
ryu = { version = "1", default_features = false }
serde = { version = "1", default_features = false }
Expand Down
2 changes: 1 addition & 1 deletion ci/azure-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ steps:
PATH=$HOME/.cargo/bin:$PATH \
MACOSX_DEPLOYMENT_TARGET=$(macosx_deployment_target) \
PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \
maturin build --release --strip --features=no-panic,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin
maturin build --release --strip --features=no-panic,simd-write,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin
env:
CC: "clang"
CFLAGS: "-O2 -fno-plt -flto=thin -fstrict-aliasing"
Expand Down
28 changes: 26 additions & 2 deletions src/serialize/json.rs → src/serialize/backend/json.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
// This is an adaptation of `src/value/ser.rs` from serde-json.

use crate::serialize::writer::WriteExt;
use crate::serialize::backend::WriteExt;
use serde::ser::{self, Impossible, Serialize};
use serde_json::error::{Error, Result};
use std::io;
Expand Down Expand Up @@ -622,6 +622,7 @@ where
}
}

#[cfg(not(feature = "simd-write"))]
pub enum CharEscape {
/// An escaped quote `"`
Quote,
Expand All @@ -642,6 +643,7 @@ pub enum CharEscape {
AsciiControl(u8),
}

#[cfg(not(feature = "simd-write"))]
impl CharEscape {
#[inline]
fn from_escape_table(escape: u8, byte: u8) -> CharEscape {
Expand Down Expand Up @@ -863,6 +865,7 @@ pub trait Formatter {
}

#[inline]
#[cfg(not(feature = "simd-write"))]
fn write_char_escape<W>(&mut self, writer: &mut W, char_escape: CharEscape) -> io::Result<()>
where
W: ?Sized + io::Write + WriteExt,
Expand Down Expand Up @@ -1137,6 +1140,16 @@ impl Formatter for PrettyFormatter {
}
}

#[cfg(feature = "simd-write")]
fn format_escaped_str<W, F>(writer: &mut W, _formatter: &mut F, value: &str) -> io::Result<()>
where
W: ?Sized + io::Write + WriteExt,
F: ?Sized + Formatter,
{
crate::serialize::backend::sonic::format_escaped_str(writer, value)
}

#[cfg(not(feature = "simd-write"))]
fn format_escaped_str<W, F>(writer: &mut W, formatter: &mut F, value: &str) -> io::Result<()>
where
W: ?Sized + io::Write + WriteExt,
Expand All @@ -1148,7 +1161,6 @@ where
reserve_minimum!(writer);
return unsafe { writer.write_reserved_fragment(b"\"\"") };
}

unsafe {
let mut escapes: u8 = __;
let mut idx = 0;
Expand Down Expand Up @@ -1179,6 +1191,7 @@ where
writer.write_str(value)
}

#[cfg(not(feature = "simd-write"))]
fn format_escaped_str_with_escapes<W, F>(
writer: &mut W,
formatter: &mut F,
Expand All @@ -1203,6 +1216,7 @@ where
Ok(())
}

#[cfg(not(feature = "simd-write"))]
fn format_escaped_str_contents<W, F>(
writer: &mut W,
formatter: &mut F,
Expand Down Expand Up @@ -1271,18 +1285,28 @@ where
Ok(())
}

#[cfg(not(feature = "simd-write"))]
const BB: u8 = b'b'; // \x08
#[cfg(not(feature = "simd-write"))]
const TT: u8 = b't'; // \x09
#[cfg(not(feature = "simd-write"))]
const NN: u8 = b'n'; // \x0A
#[cfg(not(feature = "simd-write"))]
const FF: u8 = b'f'; // \x0C
#[cfg(not(feature = "simd-write"))]
const RR: u8 = b'r'; // \x0D
#[cfg(not(feature = "simd-write"))]
const QU: u8 = b'"'; // \x22
#[cfg(not(feature = "simd-write"))]
const BS: u8 = b'\\'; // \x5C
#[cfg(not(feature = "simd-write"))]
const UU: u8 = b'u'; // \x00...\x1F except the ones above
#[cfg(not(feature = "simd-write"))]
const __: u8 = 0;

// Lookup table of escape sequences. A value of b'x' at index i means that byte
// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
#[cfg(not(feature = "simd-write"))]
const ESCAPE: [u8; 256] = [
// 1 2 3 4 5 6 7 8 9 A B C D E F
UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
Expand Down
9 changes: 9 additions & 0 deletions src/serialize/backend/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// SPDX-License-Identifier: Apache-2.0

mod json;
#[cfg(feature = "simd-write")]
mod sonic;
mod writer;

pub use json::{to_writer, to_writer_pretty};
pub use writer::{BytesWriter, WriteExt};
Loading

0 comments on commit cfe313a

Please sign in to comment.