From cb0808dc78ddcfc28e2f6fe6877d3bed50d9a6f8 Mon Sep 17 00:00:00 2001 From: ijl Date: Tue, 30 Apr 2024 23:39:21 +0000 Subject: [PATCH] cargo update, profile, clippy, bloat misc --- .github/workflows/artifact.yaml | 10 +- .github/workflows/debug.yaml | 2 +- Cargo.lock | 40 ++-- Cargo.toml | 34 +-- README.md | 2 +- build.rs | 2 +- ci/azure-pipelines.yml | 2 +- script/pynumpy | 2 +- script/valgrind | 5 + src/deserialize/cache.rs | 1 + src/deserialize/mod.rs | 3 +- src/deserialize/pyobject.rs | 48 ++-- src/serialize/per_type/dataclass.rs | 12 +- src/serialize/per_type/datetime.rs | 7 +- src/serialize/per_type/datetimelike.rs | 4 +- src/serialize/per_type/dict.rs | 294 +++++++++++++------------ src/serialize/per_type/int.rs | 1 + src/serialize/per_type/list.rs | 1 + src/serialize/per_type/numpy.rs | 28 ++- src/serialize/per_type/uuid.rs | 4 +- src/serialize/writer/byteswriter.rs | 1 + src/serialize/writer/formatter.rs | 59 ++--- src/serialize/writer/json.rs | 54 ++--- src/util.rs | 6 + 24 files changed, 310 insertions(+), 312 deletions(-) create mode 100755 script/valgrind diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index 8ce41474..acb33042 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -1,7 +1,7 @@ name: artifact on: push env: - RUST_TOOLCHAIN: "nightly-2024-04-15" + RUST_TOOLCHAIN: "nightly-2024-04-30" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" UNSAFE_PYO3_SKIP_VERSION_CHECK: "1" jobs: @@ -156,7 +156,7 @@ jobs: LDFLAGS: "-Wl,--as-needed" RUSTFLAGS: "-Z mir-opt-level=4 -Z threads=4 -D warnings -C target-feature=-crt-static" with: - rust-toolchain: nightly-2024-04-15 + rust-toolchain: nightly-2024-04-30 rustup-components: rust-src target: ${{ matrix.platform.target }} manylinux: musllinux_1_2 @@ -245,7 +245,7 @@ jobs: RUSTFLAGS: "${{ matrix.target.rustflags }}" with: target: ${{ matrix.target.target }} - rust-toolchain: nightly-2024-04-15 + rust-toolchain: nightly-2024-04-30 rustup-components: rust-src manylinux: auto args: --release --strip --out=dist --features=${{ matrix.target.features }} -i python${{ matrix.python.version }} @@ -308,7 +308,7 @@ jobs: - uses: dtolnay/rust-toolchain@master with: - toolchain: "nightly-2024-04-15" + toolchain: "nightly-2024-04-30" targets: "aarch64-apple-darwin, x86_64-apple-darwin" components: "rust-src" @@ -378,7 +378,7 @@ jobs: - uses: dtolnay/rust-toolchain@master with: - toolchain: "nightly-2024-04-15" + toolchain: "nightly-2024-04-30" targets: "aarch64-apple-darwin, x86_64-apple-darwin" components: "rust-src" diff --git a/.github/workflows/debug.yaml b/.github/workflows/debug.yaml index d42079ca..adf2ca7d 100644 --- a/.github/workflows/debug.yaml +++ b/.github/workflows/debug.yaml @@ -10,7 +10,7 @@ jobs: profile: [ { rust: "1.72", features: "" }, { rust: "1.72", features: "--features=yyjson" }, - { rust: "nightly-2024-04-15", features: "--features=yyjson,unstable-simd" }, + { rust: "nightly-2024-04-30", features: "--features=yyjson,unstable-simd"}, ] python: [ { version: '3.13' }, diff --git a/Cargo.lock b/Cargo.lock index fd3f57f0..06c7baf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,9 +47,9 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "castaway" @@ -62,9 +62,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.94" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7" +checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" [[package]] name = "cfg-if" @@ -168,9 +168,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] name = "no-panic" @@ -227,18 +227,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.80" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3-build-config" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "650dca34d463b6cdbdb02b1d71bfd6eb6b6816afc708faebb3bac1380ff4aef7" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" dependencies = [ "once_cell", "target-lexicon", @@ -246,9 +246,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a7da8fc04a8a2084909b59f29e1b8474decac98b951d77b80b26dc45f046ad" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" dependencies = [ "libc", "pyo3-build-config", @@ -280,18 +280,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" dependencies = [ "proc-macro2", "quote", @@ -300,9 +300,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "itoa", "ryu", @@ -329,9 +329,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.59" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c8cf056e..a0382fd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,24 +45,24 @@ no-panic = [ yyjson = [] [dependencies] -ahash = { version = "^0.8.9", default_features = false, features = ["compile-time-rng"] } -arrayvec = { version = "0.7", default_features = false, features = ["std", "serde"] } -associative-cache = { version = "2", default_features = false } -beef = { version = "0.5", default_features = false, features = ["impl_serde"] } -bytecount = { version = "^0.6.7", default_features = false, features = ["runtime-dispatch-simd"] } -chrono = { version = "=0.4.34", default_features = false } -compact_str = { version = "0.7", default_features = false, features = ["serde"] } -encoding_rs = { version = "0.8", default_features = false } -half = { version = "2", default_features = false, features = ["std"] } -itoa = { version = "1", default_features = false } +ahash = { version = "^0.8.9", default-features = false, features = ["compile-time-rng"] } +arrayvec = { version = "0.7", default-features = false, features = ["std", "serde"] } +associative-cache = { version = "2", default-features = false } +beef = { version = "0.5", default-features = false, features = ["impl_serde"] } +bytecount = { version = "^0.6.7", default-features = false, features = ["runtime-dispatch-simd"] } +chrono = { version = "=0.4.34", default-features = false } +compact_str = { version = "0.7", default-features = false, features = ["serde"] } +encoding_rs = { version = "0.8", default-features = false } +half = { version = "2", default-features = false, features = ["std"] } +itoa = { version = "1", default-features = false } itoap = { version = "1", features = ["std", "simd"] } -once_cell = { version = "1", default_features = false, features = ["race"] } -pyo3-ffi = { version = "^0.21", default_features = false, features = ["extension-module"]} -ryu = { version = "1", default_features = false } -serde = { version = "1", default_features = false } -serde_json = { version = "1", default_features = false, features = ["std", "float_roundtrip"] } -simdutf8 = { version = "0.1", default_features = false, features = ["std", "aarch64_neon"] } -smallvec = { version = "^1.11", default_features = false, features = ["union", "write"] } +once_cell = { version = "1", default-features = false, features = ["race"] } +pyo3-ffi = { version = "^0.21", default-features = false, features = ["extension-module"]} +ryu = { version = "1", default-features = false } +serde = { version = "1", default-features = false } +serde_json = { version = "1", default-features = false, features = ["std", "float_roundtrip"] } +simdutf8 = { version = "0.1", default-features = false, features = ["std", "aarch64_neon"] } +smallvec = { version = "^1.11", default-features = false, features = ["union", "write"] } [build-dependencies] cc = { version = "1" } diff --git a/README.md b/README.md index 3863b295..7ceaa6fe 100644 --- a/README.md +++ b/README.md @@ -1203,7 +1203,7 @@ It benefits from also having a C build environment to compile a faster deserialization backend. See this project's `manylinux_2_28` builds for an example using clang and LTO. -The project's own CI tests against `nightly-2024-04-15` and stable 1.72. It +The project's own CI tests against `nightly-2024-04-30` and stable 1.72. It is prudent to pin the nightly version because that channel can introduce breaking changes. diff --git a/build.rs b/build.rs index ee42945e..0d8477d5 100644 --- a/build.rs +++ b/build.rs @@ -29,7 +29,7 @@ fn main() { } #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - if !env::var("ORJSON_DISABLE_SIMD").is_ok() { + if env::var("ORJSON_DISABLE_SIMD").is_err() { if let Some(true) = version_check::supports_feature("portable_simd") { println!("cargo:rustc-cfg=feature=\"unstable-simd\""); } diff --git a/ci/azure-pipelines.yml b/ci/azure-pipelines.yml index 3ba78fe9..cdc04f8d 100644 --- a/ci/azure-pipelines.yml +++ b/ci/azure-pipelines.yml @@ -1,5 +1,5 @@ variables: - toolchain: nightly-2024-04-15 + toolchain: nightly-2024-04-30 jobs: diff --git a/script/pynumpy b/script/pynumpy index bfaeaa31..2ad6d982 100755 --- a/script/pynumpy +++ b/script/pynumpy @@ -35,7 +35,7 @@ elif kind == "float64": array = numpy.random.random(size=(50000, 100)) assert array.dtype == numpy.float64 elif kind == "bool": - dtype = numpy.bool + dtype = numpy.bool_ array = numpy.random.choice((True, False), size=(100000, 200)) elif kind == "int8": dtype = numpy.int8 diff --git a/script/valgrind b/script/valgrind new file mode 100755 index 00000000..cbf2a304 --- /dev/null +++ b/script/valgrind @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -eou pipefail + +valgrind pytest -v --ignore=test/test_memory.py test diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs index 610377d3..0e3111dd 100644 --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -38,6 +38,7 @@ pub type KeyMap = pub static mut KEY_MAP: OnceCell = OnceCell::new(); +#[inline(always)] pub fn cache_hash(key: &[u8]) -> u64 { // try to omit code for >64 path in ahash assume!(key.len() <= 64); diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 21c682e4..9ec01cfa 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -12,7 +12,6 @@ mod json; #[cfg(feature = "yyjson")] mod yyjson; -pub use cache::KeyMap; -pub use cache::KEY_MAP; +pub use cache::{KeyMap, KEY_MAP}; pub use deserializer::deserialize; pub use error::DeserializeError; diff --git a/src/deserialize/pyobject.rs b/src/deserialize/pyobject.rs index af4f1985..9ad9b090 100644 --- a/src/deserialize/pyobject.rs +++ b/src/deserialize/pyobject.rs @@ -5,34 +5,30 @@ use crate::str::{hash_str, unicode_from_str}; use crate::typeref::{FALSE, NONE, TRUE}; use core::ptr::NonNull; +#[inline(always)] pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { - let pykey = if unlikely!(key_str.len() > 64) { - create_str_impl(key_str) + if unlikely!(key_str.len() > 64) { + let pyob = unicode_from_str(key_str); + hash_str(pyob); + pyob } else { - get_unicode_key_impl(key_str) - }; - debug_assert!(ffi!(Py_REFCNT(pykey)) >= 1); - debug_assert!(unsafe { (*pykey.cast::()).hash != -1 }); - pykey -} - -fn get_unicode_key_impl(key_str: &str) -> *mut pyo3_ffi::PyObject { - let hash = cache_hash(key_str.as_bytes()); - let map = unsafe { KEY_MAP.get_mut().unwrap_or_else(|| unreachable!()) }; - let entry = map.entry(&hash).or_insert_with( - || hash, - || { - let pyob = create_str_impl(key_str); - CachedKey::new(pyob) - }, - ); - entry.get() -} - -fn create_str_impl(key_str: &str) -> *mut pyo3_ffi::PyObject { - let pyob = unicode_from_str(key_str); - hash_str(pyob); - pyob + let hash = cache_hash(key_str.as_bytes()); + unsafe { + let entry = KEY_MAP + .get_mut() + .unwrap_or_else(|| unreachable!()) + .entry(&hash) + .or_insert_with( + || hash, + || { + let pyob = unicode_from_str(key_str); + hash_str(pyob); + CachedKey::new(pyob) + }, + ); + entry.get() + } + } } #[allow(dead_code)] diff --git a/src/serialize/per_type/dataclass.rs b/src/serialize/per_type/dataclass.rs index f3c434b9..96592e91 100644 --- a/src/serialize/per_type/dataclass.rs +++ b/src/serialize/per_type/dataclass.rs @@ -83,6 +83,7 @@ impl DataclassFastSerializer { } impl Serialize for DataclassFastSerializer { + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -92,12 +93,13 @@ impl Serialize for DataclassFastSerializer { return ZeroDictSerializer::new().serialize(serializer); } let mut map = serializer.serialize_map(None).unwrap(); - let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); - let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); let mut pos = 0; + let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); + let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); + for _ in 0..ffi!(Py_SIZE(self.ptr)) as usize { let key = next_key; let value = next_value; @@ -144,6 +146,8 @@ impl DataclassFallbackSerializer { } impl Serialize for DataclassFallbackSerializer { + #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -157,12 +161,12 @@ impl Serialize for DataclassFallbackSerializer { } let mut map = serializer.serialize_map(None).unwrap(); + let mut pos = 0; let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); - let mut pos = 0; - pydict_next!(fields, &mut pos, &mut next_key, &mut next_value); + for _ in 0..ffi!(Py_SIZE(fields)) as usize { let attr = next_key; let field = next_value; diff --git a/src/serialize/per_type/datetime.rs b/src/serialize/per_type/datetime.rs index 859c92d5..ae5fb35b 100644 --- a/src/serialize/per_type/datetime.rs +++ b/src/serialize/per_type/datetime.rs @@ -42,6 +42,7 @@ impl Date { Date { ptr: ptr } } + #[inline(never)] pub fn write_buf(&self, buf: &mut DateTimeBuffer) { { let year = ffi!(PyDateTime_GET_YEAR(self.ptr)); @@ -66,8 +67,6 @@ impl Date { } } impl Serialize for Date { - #[cold] - #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -95,6 +94,7 @@ impl Time { } } + #[inline(never)] pub fn write_buf(&self, buf: &mut DateTimeBuffer) -> Result<(), TimeError> { if unsafe { (*(self.ptr as *mut pyo3_ffi::PyDateTime_Time)).hastzinfo == 1 } { return Err(TimeError::HasTimezone); @@ -116,8 +116,6 @@ impl Time { } impl Serialize for Time { - #[cold] - #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -237,7 +235,6 @@ impl DateTimeLike for DateTime { } impl Serialize for DateTime { - #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, diff --git a/src/serialize/per_type/datetimelike.rs b/src/serialize/per_type/datetimelike.rs index 97a3d270..eca98898 100644 --- a/src/serialize/per_type/datetimelike.rs +++ b/src/serialize/per_type/datetimelike.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + use crate::opt::*; pub enum DateTimeError { @@ -92,7 +94,7 @@ pub trait DateTimeLike { /// Write `self` to a buffer in RFC3339 format, using `opts` to /// customise if desired. - #[cfg_attr(feature = "optimize", optimize(size))] + #[inline(never)] fn write_buf(&self, buf: &mut DateTimeBuffer, opts: Opt) -> Result<(), DateTimeError> { { let year = self.year(); diff --git a/src/serialize/per_type/dict.rs b/src/serialize/per_type/dict.rs index 56a54d9b..6d185ac5 100644 --- a/src/serialize/per_type/dict.rs +++ b/src/serialize/per_type/dict.rs @@ -29,6 +29,7 @@ impl ZeroDictSerializer { } impl Serialize for ZeroDictSerializer { + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -40,6 +41,7 @@ impl Serialize for ZeroDictSerializer { pub struct DictGenericSerializer { ptr: *mut pyo3_ffi::PyObject, state: SerializerState, + #[allow(dead_code)] default: Option>, } @@ -58,7 +60,7 @@ impl DictGenericSerializer { } impl Serialize for DictGenericSerializer { - #[inline] + #[inline(always)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -66,14 +68,23 @@ impl Serialize for DictGenericSerializer { if unlikely!(self.state.recursion_limit()) { err!(SerializeError::RecursionLimit) } - if ffi!(Py_SIZE(self.ptr)) == 0 { + + if unlikely!(ffi!(Py_SIZE(self.ptr)) == 0) { ZeroDictSerializer::new().serialize(serializer) - } else if opt_disabled!(self.state.opts(), SORT_OR_NON_STR_KEYS) { - Dict::new(self.ptr, self.state, self.default).serialize(serializer) + } else if likely!(opt_disabled!(self.state.opts(), SORT_OR_NON_STR_KEYS)) { + unsafe { + core::mem::transmute::<&DictGenericSerializer, &Dict>(self).serialize(serializer) + } } else if opt_enabled!(self.state.opts(), NON_STR_KEYS) { - DictNonStrKey::new(self.ptr, self.state, self.default).serialize(serializer) + unsafe { + core::mem::transmute::<&DictGenericSerializer, &DictNonStrKey>(self) + .serialize(serializer) + } } else { - DictSortedKey::new(self.ptr, self.state, self.default).serialize(serializer) + unsafe { + core::mem::transmute::<&DictGenericSerializer, &DictSortedKey>(self) + .serialize(serializer) + } } } } @@ -83,36 +94,25 @@ pub struct Dict { state: SerializerState, default: Option>, } -impl Dict { - pub fn new( - ptr: *mut pyo3_ffi::PyObject, - state: SerializerState, - default: Option>, - ) -> Self { - Dict { - ptr: ptr, - state: state, - default: default, - } - } -} + impl Serialize for Dict { #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, { - debug_assert!(ffi!(Py_SIZE(self.ptr)) > 0); - - let mut map = serializer.serialize_map(None).unwrap(); - + let mut pos = 0; let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); - let mut pos = 0; - pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - for _ in 0..ffi!(Py_SIZE(self.ptr)) as usize { + + let mut map = serializer.serialize_map(None).unwrap(); + + let len = ffi!(Py_SIZE(self.ptr)) as usize; + assume!(len > 0); + + for _ in 0..len { let key = next_key; let value = next_value; @@ -185,37 +185,24 @@ pub struct DictSortedKey { default: Option>, } -impl DictSortedKey { - pub fn new( - ptr: *mut pyo3_ffi::PyObject, - state: SerializerState, - default: Option>, - ) -> Self { - DictSortedKey { - ptr: ptr, - state: state, - default: default, - } - } -} - impl Serialize for DictSortedKey { #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, { - let len = ffi!(Py_SIZE(self.ptr)) as usize; - debug_assert!(len > 0); - let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> = - SmallVec::with_capacity(len); - + let mut pos = 0; let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); - let mut pos = 0; - pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); + + let len = ffi!(Py_SIZE(self.ptr)) as usize; + assume!(len > 0); + + let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> = + SmallVec::with_capacity(len); + for _ in 0..len as usize { let key = next_key; let value = next_value; @@ -244,6 +231,104 @@ impl Serialize for DictSortedKey { } } +#[inline(never)] +fn non_str_str(key: *mut pyo3_ffi::PyObject) -> Result { + // because of ObType::Enum + let uni = unicode_to_str(key); + if unlikely!(uni.is_none()) { + Err(SerializeError::InvalidStr) + } else { + Ok(CompactString::from(uni.unwrap())) + } +} + +#[cold] +#[inline(never)] +fn non_str_str_subclass(key: *mut pyo3_ffi::PyObject) -> Result { + let uni = unicode_to_str_via_ffi(key); + if unlikely!(uni.is_none()) { + Err(SerializeError::InvalidStr) + } else { + Ok(CompactString::from(uni.unwrap())) + } +} + +#[inline(never)] +fn non_str_date(key: *mut pyo3_ffi::PyObject) -> Result { + let mut buf = DateTimeBuffer::new(); + Date::new(key).write_buf(&mut buf); + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(CompactString::from(key_as_str)) +} + +#[inline(never)] +fn non_str_datetime( + key: *mut pyo3_ffi::PyObject, + opts: crate::opt::Opt, +) -> Result { + let mut buf = DateTimeBuffer::new(); + let dt = DateTime::new(key, opts); + if dt.write_buf(&mut buf, opts).is_err() { + return Err(SerializeError::DatetimeLibraryUnsupported); + } + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(CompactString::from(key_as_str)) +} + +#[cold] +#[inline(never)] +fn non_str_time( + key: *mut pyo3_ffi::PyObject, + opts: crate::opt::Opt, +) -> Result { + let mut buf = DateTimeBuffer::new(); + let time = Time::new(key, opts); + if time.write_buf(&mut buf).is_err() { + return Err(SerializeError::TimeHasTzinfo); + } + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(CompactString::from(key_as_str)) +} + +#[inline(never)] +fn non_str_uuid(key: *mut pyo3_ffi::PyObject) -> Result { + let mut buf = arrayvec::ArrayVec::::new(); + UUID::new(key).write_buf(&mut buf); + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(CompactString::from(key_as_str)) +} + +#[cold] +#[inline(never)] +fn non_str_float(key: *mut pyo3_ffi::PyObject) -> Result { + let val = ffi!(PyFloat_AS_DOUBLE(key)); + if !val.is_finite() { + Ok(CompactString::new_inline("null")) + } else { + Ok(CompactString::from(ryu::Buffer::new().format_finite(val))) + } +} + +#[inline(never)] +fn non_str_int(key: *mut pyo3_ffi::PyObject) -> Result { + let ival = ffi!(PyLong_AsLongLong(key)); + if unlikely!(ival == -1 && !ffi!(PyErr_Occurred()).is_null()) { + ffi!(PyErr_Clear()); + let uval = ffi!(PyLong_AsUnsignedLongLong(key)); + if unlikely!(uval == u64::MAX && !ffi!(PyErr_Occurred()).is_null()) { + return Err(SerializeError::DictIntegerKey64Bit); + } + Ok(CompactString::from(itoa::Buffer::new().format(uval))) + } else { + Ok(CompactString::from(itoa::Buffer::new().format(ival))) + } +} + +#[inline(never)] +fn sort_non_str_dict_items(items: &mut SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]>) { + items.sort_unstable_by(|a, b| a.0.cmp(&b.0)); +} + pub struct DictNonStrKey { ptr: *mut pyo3_ffi::PyObject, state: SerializerState, @@ -251,19 +336,6 @@ pub struct DictNonStrKey { } impl DictNonStrKey { - pub fn new( - ptr: *mut pyo3_ffi::PyObject, - state: SerializerState, - default: Option>, - ) -> Self { - DictNonStrKey { - ptr: ptr, - state: state, - default: default, - } - } - - #[inline(never)] fn pyobject_to_string( key: *mut pyo3_ffi::PyObject, opts: crate::opt::Opt, @@ -271,64 +343,18 @@ impl DictNonStrKey { match pyobject_to_obtype(key, opts) { ObType::None => Ok(CompactString::new_inline("null")), ObType::Bool => { - let key_as_str = if unsafe { key == TRUE } { - "true" - } else { - "false" - }; - Ok(CompactString::from(key_as_str)) - } - ObType::Int => { - let ival = ffi!(PyLong_AsLongLong(key)); - if unlikely!(ival == -1 && !ffi!(PyErr_Occurred()).is_null()) { - ffi!(PyErr_Clear()); - let uval = ffi!(PyLong_AsUnsignedLongLong(key)); - if unlikely!(uval == u64::MAX && !ffi!(PyErr_Occurred()).is_null()) { - return Err(SerializeError::DictIntegerKey64Bit); - } - Ok(CompactString::from(itoa::Buffer::new().format(uval))) - } else { - Ok(CompactString::from(itoa::Buffer::new().format(ival))) - } - } - ObType::Float => { - let val = ffi!(PyFloat_AS_DOUBLE(key)); - if !val.is_finite() { - Ok(CompactString::new_inline("null")) + if unsafe { key == TRUE } { + Ok(CompactString::new_inline("true")) } else { - Ok(CompactString::from(ryu::Buffer::new().format_finite(val))) + Ok(CompactString::new_inline("false")) } } - ObType::Datetime => { - let mut buf = DateTimeBuffer::new(); - let dt = DateTime::new(key, opts); - if dt.write_buf(&mut buf, opts).is_err() { - return Err(SerializeError::DatetimeLibraryUnsupported); - } - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(CompactString::from(key_as_str)) - } - ObType::Date => { - let mut buf = DateTimeBuffer::new(); - Date::new(key).write_buf(&mut buf); - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(CompactString::from(key_as_str)) - } - ObType::Time => { - let mut buf = DateTimeBuffer::new(); - let time = Time::new(key, opts); - if time.write_buf(&mut buf).is_err() { - return Err(SerializeError::TimeHasTzinfo); - } - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(CompactString::from(key_as_str)) - } - ObType::Uuid => { - let mut buf = arrayvec::ArrayVec::::new(); - UUID::new(key).write_buf(&mut buf); - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(CompactString::from(key_as_str)) - } + ObType::Int => non_str_int(key), + ObType::Float => non_str_float(key), + ObType::Datetime => non_str_datetime(key, opts), + ObType::Date => non_str_date(key), + ObType::Time => non_str_time(key, opts), + ObType::Uuid => non_str_uuid(key), ObType::Enum => { let value = ffi!(PyObject_GetAttr(key, VALUE_STR)); debug_assert!(ffi!(Py_REFCNT(value)) >= 2); @@ -336,23 +362,8 @@ impl DictNonStrKey { ffi!(Py_DECREF(value)); ret } - ObType::Str => { - // because of ObType::Enum - let uni = unicode_to_str(key); - if unlikely!(uni.is_none()) { - Err(SerializeError::InvalidStr) - } else { - Ok(CompactString::from(uni.unwrap())) - } - } - ObType::StrSubclass => { - let uni = unicode_to_str_via_ffi(key); - if unlikely!(uni.is_none()) { - Err(SerializeError::InvalidStr) - } else { - Ok(CompactString::from(uni.unwrap())) - } - } + ObType::Str => non_str_str(key), + ObType::StrSubclass => non_str_str_subclass(key), ObType::Tuple | ObType::NumpyScalar | ObType::NumpyArray @@ -371,19 +382,20 @@ impl Serialize for DictNonStrKey { where S: Serializer, { - let len = ffi!(Py_SIZE(self.ptr)) as usize; - debug_assert!(len > 0); - let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> = - SmallVec::with_capacity(len); + let mut pos = 0; + let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); + let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); + + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); let opts = self.state.opts() & NOT_PASSTHROUGH; - let mut next_key: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); - let mut next_value: *mut pyo3_ffi::PyObject = core::ptr::null_mut(); + let len = ffi!(Py_SIZE(self.ptr)) as usize; + assume!(len > 0); - let mut pos = 0; + let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> = + SmallVec::with_capacity(len); - pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); for _ in 0..len { let key = next_key; let value = next_value; @@ -405,7 +417,7 @@ impl Serialize for DictNonStrKey { } if opt_enabled!(opts, SORT_KEYS) { - items.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + sort_non_str_dict_items(&mut items); } let mut map = serializer.serialize_map(None).unwrap(); diff --git a/src/serialize/per_type/int.rs b/src/serialize/per_type/int.rs index dc5fe36a..09760885 100644 --- a/src/serialize/per_type/int.rs +++ b/src/serialize/per_type/int.rs @@ -21,6 +21,7 @@ impl IntSerializer { } impl Serialize for IntSerializer { + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index ae244b61..e31267ab 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -22,6 +22,7 @@ impl ZeroListSerializer { } impl Serialize for ZeroListSerializer { + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, diff --git a/src/serialize/per_type/numpy.rs b/src/serialize/per_type/numpy.rs index 045f9553..c7ea47fa 100644 --- a/src/serialize/per_type/numpy.rs +++ b/src/serialize/per_type/numpy.rs @@ -263,6 +263,7 @@ impl NumpyArray { } } + #[inline(always)] fn data(&self) -> *const c_void { let offset = self .strides() @@ -390,6 +391,7 @@ impl<'a> NumpyF64Array<'a> { impl<'a> Serialize for NumpyF64Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -430,6 +432,7 @@ impl<'a> NumpyF32Array<'a> { impl<'a> Serialize for NumpyF32Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -470,6 +473,7 @@ impl<'a> NumpyF16Array<'a> { impl<'a> Serialize for NumpyF16Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -512,6 +516,7 @@ impl<'a> NumpyU64Array<'a> { impl<'a> Serialize for NumpyU64Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -552,6 +557,7 @@ impl<'a> NumpyU32Array<'a> { impl<'a> Serialize for NumpyU32Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -592,6 +598,7 @@ impl<'a> NumpyU16Array<'a> { impl<'a> Serialize for NumpyU16Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -615,7 +622,7 @@ impl Serialize for DataTypeU16 { where S: Serializer, { - serializer.serialize_u16(self.obj) + serializer.serialize_u32(self.obj as u32) } } @@ -632,6 +639,7 @@ impl<'a> NumpyI64Array<'a> { impl<'a> Serialize for NumpyI64Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -672,6 +680,7 @@ impl<'a> NumpyI32Array<'a> { impl<'a> Serialize for NumpyI32Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -712,6 +721,7 @@ impl<'a> NumpyI16Array<'a> { impl<'a> Serialize for NumpyI16Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -735,7 +745,7 @@ impl Serialize for DataTypeI16 { where S: Serializer, { - serializer.serialize_i16(self.obj) + serializer.serialize_i32(self.obj as i32) } } @@ -752,6 +762,7 @@ impl<'a> NumpyI8Array<'a> { impl<'a> Serialize for NumpyI8Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -775,7 +786,7 @@ impl Serialize for DataTypeI8 { where S: Serializer, { - serializer.serialize_i8(self.obj) + serializer.serialize_i32(self.obj as i32) } } @@ -792,6 +803,7 @@ impl<'a> NumpyU8Array<'a> { impl<'a> Serialize for NumpyU8Array<'a> { #[cold] + #[inline(never)] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -815,7 +827,7 @@ impl Serialize for DataTypeU8 { where S: Serializer, { - serializer.serialize_u8(self.obj) + serializer.serialize_u32(self.obj as u32) } } @@ -933,7 +945,7 @@ impl Serialize for NumpyInt8 { where S: Serializer, { - serializer.serialize_i8(self.value) + serializer.serialize_i32(self.value as i32) } } @@ -950,7 +962,7 @@ impl Serialize for NumpyInt16 { where S: Serializer, { - serializer.serialize_i16(self.value) + serializer.serialize_i32(self.value as i32) } } @@ -1001,7 +1013,7 @@ impl Serialize for NumpyUint8 { where S: Serializer, { - serializer.serialize_u8(self.value) + serializer.serialize_u32(self.value as u32) } } @@ -1018,7 +1030,7 @@ impl Serialize for NumpyUint16 { where S: Serializer, { - serializer.serialize_u16(self.value) + serializer.serialize_u32(self.value as u32) } } diff --git a/src/serialize/per_type/uuid.rs b/src/serialize/per_type/uuid.rs index 15688419..ff02fc10 100644 --- a/src/serialize/per_type/uuid.rs +++ b/src/serialize/per_type/uuid.rs @@ -17,6 +17,7 @@ impl UUID { UUID { ptr: ptr } } + #[inline(never)] pub fn write_buf(&self, buf: &mut UUIDBuffer) { let value: u128; { @@ -52,8 +53,7 @@ impl UUID { } } impl Serialize for UUID { - #[cold] - #[inline(never)] + #[inline(always)] fn serialize(&self, serializer: S) -> Result where S: Serializer, diff --git a/src/serialize/writer/byteswriter.rs b/src/serialize/writer/byteswriter.rs index 97370fd7..72510456 100644 --- a/src/serialize/writer/byteswriter.rs +++ b/src/serialize/writer/byteswriter.rs @@ -53,6 +53,7 @@ impl BytesWriter { pub fn resize(&mut self, len: usize) { self.cap = len; unsafe { + #[allow(clippy::unnecessary_cast)] _PyBytes_Resize( core::ptr::addr_of_mut!(self.bytes) as *mut *mut PyBytesObject as *mut *mut PyObject, diff --git a/src/serialize/writer/formatter.rs b/src/serialize/writer/formatter.rs index fd13fd82..cc020a30 100644 --- a/src/serialize/writer/formatter.rs +++ b/src/serialize/writer/formatter.rs @@ -30,30 +30,18 @@ pub trait Formatter { unsafe { writer.write_reserved_fragment(s) } } - #[inline] - fn write_i8(&mut self, writer: &mut W, value: i8) -> io::Result<()> + fn write_i8(&mut self, _writer: &mut W, _value: i8) -> io::Result<()> where W: ?Sized + io::Write + WriteExt, { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) + unreachable!(); } - #[inline] - fn write_i16(&mut self, writer: &mut W, value: i16) -> io::Result<()> + fn write_i16(&mut self, _writer: &mut W, _value: i16) -> io::Result<()> where W: ?Sized + io::Write + WriteExt, { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) + unreachable!(); } #[inline] @@ -82,7 +70,6 @@ pub trait Formatter { Ok(()) } - #[inline] fn write_i128(&mut self, _writer: &mut W, _value: i128) -> io::Result<()> where W: ?Sized + io::Write, @@ -90,30 +77,18 @@ pub trait Formatter { unreachable!(); } - #[inline] - fn write_u8(&mut self, writer: &mut W, value: u8) -> io::Result<()> + fn write_u8(&mut self, _writer: &mut W, _value: u8) -> io::Result<()> where W: ?Sized + io::Write + WriteExt, { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) + unreachable!(); } - #[inline] - fn write_u16(&mut self, writer: &mut W, value: u16) -> io::Result<()> + fn write_u16(&mut self, _writer: &mut W, _value: u16) -> io::Result<()> where W: ?Sized + io::Write + WriteExt, { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) + unreachable!(); } #[inline] @@ -142,7 +117,6 @@ pub trait Formatter { Ok(()) } - #[inline] fn write_u128(&mut self, _writer: &mut W, _value: u128) -> io::Result<()> where W: ?Sized + io::Write, @@ -232,11 +206,9 @@ pub trait Formatter { where W: ?Sized + io::Write + WriteExt, { + reserve_minimum!(writer); if !first { - unsafe { - reserve_minimum!(writer); - writer.write_reserved_punctuation(b',').unwrap() - } + unsafe { writer.write_reserved_punctuation(b',').unwrap() } } Ok(()) } @@ -278,9 +250,9 @@ pub trait Formatter { where W: ?Sized + io::Write + WriteExt, { + reserve_minimum!(writer); if !first { unsafe { - reserve_minimum!(writer); writer.write_reserved_punctuation(b',').unwrap(); } } @@ -323,6 +295,7 @@ pub struct PrettyFormatter { } impl PrettyFormatter { + #[allow(clippy::new_without_default)] pub const fn new() -> Self { PrettyFormatter { current_indent: 0, @@ -350,7 +323,7 @@ impl Formatter for PrettyFormatter { { self.current_indent -= 1; let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); + reserve_pretty!(writer, num_spaces); unsafe { if self.has_value { @@ -367,7 +340,7 @@ impl Formatter for PrettyFormatter { W: ?Sized + io::Write + WriteExt, { let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); + reserve_pretty!(writer, num_spaces); unsafe { writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; @@ -404,7 +377,7 @@ impl Formatter for PrettyFormatter { { self.current_indent -= 1; let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); + reserve_pretty!(writer, num_spaces); unsafe { if self.has_value { @@ -422,7 +395,7 @@ impl Formatter for PrettyFormatter { W: ?Sized + io::Write + WriteExt, { let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); + reserve_pretty!(writer, num_spaces); unsafe { writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; writer.write_reserved_indent(num_spaces)?; diff --git a/src/serialize/writer/json.rs b/src/serialize/writer/json.rs index 31942b52..c6a51056 100644 --- a/src/serialize/writer/json.rs +++ b/src/serialize/writer/json.rs @@ -71,18 +71,12 @@ where .map_err(Error::io) } - #[cold] - fn serialize_i8(self, value: i8) -> Result<()> { - self.formatter - .write_i8(&mut self.writer, value) - .map_err(Error::io) + fn serialize_i8(self, _value: i8) -> Result<()> { + unreachable!(); } - #[cold] - fn serialize_i16(self, value: i16) -> Result<()> { - self.formatter - .write_i16(&mut self.writer, value) - .map_err(Error::io) + fn serialize_i16(self, _value: i16) -> Result<()> { + unreachable!(); } #[inline] @@ -103,18 +97,12 @@ where unreachable!(); } - #[cold] - fn serialize_u8(self, value: u8) -> Result<()> { - self.formatter - .write_u8(&mut self.writer, value) - .map_err(Error::io) + fn serialize_u8(self, _value: u8) -> Result<()> { + unreachable!(); } - #[cold] - fn serialize_u16(self, value: u16) -> Result<()> { - self.formatter - .write_u16(&mut self.writer, value) - .map_err(Error::io) + fn serialize_u16(self, _value: u16) -> Result<()> { + unreachable!(); } #[inline] @@ -310,21 +298,21 @@ where self.ser .formatter .begin_array_value(&mut self.ser.writer, self.state == State::First) - .map_err(Error::io)?; + .unwrap(); self.state = State::Rest; value.serialize(&mut *self.ser)?; self.ser .formatter .end_array_value(&mut self.ser.writer) .map_err(Error::io) + .unwrap(); + Ok(()) } #[inline] fn end(self) -> Result<()> { - self.ser - .formatter - .end_array(&mut self.ser.writer) - .map_err(Error::io) + self.ser.formatter.end_array(&mut self.ser.writer).unwrap(); + Ok(()) } } @@ -352,7 +340,7 @@ where self.ser .formatter .begin_object_key(&mut self.ser.writer, self.state == State::First) - .map_err(Error::io)?; + .unwrap(); self.state = State::Rest; key.serialize(MapKeySerializer { ser: self.ser })?; @@ -360,7 +348,8 @@ where self.ser .formatter .end_object_key(&mut self.ser.writer) - .map_err(Error::io) + .unwrap(); + Ok(()) } #[inline] @@ -371,20 +360,19 @@ where self.ser .formatter .begin_object_value(&mut self.ser.writer) - .map_err(Error::io)?; + .unwrap(); value.serialize(&mut *self.ser)?; self.ser .formatter .end_object_value(&mut self.ser.writer) - .map_err(Error::io) + .unwrap(); + Ok(()) } #[inline] fn end(self) -> Result<()> { - self.ser - .formatter - .end_object(&mut self.ser.writer) - .map_err(Error::io) + self.ser.formatter.end_object(&mut self.ser.writer).unwrap(); + Ok(()) } } diff --git a/src/util.rs b/src/util.rs index 787aa8aa..db6e8a8c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -258,6 +258,12 @@ macro_rules! reserve_minimum { }; } +macro_rules! reserve_pretty { + ($writer:expr, $val:expr) => { + $writer.reserve($val + 16); + }; +} + macro_rules! assume { ($expr:expr) => { debug_assert!($expr);