From 1d5f3e16fd897ab6739a5cc692138ce9426a5b13 Mon Sep 17 00:00:00 2001 From: ijl Date: Mon, 15 Jan 2024 16:32:37 +0000 Subject: [PATCH] cargo update, build and tooling misc --- .github/workflows/debug.yaml | 17 +- .github/workflows/lint.yaml | 2 +- .github/workflows/linux.yaml | 48 +- Cargo.lock | 71 +- Cargo.toml | 11 +- README.md | 8 +- build.rs | 9 +- ci/azure-macos.yml | 11 +- ci/azure-pipelines.yml | 2 +- ci/azure-win.yml | 2 +- ci/config.toml | 7 - integration/wsgi.py | 4 +- pyproject.toml | 13 +- requirements.txt | 3 - script/develop | 6 +- script/graph | 4 +- script/lint | 4 +- script/pynumpy | 4 +- src/deserialize/cache.rs | 4 +- src/deserialize/deserializer.rs | 2 +- src/deserialize/json.rs | 2 +- src/deserialize/pyobject.rs | 10 +- src/deserialize/utf8.rs | 4 +- src/deserialize/yyjson.rs | 9 +- src/ffi/list.rs | 44 - src/ffi/mod.rs | 2 - src/lib.rs | 15 +- src/serialize/json.rs | 1330 ----------------- src/serialize/mod.rs | 1 - src/serialize/per_type/dataclass.rs | 34 +- src/serialize/per_type/datetime.rs | 6 +- src/serialize/per_type/default.rs | 4 +- src/serialize/per_type/dict.rs | 51 +- src/serialize/per_type/fragment.rs | 2 +- src/serialize/per_type/int.rs | 2 +- src/serialize/per_type/list.rs | 11 +- src/serialize/per_type/none.rs | 4 +- src/serialize/per_type/numpy.rs | 69 +- src/serialize/per_type/pyenum.rs | 2 +- src/serialize/per_type/tuple.rs | 4 +- src/serialize/per_type/unicode.rs | 4 +- src/serialize/per_type/uuid.rs | 3 +- src/serialize/serializer.rs | 4 +- .../{writer.rs => writer/byteswriter.rs} | 0 src/serialize/writer/escape.rs | 232 +++ src/serialize/writer/formatter.rs | 451 ++++++ src/serialize/writer/json.rs | 594 ++++++++ src/serialize/writer/mod.rs | 9 + src/typeref.rs | 41 +- src/util.rs | 12 + 50 files changed, 1526 insertions(+), 1662 deletions(-) delete mode 100644 src/ffi/list.rs delete mode 100644 src/serialize/json.rs rename src/serialize/{writer.rs => writer/byteswriter.rs} (100%) create mode 100644 src/serialize/writer/escape.rs create mode 100644 src/serialize/writer/formatter.rs create mode 100644 src/serialize/writer/json.rs create mode 100644 src/serialize/writer/mod.rs diff --git a/.github/workflows/debug.yaml b/.github/workflows/debug.yaml index 06b6d8fe..0adb6dfe 100644 --- a/.github/workflows/debug.yaml +++ b/.github/workflows/debug.yaml @@ -7,24 +7,24 @@ jobs: strategy: fail-fast: false matrix: - rust: [ - { version: "1.65" }, # MSRV - { version: "nightly-2023-10-10" }, + profile: [ + { rust: "1.65", features: "" }, + { rust: "1.65", features: "--features=yyjson" }, + { rust: "nightly-2024-01-10", features: "--features=yyjson,unstable-simd" }, ] python: [ { version: '3.12', abi: 'cp312-cp312' }, - { version: '3.11', abi: 'cp311-cp311' }, { version: '3.8', abi: 'cp38-cp38' }, ] env: CC: "gcc" - CFLAGS: "-O2 -fno-plt" + CFLAGS: "-O2" LDFLAGS: "-Wl,--as-needed" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" steps: - - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain ${{ matrix.rust.version }} --profile minimal -y + - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain ${{ matrix.profile.rust }} --profile minimal -y - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '${{ matrix.python.version }}' - run: python -m pip install --user --upgrade pip "maturin>=1,<2" wheel @@ -36,9 +36,8 @@ jobs: PATH="$HOME/.cargo/bin:$PATH" maturin build --release \ --out=dist \ --profile=dev \ - --features=yyjson \ --interpreter python${{ matrix.python.version }} \ - --target=x86_64-unknown-linux-gnu + --target=x86_64-unknown-linux-gnu ${{ matrix.profile.features }} - run: python -m pip install --user dist/orjson*.whl - run: python -m pip install --user -r test/requirements.txt -r integration/requirements.txt diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index e9b6dc63..d7bbae4a 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -4,7 +4,7 @@ jobs: lint: runs-on: ubuntu-22.04 steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.11' - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=default -y diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index d60d0c1e..62f4e781 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -62,17 +62,17 @@ jobs: env: PATH: /github/home/.local/bin:/github/home/.cargo/bin:/opt/python/${{ matrix.python.abi }}/bin:/opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin CC: "clang" - CFLAGS: "-O2 -fno-plt -flto=thin" - LDFLAGS: "-O2 -flto=thin -fuse-ld=lld -Wl,--as-needed" - RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld" + CFLAGS: "-O2 -fstrict-aliasing -emit-llvm -flto=full" + LDFLAGS: "-O2 -fstrict-aliasing -emit-llvm -flto=full -fuse-ld=lld -Wl,--as-needed -Wl,-plugin-opt=also-emit-llvm" + RUSTFLAGS: "-C linker=clang -C linker-plugin-lto -C lto=fat -C link-arg=-fuse-ld=lld -Z mir-opt-level=4 -Z virtual-function-elimination -D warnings" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" container: image: quay.io/pypa/manylinux_2_28_x86_64:latest options: --user 0 steps: - run: yum install -y clang lld - - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2023-10-10 --profile minimal -y - - run: rustup component add rust-src --toolchain nightly-2023-10-10-x86_64-unknown-linux-gnu + - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2024-01-10 --profile minimal -y + - run: rustup component add rust-src --toolchain nightly-2024-01-10-x86_64-unknown-linux-gnu - uses: actions/checkout@v4 - name: build-std @@ -84,7 +84,7 @@ jobs: - run: | maturin build --release --strip \ --out=dist \ - --features=no-panic,yyjson \ + --features=encoding_rs/simd-accel,no-panic,unstable-simd,yyjson \ --compatibility manylinux_2_17 \ --interpreter python${{ matrix.python.version }} \ --target=x86_64-unknown-linux-gnu @@ -138,22 +138,22 @@ jobs: uses: PyO3/maturin-action@v1 env: CC: "gcc" - CFLAGS: "-O2 -fno-plt" + CFLAGS: "-O2" LDFLAGS: "-O2 -flto -Wl,--as-needed" - RUSTFLAGS: "-C target-feature=-crt-static" + RUSTFLAGS: "-C target-feature=-crt-static -Z mir-opt-level=4" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" with: - rust-toolchain: nightly-2023-10-10 + rust-toolchain: nightly-2024-01-10 rustup-components: rust-src target: ${{ matrix.platform.target }} manylinux: musllinux_1_1 - args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }} + args: --release --strip --out=dist --features=encoding_rs/simd-accel,no-panic,unstable-simd,yyjson -i python${{ matrix.python.version }} - name: Set up QEMU if: matrix.platform.arch != 'x86_64' uses: docker/setup-qemu-action@v3 with: - image: tonistiigi/binfmt:qemu-v7.0.0 + image: tonistiigi/binfmt:qemu-v8.1.4 platforms: ${{ matrix.platform.platform }} - name: Test @@ -194,28 +194,35 @@ jobs: target: [ { arch: 'aarch64', + cflags: '-O2 -flto', + features: 'encoding_rs/simd-accel,no-panic,unstable-simd,yyjson', + rustflags: '-Z mir-opt-level=4 -D warnings', target: 'aarch64-unknown-linux-gnu', - cflags: '-O2', }, { arch: 'armv7', + cflags: '-Os -flto -fstrict-aliasing', + features: 'no-panic,yyjson', # no SIMD + rustflags: '-C opt-level=s -Z mir-opt-level=4 -D warnings', target: 'armv7-unknown-linux-gnueabihf', - cflags: '-Os -fstrict-aliasing', }, { arch: 'ppc64le', + cflags: '-O2 -flto', + features: 'no-panic,unstable-simd,yyjson', + rustflags: '-Z mir-opt-level=4 -D warnings', target: 'powerpc64le-unknown-linux-gnu', - cflags: '-O2', }, { arch: 's390x', + cflags: '-O2 -flto -march=z10', + features: 'no-panic,unstable-simd,yyjson', + rustflags: '-Z mir-opt-level=4 -C target-cpu=z10 -D warnings', target: 's390x-unknown-linux-gnu', - cflags: '-O2 -march=z10', }, ] steps: - uses: actions/checkout@v4 - - name: build-std run: | mkdir .cargo @@ -226,14 +233,15 @@ jobs: env: PYO3_CROSS_LIB_DIR: "/opt/python/${{ matrix.python.abi }}" CFLAGS: "${{ matrix.target.cflags }}" - LDFLAGS: "${{ matrix.target.cflags }} -flto -Wl,--as-needed" + LDFLAGS: "${{ matrix.target.cflags }} -Wl,--as-needed" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" + RUSTFLAGS: "${{ matrix.target.rustflags }}" with: target: ${{ matrix.target.target }} - rust-toolchain: nightly-2023-10-10 + rust-toolchain: nightly-2024-01-10 rustup-components: rust-src manylinux: auto - args: --release --strip --out=dist --features=no-panic,yyjson -i python${{ matrix.python.version }} + args: --release --strip --out=dist --features=${{ matrix.target.features }} -i python${{ matrix.python.version }} - uses: uraimo/run-on-arch-action@v2 name: Test @@ -277,7 +285,7 @@ jobs: - uses: actions/download-artifact@v3 with: name: wheels - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" - run: pip install pip "maturin>=1,<2" diff --git a/Cargo.lock b/Cargo.lock index 557acf5a..da703834 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,9 +49,6 @@ name = "bytecount" version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" -dependencies = [ - "packed_simd", -] [[package]] name = "castaway" @@ -112,9 +109,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" dependencies = [ "no-panic", ] @@ -127,9 +124,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libm" @@ -139,9 +136,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "no-panic" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a6d126424f5ce0bb4587ff4561421d44aeede520541cc66f1bb912506ae46a" +checksum = "fc56831a2ae584dc43a8b0b33f496e71fb4d43cf8c1c0a3fd932e6340bea1f81" dependencies = [ "proc-macro2", "quote", @@ -160,9 +157,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "orjson" @@ -202,18 +199,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3-build-config" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96fe70b176a89cff78f2fa7b3c930081e163d5379b4dcdf993e3ae29ca662e5" +checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" dependencies = [ "once_cell", "target-lexicon", @@ -221,9 +218,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "214929900fd25e6604661ed9cf349727c8920d47deff196c4e28165a6ef2a96b" +checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" dependencies = [ "libc", "pyo3-build-config", @@ -231,9 +228,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -246,27 +243,27 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" dependencies = [ "no-panic", ] [[package]] name = "serde" -version = "1.0.190" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.190" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", @@ -275,9 +272,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.107" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" dependencies = [ "itoa", "ryu", @@ -292,9 +289,9 @@ checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" [[package]] name = "smallvec" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" [[package]] name = "static_assertions" @@ -304,9 +301,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.38" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -315,9 +312,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.12" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" +checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" [[package]] name = "unicode-ident" @@ -333,18 +330,18 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "zerocopy" -version = "0.7.15" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ba595b9f2772fbee2312de30eeb80ec773b4cb2f1e8098db024afadda6c06f" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.15" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "772666c41fb6dceaf520b564b962d738a8e1a83b41bd48945f50837aed78bb1d" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5a0dab31..2c247052 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,10 +33,7 @@ crate-type = ["cdylib"] default = [] # Use SIMD intrinsics. This requires Rust on the nightly channel. -unstable-simd = [ - "bytecount/generic-simd", - "encoding_rs/simd-accel", -] +unstable-simd = [] no-panic = [ "itoa/no-panic", @@ -48,7 +45,7 @@ no-panic = [ yyjson = [] [dependencies] -ahash = { version = "^0.8.6", default_features = false } +ahash = { version = "=0.8.6", default_features = false } arrayvec = { version = "0.7", default_features = false, features = ["std", "serde"] } associative-cache = { version = "2", default_features = false } beef = { version = "0.5", default_features = false, features = ["impl_serde"] } @@ -59,7 +56,7 @@ encoding_rs = { version = "0.8", default_features = false } itoa = { version = "1", default_features = false } itoap = { version = "1", features = ["std", "simd"] } once_cell = { version = "1", default_features = false, features = ["race"] } -pyo3-ffi = { version = "^0.20", default_features = false, features = ["extension-module"]} +pyo3-ffi = { version = "^0.20.2", default_features = false, features = ["extension-module"]} ryu = { version = "1", default_features = false } serde = { version = "1", default_features = false } serde_json = { version = "1", default_features = false, features = ["std", "float_roundtrip"] } @@ -68,7 +65,7 @@ smallvec = { version = "^1.11", default_features = false, features = ["union", " [build-dependencies] cc = { version = "1" } -pyo3-build-config = { version = "^0.20" } +pyo3-build-config = { version = "^0.20.2" } version_check = { version = "0.9" } [profile.dev] diff --git a/README.md b/README.md index 532ca6d5..5de6f247 100644 --- a/README.md +++ b/README.md @@ -985,9 +985,9 @@ library handles a combined 342 JSON fixtures from the | Library | Invalid JSON documents not rejected | Valid JSON documents not deserialized | |------------|---------------------------------------|-----------------------------------------| | orjson | 0 | 0 | -| ujson | 38 | 0 | +| ujson | 31 | 0 | | rapidjson | 6 | 0 | -| simplejson | 13 | 0 | +| simplejson | 10 | 0 | | json | 17 | 0 | This shows that all libraries deserialize valid JSON but only orjson @@ -1192,7 +1192,7 @@ It benefits from also having a C build environment to compile a faster deserialization backend. See this project's `manylinux_2_28` builds for an example using clang and LTO. -The project's own CI tests against `nightly-2023-10-10` and stable 1.65. It +The project's own CI tests against `nightly-2024-01-10` and stable 1.65. It is prudent to pin the nightly version because that channel can introduce breaking changes. @@ -1213,5 +1213,5 @@ tests should be run as part of the build. It can be run with ## License -orjson was written by ijl <>, copyright 2018 - 2023, licensed +orjson was written by ijl <>, copyright 2018 - 2024, licensed under both the Apache 2 and MIT licenses. diff --git a/build.rs b/build.rs index 86894e0a..329e2fbb 100644 --- a/build.rs +++ b/build.rs @@ -11,8 +11,9 @@ fn main() { println!("cargo:rerun-if-env-changed=RUSTFLAGS"); println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_YYJSON"); - let py_cfg = pyo3_build_config::get(); - py_cfg.emit_pyo3_cfgs(); + for cfg in pyo3_build_config::get().build_script_outputs() { + println!("{cfg}"); + } if let Some(true) = version_check::supports_feature("core_intrinsics") { println!("cargo:rustc-cfg=feature=\"intrinsics\""); @@ -26,10 +27,6 @@ fn main() { println!("cargo:rustc-cfg=feature=\"strict_provenance\""); } - if let Some(true) = version_check::supports_feature("trusted_len") { - println!("cargo:rustc-cfg=feature=\"trusted_len\""); - } - if env::var("ORJSON_DISABLE_YYJSON").is_ok() { if env::var("CARGO_FEATURE_YYJSON").is_ok() { panic!("ORJSON_DISABLE_YYJSON and --features=yyjson both enabled.") diff --git a/ci/azure-macos.yml b/ci/azure-macos.yml index 7ada3141..9f9ebc6d 100644 --- a/ci/azure-macos.yml +++ b/ci/azure-macos.yml @@ -23,13 +23,14 @@ steps: PATH=$HOME/.cargo/bin:$PATH \ MACOSX_DEPLOYMENT_TARGET=$(macosx_deployment_target) \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ - maturin build --release --strip --features=no-panic,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin + maturin build --release --strip --features=encoding_rs/simd-accel,no-panic,yyjson --interpreter $(interpreter) --target=universal2-apple-darwin env: CC: "clang" - LDFLAGS: "-O2 -flto=thin -fuse-ld=lld -Wl,--as-needed" - CFLAGS: "-O2 -fno-plt -flto=thin" - CFLAGS_x86_64_apple_darwin: "-O2 -fno-plt -flto=thin -march=x86-64-v2 -mtune=generic" - CFLAGS_aarch64_apple_darwin: "-O2 -fno-plt -flto=thin -mcpu=apple-m1 -mtune=generic" + CFLAGS: "-O2 -fstrict-aliasing -flto=full" + LDFLAGS: "-O2 -fstrict-aliasing -flto=full -Wl,--as-needed" + CFLAGS_x86_64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -march=x86-64-v2 -mtune=generic" + CFLAGS_aarch64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -mcpu=apple-m1 -mtune=generic" + RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -Z virtual-function-elimination -D warnings" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" displayName: build universal2 diff --git a/ci/azure-pipelines.yml b/ci/azure-pipelines.yml index c78da7a4..63b48ca9 100644 --- a/ci/azure-pipelines.yml +++ b/ci/azure-pipelines.yml @@ -1,5 +1,5 @@ variables: - toolchain: nightly-2023-10-10 + toolchain: nightly-2024-01-10 jobs: diff --git a/ci/azure-win.yml b/ci/azure-win.yml index 6e65c241..15af67e1 100644 --- a/ci/azure-win.yml +++ b/ci/azure-win.yml @@ -19,7 +19,7 @@ steps: displayName: build dependencies - script: python.exe -m pip install -r test\requirements.txt -r integration\requirements.txt displayName: test dependencies -- script: maturin.exe build --release --features=no-panic,yyjson --strip --interpreter $(interpreter) --target $(target) +- script: maturin.exe build --release --features=encoding_rs/simd-accel,no-panic,yyjson --strip --interpreter $(interpreter) --target $(target) displayName: build - script: python.exe -m pip install orjson --no-index --find-links=D:\a\1\s\target\wheels displayName: install diff --git a/ci/config.toml b/ci/config.toml index a386c2ff..0823eb5a 100644 --- a/ci/config.toml +++ b/ci/config.toml @@ -9,10 +9,3 @@ rustflags = ["-C", "target-cpu=x86-64-v2", "-Z", "tune-cpu=generic"] [target.aarch64-apple-darwin] linker = "clang" rustflags = ["-C", "target-cpu=apple-m1"] - -[target.armv7-unknown-linux-gnueabihf] -rustflags = ["-C", "opt-level=s"] - -[target.s390x-unknown-linux-gnu] -linker = "clang" -rustflags = ["-C", "target-cpu=z10", "-Z", "tune-cpu=generic"] diff --git a/integration/wsgi.py b/integration/wsgi.py index 7e20e9b0..b5d4d4c4 100644 --- a/integration/wsgi.py +++ b/integration/wsgi.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) -from datetime import datetime +from datetime import datetime, timezone from uuid import uuid4 from flask import Flask @@ -9,7 +9,7 @@ app = Flask(__name__) -NOW = datetime.utcnow() +NOW = datetime.now(timezone.utc) @app.route("/") diff --git a/pyproject.toml b/pyproject.toml index 276703ed..58007e5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,20 +39,21 @@ include = [ { format = "sdist", path = "include/**/*" }, ] - -[tool.black] -line-length = 88 -target-version = ["py38"] -include = ".pyi?$" - [tool.ruff] line-length = 88 target-version = "py38" +select = [ + "I", +] + ignore = [ "E501", # line too long "F601", # Dictionary key literal ... repeated ] +[tool.ruff.lint.isort] +known-first-party = ["orjson"] + [tool.mypy] python_version = "3.8" diff --git a/requirements.txt b/requirements.txt index c8d3e01a..9ef0c1fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,6 @@ -r bench/requirements.txt -r integration/requirements.txt -r test/requirements.txt -autoflake -black -isort maturin mypy ruff diff --git a/script/develop b/script/develop index 6bad5db7..50568f63 100755 --- a/script/develop +++ b/script/develop @@ -3,9 +3,9 @@ rm -f target/wheels/* export CC="clang" -export CFLAGS="-O2 -fno-plt -flto=thin" -export LDFLAGS="${CFLAGS} -fuse-ld=lld -Wl,--as-needed" -export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld" +export CFLAGS="-O2 -fstrict-aliasing -emit-llvm -flto=full" +export LDFLAGS="${CFLAGS} -fuse-ld=lld -Wl,--as-needed -Wl,-plugin-opt=also-emit-llvm" +export RUSTFLAGS="-C linker=clang -C linker-plugin-lto -C lto=fat -C link-arg=-fuse-ld=lld -Z mir-opt-level=4 -Z virtual-function-elimination" maturin build "$@" diff --git a/script/graph b/script/graph index 89c86303..4255a127 100755 --- a/script/graph +++ b/script/graph @@ -46,14 +46,14 @@ def tab(obj): [ lib, val[lib]["median"] if correct else None, - "%.1f" % val[lib]["ops"] if correct else None, + str(int(val[lib]["ops"])) if correct else None, 0, ] ) baseline = table[0][1] for each in table: each[3] = ( - "%.2f" % (each[1] / baseline) if isinstance(each[1], float) else None + "%.1f" % (each[1] / baseline) if isinstance(each[1], float) else None ) each[1] = "%.2f" % each[1] if isinstance(each[1], float) else None buf.write(tabulate(table, headers, tablefmt="github") + "\n") diff --git a/script/lint b/script/lint index c08a928f..15eed6ec 100755 --- a/script/lint +++ b/script/lint @@ -6,8 +6,6 @@ to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init integration/wsgi.py integration/typestubs.py integration/thread" -autoflake --in-place --recursive --remove-all-unused-imports --ignore-init-module-imports . -isort ${to_lint} ruff ${to_lint} --fix -black ${to_lint} +ruff format ${to_lint} mypy --ignore-missing-imports --check-untyped-defs ./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py diff --git a/script/pynumpy b/script/pynumpy index df7a1194..102a395d 100755 --- a/script/pynumpy +++ b/script/pynumpy @@ -39,9 +39,7 @@ elif kind == "int32": elif kind == "uint8": array = numpy.random.randint(((2**8) - 1), size=(100000, 100), dtype=numpy.uint8) elif kind == "uint16": - array = numpy.random.randint( - ((2**16) - 1), size=(100000, 100), dtype=numpy.uint16 - ) + array = numpy.random.randint(((2**16) - 1), size=(100000, 100), dtype=numpy.uint16) else: print("usage: pynumpy (bool|int16|int32|float64|int8|uint8|uint16)") sys.exit(1) diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs index 62142f77..4ba30fc8 100644 --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::typeref::*; +use crate::typeref::HASH_BUILDER; use associative_cache::replacement::RoundRobinReplacement; use associative_cache::*; use once_cell::unsync::OnceCell; @@ -44,7 +44,7 @@ pub static mut KEY_MAP: OnceCell = OnceCell::new(); pub fn cache_hash(key: &[u8]) -> u64 { #[cfg(feature = "intrinsics")] unsafe { - std::intrinsics::assume(key.len() > 0); + std::intrinsics::assume(!key.is_empty()); std::intrinsics::assume(key.len() <= 64); } let mut hasher = unsafe { HASH_BUILDER.get().unwrap().build_hasher() }; diff --git a/src/deserialize/deserializer.rs b/src/deserialize/deserializer.rs index 0675cce0..cd0d09bc 100644 --- a/src/deserialize/deserializer.rs +++ b/src/deserialize/deserializer.rs @@ -2,7 +2,7 @@ use crate::deserialize::utf8::read_input_to_buf; use crate::deserialize::DeserializeError; -use crate::typeref::*; +use crate::typeref::EMPTY_UNICODE; use std::ptr::NonNull; pub fn deserialize( diff --git a/src/deserialize/json.rs b/src/deserialize/json.rs index 9a6d7561..a9d36424 100644 --- a/src/deserialize/json.rs +++ b/src/deserialize/json.rs @@ -2,7 +2,7 @@ use crate::deserialize::pyobject::*; use crate::deserialize::DeserializeError; -use crate::str::*; +use crate::str::unicode_from_str; use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; use smallvec::SmallVec; use std::borrow::Cow; diff --git a/src/deserialize/pyobject.rs b/src/deserialize/pyobject.rs index 49766b14..0da2f9fd 100644 --- a/src/deserialize/pyobject.rs +++ b/src/deserialize/pyobject.rs @@ -1,8 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::deserialize::cache::*; -use crate::str::*; -use crate::typeref::*; +use crate::str::{hash_str, unicode_from_str}; +use crate::typeref::{EMPTY_UNICODE, FALSE, NONE, TRUE}; use std::ptr::NonNull; pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { @@ -14,11 +14,7 @@ pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { pykey = use_immortal!(EMPTY_UNICODE); } else { let hash = cache_hash(key_str.as_bytes()); - let map = unsafe { - KEY_MAP - .get_mut() - .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) - }; + let map = unsafe { KEY_MAP.get_mut().unwrap_or_else(|| unreachable!()) }; let entry = map.entry(&hash).or_insert_with( || hash, || { diff --git a/src/deserialize/utf8.rs b/src/deserialize/utf8.rs index ddb03563..d1cbb474 100644 --- a/src/deserialize/utf8.rs +++ b/src/deserialize/utf8.rs @@ -2,8 +2,8 @@ use crate::deserialize::DeserializeError; use crate::ffi::*; -use crate::str::*; -use crate::typeref::*; +use crate::str::unicode_to_str; +use crate::typeref::{BYTEARRAY_TYPE, BYTES_TYPE, MEMORYVIEW_TYPE, STR_TYPE}; use crate::util::INVALID_STR; use std::borrow::Cow; use std::os::raw::c_char; diff --git a/src/deserialize/yyjson.rs b/src/deserialize/yyjson.rs index c8c5b774..968f7c39 100644 --- a/src/deserialize/yyjson.rs +++ b/src/deserialize/yyjson.rs @@ -3,8 +3,8 @@ use crate::deserialize::pyobject::*; use crate::deserialize::DeserializeError; use crate::ffi::yyjson::*; -use crate::str::*; -use crate::typeref::*; +use crate::str::unicode_from_str; +use crate::typeref::{yyjson_init, YYJSON_ALLOC, YYJSON_BUFFER_SIZE}; use std::borrow::Cow; use std::os::raw::c_char; use std::ptr::{null, null_mut, NonNull}; @@ -125,7 +125,7 @@ impl ElementType { TAG_FALSE => Self::False, TAG_ARRAY => Self::Array, TAG_OBJECT => Self::Object, - _ => unsafe { std::hint::unreachable_unchecked() }, + _ => unreachable!(), } } } @@ -164,10 +164,7 @@ fn parse_yy_object(elem: *mut yyjson_val) -> NonNull { return nonnull!(ffi!(PyDict_New())); } let mut key = unsafe_yyjson_get_first(elem); - #[cfg(not(Py_3_13))] let dict = ffi!(_PyDict_NewPresized(len as isize)); - #[cfg(Py_3_13)] - let dict = ffi!(PyDict_New()); for _ in 0..=len - 1 { let val = key.add(1); let key_str = str_from_slice!((*key).uni.str_ as *const u8, unsafe_yyjson_get_len(key)); diff --git a/src/ffi/list.rs b/src/ffi/list.rs deleted file mode 100644 index 44b386c4..00000000 --- a/src/ffi/list.rs +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use std::ptr::NonNull; - -pub struct PyListIter { - obj: *mut pyo3_ffi::PyListObject, - len: usize, - pos: usize, -} - -impl PyListIter { - #[inline] - pub fn from_pyobject(obj: *mut pyo3_ffi::PyObject) -> Self { - unsafe { - PyListIter { - obj: obj as *mut pyo3_ffi::PyListObject, - len: ffi!(Py_SIZE(obj)) as usize, - pos: 0, - } - } - } -} - -impl Iterator for PyListIter { - type Item = NonNull; - - #[inline(always)] - fn next(&mut self) -> Option { - if self.pos == self.len { - None - } else { - let elem = unsafe { *((*self.obj).ob_item).add(self.pos) }; - self.pos += 1; - Some(nonnull!(elem)) - } - } - - fn size_hint(&self) -> (usize, Option) { - (self.len, Some(self.len)) - } -} - -#[cfg(feature = "trusted_len")] -unsafe impl std::iter::TrustedLen for PyListIter {} diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index afc7ef50..ba6dedcd 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -3,7 +3,6 @@ mod buffer; mod bytes; mod fragment; -mod list; mod long; #[cfg(feature = "yyjson")] pub mod yyjson; @@ -11,5 +10,4 @@ pub mod yyjson; pub use buffer::*; pub use bytes::*; pub use fragment::{orjson_fragmenttype_new, Fragment}; -pub use list::PyListIter; pub use long::{pylong_is_unsigned, pylong_is_zero}; diff --git a/src/lib.rs b/src/lib.rs index 63b9a59a..2e0a86c0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,14 +4,13 @@ #![cfg_attr(feature = "optimize", feature(optimize_attribute))] #![cfg_attr(feature = "strict_provenance", feature(strict_provenance))] #![cfg_attr(feature = "strict_provenance", warn(fuzzy_provenance_casts))] -#![cfg_attr(feature = "trusted_len", feature(trusted_len))] +#![allow(unknown_lints)] // internal_features +#![allow(internal_features)] // core_intrinsics #![allow(unused_unsafe)] #![allow(non_camel_case_types)] -#![allow(clippy::explicit_auto_deref)] #![allow(clippy::missing_safety_doc)] #![allow(clippy::redundant_field_names)] #![allow(clippy::uninlined_format_args)] // MSRV 1.66 -#![allow(clippy::unnecessary_unwrap)] #![allow(clippy::upper_case_acronyms)] #![allow(clippy::zero_prefixed_literal)] @@ -26,9 +25,7 @@ mod str; mod typeref; use pyo3_ffi::*; -use std::os::raw::c_char; -use std::os::raw::c_int; -use std::os::raw::c_void; +use std::os::raw::{c_char, c_int, c_void}; #[allow(unused_imports)] use std::ptr::{null, null_mut, NonNull}; @@ -232,7 +229,7 @@ fn raise_dumps_exception_fixed(msg: &str) -> *mut PyObject { #[inline(never)] #[cfg_attr(feature = "optimize", optimize(size))] #[cfg(Py_3_12)] -fn raise_dumps_exception_dynamic(err: &String) -> *mut PyObject { +fn raise_dumps_exception_dynamic(err: &str) -> *mut PyObject { unsafe { let cause_exc: *mut PyObject = PyErr_GetRaisedException(); @@ -255,7 +252,7 @@ fn raise_dumps_exception_dynamic(err: &String) -> *mut PyObject { #[inline(never)] #[cfg_attr(feature = "optimize", optimize(size))] #[cfg(not(Py_3_12))] -fn raise_dumps_exception_dynamic(err: &String) -> *mut PyObject { +fn raise_dumps_exception_dynamic(err: &str) -> *mut PyObject { unsafe { let mut cause_tp: *mut PyObject = null_mut(); let mut cause_val: *mut PyObject = null_mut(); @@ -355,6 +352,6 @@ pub unsafe extern "C" fn dumps( match crate::serialize::serialize(*args, default, optsbits as opt::Opt) { Ok(val) => val.as_ptr(), - Err(err) => raise_dumps_exception_dynamic(&err), + Err(err) => raise_dumps_exception_dynamic(err.as_str()), } } diff --git a/src/serialize/json.rs b/src/serialize/json.rs deleted file mode 100644 index f4147c09..00000000 --- a/src/serialize/json.rs +++ /dev/null @@ -1,1330 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -// This is an adaptation of `src/value/ser.rs` from serde-json. - -use crate::serialize::writer::WriteExt; -use serde::ser::{self, Impossible, Serialize}; -use serde_json::error::{Error, Result}; -use std::io; - -macro_rules! reserve_minimum { - ($writer:expr) => { - $writer.reserve(64); - }; -} - -pub struct Serializer { - writer: W, - formatter: F, -} - -impl Serializer -where - W: io::Write + WriteExt, -{ - #[inline] - pub fn new(writer: W) -> Self { - Serializer::with_formatter(writer, CompactFormatter) - } -} - -impl Serializer -where - W: io::Write + WriteExt, -{ - #[inline] - pub fn pretty(writer: W) -> Self { - Serializer::with_formatter(writer, PrettyFormatter::new()) - } -} - -impl Serializer -where - W: io::Write + WriteExt, - F: Formatter, -{ - #[inline] - pub fn with_formatter(writer: W, formatter: F) -> Self { - Serializer { writer, formatter } - } - - #[inline] - pub fn into_inner(self) -> W { - self.writer - } -} - -impl<'a, W, F> ser::Serializer for &'a mut Serializer -where - W: io::Write + WriteExt, - F: Formatter, -{ - type Ok = (); - type Error = Error; - - type SerializeSeq = Compound<'a, W, F>; - type SerializeTuple = Impossible<(), Error>; - type SerializeTupleStruct = Impossible<(), Error>; - type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = Compound<'a, W, F>; - type SerializeStruct = Impossible<(), Error>; - type SerializeStructVariant = Impossible<(), Error>; - - #[inline] - fn serialize_bool(self, value: bool) -> Result<()> { - self.formatter - .write_bool(&mut self.writer, value) - .map_err(Error::io) - } - - #[cold] - fn serialize_i8(self, value: i8) -> Result<()> { - self.formatter - .write_i8(&mut self.writer, value) - .map_err(Error::io) - } - - #[cold] - fn serialize_i16(self, value: i16) -> Result<()> { - self.formatter - .write_i16(&mut self.writer, value) - .map_err(Error::io) - } - - #[inline] - fn serialize_i32(self, value: i32) -> Result<()> { - self.formatter - .write_i32(&mut self.writer, value) - .map_err(Error::io) - } - - #[inline] - fn serialize_i64(self, value: i64) -> Result<()> { - self.formatter - .write_i64(&mut self.writer, value) - .map_err(Error::io) - } - - fn serialize_i128(self, _value: i128) -> Result<()> { - unreachable!(); - } - - #[cold] - fn serialize_u8(self, value: u8) -> Result<()> { - self.formatter - .write_u8(&mut self.writer, value) - .map_err(Error::io) - } - - #[cold] - fn serialize_u16(self, value: u16) -> Result<()> { - self.formatter - .write_u16(&mut self.writer, value) - .map_err(Error::io) - } - - #[inline] - fn serialize_u32(self, value: u32) -> Result<()> { - self.formatter - .write_u32(&mut self.writer, value) - .map_err(Error::io) - } - - #[inline] - fn serialize_u64(self, value: u64) -> Result<()> { - self.formatter - .write_u64(&mut self.writer, value) - .map_err(Error::io) - } - - fn serialize_u128(self, _value: u128) -> Result<()> { - unreachable!(); - } - - #[inline] - fn serialize_f32(self, value: f32) -> Result<()> { - if unlikely!(value.is_infinite() || value.is_nan()) { - self.serialize_unit() - } else { - self.formatter - .write_f32(&mut self.writer, value) - .map_err(Error::io) - } - } - #[inline] - fn serialize_f64(self, value: f64) -> Result<()> { - if unlikely!(value.is_infinite() || value.is_nan()) { - self.serialize_unit() - } else { - self.formatter - .write_f64(&mut self.writer, value) - .map_err(Error::io) - } - } - - fn serialize_char(self, _value: char) -> Result<()> { - unreachable!(); - } - - #[inline] - fn serialize_str(self, value: &str) -> Result<()> { - format_escaped_str(&mut self.writer, &mut self.formatter, value).map_err(Error::io) - } - - fn serialize_bytes(self, value: &[u8]) -> Result<()> { - self.writer.reserve(value.len()); - unsafe { self.writer.write_reserved_fragment(value).unwrap() }; - Ok(()) - } - - #[inline] - fn serialize_unit(self) -> Result<()> { - self.formatter - .write_null(&mut self.writer) - .map_err(Error::io) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { - unreachable!(); - } - - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - ) -> Result<()> { - unreachable!(); - } - - fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _value: &T, - ) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - - #[inline] - fn serialize_none(self) -> Result<()> { - self.serialize_unit() - } - - #[inline] - fn serialize_some(self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(self) - } - - #[inline(always)] - fn serialize_seq(self, len: Option) -> Result { - if len == Some(0) { - unsafe { - reserve_minimum!(self.writer); - self.writer.write_reserved_fragment(b"[]").unwrap(); - } - Ok(Compound { - ser: self, - state: State::Empty, - }) - } else { - self.formatter - .begin_array(&mut self.writer) - .map_err(Error::io)?; - Ok(Compound { - ser: self, - state: State::First, - }) - } - } - - fn serialize_tuple(self, _len: usize) -> Result { - unreachable!(); - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } - - #[inline(always)] - fn serialize_map(self, len: Option) -> Result { - if len == Some(0) { - unsafe { - reserve_minimum!(self.writer); - self.writer.write_reserved_fragment(b"{}").unwrap(); - } - - Ok(Compound { - ser: self, - state: State::Empty, - }) - } else { - self.formatter - .begin_object(&mut self.writer) - .map_err(Error::io)?; - Ok(Compound { - ser: self, - state: State::First, - }) - } - } - - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - unreachable!(); - } - - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } -} - -#[derive(Eq, PartialEq)] -pub enum State { - Empty, - First, - Rest, -} - -pub struct Compound<'a, W: 'a, F: 'a> { - ser: &'a mut Serializer, - state: State, -} - -impl<'a, W, F> ser::SerializeSeq for Compound<'a, W, F> -where - W: io::Write + WriteExt, - F: Formatter, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - self.ser - .formatter - .begin_array_value(&mut self.ser.writer, self.state == State::First) - .map_err(Error::io)?; - self.state = State::Rest; - value.serialize(&mut *self.ser)?; - self.ser - .formatter - .end_array_value(&mut self.ser.writer) - .map_err(Error::io) - } - - #[inline] - fn end(self) -> Result<()> { - match self.state { - State::Empty => Ok(()), - _ => self - .ser - .formatter - .end_array(&mut self.ser.writer) - .map_err(Error::io), - } - } -} - -impl<'a, W, F> ser::SerializeTuple for Compound<'a, W, F> -where - W: io::Write + WriteExt, - F: Formatter, -{ - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, _value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - - fn end(self) -> Result<()> { - unreachable!(); - } -} - -impl<'a, W, F> ser::SerializeMap for Compound<'a, W, F> -where - W: io::Write + WriteExt, - F: Formatter, -{ - type Ok = (); - type Error = Error; - - fn serialize_entry(&mut self, _key: &K, _value: &V) -> Result<()> - where - K: ?Sized + Serialize, - V: ?Sized + Serialize, - { - unreachable!() - } - - #[inline] - fn serialize_key(&mut self, key: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - self.ser - .formatter - .begin_object_key(&mut self.ser.writer, self.state == State::First) - .map_err(Error::io)?; - self.state = State::Rest; - - key.serialize(MapKeySerializer { ser: self.ser })?; - - self.ser - .formatter - .end_object_key(&mut self.ser.writer) - .map_err(Error::io) - } - - #[inline] - fn serialize_value(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - self.ser - .formatter - .begin_object_value(&mut self.ser.writer) - .map_err(Error::io)?; - value.serialize(&mut *self.ser)?; - self.ser - .formatter - .end_object_value(&mut self.ser.writer) - .map_err(Error::io) - } - - #[inline] - fn end(self) -> Result<()> { - match self.state { - State::Empty => Ok(()), - _ => self - .ser - .formatter - .end_object(&mut self.ser.writer) - .map_err(Error::io), - } - } -} - -#[repr(transparent)] -struct MapKeySerializer<'a, W: 'a, F: 'a> { - ser: &'a mut Serializer, -} - -impl<'a, W, F> ser::Serializer for MapKeySerializer<'a, W, F> -where - W: io::Write + WriteExt, - F: Formatter, -{ - type Ok = (); - type Error = Error; - type SerializeSeq = Impossible<(), Error>; - type SerializeTuple = Impossible<(), Error>; - type SerializeTupleStruct = Impossible<(), Error>; - type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = Impossible<(), Error>; - type SerializeStruct = Impossible<(), Error>; - type SerializeStructVariant = Impossible<(), Error>; - - #[inline] - fn serialize_str(self, value: &str) -> Result<()> { - self.ser.serialize_str(value) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - ) -> Result<()> { - unreachable!(); - } - - fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - fn serialize_bool(self, _value: bool) -> Result<()> { - unreachable!(); - } - - fn serialize_i8(self, _value: i8) -> Result<()> { - unreachable!(); - } - - fn serialize_i16(self, _value: i16) -> Result<()> { - unreachable!(); - } - - fn serialize_i32(self, _value: i32) -> Result<()> { - unreachable!(); - } - - fn serialize_i64(self, _value: i64) -> Result<()> { - unreachable!(); - } - - fn serialize_i128(self, _value: i128) -> Result<()> { - unreachable!(); - } - - fn serialize_u8(self, _value: u8) -> Result<()> { - unreachable!(); - } - - fn serialize_u16(self, _value: u16) -> Result<()> { - unreachable!(); - } - - fn serialize_u32(self, _value: u32) -> Result<()> { - unreachable!(); - } - - fn serialize_u64(self, _value: u64) -> Result<()> { - unreachable!(); - } - - fn serialize_u128(self, _value: u128) -> Result<()> { - unreachable!(); - } - - fn serialize_f32(self, _value: f32) -> Result<()> { - unreachable!(); - } - - fn serialize_f64(self, _value: f64) -> Result<()> { - unreachable!(); - } - - fn serialize_char(self, _value: char) -> Result<()> { - unreachable!(); - } - - fn serialize_bytes(self, _value: &[u8]) -> Result<()> { - unreachable!(); - } - - fn serialize_unit(self) -> Result<()> { - unreachable!(); - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { - unreachable!(); - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _value: &T, - ) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - - fn serialize_none(self) -> Result<()> { - unreachable!(); - } - - fn serialize_some(self, _value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - unreachable!(); - } - - fn serialize_seq(self, _len: Option) -> Result { - unreachable!(); - } - - fn serialize_tuple(self, _len: usize) -> Result { - unreachable!(); - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } - - fn serialize_map(self, _len: Option) -> Result { - unreachable!(); - } - - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - unreachable!(); - } - - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - unreachable!(); - } -} - -pub enum CharEscape { - /// An escaped quote `"` - Quote, - /// An escaped reverse solidus `\` - ReverseSolidus, - /// An escaped backspace character (usually escaped as `\b`) - Backspace, - /// An escaped form feed character (usually escaped as `\f`) - FormFeed, - /// An escaped line feed character (usually escaped as `\n`) - LineFeed, - /// An escaped carriage return character (usually escaped as `\r`) - CarriageReturn, - /// An escaped tab character (usually escaped as `\t`) - Tab, - /// An escaped ASCII plane control character (usually escaped as - /// `\u00XX` where `XX` are two hex characters) - AsciiControl(u8), -} - -impl CharEscape { - #[inline] - fn from_escape_table(escape: u8, byte: u8) -> CharEscape { - match escape { - self::BB => CharEscape::Backspace, - self::TT => CharEscape::Tab, - self::NN => CharEscape::LineFeed, - self::FF => CharEscape::FormFeed, - self::RR => CharEscape::CarriageReturn, - self::QU => CharEscape::Quote, - self::BS => CharEscape::ReverseSolidus, - self::UU => CharEscape::AsciiControl(byte), - _ => unreachable!(), - } - } -} - -pub trait Formatter { - #[inline] - fn write_null(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - writer.write_reserved_fragment(b"null") - } - } - - #[inline] - fn write_bool(&mut self, writer: &mut W, value: bool) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - let s = if value { - b"true" as &[u8] - } else { - b"false" as &[u8] - }; - reserve_minimum!(writer); - unsafe { writer.write_reserved_fragment(s) } - } - - #[inline] - fn write_i8(&mut self, writer: &mut W, value: i8) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_i16(&mut self, writer: &mut W, value: i16) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_i32(&mut self, writer: &mut W, value: i32) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_i64(&mut self, writer: &mut W, value: i64) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_i128(&mut self, _writer: &mut W, _value: i128) -> io::Result<()> - where - W: ?Sized + io::Write, - { - unreachable!(); - } - - #[inline] - fn write_u8(&mut self, writer: &mut W, value: u8) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_u16(&mut self, writer: &mut W, value: u16) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_u32(&mut self, writer: &mut W, value: u32) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_u64(&mut self, writer: &mut W, value: u64) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_u128(&mut self, _writer: &mut W, _value: u128) -> io::Result<()> - where - W: ?Sized + io::Write, - { - unreachable!(); - } - - #[inline] - fn write_f32(&mut self, writer: &mut W, value: f32) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = ryu::raw::format32(value, writer.as_mut_buffer_ptr()); - writer.set_written(len); - } - Ok(()) - } - - #[inline] - fn write_f64(&mut self, writer: &mut W, value: f64) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unsafe { - reserve_minimum!(writer); - let len = ryu::raw::format64(value, writer.as_mut_buffer_ptr()); - writer.set_written(len); - } - Ok(()) - } - - fn write_number_str(&mut self, _writer: &mut W, _value: &str) -> io::Result<()> - where - W: ?Sized + io::Write, - { - unreachable!(); - } - - #[inline] - fn begin_string(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unreachable!(); - } - - #[inline] - fn end_string(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unreachable!(); - } - - #[inline] - fn write_string_fragment(&mut self, _writer: &mut W, _fragment: &str) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - unreachable!(); - } - - #[inline] - fn write_char_escape(&mut self, writer: &mut W, char_escape: CharEscape) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - use self::CharEscape::*; - - let s = match char_escape { - Quote => b"\\\"", - ReverseSolidus => b"\\\\", - Backspace => b"\\b", - FormFeed => b"\\f", - LineFeed => b"\\n", - CarriageReturn => b"\\r", - Tab => b"\\t", - AsciiControl(byte) => { - static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef"; - let bytes = &[ - b'\\', - b'u', - b'0', - b'0', - HEX_DIGITS[(byte >> 4) as usize], - HEX_DIGITS[(byte & 0xF) as usize], - ]; - return unsafe { writer.write_reserved_fragment(bytes) }; - } - }; - - unsafe { writer.write_reserved_fragment(s) } - } - - #[inline] - fn begin_array(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { writer.write_reserved_punctuation(b'[').unwrap() }; - Ok(()) - } - - #[inline] - fn end_array(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { writer.write_reserved_punctuation(b']').unwrap() }; - Ok(()) - } - - #[inline] - fn begin_array_value(&mut self, writer: &mut W, first: bool) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - if !first { - unsafe { - reserve_minimum!(writer); - writer.write_reserved_punctuation(b',').unwrap() - } - } - Ok(()) - } - - #[inline] - fn end_array_value(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write, - { - Ok(()) - } - - #[inline] - fn begin_object(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { - writer.write_reserved_punctuation(b'{').unwrap(); - } - Ok(()) - } - - #[inline] - fn end_object(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { - writer.write_reserved_punctuation(b'}').unwrap(); - } - Ok(()) - } - - #[inline] - fn begin_object_key(&mut self, writer: &mut W, first: bool) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - if !first { - unsafe { - reserve_minimum!(writer); - writer.write_reserved_punctuation(b',').unwrap(); - } - } - Ok(()) - } - - #[inline] - fn end_object_key(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write, - { - Ok(()) - } - - #[inline] - fn begin_object_value(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { writer.write_reserved_punctuation(b':') } - } - - #[inline] - fn end_object_value(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write, - { - Ok(()) - } -} - -pub struct CompactFormatter; - -impl Formatter for CompactFormatter {} - -pub struct PrettyFormatter { - current_indent: usize, - has_value: bool, -} - -impl PrettyFormatter { - pub fn new() -> Self { - PrettyFormatter { - current_indent: 0, - has_value: false, - } - } -} - -impl Default for PrettyFormatter { - fn default() -> Self { - PrettyFormatter::new() - } -} - -impl Formatter for PrettyFormatter { - #[inline] - fn begin_array(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - self.current_indent += 1; - self.has_value = false; - reserve_minimum!(writer); - unsafe { writer.write_reserved_punctuation(b'[') } - } - - #[inline] - fn end_array(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - self.current_indent -= 1; - let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); - - unsafe { - if self.has_value { - writer.write_reserved_punctuation(b'\n')?; - writer.write_reserved_indent(num_spaces)?; - } - writer.write_reserved_punctuation(b']') - } - } - - #[inline] - fn begin_array_value(&mut self, writer: &mut W, first: bool) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); - - unsafe { - writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; - writer.write_reserved_indent(num_spaces)?; - }; - Ok(()) - } - - #[inline] - fn end_array_value(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write, - { - self.has_value = true; - Ok(()) - } - - #[inline] - fn begin_object(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - self.current_indent += 1; - self.has_value = false; - - reserve_minimum!(writer); - unsafe { writer.write_reserved_punctuation(b'{') } - } - - #[inline] - fn end_object(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - self.current_indent -= 1; - let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); - - unsafe { - if self.has_value { - writer.write_reserved_punctuation(b'\n')?; - writer.write_reserved_indent(num_spaces)?; - } - - writer.write_reserved_punctuation(b'}') - } - } - - #[inline] - fn begin_object_key(&mut self, writer: &mut W, first: bool) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - let num_spaces = self.current_indent * 2; - writer.reserve(num_spaces + 2); - unsafe { - writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; - writer.write_reserved_indent(num_spaces)?; - } - Ok(()) - } - - #[inline] - fn begin_object_value(&mut self, writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write + WriteExt, - { - reserve_minimum!(writer); - unsafe { writer.write_reserved_fragment(b": ").unwrap() }; - Ok(()) - } - - #[inline] - fn end_object_value(&mut self, _writer: &mut W) -> io::Result<()> - where - W: ?Sized + io::Write, - { - self.has_value = true; - Ok(()) - } -} - -fn format_escaped_str(writer: &mut W, formatter: &mut F, value: &str) -> io::Result<()> -where - W: ?Sized + io::Write + WriteExt, - F: ?Sized + Formatter, -{ - let len = value.len(); - - if len == 0 { - reserve_minimum!(writer); - return unsafe { writer.write_reserved_fragment(b"\"\"") }; - } - - unsafe { - let mut escapes: u8 = __; - let mut idx = 0; - let as_bytes = value.as_bytes(); - while idx < len.saturating_sub(8) { - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 1) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 2) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 3) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 4) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 5) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 6) as usize); - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 7) as usize); - if unlikely!(escapes != __) { - return format_escaped_str_with_escapes(writer, formatter, as_bytes, idx); - } - idx += 8; - } - while idx < len { - escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx) as usize); - if unlikely!(escapes != __) { - return format_escaped_str_with_escapes(writer, formatter, as_bytes, idx); - } - idx += 1; - } - } - - writer.write_str(value) -} - -fn format_escaped_str_with_escapes( - writer: &mut W, - formatter: &mut F, - value: &[u8], - initial: usize, -) -> io::Result<()> -where - W: ?Sized + io::Write + WriteExt, - F: ?Sized + Formatter, -{ - writer.reserve((value.len() * 8) + 2); - unsafe { - writer.write_reserved_punctuation(b'"').unwrap(); - if initial > 0 { - writer - .write_reserved_fragment(value.get_unchecked(0..initial)) - .unwrap(); - } - format_escaped_str_contents(writer, formatter, value.get_unchecked(initial..)).unwrap(); - writer.write_reserved_punctuation(b'"').unwrap(); - }; - Ok(()) -} - -fn format_escaped_str_contents( - writer: &mut W, - formatter: &mut F, - bytes: &[u8], -) -> io::Result<()> -where - W: ?Sized + io::Write + WriteExt, - F: ?Sized + Formatter, -{ - let len = bytes.len(); - let mut start = 0; - let mut idx = 0; - - let mut escape: u8; - loop { - if idx < len.saturating_sub(4) { - escape = 0; - unsafe { - escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx) as usize); - escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 1) as usize); - escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 2) as usize); - escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 3) as usize); - } - if escape == 0 { - idx += 4; - continue; - } - } - - let byte = unsafe { *bytes.get_unchecked(idx) }; - escape = unsafe { *ESCAPE.get_unchecked(byte as usize) }; - if escape == 0 { - idx += 1; - if idx == len { - break; - } else { - continue; - } - } - - if start < idx { - unsafe { - writer - .write_reserved_fragment(bytes.get_unchecked(start..idx)) - .unwrap() - }; - } - - let char_escape = CharEscape::from_escape_table(escape, byte); - formatter.write_char_escape(writer, char_escape)?; - - idx += 1; - start = idx; - if idx == len { - break; - } - } - - if start != len { - unsafe { - writer - .write_reserved_fragment(bytes.get_unchecked(start..len)) - .unwrap() - }; - } - Ok(()) -} - -const BB: u8 = b'b'; // \x08 -const TT: u8 = b't'; // \x09 -const NN: u8 = b'n'; // \x0A -const FF: u8 = b'f'; // \x0C -const RR: u8 = b'r'; // \x0D -const QU: u8 = b'"'; // \x22 -const BS: u8 = b'\\'; // \x5C -const UU: u8 = b'u'; // \x00...\x1F except the ones above -const __: u8 = 0; - -// Lookup table of escape sequences. A value of b'x' at index i means that byte -// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped. -const ESCAPE: [u8; 256] = [ - // 1 2 3 4 5 6 7 8 9 A B C D E F - UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 - UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 - __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 - __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F -]; - -#[inline] -pub fn to_writer(writer: W, value: &T) -> Result<()> -where - W: io::Write + WriteExt, - T: ?Sized + Serialize, -{ - let mut ser = Serializer::new(writer); - value.serialize(&mut ser) -} - -#[inline] -pub fn to_writer_pretty(writer: W, value: &T) -> Result<()> -where - W: io::Write + WriteExt, - T: ?Sized + Serialize, -{ - let mut ser = Serializer::pretty(writer); - value.serialize(&mut ser) -} diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index 53301410..4b0e7d80 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) mod error; -mod json; mod per_type; mod serializer; mod writer; diff --git a/src/serialize/per_type/dataclass.rs b/src/serialize/per_type/dataclass.rs index 6541d398..04f2878b 100644 --- a/src/serialize/per_type/dataclass.rs +++ b/src/serialize/per_type/dataclass.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::opt::*; -use crate::serialize::error::*; -use crate::serialize::serializer::*; -use crate::str::*; +use crate::serialize::error::SerializeError; +use crate::serialize::serializer::{PyObjectSerializer, RECURSION_LIMIT}; +use crate::str::unicode_to_str; use crate::typeref::*; use serde::ser::{Serialize, SerializeMap, Serializer}; @@ -124,22 +124,12 @@ impl Serialize for DataclassFastSerializer { let mut pos = 0; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); for _ in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 { let key = next_key; let value = next_value; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { err!(SerializeError::KeyMustBeStr) @@ -211,22 +201,12 @@ impl Serialize for DataclassFallbackSerializer { let mut pos = 0; - ffi!(PyDict_Next( - fields, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(fields, &mut pos, &mut next_key, &mut next_value); for _ in 0..=ffi!(Py_SIZE(fields)) as usize - 1 { let attr = next_key; let field = next_value; - ffi!(PyDict_Next( - fields, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(fields, &mut pos, &mut next_key, &mut next_value); let field_type = ffi!(PyObject_GetAttr(field, FIELD_TYPE_STR)); debug_assert!(ffi!(Py_REFCNT(field_type)) >= 2); diff --git a/src/serialize/per_type/datetime.rs b/src/serialize/per_type/datetime.rs index 47d0c20b..2ead1ef5 100644 --- a/src/serialize/per_type/datetime.rs +++ b/src/serialize/per_type/datetime.rs @@ -1,11 +1,13 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::opt::*; -use crate::serialize::error::*; +use crate::serialize::error::SerializeError; use crate::serialize::per_type::datetimelike::{ DateTimeBuffer, DateTimeError, DateTimeLike, Offset, }; -use crate::typeref::*; +#[cfg(Py_3_9)] +use crate::typeref::ZONEINFO_TYPE; +use crate::typeref::{CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR}; use serde::ser::{Serialize, Serializer}; macro_rules! write_double_digit { diff --git a/src/serialize/per_type/default.rs b/src/serialize/per_type/default.rs index 6f2d0982..3c143b1b 100644 --- a/src/serialize/per_type/default.rs +++ b/src/serialize/per_type/default.rs @@ -1,8 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::opt::*; -use crate::serialize::error::*; -use crate::serialize::serializer::*; +use crate::serialize::error::SerializeError; +use crate::serialize::serializer::{PyObjectSerializer, RECURSION_LIMIT}; use serde::ser::{Serialize, Serializer}; diff --git a/src/serialize/per_type/dict.rs b/src/serialize/per_type/dict.rs index a20eaf06..229f8b58 100644 --- a/src/serialize/per_type/dict.rs +++ b/src/serialize/per_type/dict.rs @@ -1,13 +1,13 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::opt::*; -use crate::serialize::error::*; +use crate::serialize::error::SerializeError; use crate::serialize::per_type::datetimelike::{DateTimeBuffer, DateTimeLike}; use crate::serialize::per_type::*; use crate::serialize::serializer::{ pyobject_to_obtype, ObType, PyObjectSerializer, RECURSION_LIMIT, }; -use crate::str::*; +use crate::str::{unicode_to_str, unicode_to_str_via_ffi}; use crate::typeref::*; use compact_str::CompactString; use serde::ser::{Serialize, SerializeMap, Serializer}; @@ -119,22 +119,12 @@ impl Serialize for Dict { let mut pos = 0; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); for _ in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 { let key = next_key; let value = next_value; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { err!(SerializeError::KeyMustBeStr) @@ -198,22 +188,12 @@ impl Serialize for DictSortedKey { let mut pos = 0; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); for _ in 0..=len as usize - 1 { let key = next_key; let value = next_value; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { err!(SerializeError::KeyMustBeStr) @@ -268,13 +248,12 @@ impl DictNonStrKey { } } - #[cfg_attr(feature = "optimize", optimize(size))] fn pyobject_to_string( key: *mut pyo3_ffi::PyObject, opts: crate::opt::Opt, ) -> Result { match pyobject_to_obtype(key, opts) { - ObType::None => Ok(CompactString::from("null")), + ObType::None => Ok(CompactString::new_inline("null")), ObType::Bool => { let key_as_str = if unsafe { key == TRUE } { "true" @@ -299,7 +278,7 @@ impl DictNonStrKey { ObType::Float => { let val = ffi!(PyFloat_AS_DOUBLE(key)); if !val.is_finite() { - Ok(CompactString::from("null")) + Ok(CompactString::new_inline("null")) } else { Ok(CompactString::from(ryu::Buffer::new().format_finite(val))) } @@ -386,22 +365,12 @@ impl Serialize for DictNonStrKey { let mut pos = 0; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); for _ in 0..=ffi!(Py_SIZE(self.ptr)) as usize - 1 { let key = next_key; let value = next_value; - ffi!(PyDict_Next( - self.ptr, - &mut pos, - &mut next_key, - &mut next_value - )); + pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); if is_type!(ob_type!(key), STR_TYPE) { let uni = unicode_to_str(key); diff --git a/src/serialize/per_type/fragment.rs b/src/serialize/per_type/fragment.rs index 1b201bd9..969ad43f 100644 --- a/src/serialize/per_type/fragment.rs +++ b/src/serialize/per_type/fragment.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::ffi::{Fragment, PyBytes_AS_STRING, PyBytes_GET_SIZE}; -use crate::serialize::error::*; +use crate::serialize::error::SerializeError; use crate::str::unicode_to_str; use crate::typeref::{BYTES_TYPE, STR_TYPE}; diff --git a/src/serialize/per_type/int.rs b/src/serialize/per_type/int.rs index e92049ca..53a90e86 100644 --- a/src/serialize/per_type/int.rs +++ b/src/serialize/per_type/int.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::ffi::{pylong_is_unsigned, pylong_is_zero}; -use crate::serialize::error::*; +use crate::serialize::error::SerializeError; use serde::ser::{Serialize, Serializer}; // https://tools.ietf.org/html/rfc7159#section-6 diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index 9e51bfa9..327aea23 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -1,8 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::ffi::PyListIter; -use crate::opt::*; -use crate::serialize::error::*; +use crate::opt::Opt; +use crate::serialize::error::SerializeError; use crate::serialize::serializer::{PyObjectSerializer, RECURSION_LIMIT}; use serde::ser::{Serialize, SerializeSeq, Serializer}; @@ -46,9 +45,11 @@ impl Serialize for ListSerializer { serializer.serialize_seq(Some(0)).unwrap().end() } else { let mut seq = serializer.serialize_seq(None).unwrap(); - for elem in PyListIter::from_pyobject(self.ptr) { + for idx in 0..=ffi!(Py_SIZE(self.ptr)) - 1 { + let elem = + unsafe { *((*(self.ptr as *mut pyo3_ffi::PyListObject)).ob_item).offset(idx) }; let value = PyObjectSerializer::new( - elem.as_ptr(), + elem, self.opts, self.default_calls, self.recursion, diff --git a/src/serialize/per_type/none.rs b/src/serialize/per_type/none.rs index 173ce9f5..10240826 100644 --- a/src/serialize/per_type/none.rs +++ b/src/serialize/per_type/none.rs @@ -5,8 +5,8 @@ use serde::ser::{Serialize, Serializer}; pub struct NoneSerializer; impl NoneSerializer { - pub fn new() -> Self { - NoneSerializer {} + pub const fn new() -> Self { + Self {} } } diff --git a/src/serialize/per_type/numpy.rs b/src/serialize/per_type/numpy.rs index 0bfd60eb..b83b6421 100644 --- a/src/serialize/per_type/numpy.rs +++ b/src/serialize/per_type/numpy.rs @@ -1,6 +1,6 @@ use crate::opt::*; -use crate::serialize::error::*; +use crate::serialize::error::SerializeError; use crate::serialize::per_type::datetimelike::{ DateTimeBuffer, DateTimeError, DateTimeLike, Offset, }; @@ -79,7 +79,7 @@ macro_rules! slice { }; } -#[cfg_attr(feature = "optimize", optimize(size))] +#[cold] pub fn is_numpy_scalar(ob_type: *mut PyTypeObject) -> bool { let numpy_types = unsafe { NUMPY_TYPES.get_or_init(load_numpy_types) }; if numpy_types.is_none() { @@ -101,7 +101,7 @@ pub fn is_numpy_scalar(ob_type: *mut PyTypeObject) -> bool { } } -#[cfg_attr(feature = "optimize", optimize(size))] +#[cold] pub fn is_numpy_array(ob_type: *mut PyTypeObject) -> bool { let numpy_types = unsafe { NUMPY_TYPES.get_or_init(load_numpy_types) }; if numpy_types.is_none() { @@ -395,7 +395,7 @@ impl<'a> NumpyF64Array<'a> { } impl<'a> Serialize for NumpyF64Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -414,6 +414,7 @@ pub struct DataTypeF64 { } impl Serialize for DataTypeF64 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -434,7 +435,7 @@ impl<'a> NumpyF32Array<'a> { } impl<'a> Serialize for NumpyF32Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -453,6 +454,7 @@ struct DataTypeF32 { } impl Serialize for DataTypeF32 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -473,7 +475,7 @@ impl<'a> NumpyU64Array<'a> { } impl<'a> Serialize for NumpyU64Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -492,6 +494,7 @@ pub struct DataTypeU64 { } impl Serialize for DataTypeU64 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -512,7 +515,7 @@ impl<'a> NumpyU32Array<'a> { } impl<'a> Serialize for NumpyU32Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -531,6 +534,7 @@ pub struct DataTypeU32 { } impl Serialize for DataTypeU32 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -551,7 +555,7 @@ impl<'a> NumpyU16Array<'a> { } impl<'a> Serialize for NumpyU16Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -570,6 +574,7 @@ pub struct DataTypeU16 { } impl Serialize for DataTypeU16 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -590,7 +595,7 @@ impl<'a> NumpyI64Array<'a> { } impl<'a> Serialize for NumpyI64Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -609,6 +614,7 @@ pub struct DataTypeI64 { } impl Serialize for DataTypeI64 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -629,7 +635,7 @@ impl<'a> NumpyI32Array<'a> { } impl<'a> Serialize for NumpyI32Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -648,6 +654,7 @@ pub struct DataTypeI32 { } impl Serialize for DataTypeI32 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -668,7 +675,7 @@ impl<'a> NumpyI16Array<'a> { } impl<'a> Serialize for NumpyI16Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -687,6 +694,7 @@ pub struct DataTypeI16 { } impl Serialize for DataTypeI16 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -707,7 +715,7 @@ impl<'a> NumpyI8Array<'a> { } impl<'a> Serialize for NumpyI8Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -726,6 +734,7 @@ pub struct DataTypeI8 { } impl Serialize for DataTypeI8 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -746,7 +755,7 @@ impl<'a> NumpyU8Array<'a> { } impl<'a> Serialize for NumpyU8Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -765,6 +774,7 @@ pub struct DataTypeU8 { } impl Serialize for DataTypeU8 { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -785,7 +795,7 @@ impl<'a> NumpyBoolArray<'a> { } impl<'a> Serialize for NumpyBoolArray<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -804,6 +814,7 @@ pub struct DataTypeBool { } impl Serialize for DataTypeBool { + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -879,7 +890,7 @@ pub struct NumpyInt8 { } impl Serialize for NumpyInt8 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -896,7 +907,7 @@ pub struct NumpyInt16 { } impl Serialize for NumpyInt16 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -913,7 +924,7 @@ pub struct NumpyInt32 { } impl Serialize for NumpyInt32 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -930,7 +941,7 @@ pub struct NumpyInt64 { } impl Serialize for NumpyInt64 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -947,7 +958,7 @@ pub struct NumpyUint8 { } impl Serialize for NumpyUint8 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -964,7 +975,7 @@ pub struct NumpyUint16 { } impl Serialize for NumpyUint16 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -981,7 +992,7 @@ pub struct NumpyUint32 { } impl Serialize for NumpyUint32 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -998,7 +1009,7 @@ pub struct NumpyUint64 { } impl Serialize for NumpyUint64 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1015,7 +1026,7 @@ pub struct NumpyFloat32 { } impl Serialize for NumpyFloat32 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1032,7 +1043,7 @@ pub struct NumpyFloat64 { } impl Serialize for NumpyFloat64 { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1049,7 +1060,7 @@ pub struct NumpyBool { } impl Serialize for NumpyBool { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1083,7 +1094,6 @@ pub enum NumpyDatetimeUnit { impl fmt::Display for NumpyDatetimeUnit { #[cold] - #[cfg_attr(feature = "optimize", optimize(size))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let unit = match self { Self::NaT => "NaT", @@ -1114,7 +1124,6 @@ enum NumpyDateTimeError { impl NumpyDateTimeError { #[cold] - #[cfg_attr(feature = "optimize", optimize(size))] fn into_serde_err(self) -> T { let err = match self { Self::UnsupportedUnit(unit) => format!("unsupported numpy.datetime64 unit: {}", unit), @@ -1136,6 +1145,7 @@ impl NumpyDatetimeUnit { /// object rather than using the `descr` field of the `__array_struct__` /// because that field isn't populated for datetime64 arrays; see /// https://github.com/numpy/numpy/issues/5350. + #[cold] #[cfg_attr(feature = "optimize", optimize(size))] fn from_pyobject(ptr: *mut PyObject) -> Self { let dtype = ffi!(PyObject_GetAttr(ptr, DTYPE_STR)); @@ -1173,6 +1183,7 @@ impl NumpyDatetimeUnit { /// Return a `NumpyDatetime64Repr` for a value in array with this unit. /// /// Returns an `Err(NumpyDateTimeError)` if the value is invalid for this unit. + #[cold] #[cfg_attr(feature = "optimize", optimize(size))] fn datetime(&self, val: i64, opts: Opt) -> Result { match self { @@ -1245,7 +1256,7 @@ impl<'a> NumpyDatetime64Array<'a> { } impl<'a> Serialize for NumpyDatetime64Array<'a> { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1313,7 +1324,7 @@ impl DateTimeLike for NumpyDatetime64Repr { } impl Serialize for NumpyDatetime64Repr { - #[cfg_attr(feature = "optimize", optimize(size))] + #[cold] fn serialize(&self, serializer: S) -> Result where S: Serializer, diff --git a/src/serialize/per_type/pyenum.rs b/src/serialize/per_type/pyenum.rs index 1972f8a2..06baaffd 100644 --- a/src/serialize/per_type/pyenum.rs +++ b/src/serialize/per_type/pyenum.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::opt::*; -use crate::serialize::serializer::*; +use crate::serialize::serializer::PyObjectSerializer; use crate::typeref::*; use serde::ser::{Serialize, Serializer}; use std::ptr::NonNull; diff --git a/src/serialize/per_type/tuple.rs b/src/serialize/per_type/tuple.rs index 7ad76e42..4f23abb6 100644 --- a/src/serialize/per_type/tuple.rs +++ b/src/serialize/per_type/tuple.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::*; -use crate::serialize::serializer::*; +use crate::opt::Opt; +use crate::serialize::serializer::PyObjectSerializer; use serde::ser::{Serialize, SerializeSeq, Serializer}; use std::ptr::NonNull; diff --git a/src/serialize/per_type/unicode.rs b/src/serialize/per_type/unicode.rs index 76994fd7..a44cda09 100644 --- a/src/serialize/per_type/unicode.rs +++ b/src/serialize/per_type/unicode.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::serialize::error::*; -use crate::str::*; +use crate::serialize::error::SerializeError; +use crate::str::{unicode_to_str, unicode_to_str_via_ffi}; use serde::ser::{Serialize, Serializer}; diff --git a/src/serialize/per_type/uuid.rs b/src/serialize/per_type/uuid.rs index e4079475..badb759e 100644 --- a/src/serialize/per_type/uuid.rs +++ b/src/serialize/per_type/uuid.rs @@ -1,12 +1,13 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::typeref::*; +use crate::typeref::INT_ATTR_STR; use serde::ser::{Serialize, Serializer}; use std::io::Write; use std::os::raw::c_uchar; pub type UUIDBuffer = arrayvec::ArrayVec; +#[repr(transparent)] pub struct UUID { ptr: *mut pyo3_ffi::PyObject, } diff --git a/src/serialize/serializer.rs b/src/serialize/serializer.rs index 24f0464e..8af9b4f8 100644 --- a/src/serialize/serializer.rs +++ b/src/serialize/serializer.rs @@ -2,14 +2,12 @@ use crate::opt::*; use crate::serialize::per_type::*; -use crate::serialize::writer::*; +use crate::serialize::writer::{to_writer, to_writer_pretty, BytesWriter}; use crate::typeref::*; use serde::ser::{Serialize, Serializer}; use std::io::Write; use std::ptr::NonNull; -use crate::serialize::json::{to_writer, to_writer_pretty}; - pub const RECURSION_LIMIT: u8 = 255; pub fn serialize( diff --git a/src/serialize/writer.rs b/src/serialize/writer/byteswriter.rs similarity index 100% rename from src/serialize/writer.rs rename to src/serialize/writer/byteswriter.rs diff --git a/src/serialize/writer/escape.rs b/src/serialize/writer/escape.rs new file mode 100644 index 00000000..7653c7c8 --- /dev/null +++ b/src/serialize/writer/escape.rs @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +// This is an adaptation of `src/value/ser.rs` from serde-json. + +use crate::serialize::writer::WriteExt; +use std::io; + +pub enum CharEscape { + /// An escaped quote `"` + Quote, + /// An escaped reverse solidus `\` + ReverseSolidus, + /// An escaped backspace character (usually escaped as `\b`) + Backspace, + /// An escaped form feed character (usually escaped as `\f`) + FormFeed, + /// An escaped line feed character (usually escaped as `\n`) + LineFeed, + /// An escaped carriage return character (usually escaped as `\r`) + CarriageReturn, + /// An escaped tab character (usually escaped as `\t`) + Tab, + /// An escaped ASCII plane control character (usually escaped as + /// `\u00XX` where `XX` are two hex characters) + AsciiControl(u8), +} + +impl CharEscape { + #[inline] + fn from_escape_table(escape: u8, byte: u8) -> CharEscape { + match escape { + self::BB => CharEscape::Backspace, + self::TT => CharEscape::Tab, + self::NN => CharEscape::LineFeed, + self::FF => CharEscape::FormFeed, + self::RR => CharEscape::CarriageReturn, + self::QU => CharEscape::Quote, + self::BS => CharEscape::ReverseSolidus, + self::UU => CharEscape::AsciiControl(byte), + _ => unreachable!(), + } + } +} + +#[inline] +fn write_char_escape(writer: &mut W, char_escape: CharEscape) -> io::Result<()> +where + W: ?Sized + io::Write + WriteExt, +{ + use CharEscape::*; + + let s = match char_escape { + Quote => b"\\\"", + ReverseSolidus => b"\\\\", + Backspace => b"\\b", + FormFeed => b"\\f", + LineFeed => b"\\n", + CarriageReturn => b"\\r", + Tab => b"\\t", + AsciiControl(byte) => { + static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef"; + let bytes = &[ + b'\\', + b'u', + b'0', + b'0', + HEX_DIGITS[(byte >> 4) as usize], + HEX_DIGITS[(byte & 0xF) as usize], + ]; + return unsafe { writer.write_reserved_fragment(bytes) }; + } + }; + + unsafe { writer.write_reserved_fragment(s) } +} + +#[inline(never)] +pub fn format_escaped_str(writer: &mut W, value: &str) -> io::Result<()> +where + W: ?Sized + io::Write + WriteExt, +{ + let len = value.len(); + + if len == 0 { + reserve_minimum!(writer); + return unsafe { writer.write_reserved_fragment(b"\"\"") }; + } + unsafe { + let mut escapes: u8 = __; + let mut idx = 0; + let as_bytes = value.as_bytes(); + while idx < len.saturating_sub(8) { + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 1) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 2) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 3) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 4) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 5) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 6) as usize); + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx + 7) as usize); + if unlikely!(escapes != __) { + return format_escaped_str_with_escapes(writer, as_bytes, idx); + } + idx += 8; + } + while idx < len { + escapes |= *ESCAPE.get_unchecked(*as_bytes.get_unchecked(idx) as usize); + if unlikely!(escapes != __) { + return format_escaped_str_with_escapes(writer, as_bytes, idx); + } + idx += 1; + } + } + + writer.write_str(value) +} + +fn format_escaped_str_with_escapes( + writer: &mut W, + value: &[u8], + initial: usize, +) -> io::Result<()> +where + W: ?Sized + io::Write + WriteExt, +{ + writer.reserve((value.len() * 8) + 2); + unsafe { + writer.write_reserved_punctuation(b'"').unwrap(); + if initial > 0 { + writer + .write_reserved_fragment(value.get_unchecked(0..initial)) + .unwrap(); + } + format_escaped_str_contents(writer, value.get_unchecked(initial..)).unwrap(); + writer.write_reserved_punctuation(b'"').unwrap(); + }; + Ok(()) +} + +fn format_escaped_str_contents(writer: &mut W, bytes: &[u8]) -> io::Result<()> +where + W: ?Sized + io::Write + WriteExt, +{ + let len = bytes.len(); + let mut start = 0; + let mut idx = 0; + + let mut escape: u8; + loop { + if idx < len.saturating_sub(4) { + escape = 0; + unsafe { + escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx) as usize); + escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 1) as usize); + escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 2) as usize); + escape |= *ESCAPE.get_unchecked(*bytes.get_unchecked(idx + 3) as usize); + } + if escape == 0 { + idx += 4; + continue; + } + } + + let byte = unsafe { *bytes.get_unchecked(idx) }; + escape = unsafe { *ESCAPE.get_unchecked(byte as usize) }; + if escape == 0 { + idx += 1; + if idx == len { + break; + } else { + continue; + } + } + + if start < idx { + unsafe { + writer + .write_reserved_fragment(bytes.get_unchecked(start..idx)) + .unwrap() + }; + } + + let char_escape = CharEscape::from_escape_table(escape, byte); + write_char_escape(writer, char_escape)?; + + idx += 1; + start = idx; + if idx == len { + break; + } + } + + if start != len { + unsafe { + writer + .write_reserved_fragment(bytes.get_unchecked(start..len)) + .unwrap() + }; + } + Ok(()) +} + +const BB: u8 = b'b'; // \x08 +const TT: u8 = b't'; // \x09 +const NN: u8 = b'n'; // \x0A +const FF: u8 = b'f'; // \x0C +const RR: u8 = b'r'; // \x0D +const QU: u8 = b'"'; // \x22 +const BS: u8 = b'\\'; // \x5C +const UU: u8 = b'u'; // \x00...\x1F except the ones above +const __: u8 = 0; + +// Lookup table of escape sequences. A value of b'x' at index i means that byte +// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped. +const ESCAPE: [u8; 256] = [ + // 1 2 3 4 5 6 7 8 9 A B C D E F + UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 + UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 + __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F +]; diff --git a/src/serialize/writer/formatter.rs b/src/serialize/writer/formatter.rs new file mode 100644 index 00000000..fd13fd82 --- /dev/null +++ b/src/serialize/writer/formatter.rs @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +// This is an adaptation of `src/value/ser.rs` from serde-json. + +use crate::serialize::writer::WriteExt; +use std::io; + +pub trait Formatter { + #[inline] + fn write_null(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + writer.write_reserved_fragment(b"null") + } + } + + #[inline] + fn write_bool(&mut self, writer: &mut W, value: bool) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + let s = if value { + b"true" as &[u8] + } else { + b"false" as &[u8] + }; + reserve_minimum!(writer); + unsafe { writer.write_reserved_fragment(s) } + } + + #[inline] + fn write_i8(&mut self, writer: &mut W, value: i8) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_i16(&mut self, writer: &mut W, value: i16) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_i32(&mut self, writer: &mut W, value: i32) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_i64(&mut self, writer: &mut W, value: i64) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_i128(&mut self, _writer: &mut W, _value: i128) -> io::Result<()> + where + W: ?Sized + io::Write, + { + unreachable!(); + } + + #[inline] + fn write_u8(&mut self, writer: &mut W, value: u8) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_u16(&mut self, writer: &mut W, value: u16) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_u32(&mut self, writer: &mut W, value: u32) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_u64(&mut self, writer: &mut W, value: u64) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = itoap::write_to_ptr(writer.as_mut_buffer_ptr(), value); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_u128(&mut self, _writer: &mut W, _value: u128) -> io::Result<()> + where + W: ?Sized + io::Write, + { + unreachable!(); + } + + #[inline] + fn write_f32(&mut self, writer: &mut W, value: f32) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = ryu::raw::format32(value, writer.as_mut_buffer_ptr()); + writer.set_written(len); + } + Ok(()) + } + + #[inline] + fn write_f64(&mut self, writer: &mut W, value: f64) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unsafe { + reserve_minimum!(writer); + let len = ryu::raw::format64(value, writer.as_mut_buffer_ptr()); + writer.set_written(len); + } + Ok(()) + } + + fn write_number_str(&mut self, _writer: &mut W, _value: &str) -> io::Result<()> + where + W: ?Sized + io::Write, + { + unreachable!(); + } + + #[inline] + fn begin_string(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unreachable!(); + } + + #[inline] + fn end_string(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unreachable!(); + } + + #[inline] + fn write_string_fragment(&mut self, _writer: &mut W, _fragment: &str) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + unreachable!(); + } + + #[inline] + fn begin_array(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { writer.write_reserved_punctuation(b'[').unwrap() }; + Ok(()) + } + + #[inline] + fn end_array(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { writer.write_reserved_punctuation(b']').unwrap() }; + Ok(()) + } + + #[inline] + fn begin_array_value(&mut self, writer: &mut W, first: bool) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + if !first { + unsafe { + reserve_minimum!(writer); + writer.write_reserved_punctuation(b',').unwrap() + } + } + Ok(()) + } + + #[inline] + fn end_array_value(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write, + { + Ok(()) + } + + #[inline] + fn begin_object(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { + writer.write_reserved_punctuation(b'{').unwrap(); + } + Ok(()) + } + + #[inline] + fn end_object(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { + writer.write_reserved_punctuation(b'}').unwrap(); + } + Ok(()) + } + + #[inline] + fn begin_object_key(&mut self, writer: &mut W, first: bool) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + if !first { + unsafe { + reserve_minimum!(writer); + writer.write_reserved_punctuation(b',').unwrap(); + } + } + Ok(()) + } + + #[inline] + fn end_object_key(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write, + { + Ok(()) + } + + #[inline] + fn begin_object_value(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { writer.write_reserved_punctuation(b':') } + } + + #[inline] + fn end_object_value(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write, + { + Ok(()) + } +} + +pub struct CompactFormatter; + +impl Formatter for CompactFormatter {} + +pub struct PrettyFormatter { + current_indent: usize, + has_value: bool, +} + +impl PrettyFormatter { + pub const fn new() -> Self { + PrettyFormatter { + current_indent: 0, + has_value: false, + } + } +} + +impl Formatter for PrettyFormatter { + #[inline] + fn begin_array(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + self.current_indent += 1; + self.has_value = false; + reserve_minimum!(writer); + unsafe { writer.write_reserved_punctuation(b'[') } + } + + #[inline] + fn end_array(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + self.current_indent -= 1; + let num_spaces = self.current_indent * 2; + writer.reserve(num_spaces + 2); + + unsafe { + if self.has_value { + writer.write_reserved_punctuation(b'\n')?; + writer.write_reserved_indent(num_spaces)?; + } + writer.write_reserved_punctuation(b']') + } + } + + #[inline] + fn begin_array_value(&mut self, writer: &mut W, first: bool) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + let num_spaces = self.current_indent * 2; + writer.reserve(num_spaces + 2); + + unsafe { + writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; + writer.write_reserved_indent(num_spaces)?; + }; + Ok(()) + } + + #[inline] + fn end_array_value(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write, + { + self.has_value = true; + Ok(()) + } + + #[inline] + fn begin_object(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + self.current_indent += 1; + self.has_value = false; + + reserve_minimum!(writer); + unsafe { writer.write_reserved_punctuation(b'{') } + } + + #[inline] + fn end_object(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + self.current_indent -= 1; + let num_spaces = self.current_indent * 2; + writer.reserve(num_spaces + 2); + + unsafe { + if self.has_value { + writer.write_reserved_punctuation(b'\n')?; + writer.write_reserved_indent(num_spaces)?; + } + + writer.write_reserved_punctuation(b'}') + } + } + + #[inline] + fn begin_object_key(&mut self, writer: &mut W, first: bool) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + let num_spaces = self.current_indent * 2; + writer.reserve(num_spaces + 2); + unsafe { + writer.write_reserved_fragment(if first { b"\n" } else { b",\n" })?; + writer.write_reserved_indent(num_spaces)?; + } + Ok(()) + } + + #[inline] + fn begin_object_value(&mut self, writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write + WriteExt, + { + reserve_minimum!(writer); + unsafe { writer.write_reserved_fragment(b": ").unwrap() }; + Ok(()) + } + + #[inline] + fn end_object_value(&mut self, _writer: &mut W) -> io::Result<()> + where + W: ?Sized + io::Write, + { + self.has_value = true; + Ok(()) + } +} diff --git a/src/serialize/writer/json.rs b/src/serialize/writer/json.rs new file mode 100644 index 00000000..eec755a3 --- /dev/null +++ b/src/serialize/writer/json.rs @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +// This is an adaptation of `src/value/ser.rs` from serde-json. + +use crate::serialize::writer::formatter::{CompactFormatter, Formatter, PrettyFormatter}; +use crate::serialize::writer::WriteExt; +use serde::ser::{self, Impossible, Serialize}; +use serde_json::error::{Error, Result}; +use std::io; + +pub struct Serializer { + writer: W, + formatter: F, +} + +impl Serializer +where + W: io::Write + WriteExt, +{ + #[inline] + pub fn new(writer: W) -> Self { + Serializer::with_formatter(writer, CompactFormatter) + } +} + +impl Serializer +where + W: io::Write + WriteExt, +{ + #[inline] + pub fn pretty(writer: W) -> Self { + Serializer::with_formatter(writer, PrettyFormatter::new()) + } +} + +impl Serializer +where + W: io::Write + WriteExt, + F: Formatter, +{ + #[inline] + pub fn with_formatter(writer: W, formatter: F) -> Self { + Serializer { writer, formatter } + } + + #[inline] + pub fn into_inner(self) -> W { + self.writer + } +} + +impl<'a, W, F> ser::Serializer for &'a mut Serializer +where + W: io::Write + WriteExt, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + type SerializeSeq = Compound<'a, W, F>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Compound<'a, W, F>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + #[inline] + fn serialize_bool(self, value: bool) -> Result<()> { + self.formatter + .write_bool(&mut self.writer, value) + .map_err(Error::io) + } + + #[cold] + fn serialize_i8(self, value: i8) -> Result<()> { + self.formatter + .write_i8(&mut self.writer, value) + .map_err(Error::io) + } + + #[cold] + fn serialize_i16(self, value: i16) -> Result<()> { + self.formatter + .write_i16(&mut self.writer, value) + .map_err(Error::io) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result<()> { + self.formatter + .write_i32(&mut self.writer, value) + .map_err(Error::io) + } + + #[inline] + fn serialize_i64(self, value: i64) -> Result<()> { + self.formatter + .write_i64(&mut self.writer, value) + .map_err(Error::io) + } + + fn serialize_i128(self, _value: i128) -> Result<()> { + unreachable!(); + } + + #[cold] + fn serialize_u8(self, value: u8) -> Result<()> { + self.formatter + .write_u8(&mut self.writer, value) + .map_err(Error::io) + } + + #[cold] + fn serialize_u16(self, value: u16) -> Result<()> { + self.formatter + .write_u16(&mut self.writer, value) + .map_err(Error::io) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result<()> { + self.formatter + .write_u32(&mut self.writer, value) + .map_err(Error::io) + } + + #[inline] + fn serialize_u64(self, value: u64) -> Result<()> { + self.formatter + .write_u64(&mut self.writer, value) + .map_err(Error::io) + } + + fn serialize_u128(self, _value: u128) -> Result<()> { + unreachable!(); + } + + #[inline] + fn serialize_f32(self, value: f32) -> Result<()> { + if unlikely!(value.is_infinite() || value.is_nan()) { + self.serialize_unit() + } else { + self.formatter + .write_f32(&mut self.writer, value) + .map_err(Error::io) + } + } + #[inline] + fn serialize_f64(self, value: f64) -> Result<()> { + if unlikely!(value.is_infinite() || value.is_nan()) { + self.serialize_unit() + } else { + self.formatter + .write_f64(&mut self.writer, value) + .map_err(Error::io) + } + } + + fn serialize_char(self, _value: char) -> Result<()> { + unreachable!(); + } + + #[inline] + fn serialize_str(self, value: &str) -> Result<()> { + format_escaped_str(&mut self.writer, value).map_err(Error::io) + } + + fn serialize_bytes(self, value: &[u8]) -> Result<()> { + self.writer.reserve(value.len()); + unsafe { self.writer.write_reserved_fragment(value).unwrap() }; + Ok(()) + } + + #[inline] + fn serialize_unit(self) -> Result<()> { + self.formatter + .write_null(&mut self.writer) + .map_err(Error::io) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + unreachable!(); + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result<()> { + unreachable!(); + } + + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + unreachable!(); + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result<()> + where + T: ?Sized + Serialize, + { + unreachable!(); + } + + #[inline] + fn serialize_none(self) -> Result<()> { + self.serialize_unit() + } + + #[inline] + fn serialize_some(self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + #[inline(always)] + fn serialize_seq(self, _len: Option) -> Result { + self.formatter + .begin_array(&mut self.writer) + .map_err(Error::io)?; + Ok(Compound { + ser: self, + state: State::First, + }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + unreachable!(); + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } + + #[inline(always)] + fn serialize_map(self, _len: Option) -> Result { + self.formatter + .begin_object(&mut self.writer) + .map_err(Error::io)?; + Ok(Compound { + ser: self, + state: State::First, + }) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + unreachable!(); + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } +} + +#[derive(Eq, PartialEq)] +pub enum State { + First, + Rest, +} + +pub struct Compound<'a, W: 'a, F: 'a> { + ser: &'a mut Serializer, + state: State, +} + +impl<'a, W, F> ser::SerializeSeq for Compound<'a, W, F> +where + W: io::Write + WriteExt, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + #[inline] + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.ser + .formatter + .begin_array_value(&mut self.ser.writer, self.state == State::First) + .map_err(Error::io)?; + self.state = State::Rest; + value.serialize(&mut *self.ser)?; + self.ser + .formatter + .end_array_value(&mut self.ser.writer) + .map_err(Error::io) + } + + #[inline] + fn end(self) -> Result<()> { + self.ser + .formatter + .end_array(&mut self.ser.writer) + .map_err(Error::io) + } +} + +impl<'a, W, F> ser::SerializeMap for Compound<'a, W, F> +where + W: io::Write + WriteExt, + F: Formatter, +{ + type Ok = (); + type Error = Error; + + fn serialize_entry(&mut self, _key: &K, _value: &V) -> Result<()> + where + K: ?Sized + Serialize, + V: ?Sized + Serialize, + { + unreachable!() + } + + #[inline] + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.ser + .formatter + .begin_object_key(&mut self.ser.writer, self.state == State::First) + .map_err(Error::io)?; + self.state = State::Rest; + + key.serialize(MapKeySerializer { ser: self.ser })?; + + self.ser + .formatter + .end_object_key(&mut self.ser.writer) + .map_err(Error::io) + } + + #[inline] + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.ser + .formatter + .begin_object_value(&mut self.ser.writer) + .map_err(Error::io)?; + value.serialize(&mut *self.ser)?; + self.ser + .formatter + .end_object_value(&mut self.ser.writer) + .map_err(Error::io) + } + + #[inline] + fn end(self) -> Result<()> { + self.ser + .formatter + .end_object(&mut self.ser.writer) + .map_err(Error::io) + } +} + +#[repr(transparent)] +struct MapKeySerializer<'a, W: 'a, F: 'a> { + ser: &'a mut Serializer, +} + +impl<'a, W, F> ser::Serializer for MapKeySerializer<'a, W, F> +where + W: io::Write + WriteExt, + F: Formatter, +{ + type Ok = (); + type Error = Error; + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + #[inline] + fn serialize_str(self, value: &str) -> Result<()> { + self.ser.serialize_str(value) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result<()> { + unreachable!(); + } + + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + unreachable!(); + } + fn serialize_bool(self, _value: bool) -> Result<()> { + unreachable!(); + } + + fn serialize_i8(self, _value: i8) -> Result<()> { + unreachable!(); + } + + fn serialize_i16(self, _value: i16) -> Result<()> { + unreachable!(); + } + + fn serialize_i32(self, _value: i32) -> Result<()> { + unreachable!(); + } + + fn serialize_i64(self, _value: i64) -> Result<()> { + unreachable!(); + } + + fn serialize_i128(self, _value: i128) -> Result<()> { + unreachable!(); + } + + fn serialize_u8(self, _value: u8) -> Result<()> { + unreachable!(); + } + + fn serialize_u16(self, _value: u16) -> Result<()> { + unreachable!(); + } + + fn serialize_u32(self, _value: u32) -> Result<()> { + unreachable!(); + } + + fn serialize_u64(self, _value: u64) -> Result<()> { + unreachable!(); + } + + fn serialize_u128(self, _value: u128) -> Result<()> { + unreachable!(); + } + + fn serialize_f32(self, _value: f32) -> Result<()> { + unreachable!(); + } + + fn serialize_f64(self, _value: f64) -> Result<()> { + unreachable!(); + } + + fn serialize_char(self, _value: char) -> Result<()> { + unreachable!(); + } + + fn serialize_bytes(self, _value: &[u8]) -> Result<()> { + unreachable!(); + } + + fn serialize_unit(self) -> Result<()> { + unreachable!(); + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + unreachable!(); + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result<()> + where + T: ?Sized + Serialize, + { + unreachable!(); + } + + fn serialize_none(self) -> Result<()> { + unreachable!(); + } + + fn serialize_some(self, _value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + unreachable!(); + } + + fn serialize_seq(self, _len: Option) -> Result { + unreachable!(); + } + + fn serialize_tuple(self, _len: usize) -> Result { + unreachable!(); + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } + + fn serialize_map(self, _len: Option) -> Result { + unreachable!(); + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + unreachable!(); + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!(); + } +} + +#[inline(always)] +fn format_escaped_str(writer: &mut W, value: &str) -> io::Result<()> +where + W: ?Sized + io::Write + WriteExt, +{ + crate::serialize::writer::escape::format_escaped_str(writer, value) +} + +#[inline] +pub fn to_writer(writer: W, value: &T) -> Result<()> +where + W: io::Write + WriteExt, + T: ?Sized + Serialize, +{ + let mut ser = Serializer::new(writer); + value.serialize(&mut ser) +} + +#[inline] +pub fn to_writer_pretty(writer: W, value: &T) -> Result<()> +where + W: io::Write + WriteExt, + T: ?Sized + Serialize, +{ + let mut ser = Serializer::pretty(writer); + value.serialize(&mut ser) +} diff --git a/src/serialize/writer/mod.rs b/src/serialize/writer/mod.rs new file mode 100644 index 00000000..47fdf503 --- /dev/null +++ b/src/serialize/writer/mod.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 + +mod byteswriter; +mod escape; +mod formatter; +mod json; + +pub use byteswriter::{BytesWriter, WriteExt}; +pub use json::{to_writer, to_writer_pretty}; diff --git a/src/typeref.rs b/src/typeref.rs index 55dad15c..dd1c2dbb 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -4,9 +4,13 @@ use crate::ffi::orjson_fragmenttype_new; use ahash::RandomState; use once_cell::race::{OnceBool, OnceBox}; use pyo3_ffi::*; +#[cfg(feature = "yyjson")] use std::cell::UnsafeCell; +#[cfg(feature = "yyjson")] use std::mem::MaybeUninit; -use std::os::raw::{c_char, c_void}; +use std::os::raw::c_char; +#[cfg(feature = "yyjson")] +use std::os::raw::c_void; use std::ptr::{null_mut, NonNull}; pub struct NumpyTypes { @@ -148,6 +152,7 @@ pub fn init_typerefs() { #[cfg_attr(feature = "optimize", optimize(size))] fn _init_typerefs_impl() -> bool { unsafe { + debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32); assert!(crate::deserialize::KEY_MAP .set(crate::deserialize::KeyMap::default()) .is_ok()); @@ -236,11 +241,9 @@ unsafe fn look_up_json_exc() -> *mut PyObject { #[cold] #[cfg_attr(feature = "optimize", optimize(size))] -unsafe fn look_up_numpy_type(numpy_module: *mut PyObject, np_type: &str) -> *mut PyTypeObject { - let mod_dict = PyObject_GenericGetDict(numpy_module, null_mut()); - let ptr = PyMapping_GetItemString(mod_dict, np_type.as_ptr() as *const c_char); +unsafe fn look_up_numpy_type(numpy_module_dict: *mut PyObject, np_type: &str) -> *mut PyTypeObject { + let ptr = PyMapping_GetItemString(numpy_module_dict, np_type.as_ptr() as *const c_char); Py_XDECREF(ptr); - Py_XDECREF(mod_dict); ptr as *mut PyTypeObject } @@ -253,21 +256,23 @@ pub fn load_numpy_types() -> Box>> { PyErr_Clear(); return Box::new(None); } + let numpy_module_dict = PyObject_GenericGetDict(numpy, null_mut()); let types = Box::new(NumpyTypes { - array: look_up_numpy_type(numpy, "ndarray\0"), - float32: look_up_numpy_type(numpy, "float32\0"), - float64: look_up_numpy_type(numpy, "float64\0"), - int8: look_up_numpy_type(numpy, "int8\0"), - int16: look_up_numpy_type(numpy, "int16\0"), - int32: look_up_numpy_type(numpy, "int32\0"), - int64: look_up_numpy_type(numpy, "int64\0"), - uint16: look_up_numpy_type(numpy, "uint16\0"), - uint32: look_up_numpy_type(numpy, "uint32\0"), - uint64: look_up_numpy_type(numpy, "uint64\0"), - uint8: look_up_numpy_type(numpy, "uint8\0"), - bool_: look_up_numpy_type(numpy, "bool_\0"), - datetime64: look_up_numpy_type(numpy, "datetime64\0"), + array: look_up_numpy_type(numpy_module_dict, "ndarray\0"), + float32: look_up_numpy_type(numpy_module_dict, "float32\0"), + float64: look_up_numpy_type(numpy_module_dict, "float64\0"), + int8: look_up_numpy_type(numpy_module_dict, "int8\0"), + int16: look_up_numpy_type(numpy_module_dict, "int16\0"), + int32: look_up_numpy_type(numpy_module_dict, "int32\0"), + int64: look_up_numpy_type(numpy_module_dict, "int64\0"), + uint16: look_up_numpy_type(numpy_module_dict, "uint16\0"), + uint32: look_up_numpy_type(numpy_module_dict, "uint32\0"), + uint64: look_up_numpy_type(numpy_module_dict, "uint64\0"), + uint8: look_up_numpy_type(numpy_module_dict, "uint8\0"), + bool_: look_up_numpy_type(numpy_module_dict, "bool_\0"), + datetime64: look_up_numpy_type(numpy_module_dict, "datetime64\0"), }); + Py_XDECREF(numpy_module_dict); Py_XDECREF(numpy); Box::new(Some(nonnull!(Box::::into_raw(types)))) } diff --git a/src/util.rs b/src/util.rs index dd3b25bc..95ed6b33 100644 --- a/src/util.rs +++ b/src/util.rs @@ -221,3 +221,15 @@ macro_rules! use_immortal { } }; } + +macro_rules! pydict_next { + ($obj1:expr, $obj2:expr, $obj3:expr, $obj4:expr) => { + unsafe { pyo3_ffi::_PyDict_Next($obj1, $obj2, $obj3, $obj4, std::ptr::null_mut()) } + }; +} + +macro_rules! reserve_minimum { + ($writer:expr) => { + $writer.reserve(64); + }; +}