From 0cb5efb53e54e91fe8837780f54db1ee1b44949d Mon Sep 17 00:00:00 2001 From: ijl Date: Tue, 29 Oct 2024 14:28:13 +0000 Subject: [PATCH] 3.13t free-threading compatibility --- .github/workflows/artifact.yaml | 47 +++++----- .github/workflows/debug.yaml | 2 +- Cargo.toml | 1 + README.md | 35 ++++++-- bench/requirements.txt | 2 +- build.rs | 8 ++ ci/azure-win.yml | 6 +- integration/concurrent | 131 ++++++++++++++++++++++++++++ integration/requirements.txt | 2 +- integration/run | 6 +- integration/thread | 55 ------------ script/is_freethreading | 10 +++ script/lint | 2 +- script/pytest | 2 +- src/deserialize/backend/json.rs | 1 + src/deserialize/backend/yyjson.rs | 133 ++++++++++++++++++++++------- src/deserialize/cache.rs | 44 ---------- src/deserialize/key/associative.rs | 60 +++++++++++++ src/deserialize/key/mod.rs | 15 ++++ src/deserialize/key/util.rs | 15 ++++ src/deserialize/mod.rs | 4 +- src/deserialize/pyobject.rs | 29 ------- src/ffi/fragment.rs | 66 ++++++++++++-- src/lib.rs | 3 + src/serialize/obtype.rs | 10 ++- src/serialize/per_type/list.rs | 26 +++--- src/typeref.rs | 53 +----------- src/util.rs | 33 ++++++- test/requirements.txt | 2 - 29 files changed, 523 insertions(+), 280 deletions(-) create mode 100755 integration/concurrent delete mode 100755 integration/thread create mode 100755 script/is_freethreading delete mode 100644 src/deserialize/cache.rs create mode 100644 src/deserialize/key/associative.rs create mode 100644 src/deserialize/key/mod.rs create mode 100644 src/deserialize/key/util.rs diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index 436fe5eb..ef346e35 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -40,11 +40,11 @@ jobs: - run: python3 -m pip install --user -r test/requirements.txt -r integration/requirements.txt mypy - - run: pytest -s -rxX -v -n 4 test + - run: pytest -v test env: PYTHONMALLOC: "debug" - - run: ./integration/run thread + - run: ./integration/run concurrent - run: ./integration/run http - run: ./integration/run init - run: ./integration/run typestubs @@ -64,6 +64,14 @@ jobs: strategy: fail-fast: false matrix: + python: [ + { interpreter: 'python3.13t', package: 'python3.13-freethreading', compatibility: "manylinux_2_34" }, + { interpreter: 'python3.13', package: 'python3.13', compatibility: "manylinux_2_17" }, + { interpreter: 'python3.12', package: 'python3.12', compatibility: "manylinux_2_17" }, + { interpreter: 'python3.11', package: 'python3.11', compatibility: "manylinux_2_17" }, + { interpreter: 'python3.10', package: 'python3.10', compatibility: "manylinux_2_17" }, + { interpreter: 'python3.9', package: 'python3.9', compatibility: "manylinux_2_17" }, + ] arch: [ { cc: "clang", @@ -75,13 +83,6 @@ jobs: target: "x86_64-unknown-linux-gnu", }, ] - python: [ - { interpreter: 'python3.13', package: 'python3.13', compatibility: "manylinux_2_17" }, - { interpreter: 'python3.12', package: 'python3.12', compatibility: "manylinux_2_17" }, - { interpreter: 'python3.11', package: 'python3.11', compatibility: "manylinux_2_17" }, - { interpreter: 'python3.10', package: 'python3.10', compatibility: "manylinux_2_17" }, - { interpreter: 'python3.9', package: 'python3.9', compatibility: "manylinux_2_17" }, - ] env: PYTHON: "${{ matrix.python.interpreter }}" PYTHON_PACKAGE: "${{ matrix.python.package }}" @@ -126,6 +127,8 @@ jobs: ./script/install-fedora source "${VENV}/bin/activate" + export ORJSON_ENABLE_FREETHREADING="$(script/is_freethreading)" + if [[ $ORJSON_ENABLE_FREETHREADING -eq 1 ]]; then export PYTHON_GIL=0; fi maturin build --release --strip \ --features="${FEATURES}" \ @@ -135,8 +138,8 @@ jobs: uv pip install ${CARGO_TARGET_DIR}/wheels/orjson*.whl - pytest -s -rxX -v -n 4 test - ./integration/run thread + pytest -v test + ./integration/run concurrent ./integration/run http ./integration/run init @@ -158,6 +161,10 @@ jobs: strategy: fail-fast: false matrix: + python: [ + { interpreter: 'python3.13t', package: 'python3.13-freethreading', compatibility: "manylinux_2_34" }, + { interpreter: 'python3.13', package: 'python3.13', compatibility: "manylinux_2_17" }, + ] arch: [ { cc: "clang", @@ -169,9 +176,6 @@ jobs: target: "aarch64-unknown-linux-gnu", }, ] - python: [ - { interpreter: 'python3.13', package: 'python3.13', compatibility: "manylinux_2_17" }, - ] env: PYTHON: "${{ matrix.python.interpreter }}" PYTHON_PACKAGE: "${{ matrix.python.package }}" @@ -214,8 +218,9 @@ jobs: ./script/install-fedora - source "${HOME}/.cargo/env" source "${VENV}/bin/activate" + export ORJSON_ENABLE_FREETHREADING="$(script/is_freethreading)" + if [[ $ORJSON_ENABLE_FREETHREADING -eq 1 ]]; then export PYTHON_GIL=0; fi maturin build --release --strip \ --features="${FEATURES}" \ @@ -225,7 +230,7 @@ jobs: uv pip install ${CARGO_TARGET_DIR}/wheels/orjson*.whl - pytest -s -rxX -v -n 2 test + pytest -v test cp ${CARGO_TARGET_DIR}/wheels/orjson*.whl dist @@ -302,7 +307,7 @@ jobs: venv/bin/pip install -U pip wheel venv/bin/pip install -r test/requirements.txt venv/bin/pip install orjson --no-index --find-links dist/ --force-reinstall - venv/bin/python -m pytest -s -rxX -v -n 2 test + venv/bin/python -m pytest -v test - name: Store wheels if: "startsWith(github.ref, 'refs/tags/')" @@ -441,11 +446,11 @@ jobs: --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl - - run: pytest -s -rxX -v -n 3 test + - run: pytest -v test env: PYTHONMALLOC: "debug" - - run: source .venv/bin/activate && ./integration/run thread + - run: source .venv/bin/activate && ./integration/run concurrent - run: source .venv/bin/activate && ./integration/run http - run: source .venv/bin/activate && ./integration/run init @@ -510,11 +515,11 @@ jobs: --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl - - run: pytest -s -rxX -v -n 3 test + - run: pytest -v test env: PYTHONMALLOC: "debug" - - run: source .venv/bin/activate && ./integration/run thread + - run: source .venv/bin/activate && ./integration/run concurrent - run: source .venv/bin/activate && ./integration/run http - run: source .venv/bin/activate && ./integration/run init diff --git a/.github/workflows/debug.yaml b/.github/workflows/debug.yaml index 0707801a..8029d6bf 100644 --- a/.github/workflows/debug.yaml +++ b/.github/workflows/debug.yaml @@ -51,7 +51,7 @@ jobs: env: PYTHONMALLOC: "debug" - - run: ./integration/run thread + - run: ./integration/run concurrent timeout-minutes: 2 - run: ./integration/run http diff --git a/Cargo.toml b/Cargo.toml index 0c08cd0a..2fad5310 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ unwind = ["unwinding"] yyjson = [] # Features detected by build.rs. Do not specify. +freethreading = [] inline_int = [] intrinsics = [] optimize = [] diff --git a/README.md b/README.md index db11e886..1094837e 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,8 @@ and i686/x86 wheels for Windows. orjson does not and will not support PyPy, embedded Python builds for Android/iOS, or PEP 554 subinterpreters. +orjson has experimental support for PEP 703 free-threading CPython. + Releases follow semantic versioning and serializing a new object type without an opt-in flag is considered a breaking change. @@ -74,9 +76,10 @@ available in the repository. 1. [Latency](https://github.com/ijl/orjson?tab=readme-ov-file#latency) 2. [Memory](https://github.com/ijl/orjson?tab=readme-ov-file#memory) 3. [Reproducing](https://github.com/ijl/orjson?tab=readme-ov-file#reproducing) -5. [Questions](https://github.com/ijl/orjson?tab=readme-ov-file#questions) -6. [Packaging](https://github.com/ijl/orjson?tab=readme-ov-file#packaging) -7. [License](https://github.com/ijl/orjson?tab=readme-ov-file#license) +6. [Free-threading](https://github.com/ijl/orjson?tab=readme-ov-file#free-threading) +7. [Questions](https://github.com/ijl/orjson?tab=readme-ov-file#questions) +8. [Packaging](https://github.com/ijl/orjson?tab=readme-ov-file#packaging) +9. [License](https://github.com/ijl/orjson?tab=readme-ov-file#license) ## Usage @@ -161,7 +164,8 @@ serializing subclasses, specify the option `orjson.OPT_PASSTHROUGH_SUBCLASS`. The output is a `bytes` object containing UTF-8. -The global interpreter lock (GIL) is held for the duration of the call. +In non-free-threading Python, the global interpreter lock (GIL) is held for +the duration of the call. It raises `JSONEncodeError` on an unsupported type. This exception message describes the invalid object with the error message @@ -630,7 +634,8 @@ orjson maintains a cache of map keys for the duration of the process. This causes a net reduction in memory usage by avoiding duplicate strings. The keys must be at most 64 bytes to be cached and 2048 entries are stored. -The global interpreter lock (GIL) is held for the duration of the call. +In non-free-threading Python, the global interpreter lock (GIL) is held for +the duration of the call. It raises `JSONDecodeError` if given an invalid type or invalid JSON. This includes if the input contains `NaN`, `Infinity`, or `-Infinity`, @@ -1179,6 +1184,22 @@ orjson 3.10.6, ujson 5.10.0, python-rapidson 1.18, and simplejson 3.19.2. The latency results can be reproduced using the `pybench` and `graph` scripts. The memory results can be reproduced using the `pymem` script. +## Free-threading + +orjson 3.11.0 introduces experimental support for PEP 703 free-threading CPython. + +orjson supports an arbitrary number of Python threads concurrently calling the +library. There are no threads or queues internal to the library. There are +no Python critical sections. + +PyPI wheels are provided for manylinux amd64 and aarch64. + +To build a wheel with free-threading support, see +[packaging](https://github.com/ijl/orjson?tab=readme-ov-file#packaging). + +The free-threading implementation does not respect semantic versioning and may +be removed entirely in the future. + ## Questions ### Why can't I install it from PyPI? @@ -1216,6 +1237,10 @@ It benefits from also having a C build environment to compile a faster deserialization backend. See this project's `manylinux_2_28` builds for an example using clang and LTO. +Building a wheel with freethreading support requires the environmental +variable `ORJSON_ENABLE_FREETHREADING` set during build or the +`--features=freethreading` argument passed to maturin. + The project's own CI tests against `nightly-2024-10-25` and stable 1.72. It is prudent to pin the nightly version because that channel can introduce breaking changes. diff --git a/bench/requirements.txt b/bench/requirements.txt index 40d524ca..108543e6 100644 --- a/bench/requirements.txt +++ b/bench/requirements.txt @@ -1,4 +1,4 @@ -memory-profiler +memory-profiler; python_version<"3.13" pandas; python_version<"3.13" pytest-benchmark pytest-random-order diff --git a/build.rs b/build.rs index 184c94b9..1475e71e 100644 --- a/build.rs +++ b/build.rs @@ -11,6 +11,7 @@ fn main() { println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_AVX512"); println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_SIMD"); println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_YYJSON"); + println!("cargo:rerun-if-env-changed=ORJSON_ENABLE_FREETHREADING"); println!("cargo:rerun-if-env-changed=RUSTFLAGS"); println!("cargo:rustc-check-cfg=cfg(intrinsics)"); println!("cargo:rustc-check-cfg=cfg(optimize)"); @@ -31,6 +32,13 @@ fn main() { println!("cargo:rustc-cfg=feature=\"setitem_knownhash\""); } + let freethreading_env = env::var("ORJSON_ENABLE_FREETHREADING"); + if (freethreading_env.is_ok() && freethreading_env.unwrap() == "1") + || env::var("CARGO_FEATURE_FREETHREADING").is_ok() + { + println!("cargo:rustc-cfg=feature=\"freethreading\""); + } + if let Some(true) = version_check::supports_feature("core_intrinsics") { println!("cargo:rustc-cfg=feature=\"intrinsics\""); } diff --git a/ci/azure-win.yml b/ci/azure-win.yml index 655eab96..a0937f82 100644 --- a/ci/azure-win.yml +++ b/ci/azure-win.yml @@ -29,12 +29,12 @@ steps: UNSAFE_PYO3_SKIP_VERSION_CHECK: "1" - script: python.exe -m pip install orjson --no-index --find-links=D:\a\1\s\target\wheels displayName: install -- script: python.exe -m pytest -s -rxX -v test +- script: python.exe -m pytest -v test env: PYTHONMALLOC: "debug" displayName: pytest -- script: python.exe integration\thread - displayName: thread +- script: python.exe integration\concurrent + displayName: concurrent - script: python.exe integration\init displayName: init - bash: ./ci/deploy /d/a/1/s/target/wheels/*.whl diff --git a/integration/concurrent b/integration/concurrent new file mode 100755 index 00000000..449ab724 --- /dev/null +++ b/integration/concurrent @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +import asyncio +import multiprocessing +import random +import string +import sys +from concurrent import futures + +import orjson + +try: + IS_FREETHREADING = not sys._is_gil_enabled() # type: ignore +except Exception: + IS_FREETHREADING = False + +CHARS = string.ascii_lowercase + string.ascii_uppercase + +NUM_THREADS = min(multiprocessing.cpu_count(), 16) + +MULTIPLIER = int(sys.argv[1]) if len(sys.argv) == 2 else 1 + + +def random_string(): + return "".join((random.choice(CHARS) for _ in range(0, 32))) + + +def per_thread_func(data): + serialized = orjson.dumps(data) + deserialized = orjson.loads(serialized) + assert deserialized == data + + +async def loads_test(): + TEST_MESSAGE = f"concurrent serialization test running ..." + + sys.stdout.write(TEST_MESSAGE) + sys.stdout.flush() + + sys.stdout.write(f"\r{TEST_MESSAGE} creating tasks\n") + + unique_items = 1000 + + keys_per_dictionary = 16 + + # must force key map cache eviction + assert keys_per_dictionary * unique_items > 5000 + + num = 250 * MULTIPLIER + + data = [] * num + for _ in range(unique_items): + prefix = random_string() + data.append( + { + f"{prefix}_{i}": [True, False, None, "", "🐈", []] + for i in range(keys_per_dictionary) + } + ) + + tasks = [] + for _ in range(num): + tasks.extend( + list( + ( + asyncio.create_task(asyncio.to_thread(per_thread_func, each)) + for each in data + ) + ) + ) + + sys.stdout.write(f"\r{TEST_MESSAGE} running {len(tasks):,} tasks\n") + await asyncio.gather(*tasks) + + sys.stdout.write(f"\r{TEST_MESSAGE} ok\n") + + +async def list_mutation_test(): + TEST_MESSAGE = f"concurrent list mutation test running ..." + + sys.stdout.write(TEST_MESSAGE) + sys.stdout.flush() + num = 1000 * MULTIPLIER + fixture = [None] * num + + tasks = [] + for _ in range(num): + tasks.append(asyncio.create_task(asyncio.to_thread(orjson.dumps, fixture))) + tasks.append(asyncio.create_task(asyncio.to_thread(fixture.pop))) + + await asyncio.gather(*tasks) + + assert len(fixture) == 0 + + sys.stdout.write(f"\r{TEST_MESSAGE} ok\n") + + +async def dict_mutation_test(): + TEST_MESSAGE = f"concurrent dict mutation test running ..." + + sys.stdout.write(TEST_MESSAGE) + sys.stdout.flush() + num = 1000 * MULTIPLIER + fixture = {f"key_{i}": None for i in range(num)} + + tasks = [] + for i in reversed(range(num)): + tasks.append(asyncio.create_task(asyncio.to_thread(orjson.dumps, fixture))) + tasks.append(asyncio.create_task(asyncio.to_thread(fixture.pop, f"key_{i}"))) + + await asyncio.gather(*tasks) + + assert len(fixture) == 0 + + sys.stdout.write(f"\r{TEST_MESSAGE} ok\n") + + +async def main(): + asyncio.get_running_loop().set_default_executor( + futures.ThreadPoolExecutor(max_workers=NUM_THREADS) + ) + sys.stdout.write( + f"concurrent tests running with free-threading {str(IS_FREETHREADING).lower()} on {NUM_THREADS} threads ...\n" + ) + + await list_mutation_test() + await dict_mutation_test() + await loads_test() + + +asyncio.run(main()) diff --git a/integration/requirements.txt b/integration/requirements.txt index e5b290de..2f47d3a7 100644 --- a/integration/requirements.txt +++ b/integration/requirements.txt @@ -1,3 +1,3 @@ flask;sys_platform!="win" gunicorn;sys_platform!="win" -httpx==0.24.1;sys_platform!="win" +httpx==0.27.2;sys_platform!="win" diff --git a/integration/run b/integration/run index 477f7076..99b6bbb8 100755 --- a/integration/run +++ b/integration/run @@ -4,12 +4,12 @@ set -eou pipefail _dir="$(dirname "${BASH_SOURCE[0]}")" -to_run="${@:-thread http init}" +to_run="${@:-concurrent http init}" export PYTHONMALLOC="debug" -if [[ $to_run == *"thread"* ]]; then - "${_dir}"/thread +if [[ $to_run == *"concurrent"* ]]; then + "${_dir}"/concurrent fi if [[ $to_run == *"http"* ]]; then diff --git a/integration/thread b/integration/thread deleted file mode 100755 index be67a972..00000000 --- a/integration/thread +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import traceback -from concurrent.futures import ThreadPoolExecutor -from operator import itemgetter -from threading import get_ident - -import orjson - -DATA = sorted( - [ - { - "id": i, - "name": "90as90ji0123ioj2390as90as90", - "body": "哈哈89asu89as😊89as9as90jas-😋0apjzxiojzx89hq23n", - "score": 901290129.1, - "bool": True, - "int": 9832, - "none": None, - } - for i in range(10) - ], - key=itemgetter("id"), -) - - -STATUS = 0 - -TEST_MESSAGE = "thread test running..." - -sys.stdout.write(TEST_MESSAGE) -sys.stdout.flush() - - -def test_func(n): - try: - assert sorted(orjson.loads(orjson.dumps(DATA)), key=itemgetter("id")) == DATA - except Exception: - traceback.print_exc() - print("thread %s: %s dumps, loads ERROR" % (get_ident(), n)) - - -with ThreadPoolExecutor(max_workers=4) as executor: - executor.map(test_func, range(50000), chunksize=1000) - executor.shutdown(wait=True) - - -if STATUS == 0: - sys.stdout.write(f"\r{TEST_MESSAGE} ok\n") -else: - sys.stdout.write(f"\r{TEST_MESSAGE} error\n") - - -sys.exit(STATUS) diff --git a/script/is_freethreading b/script/is_freethreading new file mode 100755 index 00000000..a4abb363 --- /dev/null +++ b/script/is_freethreading @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +import sys + +try: + IS_FREETHREADING = not sys._is_gil_enabled() # type: ignore +except Exception: + IS_FREETHREADING = False + +print(1 if IS_FREETHREADING else 0) diff --git a/script/lint b/script/lint index 22f5742e..7e60b812 100755 --- a/script/lint +++ b/script/lint @@ -4,7 +4,7 @@ set -eou pipefail to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pymem script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init -integration/wsgi.py integration/typestubs.py integration/thread" +integration/wsgi.py integration/typestubs.py integration/concurrent" ruff check ${to_lint} --fix ruff format ${to_lint} diff --git a/script/pytest b/script/pytest index 6b95014c..e8e7e51d 100755 --- a/script/pytest +++ b/script/pytest @@ -1,3 +1,3 @@ #!/bin/sh -e -PYTHONMALLOC="debug" pytest -s -rxX --random-order -n 4 test +PYTHONMALLOC="debug" pytest -s test diff --git a/src/deserialize/backend/json.rs b/src/deserialize/backend/json.rs index bb1b2bc2..ad9c0dd8 100644 --- a/src/deserialize/backend/json.rs +++ b/src/deserialize/backend/json.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::deserialize::key::get_unicode_key; use crate::deserialize::pyobject::*; use crate::deserialize::DeserializeError; use crate::str::unicode_from_str; diff --git a/src/deserialize/backend/yyjson.rs b/src/deserialize/backend/yyjson.rs index 509ecdec..dd039e7d 100644 --- a/src/deserialize/backend/yyjson.rs +++ b/src/deserialize/backend/yyjson.rs @@ -1,14 +1,60 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::deserialize::key::get_unicode_key; use crate::deserialize::pyobject::*; use crate::deserialize::DeserializeError; use crate::ffi::yyjson::*; use crate::str::unicode_from_str; -use crate::typeref::{yyjson_init, YYJSON_ALLOC, YYJSON_BUFFER_SIZE}; use core::ffi::c_char; use core::ptr::{null, null_mut, NonNull}; use std::borrow::Cow; +#[cfg(not(feature = "freethreading"))] +pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8; + +#[cfg(not(feature = "freethreading"))] +#[repr(align(64))] +struct YYJSONBuffer(std::cell::UnsafeCell>); + +#[cfg(not(feature = "freethreading"))] +const YYJSONBUFFER_LAYOUT: core::alloc::Layout = core::alloc::Layout::new::(); + +#[cfg(not(feature = "freethreading"))] +pub struct YYJSONAlloc { + pub alloc: crate::ffi::yyjson::yyjson_alc, + _buffer: Box, +} + +#[cfg(not(feature = "freethreading"))] +pub static mut YYJSON_ALLOC: once_cell::race::OnceBox = + once_cell::race::OnceBox::new(); + +#[cfg(not(feature = "freethreading"))] +pub fn yyjson_init() -> Box { + // Using unsafe to ensure allocation happens on the heap without going through the stack + // so we don't stack overflow in debug mode. Once rust-lang/rust#63291 is stable (Box::new_uninit) + // we can use that instead. + let buffer = + unsafe { Box::from_raw(std::alloc::alloc(YYJSONBUFFER_LAYOUT).cast::()) }; + let mut alloc = crate::ffi::yyjson::yyjson_alc { + malloc: None, + realloc: None, + free: None, + ctx: null_mut(), + }; + unsafe { + crate::ffi::yyjson::yyjson_alc_pool_init( + &mut alloc, + buffer.0.get().cast::(), + YYJSON_BUFFER_SIZE, + ); + } + Box::new(YYJSONAlloc { + alloc, + _buffer: buffer, + }) +} + const YYJSON_TAG_BIT: u8 = 8; const YYJSON_VAL_SIZE: usize = core::mem::size_of::(); @@ -57,25 +103,63 @@ fn unsafe_yyjson_get_next_non_container(val: *mut yyjson_val) -> *mut yyjson_val unsafe { ((val as *mut u8).add(YYJSON_VAL_SIZE)) as *mut yyjson_val } } +#[cfg(feature = "freethreading")] +type Buffer = Vec>; + pub(crate) fn deserialize( data: &'static str, ) -> Result, DeserializeError<'static>> { + #[cfg(feature = "freethreading")] + let buffer_capacity = usize::max( + 4096 - core::mem::size_of::(), + yyjson_read_max_memory_usage(data.len()), + ); + #[cfg(feature = "freethreading")] + let mut buffer: Buffer = Vec::with_capacity(buffer_capacity); + #[cfg(feature = "freethreading")] + let mut alloc = crate::ffi::yyjson::yyjson_alc { + malloc: None, + realloc: None, + free: None, + ctx: null_mut(), + }; + #[cfg(feature = "freethreading")] + unsafe { + crate::ffi::yyjson::yyjson_alc_pool_init( + core::ptr::addr_of_mut!(alloc), + buffer.as_mut_ptr().cast::(), + buffer_capacity, + ); + } + #[cfg(feature = "freethreading")] + let alloc_ptr = core::ptr::addr_of!(alloc); + + #[cfg(not(feature = "freethreading"))] + let alloc_ptr = if yyjson_read_max_memory_usage(data.len()) < YYJSON_BUFFER_SIZE { + unsafe { &YYJSON_ALLOC.get_or_init(yyjson_init).alloc } + } else { + null::() + }; + let mut err = yyjson_read_err { code: YYJSON_READ_SUCCESS, msg: null(), pos: 0, }; - let doc = if yyjson_read_max_memory_usage(data.len()) < YYJSON_BUFFER_SIZE { - read_doc_with_buffer(data, &mut err) - } else { - read_doc_default(data, &mut err) + + let doc = unsafe { + yyjson_read_opts( + data.as_ptr() as *mut c_char, + data.len(), + alloc_ptr, + &mut err, + ) }; if unlikely!(doc.is_null()) { let msg: Cow = unsafe { core::ffi::CStr::from_ptr(err.msg).to_string_lossy() }; Err(DeserializeError::from_yyjson(msg, err.pos as i64, data)) } else { let val = yyjson_doc_get_root(doc); - if unlikely!(!unsafe_yyjson_is_ctn(val)) { let pyval = match ElementType::from_tag(val) { ElementType::String => parse_yy_string(val), @@ -110,21 +194,6 @@ pub(crate) fn deserialize( } } -fn read_doc_default(data: &'static str, err: &mut yyjson_read_err) -> *mut yyjson_doc { - unsafe { yyjson_read_opts(data.as_ptr() as *mut c_char, data.len(), null_mut(), err) } -} - -fn read_doc_with_buffer(data: &'static str, err: &mut yyjson_read_err) -> *mut yyjson_doc { - unsafe { - yyjson_read_opts( - data.as_ptr() as *mut c_char, - data.len(), - &YYJSON_ALLOC.get_or_init(yyjson_init).alloc, - err, - ) - } -} - enum ElementType { String, Uint64, @@ -178,10 +247,9 @@ fn parse_yy_f64(elem: *mut yyjson_val) -> NonNull { } macro_rules! append_to_list { - ($dptr:expr, $pyval:expr) => { + ($dptr:expr, $pyval:expr, $idx:expr) => { unsafe { - core::ptr::write($dptr, $pyval); - $dptr = $dptr.add(1); + core::ptr::write($dptr.add($idx), $pyval); } }; } @@ -191,22 +259,23 @@ fn populate_yy_array(list: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { unsafe { let len = unsafe_yyjson_get_len(elem); assume!(len >= 1); + + let dptr = (*(list as *mut pyo3_ffi::PyListObject)).ob_item; let mut next = unsafe_yyjson_get_first(elem); - let mut dptr = (*(list as *mut pyo3_ffi::PyListObject)).ob_item; - for _ in 0..len { + for idx in 0..len { let val = next; if unlikely!(unsafe_yyjson_is_ctn(val)) { next = unsafe_yyjson_get_next_container(val); if is_yyjson_tag!(val, TAG_ARRAY) { let pyval = ffi!(PyList_New(unsafe_yyjson_get_len(val) as isize)); - append_to_list!(dptr, pyval); + append_to_list!(dptr, pyval, idx); if unsafe_yyjson_get_len(val) > 0 { populate_yy_array(pyval, val); } } else { let pyval = ffi!(_PyDict_NewPresized(unsafe_yyjson_get_len(val) as isize)); - append_to_list!(dptr, pyval); + append_to_list!(dptr, pyval, idx); if unsafe_yyjson_get_len(val) > 0 { populate_yy_object(pyval, val); } @@ -224,7 +293,7 @@ fn populate_yy_array(list: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { ElementType::Array => unreachable!(), ElementType::Object => unreachable!(), }; - append_to_list!(dptr, pyval.as_ptr()); + append_to_list!(dptr, pyval.as_ptr(), idx); } } } @@ -235,8 +304,10 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { unsafe { let len = unsafe_yyjson_get_len(elem); assume!(len >= 1); + let mut next_key = unsafe_yyjson_get_first(elem); let mut next_val = next_key.add(1); + for _ in 0..len { let val = next_val; let pykey = { @@ -251,7 +322,7 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { next_val = next_key.add(1); if is_yyjson_tag!(val, TAG_ARRAY) { let pyval = ffi!(PyList_New(unsafe_yyjson_get_len(val) as isize)); - pydict_setitem!(dict, pykey, pyval.as_ptr()); + pydict_setitem!(dict, pykey, pyval); reverse_pydict_incref!(pykey); reverse_pydict_incref!(pyval); if unsafe_yyjson_get_len(val) > 0 { @@ -259,7 +330,7 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { } } else { let pyval = ffi!(_PyDict_NewPresized(unsafe_yyjson_get_len(val) as isize)); - pydict_setitem!(dict, pykey, pyval.as_ptr()); + pydict_setitem!(dict, pykey, pyval); reverse_pydict_incref!(pykey); reverse_pydict_incref!(pyval); if unsafe_yyjson_get_len(val) > 0 { diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs deleted file mode 100644 index f5357893..00000000 --- a/src/deserialize/cache.rs +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use associative_cache::{AssociativeCache, Capacity2048, HashDirectMapped, RoundRobinReplacement}; -use core::ffi::c_void; -use once_cell::unsync::OnceCell; - -#[repr(transparent)] -pub struct CachedKey { - ptr: *mut c_void, -} - -unsafe impl Send for CachedKey {} -unsafe impl Sync for CachedKey {} - -impl CachedKey { - pub fn new(ptr: *mut pyo3_ffi::PyObject) -> CachedKey { - CachedKey { - ptr: ptr as *mut c_void, - } - } - pub fn get(&mut self) -> *mut pyo3_ffi::PyObject { - let ptr = self.ptr as *mut pyo3_ffi::PyObject; - debug_assert!(ffi!(Py_REFCNT(ptr)) >= 1); - ffi!(Py_INCREF(ptr)); - ptr - } -} - -impl Drop for CachedKey { - fn drop(&mut self) { - ffi!(Py_DECREF(self.ptr as *mut pyo3_ffi::PyObject)); - } -} - -pub type KeyMap = - AssociativeCache; - -pub static mut KEY_MAP: OnceCell = OnceCell::new(); - -#[inline(always)] -pub fn cache_hash(key: &[u8]) -> u64 { - assume!(key.len() <= 64); - xxhash_rust::xxh3::xxh3_64(key) -} diff --git a/src/deserialize/key/associative.rs b/src/deserialize/key/associative.rs new file mode 100644 index 00000000..226ab1da --- /dev/null +++ b/src/deserialize/key/associative.rs @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use associative_cache::{AssociativeCache, Capacity2048, HashDirectMapped, RoundRobinReplacement}; +use once_cell::unsync::OnceCell; + +#[repr(transparent)] +struct CachedKey { + ptr: *mut core::ffi::c_void, +} + +unsafe impl Send for CachedKey {} +unsafe impl Sync for CachedKey {} + +impl CachedKey { + pub fn new(ptr: *mut pyo3_ffi::PyObject) -> CachedKey { + CachedKey { + ptr: ptr as *mut core::ffi::c_void, + } + } + pub fn get(&self) -> *mut pyo3_ffi::PyObject { + let ptr = self.ptr as *mut pyo3_ffi::PyObject; + debug_assert!(ffi!(Py_REFCNT(ptr)) >= 1); + ffi!(Py_INCREF(ptr)); + ptr + } +} + +impl Drop for CachedKey { + fn drop(&mut self) { + ffi!(Py_DECREF(self.ptr as *mut pyo3_ffi::PyObject)); + } +} + +type KeyMap = + AssociativeCache; + +static mut KEY_MAP: OnceCell = OnceCell::new(); + +#[cfg_attr(feature = "optimize", optimize(size))] +pub fn init_key_cache() { + unsafe { assert!(KEY_MAP.set(KeyMap::default()).is_ok()) }; +} + +#[inline(always)] +fn get_key_cache() -> &'static mut KeyMap { + unsafe { KEY_MAP.get_mut().unwrap_or_else(|| unreachable!()) } +} + +#[inline(always)] +pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { + if unlikely!(key_str.len() > 64) { + super::util::create_key(key_str) + } else { + let hash = super::util::cache_hash(key_str.as_bytes()); + let entry = get_key_cache() + .entry(&hash) + .or_insert_with(|| hash, || CachedKey::new(super::util::create_key(key_str))); + entry.get() + } +} diff --git a/src/deserialize/key/mod.rs b/src/deserialize/key/mod.rs new file mode 100644 index 00000000..d0f57ce0 --- /dev/null +++ b/src/deserialize/key/mod.rs @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +mod util; + +#[cfg(not(feature = "freethreading"))] +mod associative; + +#[cfg(not(feature = "freethreading"))] +pub use associative::{get_unicode_key, init_key_cache}; + +#[cfg(feature = "freethreading")] +pub use util::create_key as get_unicode_key; + +#[cfg(feature = "freethreading")] +pub fn init_key_cache() {} diff --git a/src/deserialize/key/util.rs b/src/deserialize/key/util.rs new file mode 100644 index 00000000..8d760975 --- /dev/null +++ b/src/deserialize/key/util.rs @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +pub fn create_key(key_str: &str) -> *mut pyo3_ffi::PyObject { + let pyob = crate::str::unicode_from_str(key_str); + #[cfg(feature = "setitem_knownhash")] + crate::str::hash_str(pyob); + pyob +} + +#[inline(always)] +#[cfg(not(feature = "freethreading"))] +pub fn cache_hash(key: &[u8]) -> u64 { + assume!(key.len() <= 64); + xxhash_rust::xxh3::xxh3_64(key) +} diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs index 0f1484d5..c6e3b78c 100644 --- a/src/deserialize/mod.rs +++ b/src/deserialize/mod.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) mod backend; -mod cache; mod deserializer; mod error; +mod key; mod pyobject; mod utf8; -pub use cache::{KeyMap, KEY_MAP}; pub use deserializer::deserialize; pub use error::DeserializeError; +pub use key::init_key_cache; diff --git a/src/deserialize/pyobject.rs b/src/deserialize/pyobject.rs index 303c4028..f9344e46 100644 --- a/src/deserialize/pyobject.rs +++ b/src/deserialize/pyobject.rs @@ -1,37 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::deserialize::cache::*; -use crate::str::{hash_str, unicode_from_str}; use crate::typeref::{FALSE, NONE, TRUE}; use core::ptr::NonNull; -#[inline(always)] -pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { - if unlikely!(key_str.len() > 64) { - let pyob = unicode_from_str(key_str); - hash_str(pyob); - pyob - } else { - let hash = cache_hash(key_str.as_bytes()); - unsafe { - let entry = KEY_MAP - .get_mut() - .unwrap_or_else(|| unreachable!()) - .entry(&hash) - .or_insert_with( - || hash, - || { - let pyob = unicode_from_str(key_str); - #[cfg(feature = "setitem_knownhash")] - hash_str(pyob); - CachedKey::new(pyob) - }, - ); - entry.get() - } - } -} - #[allow(dead_code)] #[inline(always)] pub fn parse_bool(val: bool) -> NonNull { diff --git a/src/ffi/fragment.rs b/src/ffi/fragment.rs index ae0fe664..c88f3d36 100644 --- a/src/ffi/fragment.rs +++ b/src/ffi/fragment.rs @@ -1,13 +1,34 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use core::ffi::{c_char, c_ulong}; +use core::ffi::c_char; + +#[cfg(Py_GIL_DISABLED)] +use std::sync::atomic::{AtomicIsize, AtomicU32, AtomicU64}; + use core::ptr::null_mut; use pyo3_ffi::*; // https://docs.python.org/3/c-api/typeobj.html#typedef-examples +#[cfg(Py_GIL_DISABLED)] +#[allow(non_upper_case_globals)] +const _Py_IMMORTAL_REFCNT_LOCAL: u32 = u32::MAX; + #[repr(C)] pub struct Fragment { + #[cfg(Py_GIL_DISABLED)] + pub ob_tid: usize, + #[cfg(Py_GIL_DISABLED)] + pub _padding: u16, + #[cfg(Py_GIL_DISABLED)] + pub ob_mutex: PyMutex, + #[cfg(Py_GIL_DISABLED)] + pub ob_gc_bits: u8, + #[cfg(Py_GIL_DISABLED)] + pub ob_ref_local: AtomicU32, + #[cfg(Py_GIL_DISABLED)] + pub ob_ref_shared: AtomicIsize, + #[cfg(not(Py_GIL_DISABLED))] pub ob_refcnt: pyo3_ffi::Py_ssize_t, pub ob_type: *mut pyo3_ffi::PyTypeObject, pub contents: *mut pyo3_ffi::PyObject, @@ -36,12 +57,24 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( kwds: *mut PyObject, ) -> *mut PyObject { if Py_SIZE(args) != 1 || !kwds.is_null() { - raise_args_exception(); - null_mut() + raise_args_exception() } else { let contents = PyTuple_GET_ITEM(args, 0); Py_INCREF(contents); let obj = Box::new(Fragment { + #[cfg(Py_GIL_DISABLED)] + ob_tid: 0, + #[cfg(Py_GIL_DISABLED)] + _padding: 0, + #[cfg(Py_GIL_DISABLED)] + ob_mutex: PyMutex::new(), + #[cfg(Py_GIL_DISABLED)] + ob_gc_bits: 0, + #[cfg(Py_GIL_DISABLED)] + ob_ref_local: AtomicU32::new(0), + #[cfg(Py_GIL_DISABLED)] + ob_ref_shared: AtomicIsize::new(0), + #[cfg(not(Py_GIL_DISABLED))] ob_refcnt: 1, ob_type: crate::typeref::FRAGMENT_TYPE, contents: contents, @@ -50,19 +83,24 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( } } +const FRAGMENT_LAYOUT: core::alloc::Layout = core::alloc::Layout::new::(); + #[no_mangle] #[cold] #[cfg_attr(feature = "optimize", optimize(size))] pub unsafe extern "C" fn orjson_fragment_dealloc(object: *mut PyObject) { Py_DECREF((*(object as *mut Fragment)).contents); - std::alloc::dealloc(object as *mut u8, std::alloc::Layout::new::()); + std::alloc::dealloc(object as *mut u8, FRAGMENT_LAYOUT); } -#[cfg(Py_3_10)] -const FRAGMENT_TP_FLAGS: c_ulong = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE; +#[cfg(Py_GIL_DISABLED)] +const FRAGMENT_TP_FLAGS: AtomicU64 = AtomicU64::new(Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE); + +#[cfg(all(Py_3_10, not(Py_GIL_DISABLED)))] +const FRAGMENT_TP_FLAGS: core::ffi::c_ulong = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE; #[cfg(not(Py_3_10))] -const FRAGMENT_TP_FLAGS: c_ulong = Py_TPFLAGS_DEFAULT; +const FRAGMENT_TP_FLAGS: core::ffi::c_ulong = Py_TPFLAGS_DEFAULT; #[no_mangle] #[cold] @@ -71,7 +109,19 @@ pub unsafe extern "C" fn orjson_fragmenttype_new() -> *mut PyTypeObject { let ob = Box::new(PyTypeObject { ob_base: PyVarObject { ob_base: PyObject { - #[cfg(Py_3_12)] + #[cfg(Py_GIL_DISABLED)] + ob_tid: 0, + #[cfg(Py_GIL_DISABLED)] + _padding: 0, + #[cfg(Py_GIL_DISABLED)] + ob_mutex: PyMutex::new(), + #[cfg(Py_GIL_DISABLED)] + ob_gc_bits: 0, + #[cfg(Py_GIL_DISABLED)] + ob_ref_local: AtomicU32::new(_Py_IMMORTAL_REFCNT_LOCAL), + #[cfg(Py_GIL_DISABLED)] + ob_ref_shared: AtomicIsize::new(0), + #[cfg(all(Py_3_12, not(Py_GIL_DISABLED)))] ob_refcnt: pyo3_ffi::PyObjectObRefcnt { ob_refcnt: 0 }, #[cfg(not(Py_3_12))] ob_refcnt: 0, diff --git a/src/lib.rs b/src/lib.rs index 64ace9e1..584adefc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -189,6 +189,9 @@ pub unsafe extern "C" fn PyInit_orjson() -> *mut PyModuleDef { #[cfg(Py_3_13)] PyModuleDef_Slot { slot: Py_mod_gil, + #[cfg(feature = "freethreading")] + value: Py_MOD_GIL_NOT_USED, + #[cfg(not(feature = "freethreading"))] value: Py_MOD_GIL_USED, }, PyModuleDef_Slot { diff --git a/src/serialize/obtype.rs b/src/serialize/obtype.rs index e526855e..e197a200 100644 --- a/src/serialize/obtype.rs +++ b/src/serialize/obtype.rs @@ -75,14 +75,16 @@ pub fn pyobject_to_obtype_unlikely(ob_type: *mut pyo3_ffi::PyTypeObject, opts: O } } + let tp_flags = tp_flags!(ob_type); + if opt_disabled!(opts, PASSTHROUGH_SUBCLASS) { - if is_subclass_by_flag!(ob_type, Py_TPFLAGS_UNICODE_SUBCLASS) { + if is_subclass_by_flag!(tp_flags, Py_TPFLAGS_UNICODE_SUBCLASS) { return ObType::StrSubclass; - } else if is_subclass_by_flag!(ob_type, Py_TPFLAGS_LONG_SUBCLASS) { + } else if is_subclass_by_flag!(tp_flags, Py_TPFLAGS_LONG_SUBCLASS) { return ObType::Int; - } else if is_subclass_by_flag!(ob_type, Py_TPFLAGS_LIST_SUBCLASS) { + } else if is_subclass_by_flag!(tp_flags, Py_TPFLAGS_LIST_SUBCLASS) { return ObType::List; - } else if is_subclass_by_flag!(ob_type, Py_TPFLAGS_DICT_SUBCLASS) { + } else if is_subclass_by_flag!(tp_flags, Py_TPFLAGS_DICT_SUBCLASS) { return ObType::Dict; } } diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index 3339cf30..90484231 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -32,14 +32,13 @@ impl Serialize for ZeroListSerializer { } } -pub struct ListTupleSerializer { - data_ptr: *const *mut pyo3_ffi::PyObject, +pub struct ListTupleSerializer<'a> { + data: &'a [*mut pyo3_ffi::PyObject], state: SerializerState, default: Option>, - len: usize, } -impl ListTupleSerializer { +impl<'a> ListTupleSerializer<'a> { pub fn from_list( ptr: *mut pyo3_ffi::PyObject, state: SerializerState, @@ -47,13 +46,13 @@ impl ListTupleSerializer { ) -> Self { debug_assert!( is_type!(ob_type!(ptr), LIST_TYPE) - || is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_LIST_SUBCLASS) + || is_subclass_by_flag!(tp_flags!(ob_type!(ptr)), Py_TPFLAGS_LIST_SUBCLASS) ); let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyListObject)).ob_item }; let len = ffi!(Py_SIZE(ptr)) as usize; + let data = unsafe { core::slice::from_raw_parts(data_ptr, len) }; Self { - data_ptr: data_ptr, - len: len, + data: data, state: state.copy_for_recursive_call(), default: default, } @@ -66,20 +65,20 @@ impl ListTupleSerializer { ) -> Self { debug_assert!( is_type!(ob_type!(ptr), TUPLE_TYPE) - || is_subclass_by_flag!(ob_type!(ptr), Py_TPFLAGS_TUPLE_SUBCLASS) + || is_subclass_by_flag!(tp_flags!(ob_type!(ptr)), Py_TPFLAGS_TUPLE_SUBCLASS) ); let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyTupleObject)).ob_item.as_ptr() }; let len = ffi!(Py_SIZE(ptr)) as usize; + let data = unsafe { core::slice::from_raw_parts(data_ptr, len) }; Self { - data_ptr: data_ptr, - len: len, + data: data, state: state.copy_for_recursive_call(), default: default, } } } -impl Serialize for ListTupleSerializer { +impl<'a> Serialize for ListTupleSerializer<'a> { #[inline(never)] fn serialize(&self, serializer: S) -> Result where @@ -88,10 +87,9 @@ impl Serialize for ListTupleSerializer { if unlikely!(self.state.recursion_limit()) { err!(SerializeError::RecursionLimit) } - debug_assert!(self.len >= 1); + debug_assert!(self.data.len() >= 1); let mut seq = serializer.serialize_seq(None).unwrap(); - for idx in 0..self.len { - let value = unsafe { *((self.data_ptr).add(idx)) }; + for &value in self.data { match pyobject_to_obtype(value, self.state.opts()) { ObType::Str => { seq.serialize_element(&StrSerializer::new(value))?; diff --git a/src/typeref.rs b/src/typeref.rs index 68c24213..6944eb9c 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -2,15 +2,9 @@ use crate::ffi::orjson_fragmenttype_new; use core::ffi::c_char; -#[cfg(feature = "yyjson")] -use core::ffi::c_void; -#[cfg(feature = "yyjson")] -use core::mem::MaybeUninit; use core::ptr::{null_mut, NonNull}; use once_cell::race::{OnceBool, OnceBox}; use pyo3_ffi::*; -#[cfg(feature = "yyjson")] -use std::cell::UnsafeCell; pub struct NumpyTypes { pub array: *mut PyTypeObject, @@ -75,48 +69,6 @@ pub static mut DESCR_STR: *mut PyObject = null_mut(); pub static mut VALUE_STR: *mut PyObject = null_mut(); pub static mut INT_ATTR_STR: *mut PyObject = null_mut(); -#[cfg(feature = "yyjson")] -pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8; - -#[cfg(feature = "yyjson")] -#[repr(align(64))] -struct YYJSONBuffer(UnsafeCell>); - -#[cfg(feature = "yyjson")] -pub struct YYJSONAlloc { - pub alloc: crate::ffi::yyjson::yyjson_alc, - _buffer: Box, -} - -#[cfg(feature = "yyjson")] -pub static mut YYJSON_ALLOC: OnceBox = OnceBox::new(); - -#[cfg(feature = "yyjson")] -pub fn yyjson_init() -> Box { - // Using unsafe to ensure allocation happens on the heap without going through the stack - // so we don't stack overflow in debug mode. Once rust-lang/rust#63291 is stable (Box::new_uninit) - // we can use that instead. - let layout = std::alloc::Layout::new::(); - let buffer = unsafe { Box::from_raw(std::alloc::alloc(layout).cast::()) }; - let mut alloc = crate::ffi::yyjson::yyjson_alc { - malloc: None, - realloc: None, - free: None, - ctx: null_mut(), - }; - unsafe { - crate::ffi::yyjson::yyjson_alc_pool_init( - &mut alloc, - buffer.0.get().cast::(), - YYJSON_BUFFER_SIZE, - ); - } - Box::new(YYJSONAlloc { - alloc, - _buffer: buffer, - }) -} - #[allow(non_upper_case_globals)] pub static mut JsonEncodeError: *mut PyObject = null_mut(); #[allow(non_upper_case_globals)] @@ -136,9 +88,8 @@ fn _init_typerefs_impl() -> bool { unsafe { debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32); - assert!(crate::deserialize::KEY_MAP - .set(crate::deserialize::KeyMap::default()) - .is_ok()); + crate::deserialize::init_key_cache(); + FRAGMENT_TYPE = orjson_fragmenttype_new(); PyDateTime_IMPORT(); NONE = Py_None(); diff --git a/src/util.rs b/src/util.rs index 592ee1e8..9ea25ae7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -20,9 +20,27 @@ macro_rules! is_class_by_type { }; } +#[cfg(not(Py_GIL_DISABLED))] +macro_rules! tp_flags { + ($ob_type:expr) => { + unsafe { (*$ob_type).tp_flags } + }; +} + +#[cfg(Py_GIL_DISABLED)] +macro_rules! tp_flags { + ($ob_type:expr) => { + unsafe { + (*$ob_type) + .tp_flags + .load(std::sync::atomic::Ordering::Relaxed) + } + }; +} + macro_rules! is_subclass_by_flag { - ($ob_type:expr, $flag:ident) => { - unsafe { (((*$ob_type).tp_flags & pyo3_ffi::$flag) != 0) } + ($tp_flags:expr, $flag:ident) => { + unsafe { (($tp_flags & pyo3_ffi::$flag) != 0) } }; } @@ -98,7 +116,7 @@ macro_rules! str_from_slice { }; } -#[cfg(Py_3_12)] +#[cfg(all(Py_3_12, not(Py_GIL_DISABLED)))] macro_rules! reverse_pydict_incref { ($op:expr) => { unsafe { @@ -110,6 +128,14 @@ macro_rules! reverse_pydict_incref { }; } +#[cfg(Py_GIL_DISABLED)] +macro_rules! reverse_pydict_incref { + ($op:expr) => { + debug_assert!(ffi!(Py_REFCNT($op)) >= 2); + ffi!(Py_DECREF($op)) + }; +} + #[cfg(not(Py_3_12))] macro_rules! reverse_pydict_incref { ($op:expr) => { @@ -151,6 +177,7 @@ macro_rules! call_method { }; } +#[cfg(feature = "setitem_knownhash")] macro_rules! str_hash { ($op:expr) => { unsafe { (*$op.cast::()).hash } diff --git a/test/requirements.txt b/test/requirements.txt index c25db86e..97bee4ca 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -5,6 +5,4 @@ pendulum;sys_platform=="linux" and platform_machine=="x86_64" and python_version time-machine < 2.15;sys_platform=="linux" and platform_machine=="x86_64" and python_version<"3.12" psutil;(sys_platform=="linux" or sys_platform == "macos") and platform_machine=="x86_64" and python_version<"3.13" pytest -pytest-random-order -pytest-xdist pytz