From 4d3ff2ce83748585fed590e7e94e909f0314f2fe Mon Sep 17 00:00:00 2001 From: ijl Date: Mon, 7 Oct 2024 14:02:02 +0000 Subject: [PATCH] int uses _PyLong_AsByteArray() --- src/ffi/long.rs | 76 ++++++++++++------------- src/ffi/mod.rs | 2 +- src/serialize/per_type/dict.rs | 15 ++--- src/serialize/per_type/int.rs | 101 +++++++++++++++++---------------- src/serialize/per_type/list.rs | 12 +--- src/serialize/per_type/mod.rs | 2 +- src/serialize/serializer.rs | 16 ++---- src/serialize/writer/json.rs | 2 +- test/test_error.py | 1 - test/test_type.py | 8 +++ 10 files changed, 112 insertions(+), 123 deletions(-) diff --git a/src/ffi/long.rs b/src/ffi/long.rs index 9052a7ef..c97d3af0 100644 --- a/src/ffi/long.rs +++ b/src/ffi/long.rs @@ -3,47 +3,42 @@ // longintrepr.h, _longobject, _PyLongValue #[cfg(Py_3_12)] +#[allow(non_upper_case_globals)] const SIGN_MASK: usize = 3; + #[cfg(Py_3_12)] +#[allow(non_upper_case_globals)] const SIGN_ZERO: usize = 1; #[cfg(Py_3_12)] #[allow(non_upper_case_globals)] -const _PyLong_NON_SIZE_BITS: usize = 3; +const NON_SIZE_BITS: usize = 3; #[cfg(Py_3_12)] #[repr(C)] -struct _PyLongValue { +pub struct _PyLongValue { pub lv_tag: usize, pub ob_digit: u32, } #[cfg(Py_3_12)] #[repr(C)] -struct PyLongObject { - pub ob_refcnt: pyo3_ffi::Py_ssize_t, - pub ob_type: *mut pyo3_ffi::PyTypeObject, +pub struct PyLongObject { + pub ob_base: pyo3_ffi::PyObject, pub long_value: _PyLongValue, } -#[cfg(Py_3_12)] -#[inline(always)] -pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == SIGN_ZERO } -} - #[cfg(not(Py_3_12))] -#[inline(always)] -pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size == 0 } +#[repr(C)] +pub struct PyLongObject { + pub ob_base: pyo3_ffi::PyVarObject, + pub ob_digit: u32, } #[cfg(Py_3_12)] #[inline(always)] pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { - 1 - (((*(ptr as *mut PyLongObject)).long_value.lv_tag & _PyLong_NON_SIZE_BITS) as isize) > 0 - } + unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == 0 } } #[cfg(not(Py_3_12))] @@ -54,41 +49,44 @@ pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool { #[cfg(Py_3_12)] #[inline(always)] -fn pylong_is_compact(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS) } +pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool { + unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag < (2 << NON_SIZE_BITS) } } -#[cfg(Py_3_12)] +#[cfg(not(Py_3_12))] #[inline(always)] -pub fn pylong_value_unsigned(ptr: *mut pyo3_ffi::PyObject) -> u64 { - if pylong_is_compact(ptr) == true { - unsafe { (*(ptr as *mut PyLongObject)).long_value.ob_digit as u64 } - } else { - ffi!(PyLong_AsUnsignedLongLong(ptr)) - } +pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool { + unsafe { isize::abs((*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size) == 1 } } -#[cfg(not(Py_3_12))] +#[cfg(Py_3_12)] #[inline(always)] -pub fn pylong_value_unsigned(ptr: *mut pyo3_ffi::PyObject) -> u64 { - ffi!(PyLong_AsUnsignedLongLong(ptr)) +pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { + unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == SIGN_ZERO } } - #[cfg(not(Py_3_12))] #[inline(always)] -pub fn pylong_value_signed(ptr: *mut pyo3_ffi::PyObject) -> i64 { - ffi!(PyLong_AsLongLong(ptr)) +pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { + unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size == 0 } } #[cfg(Py_3_12)] #[inline(always)] -pub fn pylong_value_signed(ptr: *mut pyo3_ffi::PyObject) -> i64 { - if pylong_is_compact(ptr) == true { - unsafe { - let sign = 1 - ((*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK) as i64; - sign * (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64 +pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 { + unsafe { + if pylong_is_unsigned(ptr) { + (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64 + } else { + -1 * (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64 } - } else { - ffi!(PyLong_AsLongLong(ptr)) + } +} + +#[cfg(not(Py_3_12))] +#[inline(always)] +pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 { + unsafe { + (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size as i64 + * (*(ptr as *mut PyLongObject)).ob_digit as i64 } } diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index f2b8198f..b3ae0065 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -10,4 +10,4 @@ pub mod yyjson; pub use buffer::*; pub use bytes::*; pub use fragment::{orjson_fragmenttype_new, Fragment}; -pub use long::{pylong_is_unsigned, pylong_is_zero, pylong_value_signed, pylong_value_unsigned}; +pub use long::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_unsigned, pylong_is_zero}; diff --git a/src/serialize/per_type/dict.rs b/src/serialize/per_type/dict.rs index 20b8a064..a6460eca 100644 --- a/src/serialize/per_type/dict.rs +++ b/src/serialize/per_type/dict.rs @@ -6,9 +6,9 @@ use crate::serialize::obtype::{pyobject_to_obtype, ObType}; use crate::serialize::per_type::datetimelike::DateTimeLike; use crate::serialize::per_type::{ BoolSerializer, DataclassGenericSerializer, Date, DateTime, DateTimeBuffer, DefaultSerializer, - EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer, IntSerializer, - ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, - StrSubclassSerializer, Time, ZeroListSerializer, UUID, + EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer, ListTupleSerializer, + NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time, + ZeroListSerializer, UUID, }; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; @@ -100,13 +100,8 @@ macro_rules! impl_serialize_entry { $map.serialize_value(&StrSubclassSerializer::new($value))?; } ObType::Int => { - if unlikely!(opt_enabled!($self.state.opts(), STRICT_INTEGER)) { - $map.serialize_key($key).unwrap(); - $map.serialize_value(&Int53Serializer::new($value))?; - } else { - $map.serialize_key($key).unwrap(); - $map.serialize_value(&IntSerializer::new($value))?; - } + $map.serialize_key($key).unwrap(); + $map.serialize_value(&IntSerializer::new($value, $self.state.opts()))?; } ObType::None => { $map.serialize_key($key).unwrap(); diff --git a/src/serialize/per_type/int.rs b/src/serialize/per_type/int.rs index 09760885..ded77e86 100644 --- a/src/serialize/per_type/int.rs +++ b/src/serialize/per_type/int.rs @@ -1,79 +1,80 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::ffi::{pylong_is_unsigned, pylong_is_zero, pylong_value_signed, pylong_value_unsigned}; +use crate::ffi::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_unsigned, pylong_is_zero}; +use crate::opt::{Opt, STRICT_INTEGER}; use crate::serialize::error::SerializeError; use serde::ser::{Serialize, Serializer}; +use core::ffi::c_uchar; +use core::mem::transmute; + // https://tools.ietf.org/html/rfc7159#section-6 // "[-(2**53)+1, (2**53)-1]" const STRICT_INT_MIN: i64 = -9007199254740991; const STRICT_INT_MAX: i64 = 9007199254740991; -#[repr(transparent)] pub struct IntSerializer { ptr: *mut pyo3_ffi::PyObject, + opts: Opt, } impl IntSerializer { - pub fn new(ptr: *mut pyo3_ffi::PyObject) -> Self { - IntSerializer { ptr: ptr } + pub fn new(ptr: *mut pyo3_ffi::PyObject, opts: Opt) -> Self { + IntSerializer { + ptr: ptr, + opts: opts, + } } } impl Serialize for IntSerializer { - #[inline(never)] + #[inline(always)] fn serialize(&self, serializer: S) -> Result where S: Serializer, { - if pylong_is_zero(self.ptr) { - serializer.serialize_u64(0) - } else if pylong_is_unsigned(self.ptr) { - let val = pylong_value_unsigned(self.ptr); - if unlikely!(val == u64::MAX) && !ffi!(PyErr_Occurred()).is_null() { - err!(SerializeError::Integer64Bits) - } else { - serializer.serialize_u64(val) + unsafe { + if pylong_is_zero(self.ptr) { + return serializer.serialize_bytes(b"0"); } - } else { - let val = pylong_value_signed(self.ptr); - if unlikely!(val == -1) && !ffi!(PyErr_Occurred()).is_null() { - err!(SerializeError::Integer64Bits) - } - serializer.serialize_i64(val) - } - } -} - -#[repr(transparent)] -pub struct Int53Serializer { - ptr: *mut pyo3_ffi::PyObject, -} - -impl Int53Serializer { - pub fn new(ptr: *mut pyo3_ffi::PyObject) -> Self { - Int53Serializer { ptr: ptr } - } -} - -impl Serialize for Int53Serializer { - #[cold] - #[inline(never)] - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let val = pylong_value_signed(self.ptr); - if unlikely!(val == -1) { - if ffi!(PyErr_Occurred()).is_null() { - serializer.serialize_i64(val) + let is_signed = !pylong_is_unsigned(self.ptr) as i32; + if pylong_fits_in_i32(self.ptr) { + if is_signed == 0 { + serializer.serialize_u64(pylong_get_inline_value(self.ptr) as u64) + } else { + serializer.serialize_i64(pylong_get_inline_value(self.ptr) as i64) + } } else { - err!(SerializeError::Integer53Bits) + let mut buffer: [u8; 8] = [0; 8]; + let ret = pyo3_ffi::_PyLong_AsByteArray( + self.ptr as *mut pyo3_ffi::PyLongObject, + buffer.as_mut_ptr() as *mut c_uchar, + 8, + 1, + is_signed, + ); + if unlikely!(ret == -1) { + ffi!(PyErr_Clear()); + err!(SerializeError::Integer64Bits) + } + if is_signed == 0 { + let val = transmute::<[u8; 8], u64>(buffer); + if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER)) + && val > STRICT_INT_MAX as u64 + { + err!(SerializeError::Integer53Bits) + } + serializer.serialize_u64(val) + } else { + let val = transmute::<[u8; 8], i64>(buffer); + if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER)) + && !(STRICT_INT_MIN..=STRICT_INT_MAX).contains(&val) + { + err!(SerializeError::Integer53Bits) + } + serializer.serialize_i64(val) + } } - } else if !(STRICT_INT_MIN..=STRICT_INT_MAX).contains(&val) { - err!(SerializeError::Integer53Bits) - } else { - serializer.serialize_i64(val) } } } diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index d9d75efa..3339cf30 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -1,13 +1,11 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::STRICT_INTEGER; use crate::serialize::error::SerializeError; use crate::serialize::obtype::{pyobject_to_obtype, ObType}; use crate::serialize::per_type::{ BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer, - DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer, - IntSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, - StrSubclassSerializer, Time, UUID, + DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer, + NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time, UUID, }; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; @@ -102,11 +100,7 @@ impl Serialize for ListTupleSerializer { seq.serialize_element(&StrSubclassSerializer::new(value))?; } ObType::Int => { - if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) { - seq.serialize_element(&Int53Serializer::new(value))?; - } else { - seq.serialize_element(&IntSerializer::new(value))?; - } + seq.serialize_element(&IntSerializer::new(value, self.state.opts()))?; } ObType::None => { seq.serialize_element(&NoneSerializer::new()).unwrap(); diff --git a/src/serialize/per_type/mod.rs b/src/serialize/per_type/mod.rs index b0ec5b59..01531c09 100644 --- a/src/serialize/per_type/mod.rs +++ b/src/serialize/per_type/mod.rs @@ -24,7 +24,7 @@ pub use default::DefaultSerializer; pub use dict::DictGenericSerializer; pub use float::FloatSerializer; pub use fragment::FragmentSerializer; -pub use int::{Int53Serializer, IntSerializer}; +pub use int::IntSerializer; pub use list::{ListTupleSerializer, ZeroListSerializer}; pub use none::NoneSerializer; pub use numpy::{is_numpy_array, is_numpy_scalar, NumpyScalar, NumpySerializer}; diff --git a/src/serialize/serializer.rs b/src/serialize/serializer.rs index bfbf92b2..852d31e0 100644 --- a/src/serialize/serializer.rs +++ b/src/serialize/serializer.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::{Opt, APPEND_NEWLINE, INDENT_2, STRICT_INTEGER}; +use crate::opt::{Opt, APPEND_NEWLINE, INDENT_2}; use crate::serialize::obtype::{pyobject_to_obtype, ObType}; use crate::serialize::per_type::{ BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer, - DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, Int53Serializer, - IntSerializer, ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, - StrSerializer, StrSubclassSerializer, Time, ZeroListSerializer, UUID, + DictGenericSerializer, EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer, + ListTupleSerializer, NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, + StrSubclassSerializer, Time, ZeroListSerializer, UUID, }; use crate::serialize::state::SerializerState; use crate::serialize::writer::{to_writer, to_writer_pretty, BytesWriter}; @@ -68,13 +68,7 @@ impl Serialize for PyObjectSerializer { match pyobject_to_obtype(self.ptr, self.state.opts()) { ObType::Str => StrSerializer::new(self.ptr).serialize(serializer), ObType::StrSubclass => StrSubclassSerializer::new(self.ptr).serialize(serializer), - ObType::Int => { - if unlikely!(opt_enabled!(self.state.opts(), STRICT_INTEGER)) { - Int53Serializer::new(self.ptr).serialize(serializer) - } else { - IntSerializer::new(self.ptr).serialize(serializer) - } - } + ObType::Int => IntSerializer::new(self.ptr, self.state.opts()).serialize(serializer), ObType::None => NoneSerializer::new().serialize(serializer), ObType::Float => FloatSerializer::new(self.ptr).serialize(serializer), ObType::Bool => BoolSerializer::new(self.ptr).serialize(serializer), diff --git a/src/serialize/writer/json.rs b/src/serialize/writer/json.rs index 1d61ebec..cbb5fb33 100644 --- a/src/serialize/writer/json.rs +++ b/src/serialize/writer/json.rs @@ -134,7 +134,7 @@ where .map_err(Error::io) } } - #[inline(never)] + #[inline] fn serialize_f64(self, value: f64) -> Result<()> { if unlikely!(value.is_infinite() || value.is_nan()) { self.serialize_unit() diff --git a/test/test_error.py b/test/test_error.py index c7e09959..29e4c731 100644 --- a/test/test_error.py +++ b/test/test_error.py @@ -188,4 +188,3 @@ def test_dumps_normalize_exception(self): with pytest.raises(orjson.JSONEncodeError) as exc_info: orjson.dumps(10**60) assert exc_info.type == orjson.JSONEncodeError - assert isinstance(exc_info.value.__cause__, OverflowError) diff --git a/test/test_type.py b/test/test_type.py index 5a602e98..81c89307 100644 --- a/test/test_type.py +++ b/test/test_type.py @@ -420,6 +420,14 @@ def test_int_53_exc_usize(self): with pytest.raises(orjson.JSONEncodeError): orjson.dumps(val, option=orjson.OPT_STRICT_INTEGER) + def test_int_53_exc_128(self): + """ + int 53-bit exception on 128-bit + """ + val = 2**65 + with pytest.raises(orjson.JSONEncodeError): + orjson.dumps(val, option=orjson.OPT_STRICT_INTEGER) + def test_int_64(self): """ int 64-bit