Skip to content

Commit

Permalink
Simplify datetime, uuid serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Nov 1, 2024
1 parent d595cd1 commit f1cbc0a
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 79 deletions.
17 changes: 7 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ strict_provenance = []
inline_int = []

[dependencies]
arrayvec = { version = "0.7", default-features = false, features = ["std", "serde"] }
associative-cache = { version = "2", default-features = false }
bytecount = { version = "^0.6.7", default-features = false, features = ["runtime-dispatch-simd"] }
compact_str = { version = "0.8", default-features = false, features = ["serde"] }
Expand All @@ -74,6 +73,7 @@ serde_json = { version = "1", default-features = false, features = ["std", "floa
simdutf8 = { version = "0.1", default-features = false, features = ["std", "public_imp", "aarch64_neon"] }
smallvec = { version = "^1.11", default-features = false, features = ["union", "write"] }
unwinding = { version = "=0.2.2", features = ["unwinder"], optional = true }
uuid = { version = "1", default-features = false }
xxhash-rust = { version = "^0.8", default-features = false, features = ["xxh3"] }

[build-dependencies]
Expand Down
72 changes: 72 additions & 0 deletions src/serialize/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use core::mem::MaybeUninit;

const BUFFER_LENGTH: usize = 64 - core::mem::size_of::<usize>();

/// For use to serialize fixed-size UUIDs and DateTime.
#[repr(align(64))]
pub struct SmallFixedBuffer {
idx: usize,
bytes: [MaybeUninit<u8>; BUFFER_LENGTH],
}

impl SmallFixedBuffer {
#[inline]
pub fn new() -> Self {
Self {
idx: 0,
bytes: [MaybeUninit::<u8>::uninit(); BUFFER_LENGTH],
}
}
#[inline]
pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] {
unsafe {
core::slice::from_raw_parts_mut(
(core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx),
BUFFER_LENGTH - self.idx,
)
}
}

#[inline]
pub unsafe fn set_written(&mut self, len: usize) {
debug_assert!(self.idx + len < BUFFER_LENGTH);
self.idx += len;
}

#[inline]
pub fn push(&mut self, value: u8) {
debug_assert!(self.idx + 1 < BUFFER_LENGTH);
unsafe {
core::ptr::write(
(core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx),
value,
);
self.idx += 1;
};
}

#[inline]
pub fn extend_from_slice(&mut self, slice: &[u8]) {
debug_assert!(self.idx + slice.len() < BUFFER_LENGTH);
unsafe {
core::ptr::copy_nonoverlapping(
slice.as_ptr(),
(core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx),
slice.len(),
);
self.idx += slice.len();
}
}

#[inline]
pub fn as_ptr(&self) -> *const u8 {
core::ptr::addr_of!(self.bytes) as *const u8
}

#[inline]
pub fn len(&self) -> usize {
self.idx
}
}
1 change: 1 addition & 0 deletions src/serialize/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

mod buffer;
mod error;
mod obtype;
mod per_type;
Expand Down
15 changes: 7 additions & 8 deletions src/serialize/per_type/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::opt::*;
use crate::serialize::buffer::SmallFixedBuffer;
use crate::serialize::error::SerializeError;
use crate::serialize::per_type::datetimelike::{
DateTimeBuffer, DateTimeError, DateTimeLike, Offset,
};
use crate::serialize::per_type::datetimelike::{DateTimeError, DateTimeLike, Offset};
#[cfg(Py_3_9)]
use crate::typeref::ZONEINFO_TYPE;
use crate::typeref::{CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR};
Expand Down Expand Up @@ -43,7 +42,7 @@ impl Date {
}

#[inline(never)]
pub fn write_buf(&self, buf: &mut DateTimeBuffer) {
pub fn write_buf(&self, buf: &mut SmallFixedBuffer) {
{
let year = ffi!(PyDateTime_GET_YEAR(self.ptr));
let mut yearbuf = itoa::Buffer::new();
Expand Down Expand Up @@ -71,7 +70,7 @@ impl Serialize for Date {
where
S: Serializer,
{
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
self.write_buf(&mut buf);
serializer.serialize_unit_struct(str_from_slice!(buf.as_ptr(), buf.len()))
}
Expand All @@ -95,7 +94,7 @@ impl Time {
}

#[inline(never)]
pub fn write_buf(&self, buf: &mut DateTimeBuffer) -> Result<(), TimeError> {
pub fn write_buf(&self, buf: &mut SmallFixedBuffer) -> Result<(), TimeError> {
if unsafe { (*(self.ptr as *mut pyo3_ffi::PyDateTime_Time)).hastzinfo == 1 } {
return Err(TimeError::HasTimezone);
}
Expand All @@ -120,7 +119,7 @@ impl Serialize for Time {
where
S: Serializer,
{
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
if self.write_buf(&mut buf).is_err() {
err!(SerializeError::DatetimeLibraryUnsupported)
};
Expand Down Expand Up @@ -239,7 +238,7 @@ impl Serialize for DateTime {
where
S: Serializer,
{
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
if self.write_buf(&mut buf, self.opts).is_err() {
err!(SerializeError::DatetimeLibraryUnsupported)
}
Expand Down
35 changes: 4 additions & 31 deletions src/serialize/per_type/datetimelike.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,12 @@

use crate::opt::*;

use crate::serialize::buffer::SmallFixedBuffer;

pub enum DateTimeError {
LibraryUnsupported,
}

#[repr(transparent)]
pub struct DateTimeBuffer {
buf: arrayvec::ArrayVec<u8, 32>,
}

impl DateTimeBuffer {
pub fn new() -> DateTimeBuffer {
DateTimeBuffer {
buf: arrayvec::ArrayVec::<u8, 32>::new(),
}
}
pub fn push(&mut self, value: u8) {
self.buf.push(value);
}

pub fn extend_from_slice(&mut self, slice: &[u8]) {
self.buf.try_extend_from_slice(slice).unwrap();
}

pub fn as_ptr(&self) -> *const u8 {
self.buf.as_ptr()
}

pub fn len(&self) -> usize {
self.buf.len()
}
}

macro_rules! write_double_digit {
($buf:ident, $value:expr) => {
if $value < 10 {
Expand Down Expand Up @@ -95,7 +69,7 @@ pub trait DateTimeLike {
/// Write `self` to a buffer in RFC3339 format, using `opts` to
/// customise if desired.
#[inline(never)]
fn write_buf(&self, buf: &mut DateTimeBuffer, opts: Opt) -> Result<(), DateTimeError> {
fn write_buf(&self, buf: &mut SmallFixedBuffer, opts: Opt) -> Result<(), DateTimeError> {
{
let year = self.year();
let mut yearbuf = itoa::Buffer::new();
Expand Down Expand Up @@ -124,8 +98,7 @@ pub trait DateTimeLike {
write_triple_digit!(buf, microsecond % 1_000);
// Don't support writing nanoseconds for now.
// If requested, something like the following should work,
// and the `DateTimeBuffer` type alias should be changed to
// have length 35.
// and `SmallFixedBuffer` needs at least length 35.
// let nanosecond = self.nanosecond();
// if nanosecond % 1_000 != 0 {
// write_triple_digit!(buf, nanosecond % 1_000);
Expand Down
17 changes: 9 additions & 8 deletions src/serialize/per_type/dict.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::opt::*;
use crate::serialize::buffer::SmallFixedBuffer;
use crate::serialize::error::SerializeError;
use crate::serialize::obtype::{pyobject_to_obtype, ObType};
use crate::serialize::per_type::datetimelike::DateTimeLike;
use crate::serialize::per_type::{
BoolSerializer, DataclassGenericSerializer, Date, DateTime, DateTimeBuffer, DefaultSerializer,
EnumSerializer, FloatSerializer, FragmentSerializer, IntSerializer, ListTupleSerializer,
NoneSerializer, NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time,
ZeroListSerializer, UUID,
BoolSerializer, DataclassGenericSerializer, Date, DateTime, DefaultSerializer, EnumSerializer,
FloatSerializer, FragmentSerializer, IntSerializer, ListTupleSerializer, NoneSerializer,
NumpyScalar, NumpySerializer, StrSerializer, StrSubclassSerializer, Time, ZeroListSerializer,
UUID,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::serialize::state::SerializerState;
Expand Down Expand Up @@ -328,7 +329,7 @@ fn non_str_str_subclass(key: *mut pyo3_ffi::PyObject) -> Result<CompactString, S

#[inline(never)]
fn non_str_date(key: *mut pyo3_ffi::PyObject) -> Result<CompactString, SerializeError> {
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
Date::new(key).write_buf(&mut buf);
let key_as_str = str_from_slice!(buf.as_ptr(), buf.len());
Ok(CompactString::from(key_as_str))
Expand All @@ -339,7 +340,7 @@ fn non_str_datetime(
key: *mut pyo3_ffi::PyObject,
opts: crate::opt::Opt,
) -> Result<CompactString, SerializeError> {
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
let dt = DateTime::new(key, opts);
if dt.write_buf(&mut buf, opts).is_err() {
return Err(SerializeError::DatetimeLibraryUnsupported);
Expand All @@ -354,7 +355,7 @@ fn non_str_time(
key: *mut pyo3_ffi::PyObject,
opts: crate::opt::Opt,
) -> Result<CompactString, SerializeError> {
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
let time = Time::new(key, opts);
if time.write_buf(&mut buf).is_err() {
return Err(SerializeError::TimeHasTzinfo);
Expand All @@ -365,7 +366,7 @@ fn non_str_time(

#[inline(never)]
fn non_str_uuid(key: *mut pyo3_ffi::PyObject) -> Result<CompactString, SerializeError> {
let mut buf = arrayvec::ArrayVec::<u8, 36>::new();
let mut buf = SmallFixedBuffer::new();
UUID::new(key).write_buf(&mut buf);
let key_as_str = str_from_slice!(buf.as_ptr(), buf.len());
Ok(CompactString::from(key_as_str))
Expand Down
2 changes: 1 addition & 1 deletion src/serialize/per_type/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ mod uuid;

pub use dataclass::DataclassGenericSerializer;
pub use datetime::{Date, DateTime, Time};
pub use datetimelike::{DateTimeBuffer, DateTimeError, DateTimeLike, Offset};
pub use datetimelike::{DateTimeError, DateTimeLike, Offset};
pub use default::DefaultSerializer;
pub use dict::DictGenericSerializer;
pub use float::FloatSerializer;
Expand Down
5 changes: 3 additions & 2 deletions src/serialize/per_type/numpy.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::opt::*;

use crate::serialize::buffer::SmallFixedBuffer;
use crate::serialize::error::SerializeError;
use crate::serialize::per_type::{
DateTimeBuffer, DateTimeError, DateTimeLike, DefaultSerializer, Offset, ZeroListSerializer,
DateTimeError, DateTimeLike, DefaultSerializer, Offset, ZeroListSerializer,
};
use crate::serialize::serializer::PyObjectSerializer;
use crate::typeref::{load_numpy_types, ARRAY_STRUCT_STR, DESCR_STR, DTYPE_STR, NUMPY_TYPES};
Expand Down Expand Up @@ -1398,7 +1399,7 @@ impl Serialize for NumpyDatetime64Repr {
where
S: Serializer,
{
let mut buf = DateTimeBuffer::new();
let mut buf = SmallFixedBuffer::new();
let _ = self.write_buf(&mut buf, self.opts);
serializer.collect_str(str_from_slice!(buf.as_ptr(), buf.len()))
}
Expand Down
Loading

0 comments on commit f1cbc0a

Please sign in to comment.