From 635cf4bf92e2b3c4ba6198db157b7afe86d5891f Mon Sep 17 00:00:00 2001 From: Colin O'Brien Date: Fri, 21 Oct 2022 12:36:07 -0700 Subject: [PATCH 1/2] Implement and leverage size_hint --- scylla-cql/src/frame/value.rs | 141 ++++++++++++++++++++++++++++++++-- 1 file changed, 136 insertions(+), 5 deletions(-) diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index 2a85c4507a..9ee009a292 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -7,6 +7,7 @@ use num_bigint::BigInt; use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::convert::TryInto; +use std::mem::size_of; use std::net::IpAddr; use thiserror::Error; use uuid::Uuid; @@ -18,6 +19,15 @@ use super::types::vint_encode; /// serialize() should write the Value as [bytes] to the provided buffer pub trait Value { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig>; + /// A *hint* to callers indicating how much memory the serialized + /// form of this `Value` will take. This hint is not defined as a + /// lower bound, upper bound, nor is an exact size. Every implementation + /// is free to return the "best guess" available. + /// The default impl returns `std::mem::size_of::()` as every Value + /// at minimum has an i32 sized tag. + fn size_hint() -> usize { + std::mem::size_of::() + } } #[derive(Debug, Error, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -241,6 +251,10 @@ impl Value for i8 { buf.put_i8(*self); Ok(()) } + + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for i16 { @@ -249,6 +263,9 @@ impl Value for i16 { buf.put_i16(*self); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for i32 { @@ -257,6 +274,9 @@ impl Value for i32 { buf.put_i32(*self); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for i64 { @@ -265,6 +285,9 @@ impl Value for i64 { buf.put_i64(*self); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for BigDecimal { @@ -280,6 +303,10 @@ impl Value for BigDecimal { Ok(()) } + fn size_hint() -> usize { + // The i32 length, the scale, the BigInt + 2 * size_of::() + 2 * size_of::() + } } impl Value for NaiveDate { @@ -297,6 +324,9 @@ impl Value for NaiveDate { buf.put_u32(days); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for Date { @@ -305,6 +335,9 @@ impl Value for Date { buf.put_u32(self.0); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for Timestamp { @@ -313,6 +346,9 @@ impl Value for Timestamp { buf.put_i64(self.0.num_milliseconds()); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for Time { @@ -321,6 +357,9 @@ impl Value for Time { buf.put_i64(self.0.num_nanoseconds().ok_or(ValueTooBig)?); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for bool { @@ -336,6 +375,9 @@ impl Value for bool { Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for f32 { @@ -344,6 +386,9 @@ impl Value for f32 { buf.put_f32(*self); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for f64 { @@ -352,6 +397,9 @@ impl Value for f64 { buf.put_f64(*self); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for Uuid { @@ -360,6 +408,9 @@ impl Value for Uuid { buf.extend_from_slice(self.as_bytes()); Ok(()) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for BigInt { @@ -372,6 +423,10 @@ impl Value for BigInt { Ok(()) } + fn size_hint() -> usize { + // Internally the smallest BigInt is [u64; 2] + size_of::() + 2 * size_of::() + } } impl Value for &str { @@ -384,6 +439,12 @@ impl Value for &str { Ok(()) } + fn size_hint() -> usize { + // 1i32 for the tag, 3i32 for additional characters. This optimizes + // for the likely case that strings will rarely be empty, and likely + // be at least a few characters + 4 * size_of::() + } } impl Value for Vec { @@ -395,6 +456,11 @@ impl Value for Vec { Ok(()) } + fn size_hint() -> usize { + // 1i32 for the tag, 3i32 for additional bytes. This optimizes + // for the likely case that bytes will rarely be empty + 2 * size_of::() + } } impl Value for IpAddr { @@ -412,12 +478,20 @@ impl Value for IpAddr { Ok(()) } + fn size_hint() -> usize { + // tag + ipv4 octects + size_of::() + size_of::<[u8; 4]>() + } } impl Value for String { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { <&str as Value>::serialize(&self.as_str(), buf) } + + fn size_hint() -> usize { + <&str as Value>::size_hint() + } } /// Every Option can be serialized as None -> NULL, Some(val) -> val.serialize() @@ -431,6 +505,9 @@ impl Value for Option { } } } + fn size_hint() -> usize { + ::size_hint() + } } impl Value for Unset { @@ -439,12 +516,18 @@ impl Value for Unset { buf.put_i32(-2); Ok(()) } + fn size_hint() -> usize { + size_of::() + } } impl Value for Counter { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { self.0.serialize(buf) } + fn size_hint() -> usize { + size_of::() + size_of::() + } } impl Value for CqlDuration { @@ -462,6 +545,9 @@ impl Value for CqlDuration { Ok(()) } + fn size_hint() -> usize { + size_of::() + 3 * size_of::() + } } impl Value for MaybeUnset { @@ -478,6 +564,9 @@ impl Value for &T { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { ::serialize(*self, buf) } + fn size_hint() -> usize { + T::size_hint() + } } fn serialize_map( @@ -525,36 +614,59 @@ impl Value for HashSet { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_list_or_set(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + // Size, number of keys, assume not empty + 4 * size_of::() + } } impl Value for HashMap { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_map(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + // Size, number of keys, assume not empty + 4 * size_of::() + } } impl Value for BTreeSet { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_list_or_set(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + // Size, number of keys, assume not empty + 4 * size_of::() + } } impl Value for BTreeMap { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_map(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + // Size, number of keys, assume not empty + 4 * size_of::() + } } impl Value for Vec { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_list_or_set(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + 4 * size_of::() + } } impl Value for &[T] { fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { serialize_list_or_set(self.iter(), self.len(), buf) } + fn size_hint() -> usize { + // Size, number of items, assume not empty + 4 * size_of::() + } } fn serialize_tuple( @@ -620,6 +732,12 @@ impl Value for CqlValue { } } +// utility macro +macro_rules! _count { + () => (0usize); + ( $x:tt $($xs:tt)* ) => (1usize + _count!($($xs)*)); +} + macro_rules! impl_value_for_tuple { ( $($Ti:ident),* ; $($FieldI:tt),* ) => { impl<$($Ti),+> Value for ($($Ti,)+) @@ -639,6 +757,8 @@ macro_rules! impl_value_for_tuple { Ok(()) } + + fn size_hint() -> usize { size_of::() + _count!($($FieldI)*) * size_of::() } } } } @@ -688,7 +808,8 @@ impl ValueList for [u8; 0] { // Implement ValueList for slices of Value types impl ValueList for &[T] { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(self.len()); + let mut result = SerializedValues::with_capacity(self.len() * T::size_hint()); + for val in *self { result.add_value(val)?; } @@ -700,7 +821,7 @@ impl ValueList for &[T] { // Implement ValueList for Vec impl ValueList for Vec { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(self.len()); + let mut result = SerializedValues::with_capacity(self.len() * T::size_hint()); for val in self { result.add_value(val)?; } @@ -714,7 +835,12 @@ macro_rules! impl_value_list_for_map { ($map_type:ident, $key_type:ty) => { impl ValueList for $map_type<$key_type, T> { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(self.len()); + // Technically `4` is not the lower bound, but it's + // unlikely that the key and value are both empty + let mut result = SerializedValues::with_capacity( + self.len() * <$key_type as Value>::size_hint() + + self.len() * ::size_hint(), + ); for (key, val) in self { result.add_named_value(key, val)?; } @@ -736,7 +862,7 @@ impl_value_list_for_map!(BTreeMap, &str); // Further variants are done using a macro impl ValueList for (T0,) { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(1); + let mut result = SerializedValues::with_capacity(::size_hint()); result.add_value(&self.0)?; Ok(Cow::Owned(result)) } @@ -749,10 +875,15 @@ macro_rules! impl_value_list_for_tuple { $($Ti: Value),+ { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity($size); + let mut cap = size_of::(); + $( + cap += $Ti::size_hint(); + )* + let mut result = SerializedValues::with_capacity(cap); $( result.add_value(&self.$FieldI) ?; )* + Ok(Cow::Owned(result)) } } From 7fbefe9c2eb6d0e31e5b628b53013d8c577c2c9a Mon Sep 17 00:00:00 2001 From: Colin O'Brien Date: Fri, 21 Oct 2022 12:45:14 -0700 Subject: [PATCH 2/2] Preallocate data buffer --- scylla-cql/src/frame/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scylla-cql/src/frame/mod.rs b/scylla-cql/src/frame/mod.rs index f7c1ab7def..ed26b88ebc 100644 --- a/scylla-cql/src/frame/mod.rs +++ b/scylla-cql/src/frame/mod.rs @@ -66,7 +66,8 @@ impl SerializedRequest { tracing: bool, ) -> Result { let mut flags = 0; - let mut data = vec![0; HEADER_SIZE]; + let mut data = Vec::with_capacity(32); + data.resize(HEADER_SIZE, 0); if let Some(compression) = compression { flags |= FLAG_COMPRESSION;