diff --git a/Cargo.lock b/Cargo.lock index 98e2b21..f5a32c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,106 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "half" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" -dependencies = [ - "cfg-if", - "crunchy", -] - -[[package]] -name = "itoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" - -[[package]] -name = "num-traits" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - [[package]] name = "proc-macro2" version = "1.0.78" @@ -120,52 +20,14 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rmp" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9860a6cc38ed1da53456442089b4dfa35e7cedaa326df63017af88385e6b20" -dependencies = [ - "byteorder", - "num-traits", - "paste", -] - -[[package]] -name = "rmp-serde" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - [[package]] name = "rust-fr" version = "0.1.0" dependencies = [ - "bitvec", - "ciborium", - "rmp-serde", "serde", - "serde_json", "thiserror", ] -[[package]] -name = "ryu" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" - [[package]] name = "serde" version = "1.0.196" @@ -186,17 +48,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" -dependencies = [ - "itoa", - "ryu", - "serde", -] - [[package]] name = "syn" version = "2.0.48" @@ -208,12 +59,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "thiserror" version = "1.0.56" @@ -239,12 +84,3 @@ name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] diff --git a/Cargo.toml b/Cargo.toml index e180ce8..c9e83bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,11 +17,4 @@ path = "src/lib.rs" [dependencies] thiserror = "1.0" -bitvec = "1.0.1" serde = { version = "1", features = ["derive"] } - - -[dev-dependencies] -serde_json = "1" -rmp-serde = "1.1.2" -ciborium = "0.2.2" diff --git a/README.md b/README.md index 6c62cf1..74e8117 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ### rust-fr -'rust-fr' (aka `rust for real`) is a simple, non-self-describing data-interchange format. +'rust-fr' aka 'rust for real' is a simple data-interchange format that is better than [serde_json](https://github.com/serde-rs/json) +but not as awesome & compact as other binary formats like [ciborium](https://github.com/enarx/ciborium) & [msgpack-rust](https://github.com/3Hren/msgpack-rust). ### installation @@ -10,14 +11,13 @@ You can use either of these methods. - Add via `Cargo.toml` ```.toml [dependencies] -rust-fr = "0.1" +rust-fr = "0.1.0" ``` ### usage. ```rs use serde::{Serialize, Deserialize}; -use rust_fr::{serializer, deserializer}; // define some data #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] @@ -31,49 +31,21 @@ let human = Human { }; // serialize the data to bytes (Vec) -let human_bytes = serializer::to_bytes(&human).unwrap(); +let human_bytes = rust_fr::protocol::serializer::to_bytes(&human).unwrap(); // deserialize the data from serialized bytes. -let deserialized_human = deserializer::from_bytes::(&human_bytes).unwrap(); +let deserialized_human = rust_fr::protocol::deserializer::from_bytes::(&human_bytes).unwrap(); assert_eq!(human, deserialized_human); ``` -### benchmark. - -- Run `cargo test -- --nocapture --ignored` to run the benchmark tests. -```sh -running 3 tests ----- Small Data ---- -rust_fr: 218 bytes -serde_json: 332 bytes -rmp_serde: 146 bytes -ciborium: 170 bytes -test tests::length_test_small_data ... ok ----- Medium Data ---- -rust_fr: 14264 bytes -serde_json: 30125 bytes -rmp_serde: 10731 bytes -ciborium: 18347 bytes -test tests::length_test_medium_data ... ok ----- Large Data ---- -rust_fr: 139214 bytes -serde_json: 367595 bytes -rmp_serde: 157219 bytes -ciborium: 198277 bytes -test tests::length_test_large_data ... ok - -test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 4 filtered out; finished in 0.01s -``` - ### why? -The goal was to learn/understand. I wrote this so I can learn how serde internally works +The aim was to learn. I wrote this so I can learn how serde internally works and how to encode data into bytes that can ultimately be transferred over the wire or elsewhere. ### format specification. - - The format is not self-describing. - Primitive types are serialized as is. - bool: 0 -> false, 1 -> true (1 byte) @@ -81,17 +53,16 @@ or elsewhere. - u8, u16, u32, u64: as is. - f32, f64: as is. - char: as u32 (4 bytes) -- Delimiters are used to separate different types of data. -- String, Byte and Map Delimiters are 1 byte long while all other delimiters are 3 bits long. -- Delimiters: - - String = 134; 0b10000110 - - Byte = 135; 0b10000111 - - Unit = 2; 0b010 - - Seq = 3; 0b011 - - SeqValue = 4; 0b100 - - Map = 139; 0b10001011 - - MapKey = 6; 0b110 - - MapValue = 7; 0b111 +- Delimiters are all serailized as a u8 (1 byte) +- Delimiters Used (the values themselves are arbitrary and could be swapped): + - STRING_DELIMITER: 0x01 + - BYTE_DELIMITER: 0x02 + - UNIT: 0x03 + - SEQ_DELIMITER: 0x04 + - SEQ_VALUE_DELIMITER: 0x05 + - MAP_DELIMITER: 0x06 + - MAP_KEY_DELIMITER: 0x07 + - MAP_VALUE_DELIMITER: 0x08 - String, Bytes, Unit, Option are serialized as: - str: bytes + STRING_DELIMITER - bytes: bytes + BYTE_DELIMITER diff --git a/src/lib.rs b/src/lib.rs index 8bffdba..2fe6c09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,320 +1,73 @@ -//! ### rust-fr -//! A simple, non-self-describing data-interchange format. It exposes two modules, `serializer` -//! and `deserializer`, for serializing and deserializing data which contain [`to_bytes`](serializer::to_bytes), -//! [`from_bytes`](deserializer::from_bytes) functions which do exactly what their names suggest. -//! - The data to be encoded & decoded must implement the `serde::Serialize` and `serde::Deserialize` traits. -//! -//! ### Example -//! ```rust -//! use rust_fr::{deserializer, serializer}; -//! -//! #[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq)] -//! struct Human { -//! name: String, -//! age: u8, -//! } -//! -//! let human = Human { -//! name: "Ayush".to_string(), -//! age: 19, -//! }; -//! -//! // serialize the data to bytes (Vec) -//! let human_bytes = serializer::to_bytes(&human).unwrap(); -//! -//! // deserialize the data from serialized bytes. -//! let deserialized_human = deserializer::from_bytes::(&human_bytes).unwrap(); -//! -//! assert_eq!(human, deserialized_human); -//! ``` - -pub mod deserializer; -pub mod error; -pub mod serializer; +pub mod protocol; #[cfg(test)] mod tests { - use crate::{deserializer, serializer}; use serde::{Deserialize, Serialize}; - use std::collections::HashMap; - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct Primitives { - a: u8, - b: u16, - c: u32, - d: u64, - e: i8, - f: i16, - g: i32, - h: i64, - i: f32, - j: f64, - k: bool, - l: char, - m: String, - } - - #[test] - fn primitives() { - let primitives = Primitives { - a: 1, - b: 2, - c: 3, - d: 4, - e: -1, - f: -2, - g: -3, - h: -4, - i: 1.0, - j: 2.0, - k: true, - l: 'a', - m: "hello".to_string(), - }; - // Serialize - let bytes = serializer::to_bytes(&primitives).unwrap(); - - // Deserialize - let deserialized_primitives = deserializer::from_bytes::(&bytes).unwrap(); - assert_eq!(primitives, deserialized_primitives); - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct CompundTypes { - a: Vec, - b: HashMap, - c: Option, - d: Option, - e: Primitives, - } + use crate::protocol; #[test] - fn compound_types() { - let compound_types = CompundTypes { - a: vec![1, 2, 3], - b: [("a".to_string(), 1), ("b".to_string(), 2)] - .iter() - .cloned() - .collect(), - c: Some(1), - d: None, - e: Primitives { - a: 1, - b: 2, - c: 3, - d: 4, - e: -1, - f: -2, - g: -3, - h: -4, - i: 1.0, - j: 2.0, - k: true, - l: 'a', - m: "hello".to_string(), - }, + fn serialize_and_deserialize_complex_data() { + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + enum SomeEnum { + A { a: u8, b: u16 }, + B(u8), + C, + } + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + struct Person { + name: String, + age: u8, + is_human: bool, + some_enum: SomeEnum, + som_other_enum: SomeEnum, + some_struct: SomeStruct, + } + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + struct SomeStruct { + a: u8, + b: u16, + } + + let person = Person { + name: "Ayush".to_string(), + age: 19, + is_human: false, + some_enum: SomeEnum::A { a: 142, b: 5156 }, + som_other_enum: SomeEnum::B(4), + some_struct: SomeStruct { a: 32, b: 51 }, }; // Serialize - let bytes = serializer::to_bytes(&compound_types).unwrap(); + let bytes = protocol::serializer::to_bytes(&person).unwrap(); // Deserialize - let deserialized_compound_types = deserializer::from_bytes::(&bytes).unwrap(); - assert_eq!(compound_types, deserialized_compound_types); - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct Random { - a: u8, - b: u16, - c: u32, - d: u64, - e: i8, - f: i16, - g: i32, - h: i64, - i: f32, - j: f64, - k: bool, - l: char, - m: String, - n: Vec, - o: HashMap, - p: Option, - q: Option, - r: [u8; 3], - } - - #[test] - fn random() { - let random = Random { - a: 1, - b: 2, - c: 3, - d: 4, - e: -1, - f: -2, - g: -3, - h: -4, - i: 1.0, - j: 2.0, - k: true, - l: 'a', - m: "hello".to_string(), - n: vec![1, 2, 3], - o: [("a".to_string(), 1), ("b".to_string(), 2)] - .iter() - .cloned() - .collect(), - p: Some(1), - q: None, - r: [1, 2, 3], - }; - - // Serialize - let bytes = serializer::to_bytes(&random).unwrap(); - - // Deserialize - let deserialized_random = deserializer::from_bytes::(&bytes).unwrap(); - assert_eq!(random, deserialized_random); - } - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct Human { - name: String, - age: u8, + let deserialized_person = protocol::deserializer::from_bytes::(&bytes).unwrap(); + assert_eq!(person, deserialized_person); } #[test] fn readme_example() { + // define some data + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + struct Human { + name: String, + age: u8, + } + let human = Human { name: "Ayush".to_string(), age: 19, }; // serialize the data to bytes (Vec) - let human_bytes = serializer::to_bytes(&human).unwrap(); + let human_bytes = protocol::serializer::to_bytes(&human).unwrap(); // deserialize the data from serialized bytes. - let deserialized_human = deserializer::from_bytes::(&human_bytes).unwrap(); + let deserialized_human = protocol::deserializer::from_bytes::(&human_bytes).unwrap(); assert_eq!(human, deserialized_human); } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct PlaygroundData { - never: HashMap>, - gonna: Vec, - give: Option, - you: bool, - up: Option, - } - - #[test] - #[ignore = "playground test; use cargo test -- --nocapture --ignored"] - fn length_test_large_data() { - let data = PlaygroundData { - never: (0..1000) - .map(|i| (i.to_string(), vec![i as u8; 100])) - .collect(), - gonna: (0..1000).map(|i| i as u8).collect(), - give: Some(1), - you: true, - up: Some(Primitives { - a: 1, - b: 2, - c: 3, - d: 4, - e: -1, - f: -2, - g: -3, - h: -4, - i: 1.0, - j: 2.0, - k: true, - l: 'a', - m: "hello".to_string(), - }), - }; - - let rust_fr_bytes = serializer::to_bytes(&data).unwrap(); - let serde_json_bytes = serde_json::to_vec(&data).unwrap(); - let rmp_serde_bytes = rmp_serde::to_vec(&data).unwrap(); - let mut cir_serde_bytes = Vec::new(); - ciborium::ser::into_writer(&data, &mut cir_serde_bytes).unwrap(); - - println!("---- Large Data ----"); - println!("rust_fr:\t{} bytes", rust_fr_bytes.len()); - println!("serde_json:\t{} bytes", serde_json_bytes.len()); - println!("rmp_serde:\t{} bytes", rmp_serde_bytes.len()); - println!("ciborium:\t{} bytes", cir_serde_bytes.len()); - } - - #[test] - #[ignore = "playground test; use cargo test -- --nocapture --ignored"] - fn length_test_small_data() { - let data = PlaygroundData { - never: (0..10) - .map(|i| (i.to_string(), vec![i as u8; 10])) - .collect(), - gonna: (0..10).map(|i| i as u8).collect(), - give: Some(1), - you: false, - up: None, - }; - - let rust_fr_bytes = serializer::to_bytes(&data).unwrap(); - let serde_json_bytes = serde_json::to_vec(&data).unwrap(); - let rmp_serde_bytes = rmp_serde::to_vec(&data).unwrap(); - let mut cir_serde_bytes = Vec::new(); - ciborium::ser::into_writer(&data, &mut cir_serde_bytes).unwrap(); - - println!("---- Small Data ----"); - println!("rust_fr:\t{} bytes", rust_fr_bytes.len()); - println!("serde_json:\t{} bytes", serde_json_bytes.len()); - println!("rmp_serde:\t{} bytes", rmp_serde_bytes.len()); - println!("ciborium:\t{} bytes", cir_serde_bytes.len()); - } - - #[test] - #[ignore = "playground test; use cargo test -- --nocapture --ignored"] - fn length_test_medium_data() { - let data = PlaygroundData { - never: (0..100) - .map(|i| (i.to_string(), vec![i as u8; 100])) - .collect(), - gonna: (0..100).map(|i| i as u8).collect(), - give: Some(1), - you: true, - up: Some(Primitives { - a: 1, - b: 2, - c: 3, - d: 4, - e: -1, - f: -2, - g: -3, - h: -4, - i: 1.0, - j: 2.0, - k: true, - l: 'a', - m: "hello".to_string(), - }), - }; - - let rust_fr_bytes = serializer::to_bytes(&data).unwrap(); - let serde_json_bytes = serde_json::to_vec(&data).unwrap(); - let rmp_serde_bytes = rmp_serde::to_vec(&data).unwrap(); - let mut cir_serde_bytes = Vec::new(); - ciborium::ser::into_writer(&data, &mut cir_serde_bytes).unwrap(); - - println!("---- Medium Data ----"); - println!("rust_fr:\t{} bytes", rust_fr_bytes.len()); - println!("serde_json:\t{} bytes", serde_json_bytes.len()); - println!("rmp_serde:\t{} bytes", rmp_serde_bytes.len()); - println!("ciborium:\t{} bytes", cir_serde_bytes.len()); - } } diff --git a/src/deserializer.rs b/src/protocol/deserializer.rs similarity index 73% rename from src/deserializer.rs rename to src/protocol/deserializer.rs index c550b1e..dd5ee1d 100644 --- a/src/deserializer.rs +++ b/src/protocol/deserializer.rs @@ -1,136 +1,60 @@ -//! ### Deserializer -//! This module contains the deserialization logic for the library. It is used to deserialize -//! bytes to a custom type. -//! -//! To use the deserializer, you need to call the [`from_bytes`] function which takes in -//! the bytes and a type. The type must implement the `Deserialize` trait from the serde library. -//! It returns a Result with the deserialized data or an error. - -use bitvec::{prelude as bv, slice::BitSlice, view::BitView}; use serde::{ de::{EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess}, Deserialize, Deserializer, }; -use super::{error::Error, serializer::Delimiter}; +use super::{ + error::Error, + serializer::{ + BYTE_DELIMITER, MAP_DELIMITER, MAP_KEY_DELIMITER, MAP_VALUE_DELIMITER, SEQ_DELIMITER, + SEQ_VALUE_DELIMITER, STRING_DELIMITER, UNIT, + }, +}; -// Internal struct that handles the deserialization of the data. -// It has a few methods that allows us to peek and eat bytes from the data. -// It also has methods to parse some data into the required type. +/// Internal struct that handles the deserialization of the data. +/// It has a few methods that allows us to peek and eat bytes from the data. +/// It also has methods to parse some data into the required type. #[derive(Debug)] struct CustomDeserializer<'de> { - data: &'de bv::BitSlice, + data: &'de [u8], } -/// The function to deserialize (serialized) bytes back into data. `T` must implement the `Deserialize` trait -/// from the `serde` library. `bytes` is the data to be deserialized. It returns a Result with the deserialized -/// data or an error. +/// The main function to deserialize bytes to a type. It makes assumptions +/// on the bytes based on the specification and the type provided. In order to deserialize from bytes to +/// a custom type, the type must implement the Deserialize trait from the serde library. pub fn from_bytes<'de, T>(bytes: &'de [u8]) -> Result where T: Deserialize<'de>, { - let mut deserializer = CustomDeserializer { - data: bytes.view_bits(), - }; + let mut deserializer = CustomDeserializer { data: bytes }; let deserialized = T::deserialize(&mut deserializer)?; Ok(deserialized) } impl<'de> CustomDeserializer<'de> { - /// Get 'n' bits from end of the data. - /// Example: If the data is 0b10101010 and n is 3, the result will be 0b010. - fn _peek_n_bits(&self, size: usize) -> Result<&BitSlice, Error> { - let len = self.data.len(); - if size > len { - return Err(Error::NLargerThanLength(size, self.data.len())); - } - self.data.get(..size).ok_or(Error::NoByte) - } - - /// Get the first byte from the data. - pub fn peek_byte(&self) -> Result { - let bits = self._peek_n_bits(8)?; - let mut byte = 0u8; - for (i, bit) in bits.iter().enumerate() { - if *bit { - byte |= 1 << i; - } - } - Ok(byte) - } - - /// Peek the next token from the data. - pub fn peek_token(&self, token: Delimiter) -> Result { - let bits = match token { - Delimiter::String => self._peek_n_bits(8)?, - Delimiter::Byte => self._peek_n_bits(8)?, - Delimiter::Map => self._peek_n_bits(8)?, - _ => self._peek_n_bits(3)?, - }; - let mut byte = 0u8; - for (i, bit) in bits.iter().enumerate() { - if *bit { - byte |= 1 << i; - } - } - if byte == token as u8 { - Ok(true) - } else { - Ok(false) - } + /// Get the last byte from the data. + pub fn peek_byte(&self) -> Result<&u8, Error> { + let data = self.data.first().ok_or(Error::NoByte)?; + Ok(data) } - - /// Grab the next bit from the data and remove it. - pub fn eat_bit(&mut self) -> Result { - let bit = *self._peek_n_bits(1)?.get(0).ok_or(Error::NoBit)?; - self.data = &self.data[1..]; - Ok(bit) - } - /// Grab the next byte from the data and remove it. pub fn eat_byte(&mut self) -> Result { - let byte = self.peek_byte()?; - self.data = &self.data[8..]; + let byte = *self.peek_byte()?; + self.data = &self.data[1..]; Ok(byte) } - /// Grab the next 'n' bytes from the data and remove them. - pub fn eat_bytes(&mut self, n: usize) -> Result, Error> { - let bits = &self.data[..n * 8]; - let mut bytes = Vec::new(); - self.data = &self.data[n * 8..]; - for i in 0..n { - let mut byte = 0u8; - for (j, bit) in bits[i * 8..(i + 1) * 8].iter().enumerate() { - if *bit { - byte |= 1 << j; - } - } - bytes.push(byte); - } + pub fn eat_bytes(&mut self, n: usize) -> Result<&[u8], Error> { + let bytes = &self.data[..n]; + self.data = &self.data[n..]; Ok(bytes) } - /// Grab the next token from the data and remove it. - pub fn eat_token(&mut self, token: Delimiter) -> Result<(), Error> { - let bits_to_munch = match token { - Delimiter::String => 8, - Delimiter::Byte => 8, - Delimiter::Map => 8, - _ => 3, - }; - if self.data.len() < bits_to_munch { - return Err(Error::UnexpectedEOF); - } - self.data = &self.data[bits_to_munch..]; - Ok(()) - } - /// Parser Methods /// Parses a boolean value from the input. pub fn parse_bool(&mut self) -> Result { - self.eat_bit() + Ok(self.eat_byte()? != 0) } /// Parses an unsigned integer value from the input. pub fn parse_unsigned(&mut self) -> Result @@ -230,13 +154,12 @@ impl<'de> CustomDeserializer<'de> { /// Parses a string value from the input. pub fn parse_str(&mut self, bytes: &mut Vec) -> Result { - 'byteloop: loop { + loop { let byte = self.eat_byte()?; - bytes.push(byte); - if self.peek_token(Delimiter::String)? { - self.eat_token(Delimiter::String)?; - break 'byteloop; + if byte == STRING_DELIMITER { + break; } + bytes.push(byte); } String::from_utf8(bytes.clone()).map_err(|_| Error::ConversionError) } @@ -244,11 +167,10 @@ impl<'de> CustomDeserializer<'de> { /// Parses a byte buffer from the input. pub fn parse_bytes(&mut self, bytes: &mut Vec) -> Result<(), Error> { loop { - if self.peek_token(Delimiter::Byte)? { - self.eat_token(Delimiter::Byte)?; + let byte = self.eat_byte()?; + if byte == BYTE_DELIMITER { break; } - let byte = self.eat_byte()?; bytes.push(byte); } Ok(()) @@ -381,12 +303,12 @@ impl<'de, 'a> Deserializer<'de> for &'a mut CustomDeserializer<'de> { where V: serde::de::Visitor<'de>, { - match self.peek_token(Delimiter::Unit)? { - true => { - self.eat_token(Delimiter::Unit)?; + match self.peek_byte()? { + &UNIT => { + self.eat_byte()?; visitor.visit_none() } - false => visitor.visit_some(self), + _ => visitor.visit_some(self), } } /// Unit Deserialization. They are serialized as UNIT. @@ -394,12 +316,9 @@ impl<'de, 'a> Deserializer<'de> for &'a mut CustomDeserializer<'de> { where V: serde::de::Visitor<'de>, { - match self.peek_token(Delimiter::Unit)? { - true => { - self.eat_token(Delimiter::Unit)?; - visitor.visit_unit() - } - _ => Err(Error::ExpectedDelimiter(Delimiter::Unit)), + match self.parse_unsigned::()? { + UNIT => visitor.visit_unit(), + _ => Err(Error::ExpectedUnit), } } @@ -462,17 +381,15 @@ impl<'de, 'a> Deserializer<'de> for &'a mut CustomDeserializer<'de> { where V: serde::de::Visitor<'de>, { - match self.peek_token(Delimiter::Seq)? { - true => { - self.eat_token(Delimiter::Seq)?; + match self.parse_unsigned::()? { + SEQ_DELIMITER => { let value = visitor.visit_seq(SequenceDeserializer::new(self))?; - if !self.peek_token(Delimiter::Seq)? { - return Err(Error::ExpectedDelimiter(Delimiter::Seq)); + if self.parse_unsigned::()? != SEQ_DELIMITER { + return Err(Error::ExpectedSeqDelimiter); } - self.eat_token(Delimiter::Seq)?; Ok(value) } - false => Err(Error::ExpectedDelimiter(Delimiter::Seq)), + _ => Err(Error::ExpectedSeqDelimiter), } } /// - map: key_1 + MAP_KEY_DELIMITER + value_1 + MAP_VALUE_DELIMITER + ... + MAP_DELIMITER @@ -481,10 +398,9 @@ impl<'de, 'a> Deserializer<'de> for &'a mut CustomDeserializer<'de> { V: serde::de::Visitor<'de>, { let value = visitor.visit_map(MapDeserializer::new(self))?; - if !self.peek_token(Delimiter::Map)? { - return Err(Error::ExpectedDelimiter(Delimiter::Map)); + if self.parse_unsigned::()? != MAP_DELIMITER { + return Err(Error::ExpectedMapDelimiter); } - self.eat_token(Delimiter::Map)?; Ok(value) } @@ -604,15 +520,12 @@ impl<'de, 'a> SeqAccess<'de> for SequenceDeserializer<'a, 'de> { T: serde::de::DeserializeSeed<'de>, { // if at end of sequence; exit - if self.deserializer.peek_token(Delimiter::Seq)? { + if self.deserializer.peek_byte()? == &SEQ_DELIMITER { return Ok(None); } // if not first and not at the end of sequence; eat SEQ_VALUE_DELIMITER - if !self.first { - if !self.deserializer.peek_token(Delimiter::SeqValue)? { - return Err(Error::ExpectedDelimiter(Delimiter::SeqValue)); - } - self.deserializer.eat_token(Delimiter::SeqValue)?; + if !self.first && self.deserializer.eat_byte()? != SEQ_VALUE_DELIMITER { + return Err(Error::ExpectedSeqValueDelimiter); } // make not first; deserialize next element self.first = false; @@ -646,23 +559,16 @@ impl<'de, 'a> MapAccess<'de> for MapDeserializer<'a, 'de> { where K: serde::de::DeserializeSeed<'de>, { - println!("map(): key--start"); // if at end of map; exit - if self.deserializer.peek_token(Delimiter::Map)? { - println!("map(): exit"); + if self.deserializer.peek_byte()? == &MAP_DELIMITER { return Ok(None); } - println!("map(): key--loop"); // make not first; deserialize next key_1 self.first = false; let value = seed.deserialize(&mut *self.deserializer).map(Some)?; - println!("map(): deserialied_key"); - if !self.deserializer.peek_token(Delimiter::MapKey)? { - return Err(Error::ExpectedDelimiter(Delimiter::MapKey)); + if self.deserializer.parse_unsigned::()? != MAP_KEY_DELIMITER { + return Err(Error::ExpectedMapKeyDelimiter); } - println!("map(): eating key delimiter"); - self.deserializer.eat_token(Delimiter::MapKey)?; - println!("map(): key--end"); Ok(value) } @@ -674,16 +580,10 @@ impl<'de, 'a> MapAccess<'de> for MapDeserializer<'a, 'de> { where V: serde::de::DeserializeSeed<'de>, { - println!("map(): value--start"); - println!("peeking_map_value"); let value = seed.deserialize(&mut *self.deserializer)?; - println!("map(): deserialied_value"); - if !self.deserializer.peek_token(Delimiter::MapValue)? { - return Err(Error::ExpectedDelimiter(Delimiter::MapValue)); + if self.deserializer.eat_byte()? != MAP_VALUE_DELIMITER { + return Err(Error::ExpectedMapValueDelimiter); } - println!("map(): eating value delimiter"); - self.deserializer.eat_token(Delimiter::MapValue)?; - println!("map(): value--end"); Ok(value) } } diff --git a/src/error.rs b/src/protocol/error.rs similarity index 59% rename from src/error.rs rename to src/protocol/error.rs index 08bc0bc..97fb70d 100644 --- a/src/error.rs +++ b/src/protocol/error.rs @@ -1,14 +1,5 @@ -//! ### Error -//! A module for the error type used in the library. It is a simple enum with a variant for each -//! error that can occur in the library. It uses `thiserror` internally. - -use super::serializer::Delimiter; - #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("could not get the last bit from the data.")] - NoBit, - #[error("could not get the last byte from the data.")] NoByte, @@ -33,8 +24,32 @@ pub enum Error { #[error("type conversion error")] ConversionError, - #[error("expected delimiter {0}")] - ExpectedDelimiter(Delimiter), + #[error("expected string delimiter")] + ExpectedStringDelimiter, + + #[error("expected byte delimiter")] + ExpectedByteDelimiter, + + #[error("expected unit")] + ExpectedUnit, + + #[error("expected enum delimiter")] + ExpectedEnumDelimiter, + + #[error("expected seq delimiter")] + ExpectedSeqDelimiter, + + #[error("expected seq value delimiter")] + ExpectedSeqValueDelimiter, + + #[error("expected map delimiter")] + ExpectedMapDelimiter, + #[error("expected map key delimiter")] + ExpectedMapKeyDelimiter, + #[error("expected map value separator")] + ExpectedMapValueSeparator, + #[error("expected map value delimiter")] + ExpectedMapValueDelimiter, } impl serde::ser::Error for Error { diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs new file mode 100644 index 0000000..27a4ed6 --- /dev/null +++ b/src/protocol/mod.rs @@ -0,0 +1,3 @@ +pub mod deserializer; +pub mod error; +pub mod serializer; diff --git a/src/serializer.rs b/src/protocol/serializer.rs similarity index 63% rename from src/serializer.rs rename to src/protocol/serializer.rs index e6db8a3..6d589d1 100644 --- a/src/serializer.rs +++ b/src/protocol/serializer.rs @@ -1,10 +1,3 @@ -//! ### Serializer -//! The module that handles the serialization of the data. -//! -//! To use the serializer, call the [`to_bytes`] function with a reference to the data to be -//! serialized. The data must implement the `Serialize` trait from the `serde` library. - -use bitvec::{prelude as bv, slice::BitSlice}; use serde::{ ser::{ SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, @@ -15,137 +8,46 @@ use serde::{ use super::error::Error; -/// The delimiter used in the format specification. The purpose -/// of delimiters is to separate different types of data such -/// that they don't mangle. There are 8 different delimiters -/// in the format specification out of which 3 (`String`, `Byte` & `Map`) -/// are 1 byte long and 5 (the rest...) are 3 bits long. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Delimiter { - // 0b10000110 - String = 134, - // 0b10000111 - Byte = 135, - // 0b010 - Unit = 2, - // 0b011 - Seq = 3, - // 0b100 - SeqValue = 4, - // 0b10001011 - Map = 139, - // 0b110 - MapKey = 6, - // 0b111 - MapValue = 7, -} - -impl std::fmt::Display for Delimiter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Delimiter::String => write!(f, "String"), - Delimiter::Byte => write!(f, "Byte"), - Delimiter::Unit => write!(f, "Unit"), - Delimiter::Seq => write!(f, "Seq"), - Delimiter::SeqValue => write!(f, "SeqValue"), - Delimiter::Map => write!(f, "Map"), - Delimiter::MapKey => write!(f, "MapKey"), - Delimiter::MapValue => write!(f, "MapValue"), - } - } -} - -// Internal struct that handles the serialization of the data. -// It has a few methods that lets us peeking bytes in the data. +/// The following constants are used to serialize the data in a specific format. +/// Their exact values are not important, but they should be unique and not conflict with the data. +pub const STRING_DELIMITER: u8 = 0x01; +pub const BYTE_DELIMITER: u8 = 0x02; +pub const UNIT: u8 = 0x03; +pub const SEQ_DELIMITER: u8 = 0x04; +pub const SEQ_VALUE_DELIMITER: u8 = 0x05; +pub const MAP_DELIMITER: u8 = 0x06; +pub const MAP_KEY_DELIMITER: u8 = 0x07; +pub const MAP_VALUE_DELIMITER: u8 = 0x08; + +/// Internal struct that handles the serialization of the data. +/// It has a few methods that lets us peeking bytes in the data. #[derive(Debug)] struct CustomSerializer { - data: bv::BitVec, + data: Vec, } -/// The function to serialize data of a given type to a byte vector. The -/// `value` must implement the `Serialize` trait from the `serde` library. It returns -/// a Result with the serialized byte vector or an error. +/// The main function to serialize data of a given type to a byte vector i.e. Vec. It +/// uses the format specification to serialize the data. In order to serialize a custom type, +/// the type must implement the Serialize trait from the serde library. pub fn to_bytes(value: &T) -> Result, Error> { - let mut serializer = CustomSerializer { - data: bv::BitVec::new(), - }; + let mut serializer = CustomSerializer { data: Vec::new() }; value.serialize(&mut serializer)?; - Ok(serializer.data.into_vec()) + Ok(serializer.data) } impl CustomSerializer { - /// Get 'n' bits from end of the data. - fn _peek_n_bits(&self, size: usize) -> Result<&BitSlice, Error> { - let len = self.data.len(); - if size > len { - return Err(Error::NLargerThanLength(size, self.data.len())); - } - self.data.get(len - size..).ok_or(Error::NoByte) - } - - /// Construst a byte from the last 3 bits of the data. - pub fn peek_token(&self, token: Delimiter) -> Result { - let bits = match token { - Delimiter::String => self._peek_n_bits(8)?, - Delimiter::Byte => self._peek_n_bits(8)?, - Delimiter::Map => self._peek_n_bits(8)?, - _ => self._peek_n_bits(3)?, - }; - let mut byte = 0u8; - for (i, bit) in bits.iter().enumerate() { - if *bit { - byte |= 1 << i; - } - } - Ok(byte == token as u8) + /// Get the last byte from the data. + pub fn peek_byte(&self) -> Result<&u8, Error> { + self.data.last().ok_or(Error::NoByte) } - /// Get token before 'n' bits. - pub fn peek_token_before_n_bits(&self, n: usize) -> Result { - let bits = self._peek_n_bits(n + 3)?[0..3].as_ref(); - let mut byte = 0u8; - for (i, bit) in bits.iter().enumerate() { - if *bit { - byte |= 1 << i; - } - } - Ok(byte) - } - - /// Serialize a token to the data. - pub fn serialize_token(&mut self, token: Delimiter) { - match token { - Delimiter::String => { - self.data - .extend(&[false, true, true, false, false, false, false, true]); - // 10000110 - } - Delimiter::Byte => { - self.data - .extend(&[true, true, true, false, false, false, false, true]); - // 10000111 - } - Delimiter::Unit => { - self.data.extend(&[false, true, false]); // 010 - } - Delimiter::Seq => { - self.data.extend(&[true, true, false]); // 011 - } - Delimiter::SeqValue => { - self.data.extend(&[false, false, true]); // 100 - } - Delimiter::Map => { - self.data - .extend(&[true, true, false, true, false, false, false, true]); - // 10001011 - } - Delimiter::MapKey => { - self.data.extend(&[false, true, true]); // 110 - } - Delimiter::MapValue => { - self.data.extend(&[true, true, true]); // 111 - } + /// Get the last 'n' bytes from the data. + pub fn peek_bytes(&self, n: usize) -> Result<&[u8], Error> { + let len = self.data.len(); + if len < n { + return Err(Error::NLargerThanLength(n, len)); } + Ok(&self.data[len - n..]) } } @@ -163,9 +65,9 @@ impl<'a> Serializer for &'a mut CustomSerializer { type SerializeTupleVariant = Self; type SerializeStructVariant = Self; - /// bool: 0 -> false, 1 -> true (1 bit) + /// bool: 0 -> false, 1 -> true (1 byte) fn serialize_bool(self, v: bool) -> Result { - self.data.push(v); + self.data.push(if v { 1 } else { 0 }); Ok(()) } @@ -222,20 +124,19 @@ impl<'a> Serializer for &'a mut CustomSerializer { /// str: bytes STRING_DELIMITER fn serialize_str(self, v: &str) -> Result { self.data.extend(v.as_bytes()); - self.serialize_token(Delimiter::String); + self.serialize_u8(STRING_DELIMITER)?; Ok(()) } /// bytes: bytes BYTE_DELIMITER fn serialize_bytes(self, v: &[u8]) -> Result { self.data.extend(v); - self.serialize_token(Delimiter::Byte); + self.serialize_u8(BYTE_DELIMITER)?; Ok(()) } /// unit: UNIT (null) fn serialize_unit(self) -> Result { - self.serialize_token(Delimiter::Unit); - Ok(()) + self.serialize_u8(UNIT) } /// option: @@ -325,7 +226,7 @@ impl<'a> Serializer for &'a mut CustomSerializer { /// sequences: SEQ_DELIMITER + value_1 + SEQ_VALUE_DELIMITER + value_2 + SEQ_VALUE_DELIMITER + ... SEQ_DELIMITER fn serialize_seq(self, _len: Option) -> Result { - self.serialize_token(Delimiter::Seq); + self.serialize_u8(SEQ_DELIMITER)?; Ok(self) } /// maps: key_1 + MAP_KEY_DELIMITER + value_1 + MAP_VALUE_DELIMITER + key_2 + MAP_KEY_DELIMITER + value_2 + MAP_VALUE_DELIMITER +... MAP_DELIMITER @@ -356,15 +257,14 @@ impl<'a> SerializeSeq for &'a mut CustomSerializer { where T: Serialize, { - if !self.peek_token(Delimiter::Seq)? { - self.serialize_token(Delimiter::SeqValue); + if self.peek_byte()? != &SEQ_DELIMITER { + self.serialize_u8(SEQ_VALUE_DELIMITER)?; } value.serialize(&mut **self) } fn end(self) -> Result { - self.serialize_token(Delimiter::Seq); - Ok(()) + self.serialize_u8(SEQ_DELIMITER) } } impl<'a> SerializeMap for &'a mut CustomSerializer { @@ -377,8 +277,7 @@ impl<'a> SerializeMap for &'a mut CustomSerializer { T: Serialize, { key.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapKey); - Ok(()) + self.serialize_u8(MAP_KEY_DELIMITER) } /// Serialize a value of a given element of the map. @@ -387,14 +286,12 @@ impl<'a> SerializeMap for &'a mut CustomSerializer { T: Serialize, { value.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapValue); - Ok(()) + self.serialize_u8(MAP_VALUE_DELIMITER) } /// End the map serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Map); - Ok(()) + self.serialize_u8(MAP_DELIMITER) } } @@ -408,16 +305,15 @@ impl<'a> SerializeTuple for &'a mut CustomSerializer { where T: Serialize, { - if !self.peek_token(Delimiter::Seq)? { - self.serialize_token(Delimiter::SeqValue); + if self.peek_byte()? != &SEQ_DELIMITER { + self.serialize_u8(SEQ_VALUE_DELIMITER)?; } value.serialize(&mut **self) } /// End the tuple serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Seq); - Ok(()) + self.serialize_u8(SEQ_DELIMITER) } } // = map() @@ -436,16 +332,14 @@ impl<'a> SerializeStruct for &'a mut CustomSerializer { T: Serialize, { key.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapKey); + self.serialize_u8(MAP_KEY_DELIMITER)?; value.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapValue); - Ok(()) + self.serialize_u8(MAP_VALUE_DELIMITER) } /// End the struct serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Map); - Ok(()) + self.serialize_u8(MAP_DELIMITER) } } @@ -460,16 +354,15 @@ impl<'a> SerializeTupleStruct for &'a mut CustomSerializer { where T: Serialize, { - if !self.peek_token(Delimiter::Seq)? { - self.serialize_token(Delimiter::SeqValue); + if self.peek_byte()? != &SEQ_DELIMITER { + self.serialize_u8(SEQ_VALUE_DELIMITER)?; } value.serialize(&mut **self) } /// End the tuple struct serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Seq); - Ok(()) + self.serialize_u8(SEQ_DELIMITER) } } @@ -484,16 +377,15 @@ impl<'a> SerializeTupleVariant for &'a mut CustomSerializer { where T: Serialize, { - if self.peek_token_before_n_bits(32)? != Delimiter::Seq as u8 { - self.serialize_token(Delimiter::SeqValue); + if self.peek_bytes(5)?[0] != SEQ_DELIMITER { + self.serialize_u8(SEQ_VALUE_DELIMITER)?; } value.serialize(&mut **self) } /// End the tuple variant serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Seq); - Ok(()) + self.serialize_u8(SEQ_DELIMITER) } } @@ -513,15 +405,13 @@ impl<'a> SerializeStructVariant for &'a mut CustomSerializer { T: Serialize, { key.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapKey); + self.serialize_u8(MAP_KEY_DELIMITER)?; value.serialize(&mut **self)?; - self.serialize_token(Delimiter::MapValue); - Ok(()) + self.serialize_u8(MAP_VALUE_DELIMITER) } /// End the struct variant serialization. fn end(self) -> Result { - self.serialize_token(Delimiter::Map); - Ok(()) + self.serialize_u8(MAP_DELIMITER) } }