From 6be8d86aa0e2831187699875519f0d8c39130221 Mon Sep 17 00:00:00 2001 From: Griffin Berlstein Date: Wed, 27 Mar 2024 13:50:20 -0400 Subject: [PATCH] [Cider 2] Memory data dump format & serialization/deserialization (#1988) * bad placeholder * data dump and testing tweaks * Add some documentation and todos * rename the method to make clippy less angry * fun with memories and such --- interp/src/flatten/mod.rs | 2 +- .../flatten/primitives/stateful/memories.rs | 89 ++++++- interp/src/lib.rs | 2 +- interp/src/logging.rs | 2 +- interp/src/serialization/data_dump.rs | 241 ++++++++++++++++++ interp/src/serialization/mod.rs | 3 + .../old.rs} | 2 +- interp/src/structures/values.rs | 74 ++++++ 8 files changed, 409 insertions(+), 6 deletions(-) create mode 100644 interp/src/serialization/data_dump.rs create mode 100644 interp/src/serialization/mod.rs rename interp/src/{serialization.rs => serialization/old.rs} (99%) diff --git a/interp/src/flatten/mod.rs b/interp/src/flatten/mod.rs index 15c94757b7..9f209cbdeb 100644 --- a/interp/src/flatten/mod.rs +++ b/interp/src/flatten/mod.rs @@ -1,6 +1,6 @@ pub(crate) mod flat_ir; pub mod primitives; -mod structures; +pub(crate) mod structures; pub(crate) mod text_utils; use structures::environment::{Environment, Simulator}; diff --git a/interp/src/flatten/primitives/stateful/memories.rs b/interp/src/flatten/primitives/stateful/memories.rs index fc855901ff..2828246a33 100644 --- a/interp/src/flatten/primitives/stateful/memories.rs +++ b/interp/src/flatten/primitives/stateful/memories.rs @@ -1,3 +1,5 @@ +use itertools::Itertools; + use crate::{ errors::InterpreterError, flatten::{ @@ -247,7 +249,7 @@ impl CombMem { T: Into, { let shape = size.into(); - let internal_state = vec![Value::zeroes(width); shape.len()]; + let internal_state = vec![Value::zeroes(width); shape.size()]; Self { base_port: base, @@ -258,6 +260,47 @@ impl CombMem { done_is_high: false, } } + + pub fn new_with_init( + base_port: GlobalPortIdx, + width: u32, + allow_invalid: bool, + size: T, + data: &[u8], + ) -> Self + where + T: Into, + { + let byte_count = width.div_ceil(8); + let size = size.into(); + + let internal_state = data + .chunks_exact(byte_count as usize) + .map(|x| Value::from_bytes_le(x, width as usize)) + .collect_vec(); + + assert_eq!(internal_state.len(), size.size()); + assert!(data + .chunks_exact(byte_count as usize) + .remainder() + .is_empty()); + + Self { + base_port, + internal_state, + _allow_invalid_access: allow_invalid, + _width: width, + addresser: MemDx::new(size), + done_is_high: false, + } + } + + pub fn dump_data(&self) -> Vec { + self.internal_state + .iter() + .flat_map(|x| x.to_bytes()) + .collect() + } } impl Primitive for CombMem { @@ -365,7 +408,7 @@ impl SeqMem { size: T, ) -> Self { let shape = size.into(); - let internal_state = vec![Value::zeroes(width); shape.len()]; + let internal_state = vec![Value::zeroes(width); shape.size()]; Self { base_port: base, @@ -378,6 +421,41 @@ impl SeqMem { } } + pub fn new_with_init( + base_port: GlobalPortIdx, + width: u32, + allow_invalid: bool, + size: T, + data: &[u8], + ) -> Self + where + T: Into, + { + let byte_count = width.div_ceil(8); + let size = size.into(); + + let internal_state = data + .chunks_exact(byte_count as usize) + .map(|x| Value::from_bytes_le(x, width as usize)) + .collect_vec(); + + assert_eq!(internal_state.len(), size.size()); + assert!(data + .chunks_exact(byte_count as usize) + .remainder() + .is_empty()); + + Self { + base_port, + internal_state, + _allow_invalid_access: allow_invalid, + _width: width, + addresser: MemDx::new(size), + done_is_high: false, + read_out: PortValue::new_undef(), + } + } + declare_ports![ _CLK: 0, RESET: 1, @@ -408,6 +486,13 @@ impl SeqMem { pub fn reset(&self) -> GlobalPortIdx { (self.base_port.index() + Self::RESET).into() } + + pub fn dump_data(&self) -> Vec { + self.internal_state + .iter() + .flat_map(|x| x.to_bytes()) + .collect() + } } impl Primitive for SeqMem { diff --git a/interp/src/lib.rs b/interp/src/lib.rs index f7bc139a97..efc71db6c3 100644 --- a/interp/src/lib.rs +++ b/interp/src/lib.rs @@ -1,6 +1,6 @@ pub mod interpreter; pub mod primitives; -mod serialization; +pub mod serialization; pub use utils::MemoryMap; pub mod configuration; pub mod debugger; diff --git a/interp/src/logging.rs b/interp/src/logging.rs index 64dd1ce232..111f892674 100644 --- a/interp/src/logging.rs +++ b/interp/src/logging.rs @@ -10,7 +10,7 @@ use slog::{Drain, Level}; static ROOT_LOGGER: OnceCell = OnceCell::new(); pub fn initialize_default_logger() { - initialize_logger(false); + initialize_logger(true); } pub fn initialize_logger(quiet: bool) { diff --git a/interp/src/serialization/data_dump.rs b/interp/src/serialization/data_dump.rs new file mode 100644 index 0000000000..0256233e5f --- /dev/null +++ b/interp/src/serialization/data_dump.rs @@ -0,0 +1,241 @@ +use std::num::NonZeroUsize; + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Debug, Deserialize, PartialEq, Clone)] +pub struct MemoryDeclaration { + pub name: String, + pub width: NonZeroUsize, + pub size: NonZeroUsize, +} + +impl MemoryDeclaration { + pub fn new(name: String, width: usize, size: usize) -> Self { + Self { + name, + width: NonZeroUsize::new(width).expect("width must be non-zero"), + size: NonZeroUsize::new(size).expect("size must be non-zero"), + } + } + + pub fn byte_count(&self) -> usize { + self.width.get().div_ceil(8) * self.size.get() + } +} + +#[derive(Serialize, Debug, Deserialize, PartialEq, Clone)] +pub struct DataHeader { + pub top_level: String, + pub memories: Vec, +} + +impl DataHeader { + pub fn new(top_level: String, memories: Vec) -> Self { + Self { + top_level, + memories, + } + } + + pub fn data_size(&self) -> usize { + self.memories + .iter() + .fold(0, |acc, mem| acc + mem.byte_count()) + } +} + +#[derive(Debug, PartialEq)] +pub struct DataDump { + pub header: DataHeader, + pub data: Vec, +} + +impl DataDump { + // TODO Griffin: handle the errors properly + pub fn serialize(&self, writer: &mut dyn std::io::Write) { + let header_str = serde_json::to_string(&self.header).unwrap(); + let len_bytes = header_str.len(); + let written = writer.write(&len_bytes.to_le_bytes()).unwrap(); + assert_eq!(written, 8); + write!(writer, "{}", header_str).unwrap(); + + let written = writer.write(&self.data).unwrap(); + assert_eq!(written, self.data.len()); + } + + /// TODO Griffin: handle the errors properly + pub fn deserialize(reader: &mut dyn std::io::Read) -> Self { + let mut raw_header_len = [0u8; 8]; + reader.read_exact(&mut raw_header_len).unwrap(); + let header_len = usize::from_le_bytes(raw_header_len); + + let mut raw_header = vec![0u8; header_len]; + reader.read_exact(&mut raw_header).unwrap(); + let header_str = String::from_utf8(raw_header).unwrap(); + let header: DataHeader = serde_json::from_str(&header_str).unwrap(); + let mut data: Vec = Vec::with_capacity(header.data_size()); + + // we could do a read_exact here instead but I opted for read_to_end + // instead to avoid allowing incorrect/malformed data files + let amount_read = reader.read_to_end(&mut data).unwrap(); + assert_eq!(amount_read, header.data_size()); + + DataDump { header, data } + } + + // TODO Griffin: Replace the panic with a proper error and the standard + // handling + pub fn get_data(&self, mem_name: &str) -> &[u8] { + let mut current_base = 0_usize; + for mem in &self.header.memories { + if mem.name == mem_name { + let end = current_base + mem.byte_count(); + return &self.data[current_base..end]; + } else { + current_base += mem.byte_count(); + } + } + panic!("Memory not found") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_data_dump() { + let header = DataHeader { + top_level: "test".to_string(), + memories: vec![ + MemoryDeclaration::new("mem0".to_string(), 32, 16), // 64 bytes + MemoryDeclaration::new("mem1".to_string(), 4, 17), // 17 bytes + MemoryDeclaration::new("mem2".to_string(), 3, 2), // 2 bytes + // 83 bytes + ], + }; + + // This was generated from random.org + let data = vec![ + 230, 165, 232, 82, 9, 111, 146, 146, 243, 18, 26, 100, 23, 45, 22, + 34, 229, 70, 32, 185, 21, 160, 237, 107, 227, 253, 174, 96, 238, + 118, 182, 23, 167, 67, 5, 76, 82, 223, 205, 190, 109, 177, 75, 15, + 216, 40, 93, 111, 231, 205, 136, 231, 193, 155, 217, 192, 120, 235, + 81, 15, 214, 225, 113, 246, 98, 212, 51, 120, 17, 112, 83, 126, + 218, 136, 0, 16, 116, 139, 213, 255, 83, 107, 112, + ]; + + let dump = DataDump { header, data }; + + let mut buf = Vec::new(); + + dump.serialize(&mut buf); + let reparsed_dump = DataDump::deserialize(&mut buf.as_slice()); + assert_eq!(reparsed_dump, dump); + } + + use proptest::prelude::*; + + prop_compose! { + fn arb_memory_declaration()(name in any::(), width in 1_usize..=256, size in 1_usize..=500) -> MemoryDeclaration { + MemoryDeclaration::new(name.to_string(), width, size) + } + } + + prop_compose! { + fn arb_data_header()( + top_level in any::(), + mut memories in prop::collection::vec(arb_memory_declaration(), 1..3) + ) -> DataHeader { + // This is a silly hack to force unique names for the memories + for (i, memory) in memories.iter_mut().enumerate() { + memory.name = format!("{}_{i}", memory.name); + } + + DataHeader { top_level, memories } + } + } + + prop_compose! { + fn arb_data(size: usize)( + data in prop::collection::vec(0u8..=255, size) + ) -> Vec { + data + } + } + + fn arb_data_dump() -> impl Strategy { + let data = arb_data_header().prop_flat_map(|header| { + let data = arb_data(header.data_size()); + (Just(header), data) + }); + + data.prop_map(|(header, mut header_data)| { + let mut cursor = 0_usize; + // Need to go through the upper byte of each value in the memory to + // remove any 1s in the padding region since that causes the memory + // produced from the memory primitive to not match the one + // serialized into it in the first place + for mem in &header.memories { + let bytes_per_val = mem.width.get().div_ceil(8); + let rem = mem.width.get() % 8; + let mask = if rem != 0 { 255u8 >> (8 - rem) } else { 255_u8 }; + + for bytes in &mut header_data[cursor..cursor + mem.byte_count()] + .chunks_exact_mut(bytes_per_val) + { + *bytes.last_mut().unwrap() &= mask; + } + + assert!(header_data[cursor..cursor + mem.byte_count()] + .chunks_exact(bytes_per_val) + .remainder() + .is_empty()); + cursor += mem.byte_count(); + } + + DataDump { + header, + data: header_data, + } + }) + } + + proptest! { + #[test] + fn prop_roundtrip(dump in arb_data_dump()) { + let mut buf = Vec::new(); + dump.serialize(&mut buf); + + let reparsed_dump = DataDump::deserialize(&mut buf.as_slice()); + prop_assert_eq!(dump, reparsed_dump) + + } + } + + use crate::flatten::{ + flat_ir::prelude::GlobalPortIdx, + primitives::stateful::{CombMemD1, SeqMemD1}, + structures::index_trait::IndexRef, + }; + + proptest! { + #[test] + fn comb_roundtrip(dump in arb_data_dump()) { + for mem in &dump.header.memories { + let memory_prim = CombMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name))); + let data = memory_prim.dump_data(); + prop_assert_eq!(dump.get_data(&mem.name), data); + } + } + + #[test] + fn seq_roundtrip(dump in arb_data_dump()) { + for mem in &dump.header.memories { + let memory_prim = SeqMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name))); + let data = memory_prim.dump_data(); + prop_assert_eq!(dump.get_data(&mem.name), data); + } + } + } +} diff --git a/interp/src/serialization/mod.rs b/interp/src/serialization/mod.rs new file mode 100644 index 0000000000..a654302050 --- /dev/null +++ b/interp/src/serialization/mod.rs @@ -0,0 +1,3 @@ +pub mod data_dump; +mod old; +pub use old::*; diff --git a/interp/src/serialization.rs b/interp/src/serialization/old.rs similarity index 99% rename from interp/src/serialization.rs rename to interp/src/serialization/old.rs index 0eef1f9054..6e49278dc3 100644 --- a/interp/src/serialization.rs +++ b/interp/src/serialization/old.rs @@ -34,7 +34,7 @@ impl Shape { /// returns the total number of entries in the memory, i.e. it's size based /// on the dimensions of it. - pub fn len(&self) -> usize { + pub fn size(&self) -> usize { match self { Shape::D1(d0) => *d0, Shape::D2(d0, d1) => d0 * d1, diff --git a/interp/src/structures/values.rs b/interp/src/structures/values.rs index 3276c812fb..7401aea560 100644 --- a/interp/src/structures/values.rs +++ b/interp/src/structures/values.rs @@ -655,6 +655,62 @@ impl Value { let new_bv = BitVec::from_bitslice(&self.vec[lower_idx..=upper_idx]); Value { vec: new_bv } } + + /// Creates a value from a byte slice and truncates to the specified width. + /// The bytes are assumed to be little-endian. The slice of bytes must be + /// non-empty. And the width must be less than or equal to the number of + /// bits in the slice. In cases where the width of the value is less than + /// the bits provided, the unused upper values should be set to zero and will be + /// discarded. + pub fn from_bytes_le(bytes: &[u8], width: usize) -> Self { + assert!(!bytes.is_empty()); + assert!(width <= bytes.len() * 8); + // TODO griffin: Make this sanity check even mildly comprehensible + let overhead = dbg!(width.div_ceil(8) * 8) - width; + assert!( + bytes.last().unwrap().leading_zeros() >= overhead as u32, + "The upper byte of the provided value has non-zero values in the padding. Given byte is {} but the upper {} bit(s) should be zero", + bytes.last().unwrap(), + overhead + ); + + let chunks = bytes.chunks_exact(8); + let remainder = chunks.remainder(); + + let mut vec: Vec = chunks + .map(|x| { + usize::from_le_bytes([ + x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], + ]) + }) + .collect(); + + if !remainder.is_empty() { + let mut acc = 0_usize; + for (byte_number, u) in remainder.iter().enumerate() { + acc |= (*u as usize) << (byte_number * 8) + } + vec.push(acc); + } + + let mut bv = BitString::from_vec(vec); + bv.truncate(width); + Value { vec: bv } + } + + pub fn to_bytes(&self) -> Vec { + // there has got to be a better way to do this + self.vec + .chunks(8) + .map(|bits| { + let mut byte = 0_u8; + for (i, bit) in bits.iter().enumerate() { + byte |= (*bit as u8) << i; + } + byte + }) + .collect() + } } /* ============== Impls for Values to make them easier to use ============= */ @@ -757,3 +813,21 @@ impl<'de> Deserialize<'de> for Value { Ok(Value::from(val, bytes * 8)) } } + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + + proptest! { + #[test] + fn test_byte_roundtrip(data in proptest::collection::vec(any::(), 1..100)) { + // this doesn't really test the truncation since it's been hard to + // get that working in a way that still generates values correctly + // but this is good enough for now + let val = Value::from_bytes_le(&data, data.len() * 8); + let bytes = val.to_bytes(); + prop_assert_eq!(bytes, data); + } + } +}