From 1a5bc7bcc07310eb07b4cd74584b4b2ef05a5d16 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 14 Oct 2023 11:52:49 +0200 Subject: [PATCH 1/4] intptrcast: only find strictly in-bounds pointers when we are not hitting the base address --- src/intptrcast.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 0bdea15763..d0ebaba490 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -82,9 +82,12 @@ impl<'mir, 'tcx> GlobalStateInner { let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; // This never overflows because `addr >= glb` let offset = addr - glb; - // If the offset exceeds the size of the allocation, don't use this `alloc_id`. + // We require this to be strict in-bounds of the allocation. This arm is only + // entered for addresses that are not the base address, so even zero-sized + // allocations will get recognized at their base address -- but all other + // allocations will *not* be recognized at their "end" address. let size = ecx.get_alloc_info(alloc_id).0; - if offset <= size.bytes() { Some(alloc_id) } else { None } + if offset < size.bytes() { Some(alloc_id) } else { None } } }?; From b8712ac0e1086e8ff1523069d04df1cc997c5b8a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 14 Oct 2023 12:01:47 +0200 Subject: [PATCH 2/4] switch intptrcast to helper trait pattern like everything else --- src/intptrcast.rs | 164 +++++++++++++++++++++++----------------------- src/lib.rs | 2 +- src/machine.rs | 9 ++- 3 files changed, 87 insertions(+), 88 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index d0ebaba490..0a09753db0 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -62,10 +62,21 @@ impl GlobalStateInner { } } -impl<'mir, 'tcx> GlobalStateInner { +/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple +/// of `align` that is larger or equal to `addr` +fn align_addr(addr: u64, align: u64) -> u64 { + match addr % align { + 0 => addr, + rem => addr.checked_add(align).unwrap() - rem, + } +} + +impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // Returns the exposed `AllocId` that corresponds to the specified addr, // or `None` if the addr is out of bounds - fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option { + fn alloc_id_from_addr(&self, addr: u64) -> Option { + let ecx = self.eval_context_ref(); let global_state = ecx.machine.intptrcast.borrow(); assert!(global_state.provenance_mode != ProvenanceMode::Strict); @@ -105,65 +116,8 @@ impl<'mir, 'tcx> GlobalStateInner { None } - pub fn expose_ptr( - ecx: &mut MiriInterpCx<'mir, 'tcx>, - alloc_id: AllocId, - tag: BorTag, - ) -> InterpResult<'tcx> { - let global_state = ecx.machine.intptrcast.get_mut(); - // In strict mode, we don't need this, so we can save some cycles by not tracking it. - if global_state.provenance_mode != ProvenanceMode::Strict { - trace!("Exposing allocation id {alloc_id:?}"); - global_state.exposed.insert(alloc_id); - if ecx.machine.borrow_tracker.is_some() { - ecx.expose_tag(alloc_id, tag)?; - } - } - Ok(()) - } - - pub fn ptr_from_addr_cast( - ecx: &MiriInterpCx<'mir, 'tcx>, - addr: u64, - ) -> InterpResult<'tcx, Pointer>> { - trace!("Casting {:#x} to a pointer", addr); - - // Potentially emit a warning. - let global_state = ecx.machine.intptrcast.borrow(); - match global_state.provenance_mode { - ProvenanceMode::Default => { - // The first time this happens at a particular location, print a warning. - thread_local! { - // `Span` is non-`Send`, so we use a thread-local instead. - static PAST_WARNINGS: RefCell> = RefCell::default(); - } - PAST_WARNINGS.with_borrow_mut(|past_warnings| { - let first = past_warnings.is_empty(); - if past_warnings.insert(ecx.cur_span()) { - // Newly inserted, so first time we see this span. - ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first }); - } - }); - } - ProvenanceMode::Strict => { - throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance); - } - ProvenanceMode::Permissive => {} - } - - // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is - // completely legal to do a cast and then `wrapping_offset` to another allocation and only - // *then* do a memory access. So the allocation that the pointer happens to point to on a - // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that - // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) - // allocation it might be referencing. - Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) - } - - fn alloc_base_addr( - ecx: &MiriInterpCx<'mir, 'tcx>, - alloc_id: AllocId, - ) -> InterpResult<'tcx, u64> { + fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { + let ecx = self.eval_context_ref(); let mut global_state = ecx.machine.intptrcast.borrow_mut(); let global_state = &mut *global_state; @@ -186,7 +140,7 @@ impl<'mir, 'tcx> GlobalStateInner { .next_base_addr .checked_add(slack) .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = Self::align_addr(base_addr, align.bytes()); + let base_addr = align_addr(base_addr, align.bytes()); entry.insert(base_addr); trace!( "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", @@ -216,15 +170,71 @@ impl<'mir, 'tcx> GlobalStateInner { } }) } +} + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { + fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { + let ecx = self.eval_context_mut(); + let global_state = ecx.machine.intptrcast.get_mut(); + // In strict mode, we don't need this, so we can save some cycles by not tracking it. + if global_state.provenance_mode != ProvenanceMode::Strict { + trace!("Exposing allocation id {alloc_id:?}"); + global_state.exposed.insert(alloc_id); + if ecx.machine.borrow_tracker.is_some() { + ecx.expose_tag(alloc_id, tag)?; + } + } + Ok(()) + } + + fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer>> { + trace!("Casting {:#x} to a pointer", addr); + + let ecx = self.eval_context_ref(); + let global_state = ecx.machine.intptrcast.borrow(); + + // Potentially emit a warning. + match global_state.provenance_mode { + ProvenanceMode::Default => { + // The first time this happens at a particular location, print a warning. + thread_local! { + // `Span` is non-`Send`, so we use a thread-local instead. + static PAST_WARNINGS: RefCell> = RefCell::default(); + } + PAST_WARNINGS.with_borrow_mut(|past_warnings| { + let first = past_warnings.is_empty(); + if past_warnings.insert(ecx.cur_span()) { + // Newly inserted, so first time we see this span. + ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first }); + } + }); + } + ProvenanceMode::Strict => { + throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance); + } + ProvenanceMode::Permissive => {} + } + + // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is + // completely legal to do a cast and then `wrapping_offset` to another allocation and only + // *then* do a memory access. So the allocation that the pointer happens to point to on a + // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that + // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) + // allocation it might be referencing. + Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) + } /// Convert a relative (tcx) pointer to a Miri pointer. - pub fn ptr_from_rel_ptr( - ecx: &MiriInterpCx<'mir, 'tcx>, + fn ptr_from_rel_ptr( + &self, ptr: Pointer, tag: BorTag, ) -> InterpResult<'tcx, Pointer> { + let ecx = self.eval_context_ref(); + let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) - let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?; + let base_addr = ecx.addr_from_alloc_id(alloc_id)?; // Add offset with the right kind of pointer-overflowing arithmetic. let dl = ecx.data_layout(); @@ -234,22 +244,21 @@ impl<'mir, 'tcx> GlobalStateInner { /// When a pointer is used for a memory access, this computes where in which allocation the /// access is going. - pub fn ptr_get_alloc( - ecx: &MiriInterpCx<'mir, 'tcx>, - ptr: Pointer, - ) -> Option<(AllocId, Size)> { + fn ptr_get_alloc(&self, ptr: Pointer) -> Option<(AllocId, Size)> { + let ecx = self.eval_context_ref(); + let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag { alloc_id } else { // A wildcard pointer. - GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())? + ecx.alloc_id_from_addr(addr.bytes())? }; // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr // must have been called in the past. - let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap(); + let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap(); // Wrapping "addr - base_addr" #[allow(clippy::cast_possible_wrap)] // we want to wrap here @@ -259,15 +268,6 @@ impl<'mir, 'tcx> GlobalStateInner { Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), )) } - - /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple - /// of `align` that is larger or equal to `addr` - fn align_addr(addr: u64, align: u64) -> u64 { - match addr % align { - 0 => addr, - rem => addr.checked_add(align).unwrap() - rem, - } - } } #[cfg(test)] @@ -276,7 +276,7 @@ mod tests { #[test] fn test_align_addr() { - assert_eq!(GlobalStateInner::align_addr(37, 4), 40); - assert_eq!(GlobalStateInner::align_addr(44, 4), 44); + assert_eq!(align_addr(37, 4), 40); + assert_eq!(align_addr(44, 4), 44); } } diff --git a/src/lib.rs b/src/lib.rs index f1d8ce01bc..68b9164dec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,7 +117,7 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::EvalContextExt as _; -pub use crate::intptrcast::ProvenanceMode; +pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode}; pub use crate::machine::{ AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, diff --git a/src/machine.rs b/src/machine.rs index 0ade43d4a8..ad3e95b712 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { // Value does not matter, SB is disabled BorTag::default() }; - intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag) + ecx.ptr_from_rel_ptr(ptr, tag) } /// Called on `usize as ptr` casts. @@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64, ) -> InterpResult<'tcx, Pointer>> { - intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr) + ecx.ptr_from_addr_cast(addr) } /// Called on `ptr as usize` casts. @@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ptr: Pointer, ) -> InterpResult<'tcx> { match ptr.provenance { - Provenance::Concrete { alloc_id, tag } => - intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag), + Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag), Provenance::Wildcard => { // No need to do anything for wildcard pointers as // their provenances have already been previously exposed. @@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ecx: &MiriInterpCx<'mir, 'tcx>, ptr: Pointer, ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> { - let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr); + let rel = ecx.ptr_get_alloc(ptr); rel.map(|(alloc_id, size)| { let tag = match ptr.provenance { From ab68ed7026c157c04df3801d5630c8875ebe9e34 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 14 Oct 2023 12:09:15 +0200 Subject: [PATCH 3/4] remove allocations from int_to_ptr_map and exposed when they get freed --- src/intptrcast.rs | 50 ++++++++++++++++++++++++++++++++--------------- src/machine.rs | 1 + 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 0a09753db0..9e813d5839 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -26,8 +26,10 @@ pub type GlobalState = RefCell; #[derive(Clone, Debug)] pub struct GlobalStateInner { - /// This is used as a map between the address of each allocation and its `AllocId`. - /// It is always sorted + /// This is used as a map between the address of each allocation and its `AllocId`. It is always + /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the + /// base address, and we need to find the `AllocId` it belongs to. + /// This is not the *full* inverse of `base_addr`; dead allocations have been removed. int_to_ptr_map: Vec<(u64, AllocId)>, /// The base address for each allocation. We cannot put that into /// `AllocExtra` because function pointers also have a base address, and @@ -102,18 +104,14 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } }?; - // We only use this provenance if it has been exposed, *and* is still live. + // We only use this provenance if it has been exposed. if global_state.exposed.contains(&alloc_id) { - let (_size, _align, kind) = ecx.get_alloc_info(alloc_id); - match kind { - AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => { - return Some(alloc_id); - } - AllocKind::Dead => {} - } + // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed. + debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead)); + Some(alloc_id) + } else { + None } - - None } fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { @@ -124,9 +122,13 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { Ok(match global_state.base_addr.entry(alloc_id) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { - // There is nothing wrong with a raw pointer being cast to an integer only after - // it became dangling. Hence we allow dead allocations. - let (size, align, _kind) = ecx.get_alloc_info(alloc_id); + let (size, align, kind) = ecx.get_alloc_info(alloc_id); + // This is either called immediately after allocation (and then cached), or when + // adjusting `tcx` pointers (which never get freed). So assert that we are looking + // at a live allocation. This also ensures that we never re-assign an address to an + // allocation that previously had an address, but then was freed and the address + // information was removed. + assert!(!matches!(kind, AllocKind::Dead)); // This allocation does not have a base address yet, pick one. // Leave some space to the previous allocation, to give it some chance to be less aligned. @@ -162,6 +164,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { if global_state.next_base_addr > ecx.target_usize_max() { throw_exhaust!(AddressSpaceFull); } + // Also maintain the opposite mapping in `int_to_ptr_map`. // Given that `next_base_addr` increases in each allocation, pushing the // corresponding tuple keeps `int_to_ptr_map` sorted global_state.int_to_ptr_map.push((base_addr, alloc_id)); @@ -257,7 +260,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { }; // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr - // must have been called in the past. + // must have been called in the past, so we can just look up the address in the map. let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap(); // Wrapping "addr - base_addr" @@ -270,6 +273,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } +impl GlobalStateInner { + pub fn free_alloc_id(&mut self, dead_id: AllocId) { + // We can *not* remove this from `base_addr`, since `addr_from_alloc_id` is called on each + // attempt at a memory access to determine the allocation ID and offset -- and there can + // still be pointers with `dead_id` that one can attempt to use for a memory access. + // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist + // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never + // returns a dead allocation. + self.int_to_ptr_map.retain(|&(_, id)| id != dead_id); + // We can also remove it from `exposed`, since this allocation can anyway not be returned by + // `alloc_id_from_addr` any more. + self.exposed.remove(&dead_id); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/machine.rs b/src/machine.rs index ad3e95b712..d5775912ea 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1262,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } + machine.intptrcast.get_mut().free_alloc_id(alloc_id); Ok(()) } From f2cb7527605fc3bb256ded636604c85e62144daa Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 19 Oct 2023 22:32:42 +0200 Subject: [PATCH 4/4] clarify comment --- src/intptrcast.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 9e813d5839..ab6a256f71 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -275,9 +275,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { impl GlobalStateInner { pub fn free_alloc_id(&mut self, dead_id: AllocId) { - // We can *not* remove this from `base_addr`, since `addr_from_alloc_id` is called on each - // attempt at a memory access to determine the allocation ID and offset -- and there can - // still be pointers with `dead_id` that one can attempt to use for a memory access. + // We can *not* remove this from `base_addr`, since the interpreter design requires that we + // be able to retrieve an AllocId + offset for any memory access *before* we check if the + // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory + // access to determine the allocation ID and offset -- and there can still be pointers with + // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return + // `None` only if the pointer truly has no provenance (this ensures consistent error + // messages). // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never // returns a dead allocation.