Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

intptrcast: remove information about dead allocations #3122

Merged
merged 4 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 123 additions & 98 deletions src/intptrcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;

#[derive(Clone, Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`.
/// It is always sorted
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
/// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
/// base address, and we need to find the `AllocId` it belongs to.
/// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
int_to_ptr_map: Vec<(u64, AllocId)>,
/// The base address for each allocation. We cannot put that into
/// `AllocExtra` because function pointers also have a base address, and
Expand Down Expand Up @@ -62,10 +64,21 @@ impl GlobalStateInner {
}
}

impl<'mir, 'tcx> GlobalStateInner {
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
fn align_addr(addr: u64, align: u64) -> u64 {
match addr % align {
0 => addr,
rem => addr.checked_add(align).unwrap() - rem,
}
}

impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// Returns the exposed `AllocId` that corresponds to the specified addr,
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
assert!(global_state.provenance_mode != ProvenanceMode::Strict);

Expand All @@ -82,94 +95,40 @@ impl<'mir, 'tcx> GlobalStateInner {
let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
// This never overflows because `addr >= glb`
let offset = addr - glb;
// If the offset exceeds the size of the allocation, don't use this `alloc_id`.
// We require this to be strict in-bounds of the allocation. This arm is only
// entered for addresses that are not the base address, so even zero-sized
// allocations will get recognized at their base address -- but all other
// allocations will *not* be recognized at their "end" address.
let size = ecx.get_alloc_info(alloc_id).0;
if offset <= size.bytes() { Some(alloc_id) } else { None }
if offset < size.bytes() { Some(alloc_id) } else { None }
}
}?;

// We only use this provenance if it has been exposed, *and* is still live.
// We only use this provenance if it has been exposed.
if global_state.exposed.contains(&alloc_id) {
let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
match kind {
AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
return Some(alloc_id);
}
AllocKind::Dead => {}
}
}

None
}

pub fn expose_ptr(
ecx: &mut MiriInterpCx<'mir, 'tcx>,
alloc_id: AllocId,
tag: BorTag,
) -> InterpResult<'tcx> {
let global_state = ecx.machine.intptrcast.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode != ProvenanceMode::Strict {
trace!("Exposing allocation id {alloc_id:?}");
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
}
Ok(())
}

pub fn ptr_from_addr_cast(
ecx: &MiriInterpCx<'mir, 'tcx>,
addr: u64,
) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
trace!("Casting {:#x} to a pointer", addr);

// Potentially emit a warning.
let global_state = ecx.machine.intptrcast.borrow();
match global_state.provenance_mode {
ProvenanceMode::Default => {
// The first time this happens at a particular location, print a warning.
thread_local! {
// `Span` is non-`Send`, so we use a thread-local instead.
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
}
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
let first = past_warnings.is_empty();
if past_warnings.insert(ecx.cur_span()) {
// Newly inserted, so first time we see this span.
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
}
});
}
ProvenanceMode::Strict => {
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
}
ProvenanceMode::Permissive => {}
// This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
Some(alloc_id)
} else {
None
}

// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
// *then* do a memory access. So the allocation that the pointer happens to point to on a
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
// allocation it might be referencing.
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
}

fn alloc_base_addr(
ecx: &MiriInterpCx<'mir, 'tcx>,
alloc_id: AllocId,
) -> InterpResult<'tcx, u64> {
fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.intptrcast.borrow_mut();
let global_state = &mut *global_state;

Ok(match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
// There is nothing wrong with a raw pointer being cast to an integer only after
// it became dangling. Hence we allow dead allocations.
let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
// This is either called immediately after allocation (and then cached), or when
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
// at a live allocation. This also ensures that we never re-assign an address to an
// allocation that previously had an address, but then was freed and the address
// information was removed.
assert!(!matches!(kind, AllocKind::Dead));

// This allocation does not have a base address yet, pick one.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
Expand All @@ -183,7 +142,7 @@ impl<'mir, 'tcx> GlobalStateInner {
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = Self::align_addr(base_addr, align.bytes());
let base_addr = align_addr(base_addr, align.bytes());
entry.insert(base_addr);
trace!(
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
Expand All @@ -205,6 +164,7 @@ impl<'mir, 'tcx> GlobalStateInner {
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
// Also maintain the opposite mapping in `int_to_ptr_map`.
// Given that `next_base_addr` increases in each allocation, pushing the
// corresponding tuple keeps `int_to_ptr_map` sorted
global_state.int_to_ptr_map.push((base_addr, alloc_id));
Expand All @@ -213,15 +173,71 @@ impl<'mir, 'tcx> GlobalStateInner {
}
})
}
}

impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.intptrcast.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode != ProvenanceMode::Strict {
trace!("Exposing allocation id {alloc_id:?}");
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
}
Ok(())
}

fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
trace!("Casting {:#x} to a pointer", addr);

let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();

// Potentially emit a warning.
match global_state.provenance_mode {
ProvenanceMode::Default => {
// The first time this happens at a particular location, print a warning.
thread_local! {
// `Span` is non-`Send`, so we use a thread-local instead.
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
}
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
let first = past_warnings.is_empty();
if past_warnings.insert(ecx.cur_span()) {
// Newly inserted, so first time we see this span.
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
}
});
}
ProvenanceMode::Strict => {
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
}
ProvenanceMode::Permissive => {}
}

// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
// *then* do a memory access. So the allocation that the pointer happens to point to on a
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
// allocation it might be referencing.
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
}

/// Convert a relative (tcx) pointer to a Miri pointer.
pub fn ptr_from_rel_ptr(
ecx: &MiriInterpCx<'mir, 'tcx>,
fn ptr_from_rel_ptr(
&self,
ptr: Pointer<AllocId>,
tag: BorTag,
) -> InterpResult<'tcx, Pointer<Provenance>> {
let ecx = self.eval_context_ref();

let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
let base_addr = ecx.addr_from_alloc_id(alloc_id)?;

// Add offset with the right kind of pointer-overflowing arithmetic.
let dl = ecx.data_layout();
Expand All @@ -231,22 +247,21 @@ impl<'mir, 'tcx> GlobalStateInner {

/// When a pointer is used for a memory access, this computes where in which allocation the
/// access is going.
pub fn ptr_get_alloc(
ecx: &MiriInterpCx<'mir, 'tcx>,
ptr: Pointer<Provenance>,
) -> Option<(AllocId, Size)> {
fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
let ecx = self.eval_context_ref();

let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)

let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
alloc_id
} else {
// A wildcard pointer.
GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
ecx.alloc_id_from_addr(addr.bytes())?
};

// This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
// must have been called in the past.
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
// must have been called in the past, so we can just look up the address in the map.
let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();

// Wrapping "addr - base_addr"
#[allow(clippy::cast_possible_wrap)] // we want to wrap here
Expand All @@ -256,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
))
}
}

/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
fn align_addr(addr: u64, align: u64) -> u64 {
match addr % align {
0 => addr,
rem => addr.checked_add(align).unwrap() - rem,
}
impl GlobalStateInner {
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
// access to determine the allocation ID and offset -- and there can still be pointers with
// `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
// `None` only if the pointer truly has no provenance (this ensures consistent error
// messages).
// However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
// can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
// returns a dead allocation.
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
self.exposed.remove(&dead_id);
}
}

Expand All @@ -273,7 +298,7 @@ mod tests {

#[test]
fn test_align_addr() {
assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
assert_eq!(align_addr(37, 4), 40);
assert_eq!(align_addr(44, 4), 44);
}
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ pub use crate::eval::{
create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
};
pub use crate::helpers::EvalContextExt as _;
pub use crate::intptrcast::ProvenanceMode;
pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
pub use crate::machine::{
AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
PrimitiveLayouts, Provenance, ProvenanceExtra,
Expand Down
10 changes: 5 additions & 5 deletions src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
// Value does not matter, SB is disabled
BorTag::default()
};
intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
ecx.ptr_from_rel_ptr(ptr, tag)
}

/// Called on `usize as ptr` casts.
Expand All @@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ecx: &MiriInterpCx<'mir, 'tcx>,
addr: u64,
) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
ecx.ptr_from_addr_cast(addr)
}

/// Called on `ptr as usize` casts.
Expand All @@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ptr: Pointer<Self::Provenance>,
) -> InterpResult<'tcx> {
match ptr.provenance {
Provenance::Concrete { alloc_id, tag } =>
intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
Provenance::Wildcard => {
// No need to do anything for wildcard pointers as
// their provenances have already been previously exposed.
Expand All @@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
ecx: &MiriInterpCx<'mir, 'tcx>,
ptr: Pointer<Self::Provenance>,
) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
let rel = ecx.ptr_get_alloc(ptr);

rel.map(|(alloc_id, size)| {
let tag = match ptr.provenance {
Expand Down Expand Up @@ -1263,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
{
*deallocated_at = Some(machine.current_span());
}
machine.intptrcast.get_mut().free_alloc_id(alloc_id);
Ok(())
}

Expand Down