From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- .../rustc_middle/src/mir/interpret/allocation.rs | 1300 ++++++++++++++++++++ compiler/rustc_middle/src/mir/interpret/error.rs | 551 +++++++++ compiler/rustc_middle/src/mir/interpret/mod.rs | 633 ++++++++++ compiler/rustc_middle/src/mir/interpret/pointer.rs | 307 +++++ compiler/rustc_middle/src/mir/interpret/queries.rs | 217 ++++ compiler/rustc_middle/src/mir/interpret/value.rs | 651 ++++++++++ 6 files changed, 3659 insertions(+) create mode 100644 compiler/rustc_middle/src/mir/interpret/allocation.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/error.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/mod.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/pointer.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/queries.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/value.rs (limited to 'compiler/rustc_middle/src/mir/interpret') diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs new file mode 100644 index 000000000..db7e0fb8a --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -0,0 +1,1300 @@ +//! The virtual memory representation of the MIR interpreter. + +use std::borrow::Cow; +use std::convert::{TryFrom, TryInto}; +use std::fmt; +use std::hash; +use std::iter; +use std::ops::{Deref, Range}; +use std::ptr; + +use rustc_ast::Mutability; +use rustc_data_structures::intern::Interned; +use rustc_data_structures::sorted_map::SortedMap; +use rustc_span::DUMMY_SP; +use rustc_target::abi::{Align, HasDataLayout, Size}; + +use super::{ + read_target_uint, write_target_uint, AllocId, InterpError, InterpResult, Pointer, Provenance, + ResourceExhaustionInfo, Scalar, ScalarMaybeUninit, ScalarSizeMismatch, UndefinedBehaviorInfo, + UninitBytesAccess, UnsupportedOpInfo, +}; +use crate::ty; + +/// This type represents an Allocation in the Miri/CTFE core engine. +/// +/// Its public API is rather low-level, working directly with allocation offsets and a custom error +/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory` +/// module provides higher-level access. +// Note: for performance reasons when interning, some of the `Allocation` fields can be partially +// hashed. (see the `Hash` impl below for more details), so the impl is not derived. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)] +#[derive(HashStable)] +pub struct Allocation { + /// The actual bytes of the allocation. + /// Note that the bytes of a pointer represent the offset of the pointer. + bytes: Box<[u8]>, + /// Maps from byte addresses to extra data for each pointer. + /// Only the first byte of a pointer is inserted into the map; i.e., + /// every entry in this map applies to `pointer_size` consecutive bytes starting + /// at the given offset. + relocations: Relocations, + /// Denotes which part of this allocation is initialized. + init_mask: InitMask, + /// The alignment of the allocation to detect unaligned reads. + /// (`Align` guarantees that this is a power of two.) + pub align: Align, + /// `true` if the allocation is mutable. + /// Also used by codegen to determine if a static should be put into mutable memory, + /// which happens for `static mut` and `static` with interior mutability. + pub mutability: Mutability, + /// Extra state for the machine. + pub extra: Extra, +} + +/// This is the maximum size we will hash at a time, when interning an `Allocation` and its +/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer. +/// Used when these two structures are large: we only partially hash the larger fields in that +/// situation. See the comment at the top of their respective `Hash` impl for more details. +const MAX_BYTES_TO_HASH: usize = 64; + +/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning. +/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH` +/// bytes. +const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH; + +// Const allocations are only hashed for interning. However, they can be large, making the hashing +// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially +// big buffers like the actual bytes of allocation. We can partially hash some fields when they're +// large. +impl hash::Hash for Allocation { + fn hash(&self, state: &mut H) { + // Partially hash the `bytes` buffer when it is large. To limit collisions with common + // prefixes and suffixes, we hash the length and some slices of the buffer. + let byte_count = self.bytes.len(); + if byte_count > MAX_HASHED_BUFFER_LEN { + // Hash the buffer's length. + byte_count.hash(state); + + // And its head and tail. + self.bytes[..MAX_BYTES_TO_HASH].hash(state); + self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state); + } else { + self.bytes.hash(state); + } + + // Hash the other fields as usual. + self.relocations.hash(state); + self.init_mask.hash(state); + self.align.hash(state); + self.mutability.hash(state); + self.extra.hash(state); + } +} + +/// Interned types generally have an `Outer` type and an `Inner` type, where +/// `Outer` is a newtype around `Interned`, and all the operations are +/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an +/// outer type and `TyS` is its inner type. +/// +/// Here things are different because only const allocations are interned. This +/// means that both the inner type (`Allocation`) and the outer type +/// (`ConstAllocation`) are used quite a bit. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, HashStable)] +#[rustc_pass_by_value] +pub struct ConstAllocation<'tcx, Prov = AllocId, Extra = ()>( + pub Interned<'tcx, Allocation>, +); + +impl<'tcx> fmt::Debug for ConstAllocation<'tcx> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // This matches how `Allocation` is printed. We print it like this to + // avoid having to update expected output in a lot of tests. + write!(f, "{:?}", self.inner()) + } +} + +impl<'tcx, Prov, Extra> ConstAllocation<'tcx, Prov, Extra> { + pub fn inner(self) -> &'tcx Allocation { + self.0.0 + } +} + +/// We have our own error type that does not know about the `AllocId`; that information +/// is added when converting to `InterpError`. +#[derive(Debug)] +pub enum AllocError { + /// A scalar had the wrong size. + ScalarSizeMismatch(ScalarSizeMismatch), + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, + /// Partially overwriting a pointer. + PartialPointerOverwrite(Size), + /// Using uninitialized data where it is not allowed. + InvalidUninitBytes(Option), +} +pub type AllocResult = Result; + +impl From for AllocError { + fn from(s: ScalarSizeMismatch) -> Self { + AllocError::ScalarSizeMismatch(s) + } +} + +impl AllocError { + pub fn to_interp_error<'tcx>(self, alloc_id: AllocId) -> InterpError<'tcx> { + use AllocError::*; + match self { + ScalarSizeMismatch(s) => { + InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ScalarSizeMismatch(s)) + } + ReadPointerAsBytes => InterpError::Unsupported(UnsupportedOpInfo::ReadPointerAsBytes), + PartialPointerOverwrite(offset) => InterpError::Unsupported( + UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)), + ), + InvalidUninitBytes(info) => InterpError::UndefinedBehavior( + UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))), + ), + } + } +} + +/// The information that makes up a memory access: offset and size. +#[derive(Copy, Clone)] +pub struct AllocRange { + pub start: Size, + pub size: Size, +} + +impl fmt::Debug for AllocRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{:#x}..{:#x}]", self.start.bytes(), self.end().bytes()) + } +} + +/// Free-starting constructor for less syntactic overhead. +#[inline(always)] +pub fn alloc_range(start: Size, size: Size) -> AllocRange { + AllocRange { start, size } +} + +impl AllocRange { + #[inline] + pub fn from(r: Range) -> Self { + alloc_range(r.start, r.end - r.start) // `Size` subtraction (overflow-checked) + } + + #[inline(always)] + pub fn end(self) -> Size { + self.start + self.size // This does overflow checking. + } + + /// Returns the `subrange` within this range; panics if it is not a subrange. + #[inline] + pub fn subrange(self, subrange: AllocRange) -> AllocRange { + let sub_start = self.start + subrange.start; + let range = alloc_range(sub_start, subrange.size); + assert!(range.end() <= self.end(), "access outside the bounds for given AllocRange"); + range + } +} + +// The constructors are all without extra; the extra gets added by a machine hook later. +impl Allocation { + /// Creates an allocation initialized by the given bytes + pub fn from_bytes<'a>( + slice: impl Into>, + align: Align, + mutability: Mutability, + ) -> Self { + let bytes = Box::<[u8]>::from(slice.into()); + let size = Size::from_bytes(bytes.len()); + Self { + bytes, + relocations: Relocations::new(), + init_mask: InitMask::new(size, true), + align, + mutability, + extra: (), + } + } + + pub fn from_bytes_byte_aligned_immutable<'a>(slice: impl Into>) -> Self { + Allocation::from_bytes(slice, Align::ONE, Mutability::Not) + } + + /// Try to create an Allocation of `size` bytes, failing if there is not enough memory + /// available to the compiler to do so. + /// + /// If `panic_on_fail` is true, this will never return `Err`. + pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> { + let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| { + // This results in an error that can happen non-deterministically, since the memory + // available to the compiler can change between runs. Normally queries are always + // deterministic. However, we can be non-deterministic here because all uses of const + // evaluation (including ConstProp!) will make compilation fail (via hard error + // or ICE) upon encountering a `MemoryExhausted` error. + if panic_on_fail { + panic!("Allocation::uninit called with panic_on_fail had allocation failure") + } + ty::tls::with(|tcx| { + tcx.sess.delay_span_bug(DUMMY_SP, "exhausted memory during interpretation") + }); + InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted) + })?; + // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]> + let bytes = unsafe { bytes.assume_init() }; + Ok(Allocation { + bytes, + relocations: Relocations::new(), + init_mask: InitMask::new(size, false), + align, + mutability: Mutability::Mut, + extra: (), + }) + } +} + +impl Allocation { + /// Adjust allocation from the ones in tcx to a custom Machine instance + /// with a different Provenance and Extra type. + pub fn adjust_from_tcx( + self, + cx: &impl HasDataLayout, + extra: Extra, + mut adjust_ptr: impl FnMut(Pointer) -> Result, Err>, + ) -> Result, Err> { + // Compute new pointer provenance, which also adjusts the bytes. + let mut bytes = self.bytes; + let mut new_relocations = Vec::with_capacity(self.relocations.0.len()); + let ptr_size = cx.data_layout().pointer_size.bytes_usize(); + let endian = cx.data_layout().endian; + for &(offset, alloc_id) in self.relocations.iter() { + let idx = offset.bytes_usize(); + let ptr_bytes = &mut bytes[idx..idx + ptr_size]; + let bits = read_target_uint(endian, ptr_bytes).unwrap(); + let (ptr_prov, ptr_offset) = + adjust_ptr(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts(); + write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap(); + new_relocations.push((offset, ptr_prov)); + } + // Create allocation. + Ok(Allocation { + bytes, + relocations: Relocations::from_presorted(new_relocations), + init_mask: self.init_mask, + align: self.align, + mutability: self.mutability, + extra, + }) + } +} + +/// Raw accessors. Provide access to otherwise private bytes. +impl Allocation { + pub fn len(&self) -> usize { + self.bytes.len() + } + + pub fn size(&self) -> Size { + Size::from_bytes(self.len()) + } + + /// Looks at a slice which may describe uninitialized bytes or describe a relocation. This differs + /// from `get_bytes_with_uninit_and_ptr` in that it does no relocation checks (even on the + /// edges) at all. + /// This must not be used for reads affecting the interpreter execution. + pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range) -> &[u8] { + &self.bytes[range] + } + + /// Returns the mask indicating which bytes are initialized. + pub fn init_mask(&self) -> &InitMask { + &self.init_mask + } + + /// Returns the relocation list. + pub fn relocations(&self) -> &Relocations { + &self.relocations + } +} + +/// Byte accessors. +impl Allocation { + /// This is the entirely abstraction-violating way to just grab the raw bytes without + /// caring about relocations. It just deduplicates some code between `read_scalar` + /// and `get_bytes_internal`. + fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] { + &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] + } + + /// The last argument controls whether we error out when there are uninitialized or pointer + /// bytes. However, we *always* error when there are relocations overlapping the edges of the + /// range. + /// + /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead, + /// + /// This function also guarantees that the resulting pointer will remain stable + /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies + /// on that. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + fn get_bytes_internal( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + check_init_and_ptr: bool, + ) -> AllocResult<&[u8]> { + if check_init_and_ptr { + self.check_init(range)?; + self.check_relocations(cx, range)?; + } else { + // We still don't want relocations on the *edges*. + self.check_relocation_edges(cx, range)?; + } + + Ok(self.get_bytes_even_more_internal(range)) + } + + /// Checks that these bytes are initialized and not pointer bytes, and then return them + /// as a slice. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods + /// on `InterpCx` instead. + #[inline] + pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> { + self.get_bytes_internal(cx, range, true) + } + + /// It is the caller's responsibility to handle uninitialized and pointer bytes. + /// However, this still checks that there are no relocations on the *edges*. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + #[inline] + pub fn get_bytes_with_uninit_and_ptr( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<&[u8]> { + self.get_bytes_internal(cx, range, false) + } + + /// Just calling this already marks everything as defined and removes relocations, + /// so be sure to actually put data there! + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods + /// on `InterpCx` instead. + pub fn get_bytes_mut( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<&mut [u8]> { + self.mark_init(range, true); + self.clear_relocations(cx, range)?; + + Ok(&mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]) + } + + /// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory. + pub fn get_bytes_mut_ptr( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<*mut [u8]> { + self.mark_init(range, true); + self.clear_relocations(cx, range)?; + + assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check + let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize()); + let len = range.end().bytes_usize() - range.start.bytes_usize(); + Ok(ptr::slice_from_raw_parts_mut(begin_ptr, len)) + } +} + +/// Reading and writing. +impl Allocation { + /// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a + /// relocation. If `allow_uninit`/`allow_ptr` is `false`, also enforces that the memory in the + /// given range contains no uninitialized bytes/relocations. + pub fn check_bytes( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + allow_uninit: bool, + allow_ptr: bool, + ) -> AllocResult { + // Check bounds and relocations on the edges. + self.get_bytes_with_uninit_and_ptr(cx, range)?; + // Check uninit and ptr. + if !allow_uninit { + self.check_init(range)?; + } + if !allow_ptr { + self.check_relocations(cx, range)?; + } + Ok(()) + } + + /// Reads a *non-ZST* scalar. + /// + /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine + /// supports that) provenance is entirely ignored. + /// + /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check + /// for ZSTness anyway due to integer pointers being valid for ZSTs. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to call `InterpCx::read_scalar` instead of this method. + pub fn read_scalar( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + read_provenance: bool, + ) -> AllocResult> { + if read_provenance { + assert_eq!(range.size, cx.data_layout().pointer_size); + } + + // First and foremost, if anything is uninit, bail. + if self.is_init(range).is_err() { + // This inflates uninitialized bytes to the entire scalar, even if only a few + // bytes are uninitialized. + return Ok(ScalarMaybeUninit::Uninit); + } + + // If we are doing a pointer read, and there is a relocation exactly where we + // are reading, then we can put data and relocation back together and return that. + if read_provenance && let Some(&prov) = self.relocations.get(&range.start) { + // We already checked init and relocations, so we can use this function. + let bytes = self.get_bytes_even_more_internal(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(ScalarMaybeUninit::from_pointer(ptr, cx)); + } + + // If we are *not* reading a pointer, and we can just ignore relocations, + // then do exactly that. + if !read_provenance && Prov::OFFSET_IS_ADDR { + // We just strip provenance. + let bytes = self.get_bytes_even_more_internal(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size))); + } + + // It's complicated. Better make sure there is no provenance anywhere. + // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then + // `read_pointer` is true and we ideally would distinguish the following two cases: + // - The entire `range` is covered by 2 relocations for the same provenance. + // Then we should return a pointer with that provenance. + // - The range has inhomogeneous provenance. Then we should return just the + // underlying bits. + let bytes = self.get_bytes(cx, range)?; + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size))) + } + + /// Writes a *non-ZST* scalar. + /// + /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check + /// for ZSTness anyway due to integer pointers being valid for ZSTs. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to call `InterpCx::write_scalar` instead of this method. + #[instrument(skip(self, cx), level = "debug")] + pub fn write_scalar( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + val: ScalarMaybeUninit, + ) -> AllocResult { + assert!(self.mutability == Mutability::Mut); + + let val = match val { + ScalarMaybeUninit::Scalar(scalar) => scalar, + ScalarMaybeUninit::Uninit => { + return self.write_uninit(cx, range); + } + }; + + // `to_bits_or_ptr_internal` is the right method because we just want to store this data + // as-is into memory. + let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size)? { + Err(val) => { + let (provenance, offset) = val.into_parts(); + (u128::from(offset.bytes()), Some(provenance)) + } + Ok(data) => (data, None), + }; + + let endian = cx.data_layout().endian; + let dst = self.get_bytes_mut(cx, range)?; + write_target_uint(endian, dst, bytes).unwrap(); + + // See if we have to also write a relocation. + if let Some(provenance) = provenance { + self.relocations.0.insert(range.start, provenance); + } + + Ok(()) + } + + /// Write "uninit" to the given memory range. + pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + self.mark_init(range, false); + self.clear_relocations(cx, range)?; + return Ok(()); + } +} + +/// Relocations. +impl Allocation { + /// Returns all relocations overlapping with the given pointer-offset pair. + fn get_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> &[(Size, Prov)] { + // We have to go back `pointer_size - 1` bytes, as that one would still overlap with + // the beginning of this range. + let start = range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1); + self.relocations.range(Size::from_bytes(start)..range.end()) + } + + /// Returns whether this allocation has relocations overlapping with the given range. + /// + /// Note: this function exists to allow `get_relocations` to be private, in order to somewhat + /// limit access to relocations outside of the `Allocation` abstraction. + /// + pub fn has_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> bool { + !self.get_relocations(cx, range).is_empty() + } + + /// Checks that there are no relocations overlapping with the given range. + #[inline(always)] + fn check_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + if self.has_relocations(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) } + } + + /// Removes all relocations inside the given range. + /// If there are relocations overlapping with the edges, they + /// are removed as well *and* the bytes they cover are marked as + /// uninitialized. This is a somewhat odd "spooky action at a distance", + /// but it allows strictly more code to run than if we would just error + /// immediately in that case. + fn clear_relocations(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult + where + Prov: Provenance, + { + // Find the start and end of the given range and its outermost relocations. + let (first, last) = { + // Find all relocations overlapping the given range. + let relocations = self.get_relocations(cx, range); + if relocations.is_empty() { + return Ok(()); + } + + ( + relocations.first().unwrap().0, + relocations.last().unwrap().0 + cx.data_layout().pointer_size, + ) + }; + let start = range.start; + let end = range.end(); + + // We need to handle clearing the relocations from parts of a pointer. + // FIXME: Miri should preserve partial relocations; see + // https://github.com/rust-lang/miri/issues/2181. + if first < start { + if Prov::ERR_ON_PARTIAL_PTR_OVERWRITE { + return Err(AllocError::PartialPointerOverwrite(first)); + } + warn!( + "Partial pointer overwrite! De-initializing memory at offsets {first:?}..{start:?}." + ); + self.init_mask.set_range(first, start, false); + } + if last > end { + if Prov::ERR_ON_PARTIAL_PTR_OVERWRITE { + return Err(AllocError::PartialPointerOverwrite( + last - cx.data_layout().pointer_size, + )); + } + warn!( + "Partial pointer overwrite! De-initializing memory at offsets {end:?}..{last:?}." + ); + self.init_mask.set_range(end, last, false); + } + + // Forget all the relocations. + // Since relocations do not overlap, we know that removing until `last` (exclusive) is fine, + // i.e., this will not remove any other relocations just after the ones we care about. + self.relocations.0.remove_range(first..last); + + Ok(()) + } + + /// Errors if there are relocations overlapping with the edges of the + /// given memory range. + #[inline] + fn check_relocation_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + self.check_relocations(cx, alloc_range(range.start, Size::ZERO))?; + self.check_relocations(cx, alloc_range(range.end(), Size::ZERO))?; + Ok(()) + } +} + +/// "Relocations" stores the provenance information of pointers stored in memory. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, TyEncodable, TyDecodable)] +pub struct Relocations(SortedMap); + +impl Relocations { + pub fn new() -> Self { + Relocations(SortedMap::new()) + } + + // The caller must guarantee that the given relocations are already sorted + // by address and contain no duplicates. + pub fn from_presorted(r: Vec<(Size, Prov)>) -> Self { + Relocations(SortedMap::from_presorted_elements(r)) + } +} + +impl Deref for Relocations { + type Target = SortedMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// A partial, owned list of relocations to transfer into another allocation. +/// +/// Offsets are already adjusted to the destination allocation. +pub struct AllocationRelocations { + dest_relocations: Vec<(Size, Prov)>, +} + +impl Allocation { + pub fn prepare_relocation_copy( + &self, + cx: &impl HasDataLayout, + src: AllocRange, + dest: Size, + count: u64, + ) -> AllocationRelocations { + let relocations = self.get_relocations(cx, src); + if relocations.is_empty() { + return AllocationRelocations { dest_relocations: Vec::new() }; + } + + let size = src.size; + let mut new_relocations = Vec::with_capacity(relocations.len() * (count as usize)); + + // If `count` is large, this is rather wasteful -- we are allocating a big array here, which + // is mostly filled with redundant information since it's just N copies of the same `Prov`s + // at slightly adjusted offsets. The reason we do this is so that in `mark_relocation_range` + // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces + // the right sequence of relocations for all N copies. + for i in 0..count { + new_relocations.extend(relocations.iter().map(|&(offset, reloc)| { + // compute offset for current repetition + let dest_offset = dest + size * i; // `Size` operations + ( + // shift offsets from source allocation to destination allocation + (offset + dest_offset) - src.start, // `Size` operations + reloc, + ) + })); + } + + AllocationRelocations { dest_relocations: new_relocations } + } + + /// Applies a relocation copy. + /// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected + /// to be clear of relocations. + /// + /// This is dangerous to use as it can violate internal `Allocation` invariants! + /// It only exists to support an efficient implementation of `mem_copy_repeatedly`. + pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations) { + self.relocations.0.insert_presorted(relocations.dest_relocations); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Uninitialized byte tracking +//////////////////////////////////////////////////////////////////////////////// + +type Block = u64; + +/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte +/// is initialized. If it is `false` the byte is uninitialized. +// Note: for performance reasons when interning, some of the `InitMask` fields can be partially +// hashed. (see the `Hash` impl below for more details), so the impl is not derived. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)] +#[derive(HashStable)] +pub struct InitMask { + blocks: Vec, + len: Size, +} + +// Const allocations are only hashed for interning. However, they can be large, making the hashing +// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially +// big buffers like the allocation's init mask. We can partially hash some fields when they're +// large. +impl hash::Hash for InitMask { + fn hash(&self, state: &mut H) { + const MAX_BLOCKS_TO_HASH: usize = MAX_BYTES_TO_HASH / std::mem::size_of::(); + const MAX_BLOCKS_LEN: usize = MAX_HASHED_BUFFER_LEN / std::mem::size_of::(); + + // Partially hash the `blocks` buffer when it is large. To limit collisions with common + // prefixes and suffixes, we hash the length and some slices of the buffer. + let block_count = self.blocks.len(); + if block_count > MAX_BLOCKS_LEN { + // Hash the buffer's length. + block_count.hash(state); + + // And its head and tail. + self.blocks[..MAX_BLOCKS_TO_HASH].hash(state); + self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state); + } else { + self.blocks.hash(state); + } + + // Hash the other fields as usual. + self.len.hash(state); + } +} + +impl InitMask { + pub const BLOCK_SIZE: u64 = 64; + + #[inline] + fn bit_index(bits: Size) -> (usize, usize) { + // BLOCK_SIZE is the number of bits that can fit in a `Block`. + // Each bit in a `Block` represents the initialization state of one byte of an allocation, + // so we use `.bytes()` here. + let bits = bits.bytes(); + let a = bits / InitMask::BLOCK_SIZE; + let b = bits % InitMask::BLOCK_SIZE; + (usize::try_from(a).unwrap(), usize::try_from(b).unwrap()) + } + + #[inline] + fn size_from_bit_index(block: impl TryInto, bit: impl TryInto) -> Size { + let block = block.try_into().ok().unwrap(); + let bit = bit.try_into().ok().unwrap(); + Size::from_bytes(block * InitMask::BLOCK_SIZE + bit) + } + + pub fn new(size: Size, state: bool) -> Self { + let mut m = InitMask { blocks: vec![], len: Size::ZERO }; + m.grow(size, state); + m + } + + pub fn set_range(&mut self, start: Size, end: Size, new_state: bool) { + let len = self.len; + if end > len { + self.grow(end - len, new_state); + } + self.set_range_inbounds(start, end, new_state); + } + + pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) { + let (blocka, bita) = Self::bit_index(start); + let (blockb, bitb) = Self::bit_index(end); + if blocka == blockb { + // First set all bits except the first `bita`, + // then unset the last `64 - bitb` bits. + let range = if bitb == 0 { + u64::MAX << bita + } else { + (u64::MAX << bita) & (u64::MAX >> (64 - bitb)) + }; + if new_state { + self.blocks[blocka] |= range; + } else { + self.blocks[blocka] &= !range; + } + return; + } + // across block boundaries + if new_state { + // Set `bita..64` to `1`. + self.blocks[blocka] |= u64::MAX << bita; + // Set `0..bitb` to `1`. + if bitb != 0 { + self.blocks[blockb] |= u64::MAX >> (64 - bitb); + } + // Fill in all the other blocks (much faster than one bit at a time). + for block in (blocka + 1)..blockb { + self.blocks[block] = u64::MAX; + } + } else { + // Set `bita..64` to `0`. + self.blocks[blocka] &= !(u64::MAX << bita); + // Set `0..bitb` to `0`. + if bitb != 0 { + self.blocks[blockb] &= !(u64::MAX >> (64 - bitb)); + } + // Fill in all the other blocks (much faster than one bit at a time). + for block in (blocka + 1)..blockb { + self.blocks[block] = 0; + } + } + } + + #[inline] + pub fn get(&self, i: Size) -> bool { + let (block, bit) = Self::bit_index(i); + (self.blocks[block] & (1 << bit)) != 0 + } + + #[inline] + pub fn set(&mut self, i: Size, new_state: bool) { + let (block, bit) = Self::bit_index(i); + self.set_bit(block, bit, new_state); + } + + #[inline] + fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) { + if new_state { + self.blocks[block] |= 1 << bit; + } else { + self.blocks[block] &= !(1 << bit); + } + } + + pub fn grow(&mut self, amount: Size, new_state: bool) { + if amount.bytes() == 0 { + return; + } + let unused_trailing_bits = + u64::try_from(self.blocks.len()).unwrap() * Self::BLOCK_SIZE - self.len.bytes(); + if amount.bytes() > unused_trailing_bits { + let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; + self.blocks.extend( + // FIXME(oli-obk): optimize this by repeating `new_state as Block`. + iter::repeat(0).take(usize::try_from(additional_blocks).unwrap()), + ); + } + let start = self.len; + self.len += amount; + self.set_range_inbounds(start, start + amount, new_state); // `Size` operation + } + + /// Returns the index of the first bit in `start..end` (end-exclusive) that is equal to is_init. + fn find_bit(&self, start: Size, end: Size, is_init: bool) -> Option { + /// A fast implementation of `find_bit`, + /// which skips over an entire block at a time if it's all 0s (resp. 1s), + /// and finds the first 1 (resp. 0) bit inside a block using `trailing_zeros` instead of a loop. + /// + /// Note that all examples below are written with 8 (instead of 64) bit blocks for simplicity, + /// and with the least significant bit (and lowest block) first: + /// ```text + /// 00000000|00000000 + /// ^ ^ ^ ^ + /// index: 0 7 8 15 + /// ``` + /// Also, if not stated, assume that `is_init = true`, that is, we are searching for the first 1 bit. + fn find_bit_fast( + init_mask: &InitMask, + start: Size, + end: Size, + is_init: bool, + ) -> Option { + /// Search one block, returning the index of the first bit equal to `is_init`. + fn search_block( + bits: Block, + block: usize, + start_bit: usize, + is_init: bool, + ) -> Option { + // For the following examples, assume this function was called with: + // bits = 0b00111011 + // start_bit = 3 + // is_init = false + // Note that, for the examples in this function, the most significant bit is written first, + // which is backwards compared to the comments in `find_bit`/`find_bit_fast`. + + // Invert bits so we're always looking for the first set bit. + // ! 0b00111011 + // bits = 0b11000100 + let bits = if is_init { bits } else { !bits }; + // Mask off unused start bits. + // 0b11000100 + // & 0b11111000 + // bits = 0b11000000 + let bits = bits & (!0 << start_bit); + // Find set bit, if any. + // bit = trailing_zeros(0b11000000) + // bit = 6 + if bits == 0 { + None + } else { + let bit = bits.trailing_zeros(); + Some(InitMask::size_from_bit_index(block, bit)) + } + } + + if start >= end { + return None; + } + + // Convert `start` and `end` to block indexes and bit indexes within each block. + // We must convert `end` to an inclusive bound to handle block boundaries correctly. + // + // For example: + // + // (a) 00000000|00000000 (b) 00000000| + // ^~~~~~~~~~~^ ^~~~~~~~~^ + // start end start end + // + // In both cases, the block index of `end` is 1. + // But we do want to search block 1 in (a), and we don't in (b). + // + // We subtract 1 from both end positions to make them inclusive: + // + // (a) 00000000|00000000 (b) 00000000| + // ^~~~~~~~~~^ ^~~~~~~^ + // start end_inclusive start end_inclusive + // + // For (a), the block index of `end_inclusive` is 1, and for (b), it's 0. + // This provides the desired behavior of searching blocks 0 and 1 for (a), + // and searching only block 0 for (b). + // There is no concern of overflows since we checked for `start >= end` above. + let (start_block, start_bit) = InitMask::bit_index(start); + let end_inclusive = Size::from_bytes(end.bytes() - 1); + let (end_block_inclusive, _) = InitMask::bit_index(end_inclusive); + + // Handle first block: need to skip `start_bit` bits. + // + // We need to handle the first block separately, + // because there may be bits earlier in the block that should be ignored, + // such as the bit marked (1) in this example: + // + // (1) + // -|------ + // (c) 01000000|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + if let Some(i) = + search_block(init_mask.blocks[start_block], start_block, start_bit, is_init) + { + // If the range is less than a block, we may find a matching bit after `end`. + // + // For example, we shouldn't successfully find bit (2), because it's after `end`: + // + // (2) + // -------| + // (d) 00000001|00000000|00000001 + // ^~~~~^ + // start end + // + // An alternative would be to mask off end bits in the same way as we do for start bits, + // but performing this check afterwards is faster and simpler to implement. + if i < end { + return Some(i); + } else { + return None; + } + } + + // Handle remaining blocks. + // + // We can skip over an entire block at once if it's all 0s (resp. 1s). + // The block marked (3) in this example is the first block that will be handled by this loop, + // and it will be skipped for that reason: + // + // (3) + // -------- + // (e) 01000000|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + if start_block < end_block_inclusive { + // This loop is written in a specific way for performance. + // Notably: `..end_block_inclusive + 1` is used for an inclusive range instead of `..=end_block_inclusive`, + // and `.zip(start_block + 1..)` is used to track the index instead of `.enumerate().skip().take()`, + // because both alternatives result in significantly worse codegen. + // `end_block_inclusive + 1` is guaranteed not to wrap, because `end_block_inclusive <= end / BLOCK_SIZE`, + // and `BLOCK_SIZE` (the number of bits per block) will always be at least 8 (1 byte). + for (&bits, block) in init_mask.blocks[start_block + 1..end_block_inclusive + 1] + .iter() + .zip(start_block + 1..) + { + if let Some(i) = search_block(bits, block, 0, is_init) { + // If this is the last block, we may find a matching bit after `end`. + // + // For example, we shouldn't successfully find bit (4), because it's after `end`: + // + // (4) + // -------| + // (f) 00000001|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + // + // As above with example (d), we could handle the end block separately and mask off end bits, + // but unconditionally searching an entire block at once and performing this check afterwards + // is faster and much simpler to implement. + if i < end { + return Some(i); + } else { + return None; + } + } + } + } + + None + } + + #[cfg_attr(not(debug_assertions), allow(dead_code))] + fn find_bit_slow( + init_mask: &InitMask, + start: Size, + end: Size, + is_init: bool, + ) -> Option { + (start..end).find(|&i| init_mask.get(i) == is_init) + } + + let result = find_bit_fast(self, start, end, is_init); + + debug_assert_eq!( + result, + find_bit_slow(self, start, end, is_init), + "optimized implementation of find_bit is wrong for start={:?} end={:?} is_init={} init_mask={:#?}", + start, + end, + is_init, + self + ); + + result + } +} + +/// A contiguous chunk of initialized or uninitialized memory. +pub enum InitChunk { + Init(Range), + Uninit(Range), +} + +impl InitChunk { + #[inline] + pub fn is_init(&self) -> bool { + match self { + Self::Init(_) => true, + Self::Uninit(_) => false, + } + } + + #[inline] + pub fn range(&self) -> Range { + match self { + Self::Init(r) => r.clone(), + Self::Uninit(r) => r.clone(), + } + } +} + +impl InitMask { + /// Checks whether the range `start..end` (end-exclusive) is entirely initialized. + /// + /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte + /// indexes for the first contiguous span of the uninitialized access. + #[inline] + pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), AllocRange> { + if end > self.len { + return Err(AllocRange::from(self.len..end)); + } + + let uninit_start = self.find_bit(start, end, false); + + match uninit_start { + Some(uninit_start) => { + let uninit_end = self.find_bit(uninit_start, end, true).unwrap_or(end); + Err(AllocRange::from(uninit_start..uninit_end)) + } + None => Ok(()), + } + } + + /// Returns an iterator, yielding a range of byte indexes for each contiguous region + /// of initialized or uninitialized bytes inside the range `start..end` (end-exclusive). + /// + /// The iterator guarantees the following: + /// - Chunks are nonempty. + /// - Chunks are adjacent (each range's start is equal to the previous range's end). + /// - Chunks span exactly `start..end` (the first starts at `start`, the last ends at `end`). + /// - Chunks alternate between [`InitChunk::Init`] and [`InitChunk::Uninit`]. + #[inline] + pub fn range_as_init_chunks(&self, start: Size, end: Size) -> InitChunkIter<'_> { + assert!(end <= self.len); + + let is_init = if start < end { + self.get(start) + } else { + // `start..end` is empty: there are no chunks, so use some arbitrary value + false + }; + + InitChunkIter { init_mask: self, is_init, start, end } + } +} + +/// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`]. +#[derive(Clone)] +pub struct InitChunkIter<'a> { + init_mask: &'a InitMask, + /// Whether the next chunk we will return is initialized. + /// If there are no more chunks, contains some arbitrary value. + is_init: bool, + /// The current byte index into `init_mask`. + start: Size, + /// The end byte index into `init_mask`. + end: Size, +} + +impl<'a> Iterator for InitChunkIter<'a> { + type Item = InitChunk; + + #[inline] + fn next(&mut self) -> Option { + if self.start >= self.end { + return None; + } + + let end_of_chunk = + self.init_mask.find_bit(self.start, self.end, !self.is_init).unwrap_or(self.end); + let range = self.start..end_of_chunk; + + let ret = + Some(if self.is_init { InitChunk::Init(range) } else { InitChunk::Uninit(range) }); + + self.is_init = !self.is_init; + self.start = end_of_chunk; + + ret + } +} + +/// Uninitialized bytes. +impl Allocation { + /// Checks whether the given range is entirely initialized. + /// + /// Returns `Ok(())` if it's initialized. Otherwise returns the range of byte + /// indexes of the first contiguous uninitialized access. + fn is_init(&self, range: AllocRange) -> Result<(), AllocRange> { + self.init_mask.is_range_initialized(range.start, range.end()) // `Size` addition + } + + /// Checks that a range of bytes is initialized. If not, returns the `InvalidUninitBytes` + /// error which will report the first range of bytes which is uninitialized. + fn check_init(&self, range: AllocRange) -> AllocResult { + self.is_init(range).map_err(|uninit_range| { + AllocError::InvalidUninitBytes(Some(UninitBytesAccess { + access: range, + uninit: uninit_range, + })) + }) + } + + fn mark_init(&mut self, range: AllocRange, is_init: bool) { + if range.size.bytes() == 0 { + return; + } + assert!(self.mutability == Mutability::Mut); + self.init_mask.set_range(range.start, range.end(), is_init); + } +} + +/// Run-length encoding of the uninit mask. +/// Used to copy parts of a mask multiple times to another allocation. +pub struct InitMaskCompressed { + /// Whether the first range is initialized. + initial: bool, + /// The lengths of ranges that are run-length encoded. + /// The initialization state of the ranges alternate starting with `initial`. + ranges: smallvec::SmallVec<[u64; 1]>, +} + +impl InitMaskCompressed { + pub fn no_bytes_init(&self) -> bool { + // The `ranges` are run-length encoded and of alternating initialization state. + // So if `ranges.len() > 1` then the second block is an initialized range. + !self.initial && self.ranges.len() == 1 + } +} + +/// Transferring the initialization mask to other allocations. +impl Allocation { + /// Creates a run-length encoding of the initialization mask; panics if range is empty. + /// + /// This is essentially a more space-efficient version of + /// `InitMask::range_as_init_chunks(...).collect::>()`. + pub fn compress_uninit_range(&self, range: AllocRange) -> InitMaskCompressed { + // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`), + // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from + // the source and write it to the destination. Even if we optimized the memory accesses, + // we'd be doing all of this `repeat` times. + // Therefore we precompute a compressed version of the initialization mask of the source value and + // then write it back `repeat` times without computing any more information from the source. + + // A precomputed cache for ranges of initialized / uninitialized bits + // 0000010010001110 will become + // `[5, 1, 2, 1, 3, 3, 1]`, + // where each element toggles the state. + + let mut ranges = smallvec::SmallVec::<[u64; 1]>::new(); + + let mut chunks = self.init_mask.range_as_init_chunks(range.start, range.end()).peekable(); + + let initial = chunks.peek().expect("range should be nonempty").is_init(); + + // Here we rely on `range_as_init_chunks` to yield alternating init/uninit chunks. + for chunk in chunks { + let len = chunk.range().end.bytes() - chunk.range().start.bytes(); + ranges.push(len); + } + + InitMaskCompressed { ranges, initial } + } + + /// Applies multiple instances of the run-length encoding to the initialization mask. + /// + /// This is dangerous to use as it can violate internal `Allocation` invariants! + /// It only exists to support an efficient implementation of `mem_copy_repeatedly`. + pub fn mark_compressed_init_range( + &mut self, + defined: &InitMaskCompressed, + range: AllocRange, + repeat: u64, + ) { + // An optimization where we can just overwrite an entire range of initialization + // bits if they are going to be uniformly `1` or `0`. + if defined.ranges.len() <= 1 { + self.init_mask.set_range_inbounds( + range.start, + range.start + range.size * repeat, // `Size` operations + defined.initial, + ); + return; + } + + for mut j in 0..repeat { + j *= range.size.bytes(); + j += range.start.bytes(); + let mut cur = defined.initial; + for range in &defined.ranges { + let old_j = j; + j += range; + self.init_mask.set_range_inbounds( + Size::from_bytes(old_j), + Size::from_bytes(j), + cur, + ); + cur = !cur; + } + } + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs new file mode 100644 index 000000000..cecb55578 --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/error.rs @@ -0,0 +1,551 @@ +use super::{AllocId, AllocRange, ConstAlloc, Pointer, Scalar}; + +use crate::mir::interpret::ConstValue; +use crate::ty::{layout, query::TyCtxtAt, tls, Ty, ValTree}; + +use rustc_data_structures::sync::Lock; +use rustc_errors::{pluralize, struct_span_err, DiagnosticBuilder, ErrorGuaranteed}; +use rustc_macros::HashStable; +use rustc_session::CtfeBacktrace; +use rustc_span::def_id::DefId; +use rustc_target::abi::{call, Align, Size}; +use std::{any::Any, backtrace::Backtrace, fmt}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, HashStable, TyEncodable, TyDecodable)] +pub enum ErrorHandled { + /// Already reported an error for this evaluation, and the compilation is + /// *guaranteed* to fail. Warnings/lints *must not* produce `Reported`. + Reported(ErrorGuaranteed), + /// Already emitted a lint for this evaluation. + Linted, + /// Don't emit an error, the evaluation failed because the MIR was generic + /// and the substs didn't fully monomorphize it. + TooGeneric, +} + +impl From for ErrorHandled { + fn from(err: ErrorGuaranteed) -> ErrorHandled { + ErrorHandled::Reported(err) + } +} + +TrivialTypeTraversalAndLiftImpls! { + ErrorHandled, +} + +pub type EvalToAllocationRawResult<'tcx> = Result, ErrorHandled>; +pub type EvalToConstValueResult<'tcx> = Result, ErrorHandled>; +pub type EvalToValTreeResult<'tcx> = Result>, ErrorHandled>; + +pub fn struct_error<'tcx>( + tcx: TyCtxtAt<'tcx>, + msg: &str, +) -> DiagnosticBuilder<'tcx, ErrorGuaranteed> { + struct_span_err!(tcx.sess, tcx.span, E0080, "{}", msg) +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(InterpErrorInfo<'_>, 8); + +/// Packages the kind of error we got from the const code interpreter +/// up with a Rust-level backtrace of where the error occurred. +/// These should always be constructed by calling `.into()` on +/// an `InterpError`. In `rustc_mir::interpret`, we have `throw_err_*` +/// macros for this. +#[derive(Debug)] +pub struct InterpErrorInfo<'tcx>(Box>); + +#[derive(Debug)] +struct InterpErrorInfoInner<'tcx> { + kind: InterpError<'tcx>, + backtrace: Option>, +} + +impl fmt::Display for InterpErrorInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0.kind) + } +} + +impl<'tcx> InterpErrorInfo<'tcx> { + pub fn print_backtrace(&self) { + if let Some(backtrace) = self.0.backtrace.as_ref() { + print_backtrace(backtrace); + } + } + + pub fn into_kind(self) -> InterpError<'tcx> { + let InterpErrorInfo(box InterpErrorInfoInner { kind, .. }) = self; + kind + } + + #[inline] + pub fn kind(&self) -> &InterpError<'tcx> { + &self.0.kind + } +} + +fn print_backtrace(backtrace: &Backtrace) { + eprintln!("\n\nAn error occurred in miri:\n{}", backtrace); +} + +impl From for InterpErrorInfo<'_> { + fn from(err: ErrorHandled) -> Self { + match err { + ErrorHandled::Reported(ErrorGuaranteed { .. }) | ErrorHandled::Linted => { + err_inval!(ReferencedConstant) + } + ErrorHandled::TooGeneric => err_inval!(TooGeneric), + } + .into() + } +} + +impl From for InterpErrorInfo<'_> { + fn from(err: ErrorGuaranteed) -> Self { + InterpError::InvalidProgram(InvalidProgramInfo::AlreadyReported(err)).into() + } +} + +impl<'tcx> From> for InterpErrorInfo<'tcx> { + fn from(kind: InterpError<'tcx>) -> Self { + let capture_backtrace = tls::with_opt(|tcx| { + if let Some(tcx) = tcx { + *Lock::borrow(&tcx.sess.ctfe_backtrace) + } else { + CtfeBacktrace::Disabled + } + }); + + let backtrace = match capture_backtrace { + CtfeBacktrace::Disabled => None, + CtfeBacktrace::Capture => Some(Box::new(Backtrace::force_capture())), + CtfeBacktrace::Immediate => { + // Print it now. + let backtrace = Backtrace::force_capture(); + print_backtrace(&backtrace); + None + } + }; + + InterpErrorInfo(Box::new(InterpErrorInfoInner { kind, backtrace })) + } +} + +/// Error information for when the program we executed turned out not to actually be a valid +/// program. This cannot happen in stand-alone Miri, but it can happen during CTFE/ConstProp +/// where we work on generic code or execution does not have all information available. +pub enum InvalidProgramInfo<'tcx> { + /// Resolution can fail if we are in a too generic context. + TooGeneric, + /// Cannot compute this constant because it depends on another one + /// which already produced an error. + ReferencedConstant, + /// Abort in case errors are already reported. + AlreadyReported(ErrorGuaranteed), + /// An error occurred during layout computation. + Layout(layout::LayoutError<'tcx>), + /// An error occurred during FnAbi computation: the passed --target lacks FFI support + /// (which unfortunately typeck does not reject). + /// Not using `FnAbiError` as that contains a nested `LayoutError`. + FnAbiAdjustForForeignAbi(call::AdjustForForeignAbiError), + /// SizeOf of unsized type was requested. + SizeOfUnsizedType(Ty<'tcx>), +} + +impl fmt::Display for InvalidProgramInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use InvalidProgramInfo::*; + match self { + TooGeneric => write!(f, "encountered overly generic constant"), + ReferencedConstant => write!(f, "referenced constant has errors"), + AlreadyReported(ErrorGuaranteed { .. }) => { + write!(f, "encountered constants with type errors, stopping evaluation") + } + Layout(ref err) => write!(f, "{err}"), + FnAbiAdjustForForeignAbi(ref err) => write!(f, "{err}"), + SizeOfUnsizedType(ty) => write!(f, "size_of called on unsized type `{ty}`"), + } + } +} + +/// Details of why a pointer had to be in-bounds. +#[derive(Debug, Copy, Clone, TyEncodable, TyDecodable, HashStable)] +pub enum CheckInAllocMsg { + /// We are dereferencing a pointer (i.e., creating a place). + DerefTest, + /// We are access memory. + MemoryAccessTest, + /// We are doing pointer arithmetic. + PointerArithmeticTest, + /// We are doing pointer offset_from. + OffsetFromTest, + /// None of the above -- generic/unspecific inbounds test. + InboundsTest, +} + +impl fmt::Display for CheckInAllocMsg { + /// When this is printed as an error the context looks like this: + /// "{msg}{pointer} is a dangling pointer". + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + CheckInAllocMsg::DerefTest => "dereferencing pointer failed: ", + CheckInAllocMsg::MemoryAccessTest => "memory access failed: ", + CheckInAllocMsg::PointerArithmeticTest => "out-of-bounds pointer arithmetic: ", + CheckInAllocMsg::OffsetFromTest => "out-of-bounds offset_from: ", + CheckInAllocMsg::InboundsTest => "out-of-bounds pointer use: ", + } + ) + } +} + +/// Details of an access to uninitialized bytes where it is not allowed. +#[derive(Debug)] +pub struct UninitBytesAccess { + /// Range of the original memory access. + pub access: AllocRange, + /// Range of the uninit memory that was encountered. (Might not be maximal.) + pub uninit: AllocRange, +} + +/// Information about a size mismatch. +#[derive(Debug)] +pub struct ScalarSizeMismatch { + pub target_size: u64, + pub data_size: u64, +} + +/// Error information for when the program caused Undefined Behavior. +pub enum UndefinedBehaviorInfo { + /// Free-form case. Only for errors that are never caught! + Ub(String), + /// Unreachable code was executed. + Unreachable, + /// A slice/array index projection went out-of-bounds. + BoundsCheckFailed { + len: u64, + index: u64, + }, + /// Something was divided by 0 (x / 0). + DivisionByZero, + /// Something was "remainded" by 0 (x % 0). + RemainderByZero, + /// Signed division overflowed (INT_MIN / -1). + DivisionOverflow, + /// Signed remainder overflowed (INT_MIN % -1). + RemainderOverflow, + /// Overflowing inbounds pointer arithmetic. + PointerArithOverflow, + /// Invalid metadata in a wide pointer (using `str` to avoid allocations). + InvalidMeta(&'static str), + /// Reading a C string that does not end within its allocation. + UnterminatedCString(Pointer), + /// Dereferencing a dangling pointer after it got freed. + PointerUseAfterFree(AllocId), + /// Used a pointer outside the bounds it is valid for. + /// (If `ptr_size > 0`, determines the size of the memory range that was expected to be in-bounds.) + PointerOutOfBounds { + alloc_id: AllocId, + alloc_size: Size, + ptr_offset: i64, + ptr_size: Size, + msg: CheckInAllocMsg, + }, + /// Using an integer as a pointer in the wrong way. + DanglingIntPointer(u64, CheckInAllocMsg), + /// Used a pointer with bad alignment. + AlignmentCheckFailed { + required: Align, + has: Align, + }, + /// Writing to read-only memory. + WriteToReadOnly(AllocId), + // Trying to access the data behind a function pointer. + DerefFunctionPointer(AllocId), + // Trying to access the data behind a vtable pointer. + DerefVTablePointer(AllocId), + /// The value validity check found a problem. + /// Should only be thrown by `validity.rs` and always point out which part of the value + /// is the problem. + ValidationFailure { + /// The "path" to the value in question, e.g. `.0[5].field` for a struct + /// field in the 6th element of an array that is the first element of a tuple. + path: Option, + msg: String, + }, + /// Using a non-boolean `u8` as bool. + InvalidBool(u8), + /// Using a non-character `u32` as character. + InvalidChar(u32), + /// The tag of an enum does not encode an actual discriminant. + InvalidTag(Scalar), + /// Using a pointer-not-to-a-function as function pointer. + InvalidFunctionPointer(Pointer), + /// Using a pointer-not-to-a-vtable as vtable pointer. + InvalidVTablePointer(Pointer), + /// Using a string that is not valid UTF-8, + InvalidStr(std::str::Utf8Error), + /// Using uninitialized data where it is not allowed. + InvalidUninitBytes(Option<(AllocId, UninitBytesAccess)>), + /// Working with a local that is not currently live. + DeadLocal, + /// Data size is not equal to target size. + ScalarSizeMismatch(ScalarSizeMismatch), + /// A discriminant of an uninhabited enum variant is written. + UninhabitedEnumVariantWritten, +} + +impl fmt::Display for UndefinedBehaviorInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use UndefinedBehaviorInfo::*; + match self { + Ub(msg) => write!(f, "{msg}"), + Unreachable => write!(f, "entering unreachable code"), + BoundsCheckFailed { ref len, ref index } => { + write!(f, "indexing out of bounds: the len is {len} but the index is {index}") + } + DivisionByZero => write!(f, "dividing by zero"), + RemainderByZero => write!(f, "calculating the remainder with a divisor of zero"), + DivisionOverflow => write!(f, "overflow in signed division (dividing MIN by -1)"), + RemainderOverflow => write!(f, "overflow in signed remainder (dividing MIN by -1)"), + PointerArithOverflow => write!(f, "overflowing in-bounds pointer arithmetic"), + InvalidMeta(msg) => write!(f, "invalid metadata in wide pointer: {msg}"), + UnterminatedCString(p) => write!( + f, + "reading a null-terminated string starting at {p:?} with no null found before end of allocation", + ), + PointerUseAfterFree(a) => { + write!(f, "pointer to {a:?} was dereferenced after this allocation got freed") + } + PointerOutOfBounds { alloc_id, alloc_size, ptr_offset, ptr_size: Size::ZERO, msg } => { + write!( + f, + "{msg}{alloc_id:?} has size {alloc_size}, so pointer at offset {ptr_offset} is out-of-bounds", + alloc_size = alloc_size.bytes(), + ) + } + PointerOutOfBounds { alloc_id, alloc_size, ptr_offset, ptr_size, msg } => write!( + f, + "{msg}{alloc_id:?} has size {alloc_size}, so pointer to {ptr_size} byte{ptr_size_p} starting at offset {ptr_offset} is out-of-bounds", + alloc_size = alloc_size.bytes(), + ptr_size = ptr_size.bytes(), + ptr_size_p = pluralize!(ptr_size.bytes()), + ), + DanglingIntPointer(i, msg) => { + write!( + f, + "{msg}{pointer} is a dangling pointer (it has no provenance)", + pointer = Pointer::>::from_addr(*i), + ) + } + AlignmentCheckFailed { required, has } => write!( + f, + "accessing memory with alignment {has}, but alignment {required} is required", + has = has.bytes(), + required = required.bytes() + ), + WriteToReadOnly(a) => write!(f, "writing to {a:?} which is read-only"), + DerefFunctionPointer(a) => write!(f, "accessing {a:?} which contains a function"), + DerefVTablePointer(a) => write!(f, "accessing {a:?} which contains a vtable"), + ValidationFailure { path: None, msg } => { + write!(f, "constructing invalid value: {msg}") + } + ValidationFailure { path: Some(path), msg } => { + write!(f, "constructing invalid value at {path}: {msg}") + } + InvalidBool(b) => { + write!(f, "interpreting an invalid 8-bit value as a bool: 0x{b:02x}") + } + InvalidChar(c) => { + write!(f, "interpreting an invalid 32-bit value as a char: 0x{c:08x}") + } + InvalidTag(val) => write!(f, "enum value has invalid tag: {val:x}"), + InvalidFunctionPointer(p) => { + write!(f, "using {p:?} as function pointer but it does not point to a function") + } + InvalidVTablePointer(p) => { + write!(f, "using {p:?} as vtable pointer but it does not point to a vtable") + } + InvalidStr(err) => write!(f, "this string is not valid UTF-8: {err}"), + InvalidUninitBytes(Some((alloc, info))) => write!( + f, + "reading memory at {alloc:?}{access:?}, \ + but memory is uninitialized at {uninit:?}, \ + and this operation requires initialized memory", + access = info.access, + uninit = info.uninit, + ), + InvalidUninitBytes(None) => write!( + f, + "using uninitialized data, but this operation requires initialized memory" + ), + DeadLocal => write!(f, "accessing a dead local variable"), + ScalarSizeMismatch(self::ScalarSizeMismatch { target_size, data_size }) => write!( + f, + "scalar size mismatch: expected {target_size} bytes but got {data_size} bytes instead", + ), + UninhabitedEnumVariantWritten => { + write!(f, "writing discriminant of an uninhabited enum") + } + } + } +} + +/// Error information for when the program did something that might (or might not) be correct +/// to do according to the Rust spec, but due to limitations in the interpreter, the +/// operation could not be carried out. These limitations can differ between CTFE and the +/// Miri engine, e.g., CTFE does not support dereferencing pointers at integral addresses. +pub enum UnsupportedOpInfo { + /// Free-form case. Only for errors that are never caught! + Unsupported(String), + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, + /// Overwriting parts of a pointer; the resulting state cannot be represented in our + /// `Allocation` data structure. See . + PartialPointerOverwrite(Pointer), + // + // The variants below are only reachable from CTFE/const prop, miri will never emit them. + // + /// Accessing thread local statics + ThreadLocalStatic(DefId), + /// Accessing an unsupported extern static. + ReadExternStatic(DefId), +} + +impl fmt::Display for UnsupportedOpInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use UnsupportedOpInfo::*; + match self { + Unsupported(ref msg) => write!(f, "{msg}"), + ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), + PartialPointerOverwrite(ptr) => { + write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}") + } + ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"), + ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"), + } + } +} + +/// Error information for when the program exhausted the resources granted to it +/// by the interpreter. +pub enum ResourceExhaustionInfo { + /// The stack grew too big. + StackFrameLimitReached, + /// The program ran for too long. + /// + /// The exact limit is set by the `const_eval_limit` attribute. + StepLimitReached, + /// There is not enough memory to perform an allocation. + MemoryExhausted, +} + +impl fmt::Display for ResourceExhaustionInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ResourceExhaustionInfo::*; + match self { + StackFrameLimitReached => { + write!(f, "reached the configured maximum number of stack frames") + } + StepLimitReached => { + write!(f, "exceeded interpreter step limit (see `#[const_eval_limit]`)") + } + MemoryExhausted => { + write!(f, "tried to allocate more memory than available to compiler") + } + } + } +} + +/// A trait to work around not having trait object upcasting. +pub trait AsAny: Any { + fn as_any(&self) -> &dyn Any; +} +impl AsAny for T { + #[inline(always)] + fn as_any(&self) -> &dyn Any { + self + } +} + +/// A trait for machine-specific errors (or other "machine stop" conditions). +pub trait MachineStopType: AsAny + fmt::Display + Send { + /// If `true`, emit a hard error instead of going through the `CONST_ERR` lint + fn is_hard_err(&self) -> bool { + false + } +} + +impl dyn MachineStopType { + #[inline(always)] + pub fn downcast_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + +pub enum InterpError<'tcx> { + /// The program caused undefined behavior. + UndefinedBehavior(UndefinedBehaviorInfo), + /// The program did something the interpreter does not support (some of these *might* be UB + /// but the interpreter is not sure). + Unsupported(UnsupportedOpInfo), + /// The program was invalid (ill-typed, bad MIR, not sufficiently monomorphized, ...). + InvalidProgram(InvalidProgramInfo<'tcx>), + /// The program exhausted the interpreter's resources (stack/heap too big, + /// execution takes too long, ...). + ResourceExhaustion(ResourceExhaustionInfo), + /// Stop execution for a machine-controlled reason. This is never raised by + /// the core engine itself. + MachineStop(Box), +} + +pub type InterpResult<'tcx, T = ()> = Result>; + +impl fmt::Display for InterpError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use InterpError::*; + match *self { + Unsupported(ref msg) => write!(f, "{msg}"), + InvalidProgram(ref msg) => write!(f, "{msg}"), + UndefinedBehavior(ref msg) => write!(f, "{msg}"), + ResourceExhaustion(ref msg) => write!(f, "{msg}"), + MachineStop(ref msg) => write!(f, "{msg}"), + } + } +} + +// Forward `Debug` to `Display`, so it does not look awful. +impl fmt::Debug for InterpError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl InterpError<'_> { + /// Some errors do string formatting even if the error is never printed. + /// To avoid performance issues, there are places where we want to be sure to never raise these formatting errors, + /// so this method lets us detect them and `bug!` on unexpected errors. + pub fn formatted_string(&self) -> bool { + matches!( + self, + InterpError::Unsupported(UnsupportedOpInfo::Unsupported(_)) + | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ValidationFailure { .. }) + | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_)) + ) + } + + /// Should this error be reported as a hard error, preventing compilation, or a soft error, + /// causing a deny-by-default lint? + pub fn is_hard_err(&self) -> bool { + use InterpError::*; + match *self { + MachineStop(ref err) => err.is_hard_err(), + UndefinedBehavior(_) => true, + ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted) => true, + _ => false, + } + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs new file mode 100644 index 000000000..967f8ece1 --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -0,0 +1,633 @@ +//! An interpreter for MIR used in CTFE and by miri. + +#[macro_export] +macro_rules! err_unsup { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::Unsupported( + $crate::mir::interpret::UnsupportedOpInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_unsup_format { + ($($tt:tt)*) => { err_unsup!(Unsupported(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! err_inval { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::InvalidProgram( + $crate::mir::interpret::InvalidProgramInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_ub { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::UndefinedBehavior( + $crate::mir::interpret::UndefinedBehaviorInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_ub_format { + ($($tt:tt)*) => { err_ub!(Ub(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! err_exhaust { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::ResourceExhaustion( + $crate::mir::interpret::ResourceExhaustionInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_machine_stop { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::MachineStop(Box::new($($tt)*)) + }; +} + +// In the `throw_*` macros, avoid `return` to make them work with `try {}`. +#[macro_export] +macro_rules! throw_unsup { + ($($tt:tt)*) => { do yeet err_unsup!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_unsup_format { + ($($tt:tt)*) => { throw_unsup!(Unsupported(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! throw_inval { + ($($tt:tt)*) => { do yeet err_inval!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_ub { + ($($tt:tt)*) => { do yeet err_ub!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_ub_format { + ($($tt:tt)*) => { throw_ub!(Ub(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! throw_exhaust { + ($($tt:tt)*) => { do yeet err_exhaust!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_machine_stop { + ($($tt:tt)*) => { do yeet err_machine_stop!($($tt)*) }; +} + +mod allocation; +mod error; +mod pointer; +mod queries; +mod value; + +use std::convert::TryFrom; +use std::fmt; +use std::io; +use std::io::{Read, Write}; +use std::num::{NonZeroU32, NonZeroU64}; +use std::sync::atomic::{AtomicU32, Ordering}; + +use rustc_ast::LitKind; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::{HashMapExt, Lock}; +use rustc_data_structures::tiny_list::TinyList; +use rustc_hir::def_id::DefId; +use rustc_macros::HashStable; +use rustc_middle::ty::print::with_no_trimmed_paths; +use rustc_serialize::{Decodable, Encodable}; +use rustc_target::abi::Endian; + +use crate::mir; +use crate::ty::codec::{TyDecoder, TyEncoder}; +use crate::ty::subst::GenericArgKind; +use crate::ty::{self, Instance, Ty, TyCtxt}; + +pub use self::error::{ + struct_error, CheckInAllocMsg, ErrorHandled, EvalToAllocationRawResult, EvalToConstValueResult, + EvalToValTreeResult, InterpError, InterpErrorInfo, InterpResult, InvalidProgramInfo, + MachineStopType, ResourceExhaustionInfo, ScalarSizeMismatch, UndefinedBehaviorInfo, + UninitBytesAccess, UnsupportedOpInfo, +}; + +pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar, ScalarMaybeUninit}; + +pub use self::allocation::{ + alloc_range, AllocRange, Allocation, ConstAllocation, InitChunk, InitChunkIter, InitMask, + Relocations, +}; + +pub use self::pointer::{Pointer, PointerArithmetic, Provenance}; + +/// Uniquely identifies one of the following: +/// - A constant +/// - A static +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, TyEncodable, TyDecodable)] +#[derive(HashStable, Lift)] +pub struct GlobalId<'tcx> { + /// For a constant or static, the `Instance` of the item itself. + /// For a promoted global, the `Instance` of the function they belong to. + pub instance: ty::Instance<'tcx>, + + /// The index for promoted globals within their function's `mir::Body`. + pub promoted: Option, +} + +impl<'tcx> GlobalId<'tcx> { + pub fn display(self, tcx: TyCtxt<'tcx>) -> String { + let instance_name = with_no_trimmed_paths!(tcx.def_path_str(self.instance.def.def_id())); + if let Some(promoted) = self.promoted { + format!("{}::{:?}", instance_name, promoted) + } else { + instance_name + } + } +} + +/// Input argument for `tcx.lit_to_const`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, HashStable)] +pub struct LitToConstInput<'tcx> { + /// The absolute value of the resultant constant. + pub lit: &'tcx LitKind, + /// The type of the constant. + pub ty: Ty<'tcx>, + /// If the constant is negative. + pub neg: bool, +} + +/// Error type for `tcx.lit_to_const`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, HashStable)] +pub enum LitToConstError { + /// The literal's inferred type did not match the expected `ty` in the input. + /// This is used for graceful error handling (`delay_span_bug`) in + /// type checking (`Const::from_anon_const`). + TypeError, + Reported, +} + +#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AllocId(pub NonZeroU64); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for AllocId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if f.alternate() { write!(f, "a{}", self.0) } else { write!(f, "alloc{}", self.0) } + } +} + +// No "Display" since AllocIds are not usually user-visible. + +#[derive(TyDecodable, TyEncodable)] +enum AllocDiscriminant { + Alloc, + Fn, + VTable, + Static, +} + +pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder>>( + encoder: &mut E, + tcx: TyCtxt<'tcx>, + alloc_id: AllocId, +) { + match tcx.global_alloc(alloc_id) { + GlobalAlloc::Memory(alloc) => { + trace!("encoding {:?} with {:#?}", alloc_id, alloc); + AllocDiscriminant::Alloc.encode(encoder); + alloc.encode(encoder); + } + GlobalAlloc::Function(fn_instance) => { + trace!("encoding {:?} with {:#?}", alloc_id, fn_instance); + AllocDiscriminant::Fn.encode(encoder); + fn_instance.encode(encoder); + } + GlobalAlloc::VTable(ty, poly_trait_ref) => { + trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id); + AllocDiscriminant::VTable.encode(encoder); + ty.encode(encoder); + poly_trait_ref.encode(encoder); + } + GlobalAlloc::Static(did) => { + assert!(!tcx.is_thread_local_static(did)); + // References to statics doesn't need to know about their allocations, + // just about its `DefId`. + AllocDiscriminant::Static.encode(encoder); + did.encode(encoder); + } + } +} + +// Used to avoid infinite recursion when decoding cyclic allocations. +type DecodingSessionId = NonZeroU32; + +#[derive(Clone)] +enum State { + Empty, + InProgressNonAlloc(TinyList), + InProgress(TinyList, AllocId), + Done(AllocId), +} + +pub struct AllocDecodingState { + // For each `AllocId`, we keep track of which decoding state it's currently in. + decoding_state: Vec>, + // The offsets of each allocation in the data stream. + data_offsets: Vec, +} + +impl AllocDecodingState { + #[inline] + pub fn new_decoding_session(&self) -> AllocDecodingSession<'_> { + static DECODER_SESSION_ID: AtomicU32 = AtomicU32::new(0); + let counter = DECODER_SESSION_ID.fetch_add(1, Ordering::SeqCst); + + // Make sure this is never zero. + let session_id = DecodingSessionId::new((counter & 0x7FFFFFFF) + 1).unwrap(); + + AllocDecodingSession { state: self, session_id } + } + + pub fn new(data_offsets: Vec) -> Self { + let decoding_state = vec![Lock::new(State::Empty); data_offsets.len()]; + + Self { decoding_state, data_offsets } + } +} + +#[derive(Copy, Clone)] +pub struct AllocDecodingSession<'s> { + state: &'s AllocDecodingState, + session_id: DecodingSessionId, +} + +impl<'s> AllocDecodingSession<'s> { + /// Decodes an `AllocId` in a thread-safe way. + pub fn decode_alloc_id<'tcx, D>(&self, decoder: &mut D) -> AllocId + where + D: TyDecoder>, + { + // Read the index of the allocation. + let idx = usize::try_from(decoder.read_u32()).unwrap(); + let pos = usize::try_from(self.state.data_offsets[idx]).unwrap(); + + // Decode the `AllocDiscriminant` now so that we know if we have to reserve an + // `AllocId`. + let (alloc_kind, pos) = decoder.with_position(pos, |decoder| { + let alloc_kind = AllocDiscriminant::decode(decoder); + (alloc_kind, decoder.position()) + }); + + // Check the decoding state to see if it's already decoded or if we should + // decode it here. + let alloc_id = { + let mut entry = self.state.decoding_state[idx].lock(); + + match *entry { + State::Done(alloc_id) => { + return alloc_id; + } + ref mut entry @ State::Empty => { + // We are allowed to decode. + match alloc_kind { + AllocDiscriminant::Alloc => { + // If this is an allocation, we need to reserve an + // `AllocId` so we can decode cyclic graphs. + let alloc_id = decoder.interner().reserve_alloc_id(); + *entry = + State::InProgress(TinyList::new_single(self.session_id), alloc_id); + Some(alloc_id) + } + AllocDiscriminant::Fn + | AllocDiscriminant::Static + | AllocDiscriminant::VTable => { + // Fns and statics cannot be cyclic, and their `AllocId` + // is determined later by interning. + *entry = + State::InProgressNonAlloc(TinyList::new_single(self.session_id)); + None + } + } + } + State::InProgressNonAlloc(ref mut sessions) => { + if sessions.contains(&self.session_id) { + bug!("this should be unreachable"); + } else { + // Start decoding concurrently. + sessions.insert(self.session_id); + None + } + } + State::InProgress(ref mut sessions, alloc_id) => { + if sessions.contains(&self.session_id) { + // Don't recurse. + return alloc_id; + } else { + // Start decoding concurrently. + sessions.insert(self.session_id); + Some(alloc_id) + } + } + } + }; + + // Now decode the actual data. + let alloc_id = decoder.with_position(pos, |decoder| { + match alloc_kind { + AllocDiscriminant::Alloc => { + let alloc = as Decodable<_>>::decode(decoder); + // We already have a reserved `AllocId`. + let alloc_id = alloc_id.unwrap(); + trace!("decoded alloc {:?}: {:#?}", alloc_id, alloc); + decoder.interner().set_alloc_id_same_memory(alloc_id, alloc); + alloc_id + } + AllocDiscriminant::Fn => { + assert!(alloc_id.is_none()); + trace!("creating fn alloc ID"); + let instance = ty::Instance::decode(decoder); + trace!("decoded fn alloc instance: {:?}", instance); + let alloc_id = decoder.interner().create_fn_alloc(instance); + alloc_id + } + AllocDiscriminant::VTable => { + assert!(alloc_id.is_none()); + trace!("creating vtable alloc ID"); + let ty = as Decodable>::decode(decoder); + let poly_trait_ref = + > as Decodable>::decode(decoder); + trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}"); + let alloc_id = decoder.interner().create_vtable_alloc(ty, poly_trait_ref); + alloc_id + } + AllocDiscriminant::Static => { + assert!(alloc_id.is_none()); + trace!("creating extern static alloc ID"); + let did = >::decode(decoder); + trace!("decoded static def-ID: {:?}", did); + let alloc_id = decoder.interner().create_static_alloc(did); + alloc_id + } + } + }); + + self.state.decoding_state[idx].with_lock(|entry| { + *entry = State::Done(alloc_id); + }); + + alloc_id + } +} + +/// An allocation in the global (tcx-managed) memory can be either a function pointer, +/// a static, or a "real" allocation with some data in it. +#[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)] +pub enum GlobalAlloc<'tcx> { + /// The alloc ID is used as a function pointer. + Function(Instance<'tcx>), + /// This alloc ID points to a symbolic (not-reified) vtable. + VTable(Ty<'tcx>, Option>), + /// The alloc ID points to a "lazy" static variable that did not get computed (yet). + /// This is also used to break the cycle in recursive statics. + Static(DefId), + /// The alloc ID points to memory. + Memory(ConstAllocation<'tcx>), +} + +impl<'tcx> GlobalAlloc<'tcx> { + /// Panics if the `GlobalAlloc` does not refer to an `GlobalAlloc::Memory` + #[track_caller] + #[inline] + pub fn unwrap_memory(&self) -> ConstAllocation<'tcx> { + match *self { + GlobalAlloc::Memory(mem) => mem, + _ => bug!("expected memory, got {:?}", self), + } + } + + /// Panics if the `GlobalAlloc` is not `GlobalAlloc::Function` + #[track_caller] + #[inline] + pub fn unwrap_fn(&self) -> Instance<'tcx> { + match *self { + GlobalAlloc::Function(instance) => instance, + _ => bug!("expected function, got {:?}", self), + } + } + + /// Panics if the `GlobalAlloc` is not `GlobalAlloc::VTable` + #[track_caller] + #[inline] + pub fn unwrap_vtable(&self) -> (Ty<'tcx>, Option>) { + match *self { + GlobalAlloc::VTable(ty, poly_trait_ref) => (ty, poly_trait_ref), + _ => bug!("expected vtable, got {:?}", self), + } + } +} + +pub(crate) struct AllocMap<'tcx> { + /// Maps `AllocId`s to their corresponding allocations. + alloc_map: FxHashMap>, + + /// Used to ensure that statics and functions only get one associated `AllocId`. + /// Should never contain a `GlobalAlloc::Memory`! + // + // FIXME: Should we just have two separate dedup maps for statics and functions each? + dedup: FxHashMap, AllocId>, + + /// The `AllocId` to assign to the next requested ID. + /// Always incremented; never gets smaller. + next_id: AllocId, +} + +impl<'tcx> AllocMap<'tcx> { + pub(crate) fn new() -> Self { + AllocMap { + alloc_map: Default::default(), + dedup: Default::default(), + next_id: AllocId(NonZeroU64::new(1).unwrap()), + } + } + fn reserve(&mut self) -> AllocId { + let next = self.next_id; + self.next_id.0 = self.next_id.0.checked_add(1).expect( + "You overflowed a u64 by incrementing by 1... \ + You've just earned yourself a free drink if we ever meet. \ + Seriously, how did you do that?!", + ); + next + } +} + +impl<'tcx> TyCtxt<'tcx> { + /// Obtains a new allocation ID that can be referenced but does not + /// yet have an allocation backing it. + /// + /// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such + /// an `AllocId` from a query. + pub fn reserve_alloc_id(self) -> AllocId { + self.alloc_map.lock().reserve() + } + + /// Reserves a new ID *if* this allocation has not been dedup-reserved before. + /// Should only be used for "symbolic" allocations (function pointers, vtables, statics), we + /// don't want to dedup IDs for "real" memory! + fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId { + let mut alloc_map = self.alloc_map.lock(); + match alloc { + GlobalAlloc::Function(..) | GlobalAlloc::Static(..) | GlobalAlloc::VTable(..) => {} + GlobalAlloc::Memory(..) => bug!("Trying to dedup-reserve memory with real data!"), + } + if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) { + return alloc_id; + } + let id = alloc_map.reserve(); + debug!("creating alloc {alloc:?} with id {id:?}"); + alloc_map.alloc_map.insert(id, alloc.clone()); + alloc_map.dedup.insert(alloc, id); + id + } + + /// Generates an `AllocId` for a static or return a cached one in case this function has been + /// called on the same static before. + pub fn create_static_alloc(self, static_id: DefId) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::Static(static_id)) + } + + /// Generates an `AllocId` for a function. Depending on the function type, + /// this might get deduplicated or assigned a new ID each time. + pub fn create_fn_alloc(self, instance: Instance<'tcx>) -> AllocId { + // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated + // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be + // duplicated across crates. + // We thus generate a new `AllocId` for every mention of a function. This means that + // `main as fn() == main as fn()` is false, while `let x = main as fn(); x == x` is true. + // However, formatting code relies on function identity (see #58320), so we only do + // this for generic functions. Lifetime parameters are ignored. + let is_generic = instance + .substs + .into_iter() + .any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_))); + if is_generic { + // Get a fresh ID. + let mut alloc_map = self.alloc_map.lock(); + let id = alloc_map.reserve(); + alloc_map.alloc_map.insert(id, GlobalAlloc::Function(instance)); + id + } else { + // Deduplicate. + self.reserve_and_set_dedup(GlobalAlloc::Function(instance)) + } + } + + /// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated. + pub fn create_vtable_alloc( + self, + ty: Ty<'tcx>, + poly_trait_ref: Option>, + ) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref)) + } + + /// Interns the `Allocation` and return a new `AllocId`, even if there's already an identical + /// `Allocation` with a different `AllocId`. + /// Statics with identical content will still point to the same `Allocation`, i.e., + /// their data will be deduplicated through `Allocation` interning -- but they + /// are different places in memory and as such need different IDs. + pub fn create_memory_alloc(self, mem: ConstAllocation<'tcx>) -> AllocId { + let id = self.reserve_alloc_id(); + self.set_alloc_id_memory(id, mem); + id + } + + /// Returns `None` in case the `AllocId` is dangling. An `InterpretCx` can still have a + /// local `Allocation` for that `AllocId`, but having such an `AllocId` in a constant is + /// illegal and will likely ICE. + /// This function exists to allow const eval to detect the difference between evaluation- + /// local dangling pointers and allocations in constants/statics. + #[inline] + pub fn try_get_global_alloc(self, id: AllocId) -> Option> { + self.alloc_map.lock().alloc_map.get(&id).cloned() + } + + #[inline] + #[track_caller] + /// Panics in case the `AllocId` is dangling. Since that is impossible for `AllocId`s in + /// constants (as all constants must pass interning and validation that check for dangling + /// ids), this function is frequently used throughout rustc, but should not be used within + /// the miri engine. + pub fn global_alloc(self, id: AllocId) -> GlobalAlloc<'tcx> { + match self.try_get_global_alloc(id) { + Some(alloc) => alloc, + None => bug!("could not find allocation for {id:?}"), + } + } + + /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to + /// call this function twice, even with the same `Allocation` will ICE the compiler. + pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { + if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) { + bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); + } + } + + /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. May be called + /// twice for the same `(AllocId, Allocation)` pair. + fn set_alloc_id_same_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { + self.alloc_map.lock().alloc_map.insert_same(id, GlobalAlloc::Memory(mem)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Methods to access integers in the target endianness +//////////////////////////////////////////////////////////////////////////////// + +#[inline] +pub fn write_target_uint( + endianness: Endian, + mut target: &mut [u8], + data: u128, +) -> Result<(), io::Error> { + // This u128 holds an "any-size uint" (since smaller uints can fits in it) + // So we do not write all bytes of the u128, just the "payload". + match endianness { + Endian::Little => target.write(&data.to_le_bytes())?, + Endian::Big => target.write(&data.to_be_bytes()[16 - target.len()..])?, + }; + debug_assert!(target.len() == 0); // We should have filled the target buffer. + Ok(()) +} + +#[inline] +pub fn read_target_uint(endianness: Endian, mut source: &[u8]) -> Result { + // This u128 holds an "any-size uint" (since smaller uints can fits in it) + let mut buf = [0u8; std::mem::size_of::()]; + // So we do not read exactly 16 bytes into the u128, just the "payload". + let uint = match endianness { + Endian::Little => { + source.read(&mut buf)?; + Ok(u128::from_le_bytes(buf)) + } + Endian::Big => { + source.read(&mut buf[16 - source.len()..])?; + Ok(u128::from_be_bytes(buf)) + } + }; + debug_assert!(source.len() == 0); // We should have consumed the source buffer. + uint +} diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs new file mode 100644 index 000000000..384954cbb --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs @@ -0,0 +1,307 @@ +use super::{AllocId, InterpResult}; + +use rustc_macros::HashStable; +use rustc_target::abi::{HasDataLayout, Size}; + +use std::convert::{TryFrom, TryInto}; +use std::fmt; + +//////////////////////////////////////////////////////////////////////////////// +// Pointer arithmetic +//////////////////////////////////////////////////////////////////////////////// + +pub trait PointerArithmetic: HasDataLayout { + // These are not supposed to be overridden. + + #[inline(always)] + fn pointer_size(&self) -> Size { + self.data_layout().pointer_size + } + + #[inline(always)] + fn max_size_of_val(&self) -> Size { + Size::from_bytes(self.machine_isize_max()) + } + + #[inline] + fn machine_usize_max(&self) -> u64 { + self.pointer_size().unsigned_int_max().try_into().unwrap() + } + + #[inline] + fn machine_isize_min(&self) -> i64 { + self.pointer_size().signed_int_min().try_into().unwrap() + } + + #[inline] + fn machine_isize_max(&self) -> i64 { + self.pointer_size().signed_int_max().try_into().unwrap() + } + + #[inline] + fn machine_usize_to_isize(&self, val: u64) -> i64 { + let val = val as i64; + // Now wrap-around into the machine_isize range. + if val > self.machine_isize_max() { + // This can only happen the the ptr size is < 64, so we know max_usize_plus_1 fits into + // i64. + debug_assert!(self.pointer_size().bits() < 64); + let max_usize_plus_1 = 1u128 << self.pointer_size().bits(); + val - i64::try_from(max_usize_plus_1).unwrap() + } else { + val + } + } + + /// Helper function: truncate given value-"overflowed flag" pair to pointer size and + /// update "overflowed flag" if there was an overflow. + /// This should be called by all the other methods before returning! + #[inline] + fn truncate_to_ptr(&self, (val, over): (u64, bool)) -> (u64, bool) { + let val = u128::from(val); + let max_ptr_plus_1 = 1u128 << self.pointer_size().bits(); + (u64::try_from(val % max_ptr_plus_1).unwrap(), over || val >= max_ptr_plus_1) + } + + #[inline] + fn overflowing_offset(&self, val: u64, i: u64) -> (u64, bool) { + // We do not need to check if i fits in a machine usize. If it doesn't, + // either the wrapping_add will wrap or res will not fit in a pointer. + let res = val.overflowing_add(i); + self.truncate_to_ptr(res) + } + + #[inline] + fn overflowing_signed_offset(&self, val: u64, i: i64) -> (u64, bool) { + // We need to make sure that i fits in a machine isize. + let n = i.unsigned_abs(); + if i >= 0 { + let (val, over) = self.overflowing_offset(val, n); + (val, over || i > self.machine_isize_max()) + } else { + let res = val.overflowing_sub(n); + let (val, over) = self.truncate_to_ptr(res); + (val, over || i < self.machine_isize_min()) + } + } + + #[inline] + fn offset<'tcx>(&self, val: u64, i: u64) -> InterpResult<'tcx, u64> { + let (res, over) = self.overflowing_offset(val, i); + if over { throw_ub!(PointerArithOverflow) } else { Ok(res) } + } + + #[inline] + fn signed_offset<'tcx>(&self, val: u64, i: i64) -> InterpResult<'tcx, u64> { + let (res, over) = self.overflowing_signed_offset(val, i); + if over { throw_ub!(PointerArithOverflow) } else { Ok(res) } + } +} + +impl PointerArithmetic for T {} + +/// This trait abstracts over the kind of provenance that is associated with a `Pointer`. It is +/// mostly opaque; the `Machine` trait extends it with some more operations that also have access to +/// some global state. +/// We don't actually care about this `Debug` bound (we use `Provenance::fmt` to format the entire +/// pointer), but `derive` adds some unnecessary bounds. +pub trait Provenance: Copy + fmt::Debug { + /// Says whether the `offset` field of `Pointer`s with this provenance is the actual physical address. + /// If `true, ptr-to-int casts work by simply discarding the provenance. + /// If `false`, ptr-to-int casts are not supported. The offset *must* be relative in that case. + const OFFSET_IS_ADDR: bool; + + /// We also use this trait to control whether to abort execution when a pointer is being partially overwritten + /// (this avoids a separate trait in `allocation.rs` just for this purpose). + const ERR_ON_PARTIAL_PTR_OVERWRITE: bool; + + /// Determines how a pointer should be printed. + fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result + where + Self: Sized; + + /// If `OFFSET_IS_ADDR == false`, provenance must always be able to + /// identify the allocation this ptr points to (i.e., this must return `Some`). + /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`). + /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.) + fn get_alloc_id(self) -> Option; +} + +impl Provenance for AllocId { + // With the `AllocId` as provenance, the `offset` is interpreted *relative to the allocation*, + // so ptr-to-int casts are not possible (since we do not know the global physical offset). + const OFFSET_IS_ADDR: bool = false; + + // For now, do not allow this, so that we keep our options open. + const ERR_ON_PARTIAL_PTR_OVERWRITE: bool = true; + + fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Forward `alternate` flag to `alloc_id` printing. + if f.alternate() { + write!(f, "{:#?}", ptr.provenance)?; + } else { + write!(f, "{:?}", ptr.provenance)?; + } + // Print offset only if it is non-zero. + if ptr.offset.bytes() > 0 { + write!(f, "+{:#x}", ptr.offset.bytes())?; + } + Ok(()) + } + + fn get_alloc_id(self) -> Option { + Some(self) + } +} + +/// Represents a pointer in the Miri engine. +/// +/// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub struct Pointer { + pub(super) offset: Size, // kept private to avoid accidental misinterpretation (meaning depends on `Prov` type) + pub provenance: Prov, +} + +static_assert_size!(Pointer, 16); +// `Option` pointers are also passed around quite a bit +// (but not stored in permanent machine state). +static_assert_size!(Pointer>, 16); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for Pointer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Provenance::fmt(self, f) + } +} + +impl fmt::Debug for Pointer> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.provenance { + Some(prov) => Provenance::fmt(&Pointer::new(prov, self.offset), f), + None => write!(f, "{:#x}[noalloc]", self.offset.bytes()), + } + } +} + +impl fmt::Display for Pointer> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.provenance.is_none() && self.offset.bytes() == 0 { + write!(f, "null pointer") + } else { + fmt::Debug::fmt(self, f) + } + } +} + +/// Produces a `Pointer` that points to the beginning of the `Allocation`. +impl From for Pointer { + #[inline(always)] + fn from(alloc_id: AllocId) -> Self { + Pointer::new(alloc_id, Size::ZERO) + } +} + +impl From> for Pointer> { + #[inline(always)] + fn from(ptr: Pointer) -> Self { + let (prov, offset) = ptr.into_parts(); + Pointer::new(Some(prov), offset) + } +} + +impl Pointer> { + /// Convert this pointer that *might* have a provenance into a pointer that *definitely* has a + /// provenance, or an absolute address. + /// + /// This is rarely what you want; call `ptr_try_get_alloc_id` instead. + pub fn into_pointer_or_addr(self) -> Result, Size> { + match self.provenance { + Some(prov) => Ok(Pointer::new(prov, self.offset)), + None => Err(self.offset), + } + } + + /// Returns the absolute address the pointer points to. + /// Only works if Prov::OFFSET_IS_ADDR is true! + pub fn addr(self) -> Size + where + Prov: Provenance, + { + assert!(Prov::OFFSET_IS_ADDR); + self.offset + } +} + +impl Pointer> { + #[inline(always)] + pub fn from_addr(addr: u64) -> Self { + Pointer { provenance: None, offset: Size::from_bytes(addr) } + } + + #[inline(always)] + pub fn null() -> Self { + Pointer::from_addr(0) + } +} + +impl<'tcx, Prov> Pointer { + #[inline(always)] + pub fn new(provenance: Prov, offset: Size) -> Self { + Pointer { provenance, offset } + } + + /// Obtain the constituents of this pointer. Not that the meaning of the offset depends on the type `Prov`! + /// This function must only be used in the implementation of `Machine::ptr_get_alloc`, + /// and when a `Pointer` is taken apart to be stored efficiently in an `Allocation`. + #[inline(always)] + pub fn into_parts(self) -> (Prov, Size) { + (self.provenance, self.offset) + } + + pub fn map_provenance(self, f: impl FnOnce(Prov) -> Prov) -> Self { + Pointer { provenance: f(self.provenance), ..self } + } + + #[inline] + pub fn offset(self, i: Size, cx: &impl HasDataLayout) -> InterpResult<'tcx, Self> { + Ok(Pointer { + offset: Size::from_bytes(cx.data_layout().offset(self.offset.bytes(), i.bytes())?), + ..self + }) + } + + #[inline] + pub fn overflowing_offset(self, i: Size, cx: &impl HasDataLayout) -> (Self, bool) { + let (res, over) = cx.data_layout().overflowing_offset(self.offset.bytes(), i.bytes()); + let ptr = Pointer { offset: Size::from_bytes(res), ..self }; + (ptr, over) + } + + #[inline(always)] + pub fn wrapping_offset(self, i: Size, cx: &impl HasDataLayout) -> Self { + self.overflowing_offset(i, cx).0 + } + + #[inline] + pub fn signed_offset(self, i: i64, cx: &impl HasDataLayout) -> InterpResult<'tcx, Self> { + Ok(Pointer { + offset: Size::from_bytes(cx.data_layout().signed_offset(self.offset.bytes(), i)?), + ..self + }) + } + + #[inline] + pub fn overflowing_signed_offset(self, i: i64, cx: &impl HasDataLayout) -> (Self, bool) { + let (res, over) = cx.data_layout().overflowing_signed_offset(self.offset.bytes(), i); + let ptr = Pointer { offset: Size::from_bytes(res), ..self }; + (ptr, over) + } + + #[inline(always)] + pub fn wrapping_signed_offset(self, i: i64, cx: &impl HasDataLayout) -> Self { + self.overflowing_signed_offset(i, cx).0 + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/queries.rs b/compiler/rustc_middle/src/mir/interpret/queries.rs new file mode 100644 index 000000000..786927e2d --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/queries.rs @@ -0,0 +1,217 @@ +use super::{ErrorHandled, EvalToConstValueResult, EvalToValTreeResult, GlobalId}; + +use crate::mir; +use crate::ty::subst::InternalSubsts; +use crate::ty::visit::TypeVisitable; +use crate::ty::{self, query::TyCtxtAt, query::TyCtxtEnsure, TyCtxt}; +use rustc_hir::def_id::DefId; +use rustc_span::{Span, DUMMY_SP}; + +impl<'tcx> TyCtxt<'tcx> { + /// Evaluates a constant without providing any substitutions. This is useful to evaluate consts + /// that can't take any generic arguments like statics, const items or enum discriminants. If a + /// generic parameter is used within the constant `ErrorHandled::ToGeneric` will be returned. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_poly(self, def_id: DefId) -> EvalToConstValueResult<'tcx> { + // In some situations def_id will have substitutions within scope, but they aren't allowed + // to be used. So we can't use `Instance::mono`, instead we feed unresolved substitutions + // into `const_eval` which will return `ErrorHandled::ToGeneric` if any of them are + // encountered. + let substs = InternalSubsts::identity_for_item(self, def_id); + let instance = ty::Instance::new(def_id, substs); + let cid = GlobalId { instance, promoted: None }; + let param_env = self.param_env(def_id).with_reveal_all_normalized(self); + self.const_eval_global_id(param_env, cid, None) + } + /// Resolves and evaluates a constant. + /// + /// The constant can be located on a trait like `::C`, in which case the given + /// substitutions and environment are used to resolve the constant. Alternatively if the + /// constant has generic parameters in scope the substitutions are used to evaluate the value of + /// the constant. For example in `fn foo() { let _ = [0; bar::()]; }` the repeat count + /// constant `bar::()` requires a substitution for `T`, if the substitution for `T` is still + /// too generic for the constant to be evaluated then `Err(ErrorHandled::TooGeneric)` is + /// returned. + #[instrument(level = "debug", skip(self))] + pub fn const_eval_resolve( + self, + param_env: ty::ParamEnv<'tcx>, + ct: ty::Unevaluated<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + // Cannot resolve `Unevaluated` constants that contain inference + // variables. We reject those here since `resolve_opt_const_arg` + // would fail otherwise. + // + // When trying to evaluate constants containing inference variables, + // use `Infcx::const_eval_resolve` instead. + if ct.substs.has_infer_types_or_consts() { + bug!("did not expect inference variables here"); + } + + match ty::Instance::resolve_opt_const_arg(self, param_env, ct.def, ct.substs) { + Ok(Some(instance)) => { + let cid = GlobalId { instance, promoted: ct.promoted }; + self.const_eval_global_id(param_env, cid, span) + } + Ok(None) => Err(ErrorHandled::TooGeneric), + Err(error_reported) => Err(ErrorHandled::Reported(error_reported)), + } + } + + #[instrument(level = "debug", skip(self))] + pub fn const_eval_resolve_for_typeck( + self, + param_env: ty::ParamEnv<'tcx>, + ct: ty::Unevaluated<'tcx>, + span: Option, + ) -> EvalToValTreeResult<'tcx> { + // Cannot resolve `Unevaluated` constants that contain inference + // variables. We reject those here since `resolve_opt_const_arg` + // would fail otherwise. + // + // When trying to evaluate constants containing inference variables, + // use `Infcx::const_eval_resolve` instead. + if ct.substs.has_infer_types_or_consts() { + bug!("did not expect inference variables here"); + } + + match ty::Instance::resolve_opt_const_arg(self, param_env, ct.def, ct.substs) { + Ok(Some(instance)) => { + let cid = GlobalId { instance, promoted: ct.promoted }; + self.const_eval_global_id_for_typeck(param_env, cid, span) + } + Ok(None) => Err(ErrorHandled::TooGeneric), + Err(error_reported) => Err(ErrorHandled::Reported(error_reported)), + } + } + + pub fn const_eval_instance( + self, + param_env: ty::ParamEnv<'tcx>, + instance: ty::Instance<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + self.const_eval_global_id(param_env, GlobalId { instance, promoted: None }, span) + } + + /// Evaluate a constant to a `ConstValue`. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_global_id( + self, + param_env: ty::ParamEnv<'tcx>, + cid: GlobalId<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + let param_env = param_env.with_const(); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.erase_regions(param_env.and(cid)); + if let Some(span) = span { + self.at(span).eval_to_const_value_raw(inputs) + } else { + self.eval_to_const_value_raw(inputs) + } + } + + /// Evaluate a constant to a type-level constant. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_global_id_for_typeck( + self, + param_env: ty::ParamEnv<'tcx>, + cid: GlobalId<'tcx>, + span: Option, + ) -> EvalToValTreeResult<'tcx> { + let param_env = param_env.with_const(); + debug!(?param_env); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.erase_regions(param_env.and(cid)); + debug!(?inputs); + if let Some(span) = span { + self.at(span).eval_to_valtree(inputs) + } else { + self.eval_to_valtree(inputs) + } + } + + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + #[inline(always)] + pub fn eval_static_initializer( + self, + def_id: DefId, + ) -> Result, ErrorHandled> { + self.at(DUMMY_SP).eval_static_initializer(def_id) + } +} + +impl<'tcx> TyCtxtAt<'tcx> { + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + pub fn eval_static_initializer( + self, + def_id: DefId, + ) -> Result, ErrorHandled> { + trace!("eval_static_initializer: Need to compute {:?}", def_id); + assert!(self.is_static(def_id)); + let instance = ty::Instance::mono(*self, def_id); + let gid = GlobalId { instance, promoted: None }; + self.eval_to_allocation(gid, ty::ParamEnv::reveal_all()) + } + + /// Evaluate anything constant-like, returning the allocation of the final memory. + fn eval_to_allocation( + self, + gid: GlobalId<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ) -> Result, ErrorHandled> { + let param_env = param_env.with_const(); + trace!("eval_to_allocation: Need to compute {:?}", gid); + let raw_const = self.eval_to_allocation_raw(param_env.and(gid))?; + Ok(self.global_alloc(raw_const.alloc_id).unwrap_memory()) + } +} + +impl<'tcx> TyCtxtEnsure<'tcx> { + /// Evaluates a constant without providing any substitutions. This is useful to evaluate consts + /// that can't take any generic arguments like statics, const items or enum discriminants. If a + /// generic parameter is used within the constant `ErrorHandled::ToGeneric` will be returned. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_poly(self, def_id: DefId) { + // In some situations def_id will have substitutions within scope, but they aren't allowed + // to be used. So we can't use `Instance::mono`, instead we feed unresolved substitutions + // into `const_eval` which will return `ErrorHandled::ToGeneric` if any of them are + // encountered. + let substs = InternalSubsts::identity_for_item(self.tcx, def_id); + let instance = ty::Instance::new(def_id, substs); + let cid = GlobalId { instance, promoted: None }; + let param_env = + self.tcx.param_env(def_id).with_reveal_all_normalized(self.tcx).with_const(); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.tcx.erase_regions(param_env.and(cid)); + self.eval_to_const_value_raw(inputs) + } + + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + pub fn eval_static_initializer(self, def_id: DefId) { + trace!("eval_static_initializer: Need to compute {:?}", def_id); + assert!(self.tcx.is_static(def_id)); + let instance = ty::Instance::mono(self.tcx, def_id); + let gid = GlobalId { instance, promoted: None }; + let param_env = ty::ParamEnv::reveal_all().with_const(); + trace!("eval_to_allocation: Need to compute {:?}", gid); + self.eval_to_allocation_raw(param_env.and(gid)) + } +} + +impl<'tcx> TyCtxt<'tcx> { + /// Destructure a mir constant ADT or array into its variant index and its field values. + /// Panics if the destructuring fails, use `try_destructure_mir_constant` for fallible version. + pub fn destructure_mir_constant( + self, + param_env: ty::ParamEnv<'tcx>, + constant: mir::ConstantKind<'tcx>, + ) -> mir::DestructuredMirConstant<'tcx> { + self.try_destructure_mir_constant(param_env.and(constant)).unwrap() + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs new file mode 100644 index 000000000..834c114ee --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/value.rs @@ -0,0 +1,651 @@ +use std::convert::{TryFrom, TryInto}; +use std::fmt; + +use rustc_apfloat::{ + ieee::{Double, Single}, + Float, +}; +use rustc_macros::HashStable; +use rustc_target::abi::{HasDataLayout, Size}; + +use crate::ty::{Lift, ParamEnv, ScalarInt, Ty, TyCtxt}; + +use super::{ + AllocId, AllocRange, ConstAllocation, InterpResult, Pointer, PointerArithmetic, Provenance, + ScalarSizeMismatch, +}; + +/// Represents the result of const evaluation via the `eval_to_allocation` query. +#[derive(Copy, Clone, HashStable, TyEncodable, TyDecodable, Debug, Hash, Eq, PartialEq)] +pub struct ConstAlloc<'tcx> { + // the value lives here, at offset 0, and that allocation definitely is an `AllocKind::Memory` + // (so you can use `AllocMap::unwrap_memory`). + pub alloc_id: AllocId, + pub ty: Ty<'tcx>, +} + +/// Represents a constant value in Rust. `Scalar` and `Slice` are optimizations for +/// array length computations, enum discriminants and the pattern matching logic. +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub enum ConstValue<'tcx> { + /// Used only for types with `layout::abi::Scalar` ABI. + /// + /// Not using the enum `Value` to encode that this must not be `Uninit`. + Scalar(Scalar), + + /// Only used for ZSTs. + ZeroSized, + + /// Used only for `&[u8]` and `&str` + Slice { data: ConstAllocation<'tcx>, start: usize, end: usize }, + + /// A value not represented/representable by `Scalar` or `Slice` + ByRef { + /// The backing memory of the value, may contain more memory than needed for just the value + /// in order to share `ConstAllocation`s between values + alloc: ConstAllocation<'tcx>, + /// Offset into `alloc` + offset: Size, + }, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(ConstValue<'_>, 32); + +impl<'a, 'tcx> Lift<'tcx> for ConstValue<'a> { + type Lifted = ConstValue<'tcx>; + fn lift_to_tcx(self, tcx: TyCtxt<'tcx>) -> Option> { + Some(match self { + ConstValue::Scalar(s) => ConstValue::Scalar(s), + ConstValue::ZeroSized => ConstValue::ZeroSized, + ConstValue::Slice { data, start, end } => { + ConstValue::Slice { data: tcx.lift(data)?, start, end } + } + ConstValue::ByRef { alloc, offset } => { + ConstValue::ByRef { alloc: tcx.lift(alloc)?, offset } + } + }) + } +} + +impl<'tcx> ConstValue<'tcx> { + #[inline] + pub fn try_to_scalar(&self) -> Option> { + match *self { + ConstValue::ByRef { .. } | ConstValue::Slice { .. } | ConstValue::ZeroSized => None, + ConstValue::Scalar(val) => Some(val), + } + } + + pub fn try_to_scalar_int(&self) -> Option { + Some(self.try_to_scalar()?.assert_int()) + } + + pub fn try_to_bits(&self, size: Size) -> Option { + self.try_to_scalar_int()?.to_bits(size).ok() + } + + pub fn try_to_bool(&self) -> Option { + self.try_to_scalar_int()?.try_into().ok() + } + + pub fn try_to_machine_usize(&self, tcx: TyCtxt<'tcx>) -> Option { + self.try_to_scalar_int()?.try_to_machine_usize(tcx).ok() + } + + pub fn try_to_bits_for_ty( + &self, + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + ty: Ty<'tcx>, + ) -> Option { + let size = tcx.layout_of(param_env.with_reveal_all_normalized(tcx).and(ty)).ok()?.size; + self.try_to_bits(size) + } + + pub fn from_bool(b: bool) -> Self { + ConstValue::Scalar(Scalar::from_bool(b)) + } + + pub fn from_u64(i: u64) -> Self { + ConstValue::Scalar(Scalar::from_u64(i)) + } + + pub fn from_machine_usize(i: u64, cx: &impl HasDataLayout) -> Self { + ConstValue::Scalar(Scalar::from_machine_usize(i, cx)) + } +} + +/// A `Scalar` represents an immediate, primitive value existing outside of a +/// `memory::Allocation`. It is in many ways like a small chunk of an `Allocation`, up to 16 bytes in +/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes +/// of a simple value or a pointer into another `Allocation` +/// +/// These variants would be private if there was a convenient way to achieve that in Rust. +/// Do *not* match on a `Scalar`! Use the various `to_*` methods instead. +#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub enum Scalar { + /// The raw bytes of a simple value. + Int(ScalarInt), + + /// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of + /// relocations, but a `Scalar` is only large enough to contain one, so we just represent the + /// relocation and its associated offset together as a `Pointer` here. + /// + /// We also store the size of the pointer, such that a `Scalar` always knows how big it is. + /// The size is always the pointer size of the current target, but this is not information + /// that we always have readily available. + Ptr(Pointer, u8), +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(Scalar, 24); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "{:?}", ptr), + Scalar::Int(int) => write!(f, "{:?}", int), + } + } +} + +impl fmt::Display for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "pointer to {:?}", ptr), + Scalar::Int(int) => write!(f, "{}", int), + } + } +} + +impl fmt::LowerHex for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "pointer to {:?}", ptr), + Scalar::Int(int) => write!(f, "{:#x}", int), + } + } +} + +impl From for Scalar { + #[inline(always)] + fn from(f: Single) -> Self { + Scalar::from_f32(f) + } +} + +impl From for Scalar { + #[inline(always)] + fn from(f: Double) -> Self { + Scalar::from_f64(f) + } +} + +impl From for Scalar { + #[inline(always)] + fn from(ptr: ScalarInt) -> Self { + Scalar::Int(ptr) + } +} + +impl Scalar { + #[inline(always)] + pub fn from_pointer(ptr: Pointer, cx: &impl HasDataLayout) -> Self { + Scalar::Ptr(ptr, u8::try_from(cx.pointer_size().bytes()).unwrap()) + } + + /// Create a Scalar from a pointer with an `Option<_>` provenance (where `None` represents a + /// plain integer / "invalid" pointer). + pub fn from_maybe_pointer(ptr: Pointer>, cx: &impl HasDataLayout) -> Self { + match ptr.into_parts() { + (Some(prov), offset) => Scalar::from_pointer(Pointer::new(prov, offset), cx), + (None, offset) => { + Scalar::Int(ScalarInt::try_from_uint(offset.bytes(), cx.pointer_size()).unwrap()) + } + } + } + + #[inline] + pub fn null_ptr(cx: &impl HasDataLayout) -> Self { + Scalar::Int(ScalarInt::null(cx.pointer_size())) + } + + #[inline] + pub fn from_bool(b: bool) -> Self { + Scalar::Int(b.into()) + } + + #[inline] + pub fn from_char(c: char) -> Self { + Scalar::Int(c.into()) + } + + #[inline] + pub fn try_from_uint(i: impl Into, size: Size) -> Option { + ScalarInt::try_from_uint(i, size).map(Scalar::Int) + } + + #[inline] + pub fn from_uint(i: impl Into, size: Size) -> Self { + let i = i.into(); + Self::try_from_uint(i, size) + .unwrap_or_else(|| bug!("Unsigned value {:#x} does not fit in {} bits", i, size.bits())) + } + + #[inline] + pub fn from_u8(i: u8) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u16(i: u16) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u32(i: u32) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u64(i: u64) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_machine_usize(i: u64, cx: &impl HasDataLayout) -> Self { + Self::from_uint(i, cx.data_layout().pointer_size) + } + + #[inline] + pub fn try_from_int(i: impl Into, size: Size) -> Option { + ScalarInt::try_from_int(i, size).map(Scalar::Int) + } + + #[inline] + pub fn from_int(i: impl Into, size: Size) -> Self { + let i = i.into(); + Self::try_from_int(i, size) + .unwrap_or_else(|| bug!("Signed value {:#x} does not fit in {} bits", i, size.bits())) + } + + #[inline] + pub fn from_i32(i: i32) -> Self { + Self::from_int(i, Size::from_bits(32)) + } + + #[inline] + pub fn from_i64(i: i64) -> Self { + Self::from_int(i, Size::from_bits(64)) + } + + #[inline] + pub fn from_machine_isize(i: i64, cx: &impl HasDataLayout) -> Self { + Self::from_int(i, cx.data_layout().pointer_size) + } + + #[inline] + pub fn from_f32(f: Single) -> Self { + Scalar::Int(f.into()) + } + + #[inline] + pub fn from_f64(f: Double) -> Self { + Scalar::Int(f.into()) + } + + /// This is almost certainly not the method you want! You should dispatch on the type + /// and use `to_{u8,u16,...}`/`scalar_to_ptr` to perform ptr-to-int / int-to-ptr casts as needed. + /// + /// This method only exists for the benefit of low-level operations that truly need to treat the + /// scalar in whatever form it is. + /// + /// This throws UB (instead of ICEing) on a size mismatch since size mismatches can arise in + /// Miri when someone declares a function that we shim (such as `malloc`) with a wrong type. + #[inline] + pub fn to_bits_or_ptr_internal( + self, + target_size: Size, + ) -> Result>, ScalarSizeMismatch> { + assert_ne!(target_size.bytes(), 0, "you should never look at the bits of a ZST"); + Ok(match self { + Scalar::Int(int) => Ok(int.to_bits(target_size).map_err(|size| { + ScalarSizeMismatch { target_size: target_size.bytes(), data_size: size.bytes() } + })?), + Scalar::Ptr(ptr, sz) => { + if target_size.bytes() != u64::from(sz) { + return Err(ScalarSizeMismatch { + target_size: target_size.bytes(), + data_size: sz.into(), + }); + } + Err(ptr) + } + }) + } +} + +impl<'tcx, Prov: Provenance> Scalar { + pub fn to_pointer(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, Pointer>> { + match self + .to_bits_or_ptr_internal(cx.pointer_size()) + .map_err(|s| err_ub!(ScalarSizeMismatch(s)))? + { + Err(ptr) => Ok(ptr.into()), + Ok(bits) => { + let addr = u64::try_from(bits).unwrap(); + Ok(Pointer::from_addr(addr)) + } + } + } + + /// Fundamental scalar-to-int (cast) operation. Many convenience wrappers exist below, that you + /// likely want to use instead. + /// + /// Will perform ptr-to-int casts if needed and possible. + /// If that fails, we know the offset is relative, so we return an "erased" Scalar + /// (which is useful for error messages but not much else). + #[inline] + pub fn try_to_int(self) -> Result> { + match self { + Scalar::Int(int) => Ok(int), + Scalar::Ptr(ptr, sz) => { + if Prov::OFFSET_IS_ADDR { + Ok(ScalarInt::try_from_uint(ptr.offset.bytes(), Size::from_bytes(sz)).unwrap()) + } else { + // We know `offset` is relative, since `OFFSET_IS_ADDR == false`. + let (prov, offset) = ptr.into_parts(); + // Because `OFFSET_IS_ADDR == false`, this unwrap can never fail. + Err(Scalar::Ptr(Pointer::new(prov.get_alloc_id().unwrap(), offset), sz)) + } + } + } + } + + #[inline(always)] + pub fn assert_int(self) -> ScalarInt { + self.try_to_int().unwrap() + } + + /// This throws UB (instead of ICEing) on a size mismatch since size mismatches can arise in + /// Miri when someone declares a function that we shim (such as `malloc`) with a wrong type. + #[inline] + pub fn to_bits(self, target_size: Size) -> InterpResult<'tcx, u128> { + assert_ne!(target_size.bytes(), 0, "you should never look at the bits of a ZST"); + self.try_to_int().map_err(|_| err_unsup!(ReadPointerAsBytes))?.to_bits(target_size).map_err( + |size| { + err_ub!(ScalarSizeMismatch(ScalarSizeMismatch { + target_size: target_size.bytes(), + data_size: size.bytes(), + })) + .into() + }, + ) + } + + #[inline(always)] + pub fn assert_bits(self, target_size: Size) -> u128 { + self.to_bits(target_size).unwrap() + } + + pub fn to_bool(self) -> InterpResult<'tcx, bool> { + let val = self.to_u8()?; + match val { + 0 => Ok(false), + 1 => Ok(true), + _ => throw_ub!(InvalidBool(val)), + } + } + + pub fn to_char(self) -> InterpResult<'tcx, char> { + let val = self.to_u32()?; + match std::char::from_u32(val) { + Some(c) => Ok(c), + None => throw_ub!(InvalidChar(val)), + } + } + + /// Converts the scalar to produce an unsigned integer of the given size. + /// Fails if the scalar is a pointer. + #[inline] + pub fn to_uint(self, size: Size) -> InterpResult<'tcx, u128> { + self.to_bits(size) + } + + /// Converts the scalar to produce a `u8`. Fails if the scalar is a pointer. + pub fn to_u8(self) -> InterpResult<'tcx, u8> { + self.to_uint(Size::from_bits(8)).map(|v| u8::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u16`. Fails if the scalar is a pointer. + pub fn to_u16(self) -> InterpResult<'tcx, u16> { + self.to_uint(Size::from_bits(16)).map(|v| u16::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u32`. Fails if the scalar is a pointer. + pub fn to_u32(self) -> InterpResult<'tcx, u32> { + self.to_uint(Size::from_bits(32)).map(|v| u32::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u64`. Fails if the scalar is a pointer. + pub fn to_u64(self) -> InterpResult<'tcx, u64> { + self.to_uint(Size::from_bits(64)).map(|v| u64::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u128`. Fails if the scalar is a pointer. + pub fn to_u128(self) -> InterpResult<'tcx, u128> { + self.to_uint(Size::from_bits(128)) + } + + /// Converts the scalar to produce a machine-pointer-sized unsigned integer. + /// Fails if the scalar is a pointer. + pub fn to_machine_usize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + let b = self.to_uint(cx.data_layout().pointer_size)?; + Ok(u64::try_from(b).unwrap()) + } + + /// Converts the scalar to produce a signed integer of the given size. + /// Fails if the scalar is a pointer. + #[inline] + pub fn to_int(self, size: Size) -> InterpResult<'tcx, i128> { + let b = self.to_bits(size)?; + Ok(size.sign_extend(b) as i128) + } + + /// Converts the scalar to produce an `i8`. Fails if the scalar is a pointer. + pub fn to_i8(self) -> InterpResult<'tcx, i8> { + self.to_int(Size::from_bits(8)).map(|v| i8::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i16`. Fails if the scalar is a pointer. + pub fn to_i16(self) -> InterpResult<'tcx, i16> { + self.to_int(Size::from_bits(16)).map(|v| i16::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i32`. Fails if the scalar is a pointer. + pub fn to_i32(self) -> InterpResult<'tcx, i32> { + self.to_int(Size::from_bits(32)).map(|v| i32::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i64`. Fails if the scalar is a pointer. + pub fn to_i64(self) -> InterpResult<'tcx, i64> { + self.to_int(Size::from_bits(64)).map(|v| i64::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i128`. Fails if the scalar is a pointer. + pub fn to_i128(self) -> InterpResult<'tcx, i128> { + self.to_int(Size::from_bits(128)) + } + + /// Converts the scalar to produce a machine-pointer-sized signed integer. + /// Fails if the scalar is a pointer. + pub fn to_machine_isize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, i64> { + let b = self.to_int(cx.data_layout().pointer_size)?; + Ok(i64::try_from(b).unwrap()) + } + + #[inline] + pub fn to_f32(self) -> InterpResult<'tcx, Single> { + // Going through `u32` to check size and truncation. + Ok(Single::from_bits(self.to_u32()?.into())) + } + + #[inline] + pub fn to_f64(self) -> InterpResult<'tcx, Double> { + // Going through `u64` to check size and truncation. + Ok(Double::from_bits(self.to_u64()?.into())) + } +} + +#[derive(Clone, Copy, Eq, PartialEq, TyEncodable, TyDecodable, HashStable, Hash)] +pub enum ScalarMaybeUninit { + Scalar(Scalar), + Uninit, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(ScalarMaybeUninit, 24); + +impl From> for ScalarMaybeUninit { + #[inline(always)] + fn from(s: Scalar) -> Self { + ScalarMaybeUninit::Scalar(s) + } +} + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for ScalarMaybeUninit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ScalarMaybeUninit::Uninit => write!(f, ""), + ScalarMaybeUninit::Scalar(s) => write!(f, "{:?}", s), + } + } +} + +impl fmt::LowerHex for ScalarMaybeUninit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ScalarMaybeUninit::Uninit => write!(f, "uninitialized bytes"), + ScalarMaybeUninit::Scalar(s) => write!(f, "{:x}", s), + } + } +} + +impl ScalarMaybeUninit { + #[inline] + pub fn from_pointer(ptr: Pointer, cx: &impl HasDataLayout) -> Self { + ScalarMaybeUninit::Scalar(Scalar::from_pointer(ptr, cx)) + } + + #[inline] + pub fn from_maybe_pointer(ptr: Pointer>, cx: &impl HasDataLayout) -> Self { + ScalarMaybeUninit::Scalar(Scalar::from_maybe_pointer(ptr, cx)) + } + + #[inline] + pub fn check_init<'tcx>(self) -> InterpResult<'tcx, Scalar> { + match self { + ScalarMaybeUninit::Scalar(scalar) => Ok(scalar), + ScalarMaybeUninit::Uninit => throw_ub!(InvalidUninitBytes(None)), + } + } +} + +impl<'tcx, Prov: Provenance> ScalarMaybeUninit { + #[inline(always)] + pub fn to_pointer(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, Pointer>> { + self.check_init()?.to_pointer(cx) + } + + #[inline(always)] + pub fn to_bool(self) -> InterpResult<'tcx, bool> { + self.check_init()?.to_bool() + } + + #[inline(always)] + pub fn to_char(self) -> InterpResult<'tcx, char> { + self.check_init()?.to_char() + } + + #[inline(always)] + pub fn to_f32(self) -> InterpResult<'tcx, Single> { + self.check_init()?.to_f32() + } + + #[inline(always)] + pub fn to_f64(self) -> InterpResult<'tcx, Double> { + self.check_init()?.to_f64() + } + + #[inline(always)] + pub fn to_u8(self) -> InterpResult<'tcx, u8> { + self.check_init()?.to_u8() + } + + #[inline(always)] + pub fn to_u16(self) -> InterpResult<'tcx, u16> { + self.check_init()?.to_u16() + } + + #[inline(always)] + pub fn to_u32(self) -> InterpResult<'tcx, u32> { + self.check_init()?.to_u32() + } + + #[inline(always)] + pub fn to_u64(self) -> InterpResult<'tcx, u64> { + self.check_init()?.to_u64() + } + + #[inline(always)] + pub fn to_machine_usize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + self.check_init()?.to_machine_usize(cx) + } + + #[inline(always)] + pub fn to_i8(self) -> InterpResult<'tcx, i8> { + self.check_init()?.to_i8() + } + + #[inline(always)] + pub fn to_i16(self) -> InterpResult<'tcx, i16> { + self.check_init()?.to_i16() + } + + #[inline(always)] + pub fn to_i32(self) -> InterpResult<'tcx, i32> { + self.check_init()?.to_i32() + } + + #[inline(always)] + pub fn to_i64(self) -> InterpResult<'tcx, i64> { + self.check_init()?.to_i64() + } + + #[inline(always)] + pub fn to_machine_isize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, i64> { + self.check_init()?.to_machine_isize(cx) + } +} + +/// Gets the bytes of a constant slice value. +pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) -> &'tcx [u8] { + if let ConstValue::Slice { data, start, end } = val { + let len = end - start; + data.inner() + .get_bytes( + cx, + AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) }, + ) + .unwrap_or_else(|err| bug!("const slice is invalid: {:?}", err)) + } else { + bug!("expected const slice, but found another const value"); + } +} -- cgit v1.2.3