From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_const_eval/src/interpret/cast.rs | 365 ++++++ .../rustc_const_eval/src/interpret/eval_context.rs | 1019 ++++++++++++++++ compiler/rustc_const_eval/src/interpret/intern.rs | 486 ++++++++ .../rustc_const_eval/src/interpret/intrinsics.rs | 696 +++++++++++ .../src/interpret/intrinsics/caller_location.rs | 130 +++ .../src/interpret/intrinsics/type_name.rs | 196 ++++ compiler/rustc_const_eval/src/interpret/machine.rs | 525 +++++++++ compiler/rustc_const_eval/src/interpret/memory.rs | 1224 ++++++++++++++++++++ compiler/rustc_const_eval/src/interpret/mod.rs | 34 + compiler/rustc_const_eval/src/interpret/operand.rs | 831 +++++++++++++ .../rustc_const_eval/src/interpret/operator.rs | 463 ++++++++ compiler/rustc_const_eval/src/interpret/place.rs | 900 ++++++++++++++ .../rustc_const_eval/src/interpret/projection.rs | 391 +++++++ compiler/rustc_const_eval/src/interpret/step.rs | 320 +++++ .../rustc_const_eval/src/interpret/terminator.rs | 671 +++++++++++ compiler/rustc_const_eval/src/interpret/traits.rs | 59 + compiler/rustc_const_eval/src/interpret/util.rs | 73 ++ .../rustc_const_eval/src/interpret/validity.rs | 986 ++++++++++++++++ compiler/rustc_const_eval/src/interpret/visitor.rs | 534 +++++++++ 19 files changed, 9903 insertions(+) create mode 100644 compiler/rustc_const_eval/src/interpret/cast.rs create mode 100644 compiler/rustc_const_eval/src/interpret/eval_context.rs create mode 100644 compiler/rustc_const_eval/src/interpret/intern.rs create mode 100644 compiler/rustc_const_eval/src/interpret/intrinsics.rs create mode 100644 compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs create mode 100644 compiler/rustc_const_eval/src/interpret/intrinsics/type_name.rs create mode 100644 compiler/rustc_const_eval/src/interpret/machine.rs create mode 100644 compiler/rustc_const_eval/src/interpret/memory.rs create mode 100644 compiler/rustc_const_eval/src/interpret/mod.rs create mode 100644 compiler/rustc_const_eval/src/interpret/operand.rs create mode 100644 compiler/rustc_const_eval/src/interpret/operator.rs create mode 100644 compiler/rustc_const_eval/src/interpret/place.rs create mode 100644 compiler/rustc_const_eval/src/interpret/projection.rs create mode 100644 compiler/rustc_const_eval/src/interpret/step.rs create mode 100644 compiler/rustc_const_eval/src/interpret/terminator.rs create mode 100644 compiler/rustc_const_eval/src/interpret/traits.rs create mode 100644 compiler/rustc_const_eval/src/interpret/util.rs create mode 100644 compiler/rustc_const_eval/src/interpret/validity.rs create mode 100644 compiler/rustc_const_eval/src/interpret/visitor.rs (limited to 'compiler/rustc_const_eval/src/interpret') diff --git a/compiler/rustc_const_eval/src/interpret/cast.rs b/compiler/rustc_const_eval/src/interpret/cast.rs new file mode 100644 index 000000000..c97c31eb9 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/cast.rs @@ -0,0 +1,365 @@ +use std::assert_matches::assert_matches; +use std::convert::TryFrom; + +use rustc_apfloat::ieee::{Double, Single}; +use rustc_apfloat::{Float, FloatConvert}; +use rustc_middle::mir::interpret::{InterpResult, PointerArithmetic, Scalar}; +use rustc_middle::mir::CastKind; +use rustc_middle::ty::adjustment::PointerCast; +use rustc_middle::ty::layout::{IntegerExt, LayoutOf, TyAndLayout}; +use rustc_middle::ty::{self, FloatTy, Ty, TypeAndMut}; +use rustc_target::abi::Integer; +use rustc_type_ir::sty::TyKind::*; + +use super::{ + util::ensure_monomorphic_enough, FnVal, ImmTy, Immediate, InterpCx, Machine, OpTy, PlaceTy, +}; + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + pub fn cast( + &mut self, + src: &OpTy<'tcx, M::Provenance>, + cast_kind: CastKind, + cast_ty: Ty<'tcx>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + use rustc_middle::mir::CastKind::*; + // FIXME: In which cases should we trigger UB when the source is uninit? + match cast_kind { + Pointer(PointerCast::Unsize) => { + let cast_ty = self.layout_of(cast_ty)?; + self.unsize_into(src, cast_ty, dest)?; + } + + PointerExposeAddress => { + let src = self.read_immediate(src)?; + let res = self.pointer_expose_address_cast(&src, cast_ty)?; + self.write_immediate(res, dest)?; + } + + PointerFromExposedAddress => { + let src = self.read_immediate(src)?; + let res = self.pointer_from_exposed_address_cast(&src, cast_ty)?; + self.write_immediate(res, dest)?; + } + + Misc => { + let src = self.read_immediate(src)?; + let res = self.misc_cast(&src, cast_ty)?; + self.write_immediate(res, dest)?; + } + + Pointer(PointerCast::MutToConstPointer | PointerCast::ArrayToPointer) => { + // These are NOPs, but can be wide pointers. + let v = self.read_immediate(src)?; + self.write_immediate(*v, dest)?; + } + + Pointer(PointerCast::ReifyFnPointer) => { + // The src operand does not matter, just its type + match *src.layout.ty.kind() { + ty::FnDef(def_id, substs) => { + // All reifications must be monomorphic, bail out otherwise. + ensure_monomorphic_enough(*self.tcx, src.layout.ty)?; + + let instance = ty::Instance::resolve_for_fn_ptr( + *self.tcx, + self.param_env, + def_id, + substs, + ) + .ok_or_else(|| err_inval!(TooGeneric))?; + + let fn_ptr = self.create_fn_alloc_ptr(FnVal::Instance(instance)); + self.write_pointer(fn_ptr, dest)?; + } + _ => span_bug!(self.cur_span(), "reify fn pointer on {:?}", src.layout.ty), + } + } + + Pointer(PointerCast::UnsafeFnPointer) => { + let src = self.read_immediate(src)?; + match cast_ty.kind() { + ty::FnPtr(_) => { + // No change to value + self.write_immediate(*src, dest)?; + } + _ => span_bug!(self.cur_span(), "fn to unsafe fn cast on {:?}", cast_ty), + } + } + + Pointer(PointerCast::ClosureFnPointer(_)) => { + // The src operand does not matter, just its type + match *src.layout.ty.kind() { + ty::Closure(def_id, substs) => { + // All reifications must be monomorphic, bail out otherwise. + ensure_monomorphic_enough(*self.tcx, src.layout.ty)?; + + let instance = ty::Instance::resolve_closure( + *self.tcx, + def_id, + substs, + ty::ClosureKind::FnOnce, + ) + .ok_or_else(|| err_inval!(TooGeneric))?; + let fn_ptr = self.create_fn_alloc_ptr(FnVal::Instance(instance)); + self.write_pointer(fn_ptr, dest)?; + } + _ => span_bug!(self.cur_span(), "closure fn pointer on {:?}", src.layout.ty), + } + } + } + Ok(()) + } + + pub fn misc_cast( + &mut self, + src: &ImmTy<'tcx, M::Provenance>, + cast_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, Immediate> { + use rustc_type_ir::sty::TyKind::*; + trace!("Casting {:?}: {:?} to {:?}", *src, src.layout.ty, cast_ty); + + match src.layout.ty.kind() { + // Floating point + Float(FloatTy::F32) => { + return Ok(self.cast_from_float(src.to_scalar()?.to_f32()?, cast_ty).into()); + } + Float(FloatTy::F64) => { + return Ok(self.cast_from_float(src.to_scalar()?.to_f64()?, cast_ty).into()); + } + // The rest is integer/pointer-"like", including fn ptr casts + _ => assert!( + src.layout.ty.is_bool() + || src.layout.ty.is_char() + || src.layout.ty.is_integral() + || src.layout.ty.is_any_ptr(), + "Unexpected cast from type {:?}", + src.layout.ty + ), + } + + // # First handle non-scalar source values. + + // Handle casting any ptr to raw ptr (might be a fat ptr). + if src.layout.ty.is_any_ptr() && cast_ty.is_unsafe_ptr() { + let dest_layout = self.layout_of(cast_ty)?; + if dest_layout.size == src.layout.size { + // Thin or fat pointer that just hast the ptr kind of target type changed. + return Ok(**src); + } else { + // Casting the metadata away from a fat ptr. + assert_eq!(src.layout.size, 2 * self.pointer_size()); + assert_eq!(dest_layout.size, self.pointer_size()); + assert!(src.layout.ty.is_unsafe_ptr()); + return match **src { + Immediate::ScalarPair(data, _) => Ok(data.check_init()?.into()), + Immediate::Scalar(..) => span_bug!( + self.cur_span(), + "{:?} input to a fat-to-thin cast ({:?} -> {:?})", + *src, + src.layout.ty, + cast_ty + ), + Immediate::Uninit => throw_ub!(InvalidUninitBytes(None)), + }; + } + } + + // # The remaining source values are scalar and "int-like". + let scalar = src.to_scalar()?; + Ok(self.cast_from_int_like(scalar, src.layout, cast_ty)?.into()) + } + + pub fn pointer_expose_address_cast( + &mut self, + src: &ImmTy<'tcx, M::Provenance>, + cast_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, Immediate> { + assert_matches!(src.layout.ty.kind(), ty::RawPtr(_) | ty::FnPtr(_)); + assert!(cast_ty.is_integral()); + + let scalar = src.to_scalar()?; + let ptr = scalar.to_pointer(self)?; + match ptr.into_pointer_or_addr() { + Ok(ptr) => M::expose_ptr(self, ptr)?, + Err(_) => {} // Do nothing, exposing an invalid pointer (`None` provenance) is a NOP. + }; + Ok(self.cast_from_int_like(scalar, src.layout, cast_ty)?.into()) + } + + pub fn pointer_from_exposed_address_cast( + &mut self, + src: &ImmTy<'tcx, M::Provenance>, + cast_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, Immediate> { + assert!(src.layout.ty.is_integral()); + assert_matches!(cast_ty.kind(), ty::RawPtr(_)); + + // First cast to usize. + let scalar = src.to_scalar()?; + let addr = self.cast_from_int_like(scalar, src.layout, self.tcx.types.usize)?; + let addr = addr.to_machine_usize(self)?; + + // Then turn address into pointer. + let ptr = M::ptr_from_addr_cast(&self, addr)?; + Ok(Scalar::from_maybe_pointer(ptr, self).into()) + } + + pub fn cast_from_int_like( + &self, + scalar: Scalar, // input value (there is no ScalarTy so we separate data+layout) + src_layout: TyAndLayout<'tcx>, + cast_ty: Ty<'tcx>, + ) -> InterpResult<'tcx, Scalar> { + // Let's make sure v is sign-extended *if* it has a signed type. + let signed = src_layout.abi.is_signed(); // Also asserts that abi is `Scalar`. + + let v = scalar.to_bits(src_layout.size)?; + let v = if signed { self.sign_extend(v, src_layout) } else { v }; + trace!("cast_from_scalar: {}, {} -> {}", v, src_layout.ty, cast_ty); + + Ok(match *cast_ty.kind() { + Int(_) | Uint(_) => { + let size = match *cast_ty.kind() { + Int(t) => Integer::from_int_ty(self, t).size(), + Uint(t) => Integer::from_uint_ty(self, t).size(), + _ => bug!(), + }; + let v = size.truncate(v); + Scalar::from_uint(v, size) + } + + Float(FloatTy::F32) if signed => Scalar::from_f32(Single::from_i128(v as i128).value), + Float(FloatTy::F64) if signed => Scalar::from_f64(Double::from_i128(v as i128).value), + Float(FloatTy::F32) => Scalar::from_f32(Single::from_u128(v).value), + Float(FloatTy::F64) => Scalar::from_f64(Double::from_u128(v).value), + + Char => { + // `u8` to `char` cast + Scalar::from_u32(u8::try_from(v).unwrap().into()) + } + + // Casts to bool are not permitted by rustc, no need to handle them here. + _ => span_bug!(self.cur_span(), "invalid int to {:?} cast", cast_ty), + }) + } + + fn cast_from_float(&self, f: F, dest_ty: Ty<'tcx>) -> Scalar + where + F: Float + Into> + FloatConvert + FloatConvert, + { + use rustc_type_ir::sty::TyKind::*; + match *dest_ty.kind() { + // float -> uint + Uint(t) => { + let size = Integer::from_uint_ty(self, t).size(); + // `to_u128` is a saturating cast, which is what we need + // (https://doc.rust-lang.org/nightly/nightly-rustc/rustc_apfloat/trait.Float.html#method.to_i128_r). + let v = f.to_u128(size.bits_usize()).value; + // This should already fit the bit width + Scalar::from_uint(v, size) + } + // float -> int + Int(t) => { + let size = Integer::from_int_ty(self, t).size(); + // `to_i128` is a saturating cast, which is what we need + // (https://doc.rust-lang.org/nightly/nightly-rustc/rustc_apfloat/trait.Float.html#method.to_i128_r). + let v = f.to_i128(size.bits_usize()).value; + Scalar::from_int(v, size) + } + // float -> f32 + Float(FloatTy::F32) => Scalar::from_f32(f.convert(&mut false).value), + // float -> f64 + Float(FloatTy::F64) => Scalar::from_f64(f.convert(&mut false).value), + // That's it. + _ => span_bug!(self.cur_span(), "invalid float to {:?} cast", dest_ty), + } + } + + fn unsize_into_ptr( + &mut self, + src: &OpTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + // The pointee types + source_ty: Ty<'tcx>, + cast_ty: Ty<'tcx>, + ) -> InterpResult<'tcx> { + // A -> A conversion + let (src_pointee_ty, dest_pointee_ty) = + self.tcx.struct_lockstep_tails_erasing_lifetimes(source_ty, cast_ty, self.param_env); + + match (&src_pointee_ty.kind(), &dest_pointee_ty.kind()) { + (&ty::Array(_, length), &ty::Slice(_)) => { + let ptr = self.read_immediate(src)?.to_scalar()?; + // u64 cast is from usize to u64, which is always good + let val = + Immediate::new_slice(ptr, length.eval_usize(*self.tcx, self.param_env), self); + self.write_immediate(val, dest) + } + (&ty::Dynamic(ref data_a, ..), &ty::Dynamic(ref data_b, ..)) => { + let (old_data, old_vptr) = self.read_immediate(src)?.to_scalar_pair()?; + let old_vptr = old_vptr.to_pointer(self)?; + let (ty, old_trait) = self.get_ptr_vtable(old_vptr)?; + if old_trait != data_a.principal() { + throw_ub_format!("upcast on a pointer whose vtable does not match its type"); + } + let new_vptr = self.get_vtable_ptr(ty, data_b.principal())?; + self.write_immediate(Immediate::new_dyn_trait(old_data, new_vptr, self), dest) + } + (_, &ty::Dynamic(ref data, _)) => { + // Initial cast from sized to dyn trait + let vtable = self.get_vtable_ptr(src_pointee_ty, data.principal())?; + let ptr = self.read_immediate(src)?.to_scalar()?; + let val = Immediate::new_dyn_trait(ptr, vtable, &*self.tcx); + self.write_immediate(val, dest) + } + + _ => { + span_bug!(self.cur_span(), "invalid unsizing {:?} -> {:?}", src.layout.ty, cast_ty) + } + } + } + + fn unsize_into( + &mut self, + src: &OpTy<'tcx, M::Provenance>, + cast_ty: TyAndLayout<'tcx>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + trace!("Unsizing {:?} of type {} into {:?}", *src, src.layout.ty, cast_ty.ty); + match (&src.layout.ty.kind(), &cast_ty.ty.kind()) { + (&ty::Ref(_, s, _), &ty::Ref(_, c, _) | &ty::RawPtr(TypeAndMut { ty: c, .. })) + | (&ty::RawPtr(TypeAndMut { ty: s, .. }), &ty::RawPtr(TypeAndMut { ty: c, .. })) => { + self.unsize_into_ptr(src, dest, *s, *c) + } + (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => { + assert_eq!(def_a, def_b); + + // unsizing of generic struct with pointer fields + // Example: `Arc` -> `Arc` + // here we need to increase the size of every &T thin ptr field to a fat ptr + for i in 0..src.layout.fields.count() { + let cast_ty_field = cast_ty.field(self, i); + if cast_ty_field.is_zst() { + continue; + } + let src_field = self.operand_field(src, i)?; + let dst_field = self.place_field(dest, i)?; + if src_field.layout.ty == cast_ty_field.ty { + self.copy_op(&src_field, &dst_field, /*allow_transmute*/ false)?; + } else { + self.unsize_into(&src_field, cast_ty_field, &dst_field)?; + } + } + Ok(()) + } + _ => span_bug!( + self.cur_span(), + "unsize_into: invalid conversion: {:?} -> {:?}", + src.layout, + dest.layout + ), + } + } +} diff --git a/compiler/rustc_const_eval/src/interpret/eval_context.rs b/compiler/rustc_const_eval/src/interpret/eval_context.rs new file mode 100644 index 000000000..150d6589b --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/eval_context.rs @@ -0,0 +1,1019 @@ +use std::cell::Cell; +use std::fmt; +use std::mem; + +use rustc_hir::{self as hir, def_id::DefId, definitions::DefPathData}; +use rustc_index::vec::IndexVec; +use rustc_middle::mir; +use rustc_middle::mir::interpret::{InterpError, InvalidProgramInfo}; +use rustc_middle::ty::layout::{ + self, FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOf, LayoutOfHelpers, + TyAndLayout, +}; +use rustc_middle::ty::{ + self, query::TyCtxtAt, subst::SubstsRef, ParamEnv, Ty, TyCtxt, TypeFoldable, +}; +use rustc_mir_dataflow::storage::always_storage_live_locals; +use rustc_session::Limit; +use rustc_span::{Pos, Span}; +use rustc_target::abi::{call::FnAbi, Align, HasDataLayout, Size, TargetDataLayout}; + +use super::{ + AllocId, GlobalId, Immediate, InterpErrorInfo, InterpResult, MPlaceTy, Machine, MemPlace, + MemPlaceMeta, Memory, MemoryKind, Operand, Place, PlaceTy, PointerArithmetic, Provenance, + Scalar, ScalarMaybeUninit, StackPopJump, +}; +use crate::transform::validate::equal_up_to_regions; + +pub struct InterpCx<'mir, 'tcx, M: Machine<'mir, 'tcx>> { + /// Stores the `Machine` instance. + /// + /// Note: the stack is provided by the machine. + pub machine: M, + + /// The results of the type checker, from rustc. + /// The span in this is the "root" of the evaluation, i.e., the const + /// we are evaluating (if this is CTFE). + pub tcx: TyCtxtAt<'tcx>, + + /// Bounds in scope for polymorphic evaluations. + pub(crate) param_env: ty::ParamEnv<'tcx>, + + /// The virtual memory system. + pub memory: Memory<'mir, 'tcx, M>, + + /// The recursion limit (cached from `tcx.recursion_limit(())`) + pub recursion_limit: Limit, +} + +// The Phantomdata exists to prevent this type from being `Send`. If it were sent across a thread +// boundary and dropped in the other thread, it would exit the span in the other thread. +struct SpanGuard(tracing::Span, std::marker::PhantomData<*const u8>); + +impl SpanGuard { + /// By default a `SpanGuard` does nothing. + fn new() -> Self { + Self(tracing::Span::none(), std::marker::PhantomData) + } + + /// If a span is entered, we exit the previous span (if any, normally none) and enter the + /// new span. This is mainly so we don't have to use `Option` for the `tracing_span` field of + /// `Frame` by creating a dummy span to being with and then entering it once the frame has + /// been pushed. + fn enter(&mut self, span: tracing::Span) { + // This executes the destructor on the previous instance of `SpanGuard`, ensuring that + // we never enter or exit more spans than vice versa. Unless you `mem::leak`, then we + // can't protect the tracing stack, but that'll just lead to weird logging, no actual + // problems. + *self = Self(span, std::marker::PhantomData); + self.0.with_subscriber(|(id, dispatch)| { + dispatch.enter(id); + }); + } +} + +impl Drop for SpanGuard { + fn drop(&mut self) { + self.0.with_subscriber(|(id, dispatch)| { + dispatch.exit(id); + }); + } +} + +/// A stack frame. +pub struct Frame<'mir, 'tcx, Prov: Provenance = AllocId, Extra = ()> { + //////////////////////////////////////////////////////////////////////////////// + // Function and callsite information + //////////////////////////////////////////////////////////////////////////////// + /// The MIR for the function called on this frame. + pub body: &'mir mir::Body<'tcx>, + + /// The def_id and substs of the current function. + pub instance: ty::Instance<'tcx>, + + /// Extra data for the machine. + pub extra: Extra, + + //////////////////////////////////////////////////////////////////////////////// + // Return place and locals + //////////////////////////////////////////////////////////////////////////////// + /// Work to perform when returning from this function. + pub return_to_block: StackPopCleanup, + + /// The location where the result of the current stack frame should be written to, + /// and its layout in the caller. + pub return_place: PlaceTy<'tcx, Prov>, + + /// The list of locals for this stack frame, stored in order as + /// `[return_ptr, arguments..., variables..., temporaries...]`. + /// The locals are stored as `Option`s. + /// `None` represents a local that is currently dead, while a live local + /// can either directly contain `Scalar` or refer to some part of an `Allocation`. + /// + /// Do *not* access this directly; always go through the machine hook! + pub locals: IndexVec>, + + /// The span of the `tracing` crate is stored here. + /// When the guard is dropped, the span is exited. This gives us + /// a full stack trace on all tracing statements. + tracing_span: SpanGuard, + + //////////////////////////////////////////////////////////////////////////////// + // Current position within the function + //////////////////////////////////////////////////////////////////////////////// + /// If this is `Err`, we are not currently executing any particular statement in + /// this frame (can happen e.g. during frame initialization, and during unwinding on + /// frames without cleanup code). + /// We basically abuse `Result` as `Either`. + /// + /// Needs to be public because ConstProp does unspeakable things to it. + pub loc: Result, +} + +/// What we store about a frame in an interpreter backtrace. +#[derive(Debug)] +pub struct FrameInfo<'tcx> { + pub instance: ty::Instance<'tcx>, + pub span: Span, + pub lint_root: Option, +} + +/// Unwind information. +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +pub enum StackPopUnwind { + /// The cleanup block. + Cleanup(mir::BasicBlock), + /// No cleanup needs to be done. + Skip, + /// Unwinding is not allowed (UB). + NotAllowed, +} + +#[derive(Clone, Copy, Eq, PartialEq, Debug)] // Miri debug-prints these +pub enum StackPopCleanup { + /// Jump to the next block in the caller, or cause UB if None (that's a function + /// that may never return). Also store layout of return place so + /// we can validate it at that layout. + /// `ret` stores the block we jump to on a normal return, while `unwind` + /// stores the block used for cleanup during unwinding. + Goto { ret: Option, unwind: StackPopUnwind }, + /// The root frame of the stack: nowhere else to jump to. + /// `cleanup` says whether locals are deallocated. Static computation + /// wants them leaked to intern what they need (and just throw away + /// the entire `ecx` when it is done). + Root { cleanup: bool }, +} + +/// State of a local variable including a memoized layout +#[derive(Clone, Debug)] +pub struct LocalState<'tcx, Prov: Provenance = AllocId> { + pub value: LocalValue, + /// Don't modify if `Some`, this is only used to prevent computing the layout twice + pub layout: Cell>>, +} + +/// Current value of a local variable +#[derive(Copy, Clone, Debug)] // Miri debug-prints these +pub enum LocalValue { + /// This local is not currently alive, and cannot be used at all. + Dead, + /// A normal, live local. + /// Mostly for convenience, we re-use the `Operand` type here. + /// This is an optimization over just always having a pointer here; + /// we can thus avoid doing an allocation when the local just stores + /// immediate values *and* never has its address taken. + Live(Operand), +} + +impl<'tcx, Prov: Provenance + 'static> LocalState<'tcx, Prov> { + /// Read the local's value or error if the local is not yet live or not live anymore. + /// + /// Note: This may only be invoked from the `Machine::access_local` hook and not from + /// anywhere else. You may be invalidating machine invariants if you do! + #[inline] + pub fn access(&self) -> InterpResult<'tcx, &Operand> { + match &self.value { + LocalValue::Dead => throw_ub!(DeadLocal), // could even be "invalid program"? + LocalValue::Live(val) => Ok(val), + } + } + + /// Overwrite the local. If the local can be overwritten in place, return a reference + /// to do so; otherwise return the `MemPlace` to consult instead. + /// + /// Note: This may only be invoked from the `Machine::access_local_mut` hook and not from + /// anywhere else. You may be invalidating machine invariants if you do! + #[inline] + pub fn access_mut(&mut self) -> InterpResult<'tcx, &mut Operand> { + match &mut self.value { + LocalValue::Dead => throw_ub!(DeadLocal), // could even be "invalid program"? + LocalValue::Live(val) => Ok(val), + } + } +} + +impl<'mir, 'tcx, Prov: Provenance> Frame<'mir, 'tcx, Prov> { + pub fn with_extra(self, extra: Extra) -> Frame<'mir, 'tcx, Prov, Extra> { + Frame { + body: self.body, + instance: self.instance, + return_to_block: self.return_to_block, + return_place: self.return_place, + locals: self.locals, + loc: self.loc, + extra, + tracing_span: self.tracing_span, + } + } +} + +impl<'mir, 'tcx, Prov: Provenance, Extra> Frame<'mir, 'tcx, Prov, Extra> { + /// Get the current location within the Frame. + /// + /// If this is `Err`, we are not currently executing any particular statement in + /// this frame (can happen e.g. during frame initialization, and during unwinding on + /// frames without cleanup code). + /// We basically abuse `Result` as `Either`. + /// + /// Used by priroda. + pub fn current_loc(&self) -> Result { + self.loc + } + + /// Return the `SourceInfo` of the current instruction. + pub fn current_source_info(&self) -> Option<&mir::SourceInfo> { + self.loc.ok().map(|loc| self.body.source_info(loc)) + } + + pub fn current_span(&self) -> Span { + match self.loc { + Ok(loc) => self.body.source_info(loc).span, + Err(span) => span, + } + } +} + +impl<'tcx> fmt::Display for FrameInfo<'tcx> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + ty::tls::with(|tcx| { + if tcx.def_key(self.instance.def_id()).disambiguated_data.data + == DefPathData::ClosureExpr + { + write!(f, "inside closure")?; + } else { + write!(f, "inside `{}`", self.instance)?; + } + if !self.span.is_dummy() { + let sm = tcx.sess.source_map(); + let lo = sm.lookup_char_pos(self.span.lo()); + write!( + f, + " at {}:{}:{}", + sm.filename_for_diagnostics(&lo.file.name), + lo.line, + lo.col.to_usize() + 1 + )?; + } + Ok(()) + }) + } +} + +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> HasDataLayout for InterpCx<'mir, 'tcx, M> { + #[inline] + fn data_layout(&self) -> &TargetDataLayout { + &self.tcx.data_layout + } +} + +impl<'mir, 'tcx, M> layout::HasTyCtxt<'tcx> for InterpCx<'mir, 'tcx, M> +where + M: Machine<'mir, 'tcx>, +{ + #[inline] + fn tcx(&self) -> TyCtxt<'tcx> { + *self.tcx + } +} + +impl<'mir, 'tcx, M> layout::HasParamEnv<'tcx> for InterpCx<'mir, 'tcx, M> +where + M: Machine<'mir, 'tcx>, +{ + fn param_env(&self) -> ty::ParamEnv<'tcx> { + self.param_env + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> LayoutOfHelpers<'tcx> for InterpCx<'mir, 'tcx, M> { + type LayoutOfResult = InterpResult<'tcx, TyAndLayout<'tcx>>; + + #[inline] + fn layout_tcx_at_span(&self) -> Span { + // Using the cheap root span for performance. + self.tcx.span + } + + #[inline] + fn handle_layout_err( + &self, + err: LayoutError<'tcx>, + _: Span, + _: Ty<'tcx>, + ) -> InterpErrorInfo<'tcx> { + err_inval!(Layout(err)).into() + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> FnAbiOfHelpers<'tcx> for InterpCx<'mir, 'tcx, M> { + type FnAbiOfResult = InterpResult<'tcx, &'tcx FnAbi<'tcx, Ty<'tcx>>>; + + fn handle_fn_abi_err( + &self, + err: FnAbiError<'tcx>, + _span: Span, + _fn_abi_request: FnAbiRequest<'tcx>, + ) -> InterpErrorInfo<'tcx> { + match err { + FnAbiError::Layout(err) => err_inval!(Layout(err)).into(), + FnAbiError::AdjustForForeignAbi(err) => { + err_inval!(FnAbiAdjustForForeignAbi(err)).into() + } + } + } +} + +/// Test if it is valid for a MIR assignment to assign `src`-typed place to `dest`-typed value. +/// This test should be symmetric, as it is primarily about layout compatibility. +pub(super) fn mir_assign_valid_types<'tcx>( + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + src: TyAndLayout<'tcx>, + dest: TyAndLayout<'tcx>, +) -> bool { + // Type-changing assignments can happen when subtyping is used. While + // all normal lifetimes are erased, higher-ranked types with their + // late-bound lifetimes are still around and can lead to type + // differences. So we compare ignoring lifetimes. + if equal_up_to_regions(tcx, param_env, src.ty, dest.ty) { + // Make sure the layout is equal, too -- just to be safe. Miri really + // needs layout equality. For performance reason we skip this check when + // the types are equal. Equal types *can* have different layouts when + // enum downcast is involved (as enum variants carry the type of the + // enum), but those should never occur in assignments. + if cfg!(debug_assertions) || src.ty != dest.ty { + assert_eq!(src.layout, dest.layout); + } + true + } else { + false + } +} + +/// Use the already known layout if given (but sanity check in debug mode), +/// or compute the layout. +#[cfg_attr(not(debug_assertions), inline(always))] +pub(super) fn from_known_layout<'tcx>( + tcx: TyCtxtAt<'tcx>, + param_env: ParamEnv<'tcx>, + known_layout: Option>, + compute: impl FnOnce() -> InterpResult<'tcx, TyAndLayout<'tcx>>, +) -> InterpResult<'tcx, TyAndLayout<'tcx>> { + match known_layout { + None => compute(), + Some(known_layout) => { + if cfg!(debug_assertions) { + let check_layout = compute()?; + if !mir_assign_valid_types(tcx.tcx, param_env, check_layout, known_layout) { + span_bug!( + tcx.span, + "expected type differs from actual type.\nexpected: {:?}\nactual: {:?}", + known_layout.ty, + check_layout.ty, + ); + } + } + Ok(known_layout) + } + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + pub fn new( + tcx: TyCtxt<'tcx>, + root_span: Span, + param_env: ty::ParamEnv<'tcx>, + machine: M, + ) -> Self { + InterpCx { + machine, + tcx: tcx.at(root_span), + param_env, + memory: Memory::new(), + recursion_limit: tcx.recursion_limit(), + } + } + + #[inline(always)] + pub fn cur_span(&self) -> Span { + // This deliberately does *not* honor `requires_caller_location` since it is used for much + // more than just panics. + self.stack().last().map_or(self.tcx.span, |f| f.current_span()) + } + + #[inline(always)] + pub(crate) fn stack(&self) -> &[Frame<'mir, 'tcx, M::Provenance, M::FrameExtra>] { + M::stack(self) + } + + #[inline(always)] + pub(crate) fn stack_mut( + &mut self, + ) -> &mut Vec> { + M::stack_mut(self) + } + + #[inline(always)] + pub fn frame_idx(&self) -> usize { + let stack = self.stack(); + assert!(!stack.is_empty()); + stack.len() - 1 + } + + #[inline(always)] + pub fn frame(&self) -> &Frame<'mir, 'tcx, M::Provenance, M::FrameExtra> { + self.stack().last().expect("no call frames exist") + } + + #[inline(always)] + pub fn frame_mut(&mut self) -> &mut Frame<'mir, 'tcx, M::Provenance, M::FrameExtra> { + self.stack_mut().last_mut().expect("no call frames exist") + } + + #[inline(always)] + pub(super) fn body(&self) -> &'mir mir::Body<'tcx> { + self.frame().body + } + + #[inline(always)] + pub fn sign_extend(&self, value: u128, ty: TyAndLayout<'_>) -> u128 { + assert!(ty.abi.is_signed()); + ty.size.sign_extend(value) + } + + #[inline(always)] + pub fn truncate(&self, value: u128, ty: TyAndLayout<'_>) -> u128 { + ty.size.truncate(value) + } + + #[inline] + pub fn type_is_freeze(&self, ty: Ty<'tcx>) -> bool { + ty.is_freeze(self.tcx, self.param_env) + } + + pub fn load_mir( + &self, + instance: ty::InstanceDef<'tcx>, + promoted: Option, + ) -> InterpResult<'tcx, &'tcx mir::Body<'tcx>> { + let def = instance.with_opt_param(); + trace!("load mir(instance={:?}, promoted={:?})", instance, promoted); + let body = if let Some(promoted) = promoted { + &self.tcx.promoted_mir_opt_const_arg(def)[promoted] + } else { + M::load_mir(self, instance)? + }; + // do not continue if typeck errors occurred (can only occur in local crate) + if let Some(err) = body.tainted_by_errors { + throw_inval!(AlreadyReported(err)); + } + Ok(body) + } + + /// Call this on things you got out of the MIR (so it is as generic as the current + /// stack frame), to bring it into the proper environment for this interpreter. + pub(super) fn subst_from_current_frame_and_normalize_erasing_regions>( + &self, + value: T, + ) -> Result> { + self.subst_from_frame_and_normalize_erasing_regions(self.frame(), value) + } + + /// Call this on things you got out of the MIR (so it is as generic as the provided + /// stack frame), to bring it into the proper environment for this interpreter. + pub(super) fn subst_from_frame_and_normalize_erasing_regions>( + &self, + frame: &Frame<'mir, 'tcx, M::Provenance, M::FrameExtra>, + value: T, + ) -> Result> { + frame + .instance + .try_subst_mir_and_normalize_erasing_regions(*self.tcx, self.param_env, value) + .map_err(|e| { + self.tcx.sess.delay_span_bug( + self.cur_span(), + format!("failed to normalize {}", e.get_type_for_failure()).as_str(), + ); + + InterpError::InvalidProgram(InvalidProgramInfo::TooGeneric) + }) + } + + /// The `substs` are assumed to already be in our interpreter "universe" (param_env). + pub(super) fn resolve( + &self, + def: ty::WithOptConstParam, + substs: SubstsRef<'tcx>, + ) -> InterpResult<'tcx, ty::Instance<'tcx>> { + trace!("resolve: {:?}, {:#?}", def, substs); + trace!("param_env: {:#?}", self.param_env); + trace!("substs: {:#?}", substs); + match ty::Instance::resolve_opt_const_arg(*self.tcx, self.param_env, def, substs) { + Ok(Some(instance)) => Ok(instance), + Ok(None) => throw_inval!(TooGeneric), + + // FIXME(eddyb) this could be a bit more specific than `AlreadyReported`. + Err(error_reported) => throw_inval!(AlreadyReported(error_reported)), + } + } + + #[inline(always)] + pub fn layout_of_local( + &self, + frame: &Frame<'mir, 'tcx, M::Provenance, M::FrameExtra>, + local: mir::Local, + layout: Option>, + ) -> InterpResult<'tcx, TyAndLayout<'tcx>> { + // `const_prop` runs into this with an invalid (empty) frame, so we + // have to support that case (mostly by skipping all caching). + match frame.locals.get(local).and_then(|state| state.layout.get()) { + None => { + let layout = from_known_layout(self.tcx, self.param_env, layout, || { + let local_ty = frame.body.local_decls[local].ty; + let local_ty = + self.subst_from_frame_and_normalize_erasing_regions(frame, local_ty)?; + self.layout_of(local_ty) + })?; + if let Some(state) = frame.locals.get(local) { + // Layouts of locals are requested a lot, so we cache them. + state.layout.set(Some(layout)); + } + Ok(layout) + } + Some(layout) => Ok(layout), + } + } + + /// Returns the actual dynamic size and alignment of the place at the given type. + /// Only the "meta" (metadata) part of the place matters. + /// This can fail to provide an answer for extern types. + pub(super) fn size_and_align_of( + &self, + metadata: &MemPlaceMeta, + layout: &TyAndLayout<'tcx>, + ) -> InterpResult<'tcx, Option<(Size, Align)>> { + if !layout.is_unsized() { + return Ok(Some((layout.size, layout.align.abi))); + } + match layout.ty.kind() { + ty::Adt(..) | ty::Tuple(..) => { + // First get the size of all statically known fields. + // Don't use type_of::sizing_type_of because that expects t to be sized, + // and it also rounds up to alignment, which we want to avoid, + // as the unsized field's alignment could be smaller. + assert!(!layout.ty.is_simd()); + assert!(layout.fields.count() > 0); + trace!("DST layout: {:?}", layout); + + let sized_size = layout.fields.offset(layout.fields.count() - 1); + let sized_align = layout.align.abi; + trace!( + "DST {} statically sized prefix size: {:?} align: {:?}", + layout.ty, + sized_size, + sized_align + ); + + // Recurse to get the size of the dynamically sized field (must be + // the last field). Can't have foreign types here, how would we + // adjust alignment and size for them? + let field = layout.field(self, layout.fields.count() - 1); + let Some((unsized_size, unsized_align)) = self.size_and_align_of(metadata, &field)? else { + // A field with an extern type. We don't know the actual dynamic size + // or the alignment. + return Ok(None); + }; + + // FIXME (#26403, #27023): We should be adding padding + // to `sized_size` (to accommodate the `unsized_align` + // required of the unsized field that follows) before + // summing it with `sized_size`. (Note that since #26403 + // is unfixed, we do not yet add the necessary padding + // here. But this is where the add would go.) + + // Return the sum of sizes and max of aligns. + let size = sized_size + unsized_size; // `Size` addition + + // Choose max of two known alignments (combined value must + // be aligned according to more restrictive of the two). + let align = sized_align.max(unsized_align); + + // Issue #27023: must add any necessary padding to `size` + // (to make it a multiple of `align`) before returning it. + let size = size.align_to(align); + + // Check if this brought us over the size limit. + if size > self.max_size_of_val() { + throw_ub!(InvalidMeta("total size is bigger than largest supported object")); + } + Ok(Some((size, align))) + } + ty::Dynamic(..) => { + let vtable = metadata.unwrap_meta().to_pointer(self)?; + // Read size and align from vtable (already checks size). + Ok(Some(self.get_vtable_size_and_align(vtable)?)) + } + + ty::Slice(_) | ty::Str => { + let len = metadata.unwrap_meta().to_machine_usize(self)?; + let elem = layout.field(self, 0); + + // Make sure the slice is not too big. + let size = elem.size.bytes().saturating_mul(len); // we rely on `max_size_of_val` being smaller than `u64::MAX`. + let size = Size::from_bytes(size); + if size > self.max_size_of_val() { + throw_ub!(InvalidMeta("slice is bigger than largest supported object")); + } + Ok(Some((size, elem.align.abi))) + } + + ty::Foreign(_) => Ok(None), + + _ => span_bug!(self.cur_span(), "size_and_align_of::<{:?}> not supported", layout.ty), + } + } + #[inline] + pub fn size_and_align_of_mplace( + &self, + mplace: &MPlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Option<(Size, Align)>> { + self.size_and_align_of(&mplace.meta, &mplace.layout) + } + + #[instrument(skip(self, body, return_place, return_to_block), level = "debug")] + pub fn push_stack_frame( + &mut self, + instance: ty::Instance<'tcx>, + body: &'mir mir::Body<'tcx>, + return_place: &PlaceTy<'tcx, M::Provenance>, + return_to_block: StackPopCleanup, + ) -> InterpResult<'tcx> { + trace!("body: {:#?}", body); + // first push a stack frame so we have access to the local substs + let pre_frame = Frame { + body, + loc: Err(body.span), // Span used for errors caused during preamble. + return_to_block, + return_place: return_place.clone(), + // empty local array, we fill it in below, after we are inside the stack frame and + // all methods actually know about the frame + locals: IndexVec::new(), + instance, + tracing_span: SpanGuard::new(), + extra: (), + }; + let frame = M::init_frame_extra(self, pre_frame)?; + self.stack_mut().push(frame); + + // Make sure all the constants required by this frame evaluate successfully (post-monomorphization check). + for const_ in &body.required_consts { + let span = const_.span; + let const_ = + self.subst_from_current_frame_and_normalize_erasing_regions(const_.literal)?; + self.mir_const_to_op(&const_, None).map_err(|err| { + // If there was an error, set the span of the current frame to this constant. + // Avoiding doing this when evaluation succeeds. + self.frame_mut().loc = Err(span); + err + })?; + } + + // Most locals are initially dead. + let dummy = LocalState { value: LocalValue::Dead, layout: Cell::new(None) }; + let mut locals = IndexVec::from_elem(dummy, &body.local_decls); + + // Now mark those locals as live that have no `Storage*` annotations. + let always_live = always_storage_live_locals(self.body()); + for local in locals.indices() { + if always_live.contains(local) { + locals[local].value = LocalValue::Live(Operand::Immediate(Immediate::Uninit)); + } + } + // done + self.frame_mut().locals = locals; + M::after_stack_push(self)?; + self.frame_mut().loc = Ok(mir::Location::START); + + let span = info_span!("frame", "{}", instance); + self.frame_mut().tracing_span.enter(span); + + Ok(()) + } + + /// Jump to the given block. + #[inline] + pub fn go_to_block(&mut self, target: mir::BasicBlock) { + self.frame_mut().loc = Ok(mir::Location { block: target, statement_index: 0 }); + } + + /// *Return* to the given `target` basic block. + /// Do *not* use for unwinding! Use `unwind_to_block` instead. + /// + /// If `target` is `None`, that indicates the function cannot return, so we raise UB. + pub fn return_to_block(&mut self, target: Option) -> InterpResult<'tcx> { + if let Some(target) = target { + self.go_to_block(target); + Ok(()) + } else { + throw_ub!(Unreachable) + } + } + + /// *Unwind* to the given `target` basic block. + /// Do *not* use for returning! Use `return_to_block` instead. + /// + /// If `target` is `StackPopUnwind::Skip`, that indicates the function does not need cleanup + /// during unwinding, and we will just keep propagating that upwards. + /// + /// If `target` is `StackPopUnwind::NotAllowed`, that indicates the function does not allow + /// unwinding, and doing so is UB. + pub fn unwind_to_block(&mut self, target: StackPopUnwind) -> InterpResult<'tcx> { + self.frame_mut().loc = match target { + StackPopUnwind::Cleanup(block) => Ok(mir::Location { block, statement_index: 0 }), + StackPopUnwind::Skip => Err(self.frame_mut().body.span), + StackPopUnwind::NotAllowed => { + throw_ub_format!("unwinding past a stack frame that does not allow unwinding") + } + }; + Ok(()) + } + + /// Pops the current frame from the stack, deallocating the + /// memory for allocated locals. + /// + /// If `unwinding` is `false`, then we are performing a normal return + /// from a function. In this case, we jump back into the frame of the caller, + /// and continue execution as normal. + /// + /// If `unwinding` is `true`, then we are in the middle of a panic, + /// and need to unwind this frame. In this case, we jump to the + /// `cleanup` block for the function, which is responsible for running + /// `Drop` impls for any locals that have been initialized at this point. + /// The cleanup block ends with a special `Resume` terminator, which will + /// cause us to continue unwinding. + #[instrument(skip(self), level = "debug")] + pub(super) fn pop_stack_frame(&mut self, unwinding: bool) -> InterpResult<'tcx> { + info!( + "popping stack frame ({})", + if unwinding { "during unwinding" } else { "returning from function" } + ); + + // Check `unwinding`. + assert_eq!( + unwinding, + match self.frame().loc { + Ok(loc) => self.body().basic_blocks()[loc.block].is_cleanup, + Err(_) => true, + } + ); + if unwinding && self.frame_idx() == 0 { + throw_ub_format!("unwinding past the topmost frame of the stack"); + } + + // Copy return value. Must of course happen *before* we deallocate the locals. + let copy_ret_result = if !unwinding { + let op = self + .local_to_op(self.frame(), mir::RETURN_PLACE, None) + .expect("return place should always be live"); + let dest = self.frame().return_place.clone(); + let err = self.copy_op(&op, &dest, /*allow_transmute*/ true); + trace!("return value: {:?}", self.dump_place(*dest)); + // We delay actually short-circuiting on this error until *after* the stack frame is + // popped, since we want this error to be attributed to the caller, whose type defines + // this transmute. + err + } else { + Ok(()) + }; + + // Cleanup: deallocate locals. + // Usually we want to clean up (deallocate locals), but in a few rare cases we don't. + // We do this while the frame is still on the stack, so errors point to the callee. + let return_to_block = self.frame().return_to_block; + let cleanup = match return_to_block { + StackPopCleanup::Goto { .. } => true, + StackPopCleanup::Root { cleanup, .. } => cleanup, + }; + if cleanup { + // We need to take the locals out, since we need to mutate while iterating. + let locals = mem::take(&mut self.frame_mut().locals); + for local in &locals { + self.deallocate_local(local.value)?; + } + } + + // All right, now it is time to actually pop the frame. + // Note that its locals are gone already, but that's fine. + let frame = + self.stack_mut().pop().expect("tried to pop a stack frame, but there were none"); + // Report error from return value copy, if any. + copy_ret_result?; + + // If we are not doing cleanup, also skip everything else. + if !cleanup { + assert!(self.stack().is_empty(), "only the topmost frame should ever be leaked"); + assert!(!unwinding, "tried to skip cleanup during unwinding"); + // Skip machine hook. + return Ok(()); + } + if M::after_stack_pop(self, frame, unwinding)? == StackPopJump::NoJump { + // The hook already did everything. + return Ok(()); + } + + // Normal return, figure out where to jump. + if unwinding { + // Follow the unwind edge. + let unwind = match return_to_block { + StackPopCleanup::Goto { unwind, .. } => unwind, + StackPopCleanup::Root { .. } => { + panic!("encountered StackPopCleanup::Root when unwinding!") + } + }; + self.unwind_to_block(unwind) + } else { + // Follow the normal return edge. + match return_to_block { + StackPopCleanup::Goto { ret, .. } => self.return_to_block(ret), + StackPopCleanup::Root { .. } => { + assert!( + self.stack().is_empty(), + "only the topmost frame can have StackPopCleanup::Root" + ); + Ok(()) + } + } + } + } + + /// Mark a storage as live, killing the previous content. + pub fn storage_live(&mut self, local: mir::Local) -> InterpResult<'tcx> { + assert!(local != mir::RETURN_PLACE, "Cannot make return place live"); + trace!("{:?} is now live", local); + + let local_val = LocalValue::Live(Operand::Immediate(Immediate::Uninit)); + // StorageLive expects the local to be dead, and marks it live. + let old = mem::replace(&mut self.frame_mut().locals[local].value, local_val); + if !matches!(old, LocalValue::Dead) { + throw_ub_format!("StorageLive on a local that was already live"); + } + Ok(()) + } + + pub fn storage_dead(&mut self, local: mir::Local) -> InterpResult<'tcx> { + assert!(local != mir::RETURN_PLACE, "Cannot make return place dead"); + trace!("{:?} is now dead", local); + + // It is entirely okay for this local to be already dead (at least that's how we currently generate MIR) + let old = mem::replace(&mut self.frame_mut().locals[local].value, LocalValue::Dead); + self.deallocate_local(old)?; + Ok(()) + } + + #[instrument(skip(self), level = "debug")] + fn deallocate_local(&mut self, local: LocalValue) -> InterpResult<'tcx> { + if let LocalValue::Live(Operand::Indirect(MemPlace { ptr, .. })) = local { + // All locals have a backing allocation, even if the allocation is empty + // due to the local having ZST type. Hence we can `unwrap`. + trace!( + "deallocating local {:?}: {:?}", + local, + // Locals always have a `alloc_id` (they are never the result of a int2ptr). + self.dump_alloc(ptr.provenance.unwrap().get_alloc_id().unwrap()) + ); + self.deallocate_ptr(ptr, None, MemoryKind::Stack)?; + }; + Ok(()) + } + + pub fn eval_to_allocation( + &self, + gid: GlobalId<'tcx>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + // For statics we pick `ParamEnv::reveal_all`, because statics don't have generics + // and thus don't care about the parameter environment. While we could just use + // `self.param_env`, that would mean we invoke the query to evaluate the static + // with different parameter environments, thus causing the static to be evaluated + // multiple times. + let param_env = if self.tcx.is_static(gid.instance.def_id()) { + ty::ParamEnv::reveal_all() + } else { + self.param_env + }; + let param_env = param_env.with_const(); + // Use a precise span for better cycle errors. + let val = self.tcx.at(self.cur_span()).eval_to_allocation_raw(param_env.and(gid))?; + self.raw_const_to_mplace(val) + } + + #[must_use] + pub fn dump_place(&self, place: Place) -> PlacePrinter<'_, 'mir, 'tcx, M> { + PlacePrinter { ecx: self, place } + } + + #[must_use] + pub fn generate_stacktrace(&self) -> Vec> { + let mut frames = Vec::new(); + // This deliberately does *not* honor `requires_caller_location` since it is used for much + // more than just panics. + for frame in self.stack().iter().rev() { + let lint_root = frame.current_source_info().and_then(|source_info| { + match &frame.body.source_scopes[source_info.scope].local_data { + mir::ClearCrossCrate::Set(data) => Some(data.lint_root), + mir::ClearCrossCrate::Clear => None, + } + }); + let span = frame.current_span(); + + frames.push(FrameInfo { span, instance: frame.instance, lint_root }); + } + trace!("generate stacktrace: {:#?}", frames); + frames + } +} + +#[doc(hidden)] +/// Helper struct for the `dump_place` function. +pub struct PlacePrinter<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> { + ecx: &'a InterpCx<'mir, 'tcx, M>, + place: Place, +} + +impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> std::fmt::Debug + for PlacePrinter<'a, 'mir, 'tcx, M> +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.place { + Place::Local { frame, local } => { + let mut allocs = Vec::new(); + write!(fmt, "{:?}", local)?; + if frame != self.ecx.frame_idx() { + write!(fmt, " ({} frames up)", self.ecx.frame_idx() - frame)?; + } + write!(fmt, ":")?; + + match self.ecx.stack()[frame].locals[local].value { + LocalValue::Dead => write!(fmt, " is dead")?, + LocalValue::Live(Operand::Immediate(Immediate::Uninit)) => { + write!(fmt, " is uninitialized")? + } + LocalValue::Live(Operand::Indirect(mplace)) => { + write!( + fmt, + " by {} ref {:?}:", + match mplace.meta { + MemPlaceMeta::Meta(meta) => format!(" meta({:?})", meta), + MemPlaceMeta::None => String::new(), + }, + mplace.ptr, + )?; + allocs.extend(mplace.ptr.provenance.map(Provenance::get_alloc_id)); + } + LocalValue::Live(Operand::Immediate(Immediate::Scalar(val))) => { + write!(fmt, " {:?}", val)?; + if let ScalarMaybeUninit::Scalar(Scalar::Ptr(ptr, _size)) = val { + allocs.push(ptr.provenance.get_alloc_id()); + } + } + LocalValue::Live(Operand::Immediate(Immediate::ScalarPair(val1, val2))) => { + write!(fmt, " ({:?}, {:?})", val1, val2)?; + if let ScalarMaybeUninit::Scalar(Scalar::Ptr(ptr, _size)) = val1 { + allocs.push(ptr.provenance.get_alloc_id()); + } + if let ScalarMaybeUninit::Scalar(Scalar::Ptr(ptr, _size)) = val2 { + allocs.push(ptr.provenance.get_alloc_id()); + } + } + } + + write!(fmt, ": {:?}", self.ecx.dump_allocs(allocs.into_iter().flatten().collect())) + } + Place::Ptr(mplace) => match mplace.ptr.provenance.and_then(Provenance::get_alloc_id) { + Some(alloc_id) => { + write!(fmt, "by ref {:?}: {:?}", mplace.ptr, self.ecx.dump_alloc(alloc_id)) + } + ptr => write!(fmt, " integral by ref: {:?}", ptr), + }, + } + } +} diff --git a/compiler/rustc_const_eval/src/interpret/intern.rs b/compiler/rustc_const_eval/src/interpret/intern.rs new file mode 100644 index 000000000..376b8872c --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/intern.rs @@ -0,0 +1,486 @@ +//! This module specifies the type based interner for constants. +//! +//! After a const evaluation has computed a value, before we destroy the const evaluator's session +//! memory, we need to extract all memory allocations to the global memory pool so they stay around. +//! +//! In principle, this is not very complicated: we recursively walk the final value, follow all the +//! pointers, and move all reachable allocations to the global `tcx` memory. The only complication +//! is picking the right mutability for the allocations in a `static` initializer: we want to make +//! as many allocations as possible immutable so LLVM can put them into read-only memory. At the +//! same time, we need to make memory that could be mutated by the program mutable to avoid +//! incorrect compilations. To achieve this, we do a type-based traversal of the final value, +//! tracking mutable and shared references and `UnsafeCell` to determine the current mutability. +//! (In principle, we could skip this type-based part for `const` and promoteds, as they need to be +//! always immutable. At least for `const` however we use this opportunity to reject any `const` +//! that contains allocations whose mutability we cannot identify.) + +use super::validity::RefTracking; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_errors::ErrorGuaranteed; +use rustc_hir as hir; +use rustc_middle::mir::interpret::InterpResult; +use rustc_middle::ty::{self, layout::TyAndLayout, Ty}; + +use rustc_ast::Mutability; + +use super::{ + AllocId, Allocation, ConstAllocation, InterpCx, MPlaceTy, Machine, MemoryKind, PlaceTy, + ValueVisitor, +}; +use crate::const_eval; + +pub trait CompileTimeMachine<'mir, 'tcx, T> = Machine< + 'mir, + 'tcx, + MemoryKind = T, + Provenance = AllocId, + ExtraFnVal = !, + FrameExtra = (), + AllocExtra = (), + MemoryMap = FxHashMap, Allocation)>, +>; + +struct InternVisitor<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval::MemoryKind>> { + /// The ectx from which we intern. + ecx: &'rt mut InterpCx<'mir, 'tcx, M>, + /// Previously encountered safe references. + ref_tracking: &'rt mut RefTracking<(MPlaceTy<'tcx>, InternMode)>, + /// A list of all encountered allocations. After type-based interning, we traverse this list to + /// also intern allocations that are only referenced by a raw pointer or inside a union. + leftover_allocations: &'rt mut FxHashSet, + /// The root kind of the value that we're looking at. This field is never mutated for a + /// particular allocation. It is primarily used to make as many allocations as possible + /// read-only so LLVM can place them in const memory. + mode: InternMode, + /// This field stores whether we are *currently* inside an `UnsafeCell`. This can affect + /// the intern mode of references we encounter. + inside_unsafe_cell: bool, +} + +#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq)] +enum InternMode { + /// A static and its current mutability. Below shared references inside a `static mut`, + /// this is *immutable*, and below mutable references inside an `UnsafeCell`, this + /// is *mutable*. + Static(hir::Mutability), + /// A `const`. + Const, +} + +/// Signalling data structure to ensure we don't recurse +/// into the memory of other constants or statics +struct IsStaticOrFn; + +/// Intern an allocation without looking at its children. +/// `mode` is the mode of the environment where we found this pointer. +/// `mutability` is the mutability of the place to be interned; even if that says +/// `immutable` things might become mutable if `ty` is not frozen. +/// `ty` can be `None` if there is no potential interior mutability +/// to account for (e.g. for vtables). +fn intern_shallow<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval::MemoryKind>>( + ecx: &'rt mut InterpCx<'mir, 'tcx, M>, + leftover_allocations: &'rt mut FxHashSet, + alloc_id: AllocId, + mode: InternMode, + ty: Option>, +) -> Option { + trace!("intern_shallow {:?} with {:?}", alloc_id, mode); + // remove allocation + let tcx = ecx.tcx; + let Some((kind, mut alloc)) = ecx.memory.alloc_map.remove(&alloc_id) else { + // Pointer not found in local memory map. It is either a pointer to the global + // map, or dangling. + // If the pointer is dangling (neither in local nor global memory), we leave it + // to validation to error -- it has the much better error messages, pointing out where + // in the value the dangling reference lies. + // The `delay_span_bug` ensures that we don't forget such a check in validation. + if tcx.try_get_global_alloc(alloc_id).is_none() { + tcx.sess.delay_span_bug(ecx.tcx.span, "tried to intern dangling pointer"); + } + // treat dangling pointers like other statics + // just to stop trying to recurse into them + return Some(IsStaticOrFn); + }; + // This match is just a canary for future changes to `MemoryKind`, which most likely need + // changes in this function. + match kind { + MemoryKind::Stack + | MemoryKind::Machine(const_eval::MemoryKind::Heap) + | MemoryKind::CallerLocation => {} + } + // Set allocation mutability as appropriate. This is used by LLVM to put things into + // read-only memory, and also by Miri when evaluating other globals that + // access this one. + if let InternMode::Static(mutability) = mode { + // For this, we need to take into account `UnsafeCell`. When `ty` is `None`, we assume + // no interior mutability. + let frozen = ty.map_or(true, |ty| ty.is_freeze(ecx.tcx, ecx.param_env)); + // For statics, allocation mutability is the combination of place mutability and + // type mutability. + // The entire allocation needs to be mutable if it contains an `UnsafeCell` anywhere. + let immutable = mutability == Mutability::Not && frozen; + if immutable { + alloc.mutability = Mutability::Not; + } else { + // Just making sure we are not "upgrading" an immutable allocation to mutable. + assert_eq!(alloc.mutability, Mutability::Mut); + } + } else { + // No matter what, *constants are never mutable*. Mutating them is UB. + // See const_eval::machine::MemoryExtra::can_access_statics for why + // immutability is so important. + + // Validation will ensure that there is no `UnsafeCell` on an immutable allocation. + alloc.mutability = Mutability::Not; + }; + // link the alloc id to the actual allocation + leftover_allocations.extend(alloc.relocations().iter().map(|&(_, alloc_id)| alloc_id)); + let alloc = tcx.intern_const_alloc(alloc); + tcx.set_alloc_id_memory(alloc_id, alloc); + None +} + +impl<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval::MemoryKind>> + InternVisitor<'rt, 'mir, 'tcx, M> +{ + fn intern_shallow( + &mut self, + alloc_id: AllocId, + mode: InternMode, + ty: Option>, + ) -> Option { + intern_shallow(self.ecx, self.leftover_allocations, alloc_id, mode, ty) + } +} + +impl<'rt, 'mir, 'tcx: 'mir, M: CompileTimeMachine<'mir, 'tcx, const_eval::MemoryKind>> + ValueVisitor<'mir, 'tcx, M> for InternVisitor<'rt, 'mir, 'tcx, M> +{ + type V = MPlaceTy<'tcx>; + + #[inline(always)] + fn ecx(&self) -> &InterpCx<'mir, 'tcx, M> { + &self.ecx + } + + fn visit_aggregate( + &mut self, + mplace: &MPlaceTy<'tcx>, + fields: impl Iterator>, + ) -> InterpResult<'tcx> { + // We want to walk the aggregate to look for references to intern. While doing that we + // also need to take special care of interior mutability. + // + // As an optimization, however, if the allocation does not contain any references: we don't + // need to do the walk. It can be costly for big arrays for example (e.g. issue #93215). + let is_walk_needed = |mplace: &MPlaceTy<'tcx>| -> InterpResult<'tcx, bool> { + // ZSTs cannot contain pointers, we can avoid the interning walk. + if mplace.layout.is_zst() { + return Ok(false); + } + + // Now, check whether this allocation could contain references. + // + // Note, this check may sometimes not be cheap, so we only do it when the walk we'd like + // to avoid could be expensive: on the potentially larger types, arrays and slices, + // rather than on all aggregates unconditionally. + if matches!(mplace.layout.ty.kind(), ty::Array(..) | ty::Slice(..)) { + let Some((size, align)) = self.ecx.size_and_align_of_mplace(&mplace)? else { + // We do the walk if we can't determine the size of the mplace: we may be + // dealing with extern types here in the future. + return Ok(true); + }; + + // If there are no relocations in this allocation, it does not contain references + // that point to another allocation, and we can avoid the interning walk. + if let Some(alloc) = self.ecx.get_ptr_alloc(mplace.ptr, size, align)? { + if !alloc.has_relocations() { + return Ok(false); + } + } else { + // We're encountering a ZST here, and can avoid the walk as well. + return Ok(false); + } + } + + // In the general case, we do the walk. + Ok(true) + }; + + // If this allocation contains no references to intern, we avoid the potentially costly + // walk. + // + // We can do this before the checks for interior mutability below, because only references + // are relevant in that situation, and we're checking if there are any here. + if !is_walk_needed(mplace)? { + return Ok(()); + } + + if let Some(def) = mplace.layout.ty.ty_adt_def() { + if def.is_unsafe_cell() { + // We are crossing over an `UnsafeCell`, we can mutate again. This means that + // References we encounter inside here are interned as pointing to mutable + // allocations. + // Remember the `old` value to handle nested `UnsafeCell`. + let old = std::mem::replace(&mut self.inside_unsafe_cell, true); + let walked = self.walk_aggregate(mplace, fields); + self.inside_unsafe_cell = old; + return walked; + } + } + + self.walk_aggregate(mplace, fields) + } + + fn visit_value(&mut self, mplace: &MPlaceTy<'tcx>) -> InterpResult<'tcx> { + // Handle Reference types, as these are the only relocations supported by const eval. + // Raw pointers (and boxes) are handled by the `leftover_relocations` logic. + let tcx = self.ecx.tcx; + let ty = mplace.layout.ty; + if let ty::Ref(_, referenced_ty, ref_mutability) = *ty.kind() { + let value = self.ecx.read_immediate(&mplace.into())?; + let mplace = self.ecx.ref_to_mplace(&value)?; + assert_eq!(mplace.layout.ty, referenced_ty); + // Handle trait object vtables. + if let ty::Dynamic(..) = + tcx.struct_tail_erasing_lifetimes(referenced_ty, self.ecx.param_env).kind() + { + let ptr = mplace.meta.unwrap_meta().to_pointer(&tcx)?; + if let Some(alloc_id) = ptr.provenance { + // Explicitly choose const mode here, since vtables are immutable, even + // if the reference of the fat pointer is mutable. + self.intern_shallow(alloc_id, InternMode::Const, None); + } else { + // Validation will error (with a better message) on an invalid vtable pointer. + // Let validation show the error message, but make sure it *does* error. + tcx.sess + .delay_span_bug(tcx.span, "vtables pointers cannot be integer pointers"); + } + } + // Check if we have encountered this pointer+layout combination before. + // Only recurse for allocation-backed pointers. + if let Some(alloc_id) = mplace.ptr.provenance { + // Compute the mode with which we intern this. Our goal here is to make as many + // statics as we can immutable so they can be placed in read-only memory by LLVM. + let ref_mode = match self.mode { + InternMode::Static(mutbl) => { + // In statics, merge outer mutability with reference mutability and + // take into account whether we are in an `UnsafeCell`. + + // The only way a mutable reference actually works as a mutable reference is + // by being in a `static mut` directly or behind another mutable reference. + // If there's an immutable reference or we are inside a `static`, then our + // mutable reference is equivalent to an immutable one. As an example: + // `&&mut Foo` is semantically equivalent to `&&Foo` + match ref_mutability { + _ if self.inside_unsafe_cell => { + // Inside an `UnsafeCell` is like inside a `static mut`, the "outer" + // mutability does not matter. + InternMode::Static(ref_mutability) + } + Mutability::Not => { + // A shared reference, things become immutable. + // We do *not* consider `freeze` here: `intern_shallow` considers + // `freeze` for the actual mutability of this allocation; the intern + // mode for references contained in this allocation is tracked more + // precisely when traversing the referenced data (by tracking + // `UnsafeCell`). This makes sure that `&(&i32, &Cell)` still + // has the left inner reference interned into a read-only + // allocation. + InternMode::Static(Mutability::Not) + } + Mutability::Mut => { + // Mutable reference. + InternMode::Static(mutbl) + } + } + } + InternMode::Const => { + // Ignore `UnsafeCell`, everything is immutable. Validity does some sanity + // checking for mutable references that we encounter -- they must all be + // ZST. + InternMode::Const + } + }; + match self.intern_shallow(alloc_id, ref_mode, Some(referenced_ty)) { + // No need to recurse, these are interned already and statics may have + // cycles, so we don't want to recurse there + Some(IsStaticOrFn) => {} + // intern everything referenced by this value. The mutability is taken from the + // reference. It is checked above that mutable references only happen in + // `static mut` + None => self.ref_tracking.track((mplace, ref_mode), || ()), + } + } + Ok(()) + } else { + // Not a reference -- proceed recursively. + self.walk_value(mplace) + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq)] +pub enum InternKind { + /// The `mutability` of the static, ignoring the type which may have interior mutability. + Static(hir::Mutability), + Constant, + Promoted, +} + +/// Intern `ret` and everything it references. +/// +/// This *cannot raise an interpreter error*. Doing so is left to validation, which +/// tracks where in the value we are and thus can show much better error messages. +/// Any errors here would anyway be turned into `const_err` lints, whereas validation failures +/// are hard errors. +#[tracing::instrument(level = "debug", skip(ecx))] +pub fn intern_const_alloc_recursive< + 'mir, + 'tcx: 'mir, + M: CompileTimeMachine<'mir, 'tcx, const_eval::MemoryKind>, +>( + ecx: &mut InterpCx<'mir, 'tcx, M>, + intern_kind: InternKind, + ret: &MPlaceTy<'tcx>, +) -> Result<(), ErrorGuaranteed> { + let tcx = ecx.tcx; + let base_intern_mode = match intern_kind { + InternKind::Static(mutbl) => InternMode::Static(mutbl), + // `Constant` includes array lengths. + InternKind::Constant | InternKind::Promoted => InternMode::Const, + }; + + // Type based interning. + // `ref_tracking` tracks typed references we have already interned and still need to crawl for + // more typed information inside them. + // `leftover_allocations` collects *all* allocations we see, because some might not + // be available in a typed way. They get interned at the end. + let mut ref_tracking = RefTracking::empty(); + let leftover_allocations = &mut FxHashSet::default(); + + // start with the outermost allocation + intern_shallow( + ecx, + leftover_allocations, + // The outermost allocation must exist, because we allocated it with + // `Memory::allocate`. + ret.ptr.provenance.unwrap(), + base_intern_mode, + Some(ret.layout.ty), + ); + + ref_tracking.track((*ret, base_intern_mode), || ()); + + while let Some(((mplace, mode), _)) = ref_tracking.todo.pop() { + let res = InternVisitor { + ref_tracking: &mut ref_tracking, + ecx, + mode, + leftover_allocations, + inside_unsafe_cell: false, + } + .visit_value(&mplace); + // We deliberately *ignore* interpreter errors here. When there is a problem, the remaining + // references are "leftover"-interned, and later validation will show a proper error + // and point at the right part of the value causing the problem. + match res { + Ok(()) => {} + Err(error) => { + ecx.tcx.sess.delay_span_bug( + ecx.tcx.span, + &format!( + "error during interning should later cause validation failure: {}", + error + ), + ); + } + } + } + + // Intern the rest of the allocations as mutable. These might be inside unions, padding, raw + // pointers, ... So we can't intern them according to their type rules + + let mut todo: Vec<_> = leftover_allocations.iter().cloned().collect(); + debug!(?todo); + debug!("dead_alloc_map: {:#?}", ecx.memory.dead_alloc_map); + while let Some(alloc_id) = todo.pop() { + if let Some((_, mut alloc)) = ecx.memory.alloc_map.remove(&alloc_id) { + // We can't call the `intern_shallow` method here, as its logic is tailored to safe + // references and a `leftover_allocations` set (where we only have a todo-list here). + // So we hand-roll the interning logic here again. + match intern_kind { + // Statics may contain mutable allocations even behind relocations. + // Even for immutable statics it would be ok to have mutable allocations behind + // raw pointers, e.g. for `static FOO: *const AtomicUsize = &AtomicUsize::new(42)`. + InternKind::Static(_) => {} + // Raw pointers in promoteds may only point to immutable things so we mark + // everything as immutable. + // It is UB to mutate through a raw pointer obtained via an immutable reference: + // Since all references and pointers inside a promoted must by their very definition + // be created from an immutable reference (and promotion also excludes interior + // mutability), mutating through them would be UB. + // There's no way we can check whether the user is using raw pointers correctly, + // so all we can do is mark this as immutable here. + InternKind::Promoted => { + // See const_eval::machine::MemoryExtra::can_access_statics for why + // immutability is so important. + alloc.mutability = Mutability::Not; + } + InternKind::Constant => { + // If it's a constant, we should not have any "leftovers" as everything + // is tracked by const-checking. + // FIXME: downgrade this to a warning? It rejects some legitimate consts, + // such as `const CONST_RAW: *const Vec = &Vec::new() as *const _;`. + ecx.tcx + .sess + .span_err(ecx.tcx.span, "untyped pointers are not allowed in constant"); + // For better errors later, mark the allocation as immutable. + alloc.mutability = Mutability::Not; + } + } + let alloc = tcx.intern_const_alloc(alloc); + tcx.set_alloc_id_memory(alloc_id, alloc); + for &(_, alloc_id) in alloc.inner().relocations().iter() { + if leftover_allocations.insert(alloc_id) { + todo.push(alloc_id); + } + } + } else if ecx.memory.dead_alloc_map.contains_key(&alloc_id) { + // Codegen does not like dangling pointers, and generally `tcx` assumes that + // all allocations referenced anywhere actually exist. So, make sure we error here. + let reported = ecx + .tcx + .sess + .span_err(ecx.tcx.span, "encountered dangling pointer in final constant"); + return Err(reported); + } else if ecx.tcx.try_get_global_alloc(alloc_id).is_none() { + // We have hit an `AllocId` that is neither in local or global memory and isn't + // marked as dangling by local memory. That should be impossible. + span_bug!(ecx.tcx.span, "encountered unknown alloc id {:?}", alloc_id); + } + } + Ok(()) +} + +impl<'mir, 'tcx: 'mir, M: super::intern::CompileTimeMachine<'mir, 'tcx, !>> + InterpCx<'mir, 'tcx, M> +{ + /// A helper function that allocates memory for the layout given and gives you access to mutate + /// it. Once your own mutation code is done, the backing `Allocation` is removed from the + /// current `Memory` and returned. + pub fn intern_with_temp_alloc( + &mut self, + layout: TyAndLayout<'tcx>, + f: impl FnOnce( + &mut InterpCx<'mir, 'tcx, M>, + &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ()>, + ) -> InterpResult<'tcx, ConstAllocation<'tcx>> { + let dest = self.allocate(layout, MemoryKind::Stack)?; + f(self, &dest.into())?; + let mut alloc = self.memory.alloc_map.remove(&dest.ptr.provenance.unwrap()).unwrap().1; + alloc.mutability = Mutability::Not; + Ok(self.tcx.intern_const_alloc(alloc)) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs new file mode 100644 index 000000000..08209eb79 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -0,0 +1,696 @@ +//! Intrinsics and other functions that the miri engine executes without +//! looking at their MIR. Intrinsics/functions supported here are shared by CTFE +//! and miri. + +use std::convert::TryFrom; + +use rustc_hir::def_id::DefId; +use rustc_middle::mir::{ + self, + interpret::{ConstValue, GlobalId, InterpResult, PointerArithmetic, Scalar}, + BinOp, +}; +use rustc_middle::ty; +use rustc_middle::ty::layout::LayoutOf as _; +use rustc_middle::ty::subst::SubstsRef; +use rustc_middle::ty::{Ty, TyCtxt}; +use rustc_span::symbol::{sym, Symbol}; +use rustc_target::abi::{Abi, Align, Primitive, Size}; + +use super::{ + util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, Machine, OpTy, PlaceTy, + Pointer, +}; + +mod caller_location; +mod type_name; + +fn numeric_intrinsic(name: Symbol, bits: u128, kind: Primitive) -> Scalar { + let size = match kind { + Primitive::Int(integer, _) => integer.size(), + _ => bug!("invalid `{}` argument: {:?}", name, bits), + }; + let extra = 128 - u128::from(size.bits()); + let bits_out = match name { + sym::ctpop => u128::from(bits.count_ones()), + sym::ctlz => u128::from(bits.leading_zeros()) - extra, + sym::cttz => u128::from((bits << extra).trailing_zeros()) - extra, + sym::bswap => (bits << extra).swap_bytes(), + sym::bitreverse => (bits << extra).reverse_bits(), + _ => bug!("not a numeric intrinsic: {}", name), + }; + Scalar::from_uint(bits_out, size) +} + +/// The logic for all nullary intrinsics is implemented here. These intrinsics don't get evaluated +/// inside an `InterpCx` and instead have their value computed directly from rustc internal info. +pub(crate) fn eval_nullary_intrinsic<'tcx>( + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + def_id: DefId, + substs: SubstsRef<'tcx>, +) -> InterpResult<'tcx, ConstValue<'tcx>> { + let tp_ty = substs.type_at(0); + let name = tcx.item_name(def_id); + Ok(match name { + sym::type_name => { + ensure_monomorphic_enough(tcx, tp_ty)?; + let alloc = type_name::alloc_type_name(tcx, tp_ty); + ConstValue::Slice { data: alloc, start: 0, end: alloc.inner().len() } + } + sym::needs_drop => { + ensure_monomorphic_enough(tcx, tp_ty)?; + ConstValue::from_bool(tp_ty.needs_drop(tcx, param_env)) + } + sym::pref_align_of => { + // Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough. + let layout = tcx.layout_of(param_env.and(tp_ty)).map_err(|e| err_inval!(Layout(e)))?; + ConstValue::from_machine_usize(layout.align.pref.bytes(), &tcx) + } + sym::type_id => { + ensure_monomorphic_enough(tcx, tp_ty)?; + ConstValue::from_u64(tcx.type_id_hash(tp_ty)) + } + sym::variant_count => match tp_ty.kind() { + // Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough. + ty::Adt(ref adt, _) => { + ConstValue::from_machine_usize(adt.variants().len() as u64, &tcx) + } + ty::Projection(_) + | ty::Opaque(_, _) + | ty::Param(_) + | ty::Bound(_, _) + | ty::Placeholder(_) + | ty::Infer(_) => throw_inval!(TooGeneric), + ty::Bool + | ty::Char + | ty::Int(_) + | ty::Uint(_) + | ty::Float(_) + | ty::Foreign(_) + | ty::Str + | ty::Array(_, _) + | ty::Slice(_) + | ty::RawPtr(_) + | ty::Ref(_, _, _) + | ty::FnDef(_, _) + | ty::FnPtr(_) + | ty::Dynamic(_, _) + | ty::Closure(_, _) + | ty::Generator(_, _, _) + | ty::GeneratorWitness(_) + | ty::Never + | ty::Tuple(_) + | ty::Error(_) => ConstValue::from_machine_usize(0u64, &tcx), + }, + other => bug!("`{}` is not a zero arg intrinsic", other), + }) +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Returns `true` if emulation happened. + /// Here we implement the intrinsics that are common to all Miri instances; individual machines can add their own + /// intrinsic handling. + pub fn emulate_intrinsic( + &mut self, + instance: ty::Instance<'tcx>, + args: &[OpTy<'tcx, M::Provenance>], + dest: &PlaceTy<'tcx, M::Provenance>, + ret: Option, + ) -> InterpResult<'tcx, bool> { + let substs = instance.substs; + let intrinsic_name = self.tcx.item_name(instance.def_id()); + + // First handle intrinsics without return place. + let ret = match ret { + None => match intrinsic_name { + sym::transmute => throw_ub_format!("transmuting to uninhabited type"), + sym::abort => M::abort(self, "the program aborted execution".to_owned())?, + // Unsupported diverging intrinsic. + _ => return Ok(false), + }, + Some(p) => p, + }; + + match intrinsic_name { + sym::caller_location => { + let span = self.find_closest_untracked_caller_location(); + let location = self.alloc_caller_location_for_span(span); + self.write_immediate(location.to_ref(self), dest)?; + } + + sym::min_align_of_val | sym::size_of_val => { + // Avoid `deref_operand` -- this is not a deref, the ptr does not have to be + // dereferenceable! + let place = self.ref_to_mplace(&self.read_immediate(&args[0])?)?; + let (size, align) = self + .size_and_align_of_mplace(&place)? + .ok_or_else(|| err_unsup_format!("`extern type` does not have known layout"))?; + + let result = match intrinsic_name { + sym::min_align_of_val => align.bytes(), + sym::size_of_val => size.bytes(), + _ => bug!(), + }; + + self.write_scalar(Scalar::from_machine_usize(result, self), dest)?; + } + + sym::pref_align_of + | sym::needs_drop + | sym::type_id + | sym::type_name + | sym::variant_count => { + let gid = GlobalId { instance, promoted: None }; + let ty = match intrinsic_name { + sym::pref_align_of | sym::variant_count => self.tcx.types.usize, + sym::needs_drop => self.tcx.types.bool, + sym::type_id => self.tcx.types.u64, + sym::type_name => self.tcx.mk_static_str(), + _ => bug!(), + }; + let val = + self.tcx.const_eval_global_id(self.param_env, gid, Some(self.tcx.span))?; + let val = self.const_val_to_op(val, ty, Some(dest.layout))?; + self.copy_op(&val, dest, /*allow_transmute*/ false)?; + } + + sym::ctpop + | sym::cttz + | sym::cttz_nonzero + | sym::ctlz + | sym::ctlz_nonzero + | sym::bswap + | sym::bitreverse => { + let ty = substs.type_at(0); + let layout_of = self.layout_of(ty)?; + let val = self.read_scalar(&args[0])?.check_init()?; + let bits = val.to_bits(layout_of.size)?; + let kind = match layout_of.abi { + Abi::Scalar(scalar) => scalar.primitive(), + _ => span_bug!( + self.cur_span(), + "{} called on invalid type {:?}", + intrinsic_name, + ty + ), + }; + let (nonzero, intrinsic_name) = match intrinsic_name { + sym::cttz_nonzero => (true, sym::cttz), + sym::ctlz_nonzero => (true, sym::ctlz), + other => (false, other), + }; + if nonzero && bits == 0 { + throw_ub_format!("`{}_nonzero` called on 0", intrinsic_name); + } + let out_val = numeric_intrinsic(intrinsic_name, bits, kind); + self.write_scalar(out_val, dest)?; + } + sym::add_with_overflow | sym::sub_with_overflow | sym::mul_with_overflow => { + let lhs = self.read_immediate(&args[0])?; + let rhs = self.read_immediate(&args[1])?; + let bin_op = match intrinsic_name { + sym::add_with_overflow => BinOp::Add, + sym::sub_with_overflow => BinOp::Sub, + sym::mul_with_overflow => BinOp::Mul, + _ => bug!(), + }; + self.binop_with_overflow( + bin_op, /*force_overflow_checks*/ true, &lhs, &rhs, dest, + )?; + } + sym::saturating_add | sym::saturating_sub => { + let l = self.read_immediate(&args[0])?; + let r = self.read_immediate(&args[1])?; + let val = self.saturating_arith( + if intrinsic_name == sym::saturating_add { BinOp::Add } else { BinOp::Sub }, + &l, + &r, + )?; + self.write_scalar(val, dest)?; + } + sym::discriminant_value => { + let place = self.deref_operand(&args[0])?; + let discr_val = self.read_discriminant(&place.into())?.0; + self.write_scalar(discr_val, dest)?; + } + sym::unchecked_shl + | sym::unchecked_shr + | sym::unchecked_add + | sym::unchecked_sub + | sym::unchecked_mul + | sym::unchecked_div + | sym::unchecked_rem => { + let l = self.read_immediate(&args[0])?; + let r = self.read_immediate(&args[1])?; + let bin_op = match intrinsic_name { + sym::unchecked_shl => BinOp::Shl, + sym::unchecked_shr => BinOp::Shr, + sym::unchecked_add => BinOp::Add, + sym::unchecked_sub => BinOp::Sub, + sym::unchecked_mul => BinOp::Mul, + sym::unchecked_div => BinOp::Div, + sym::unchecked_rem => BinOp::Rem, + _ => bug!(), + }; + let (val, overflowed, _ty) = self.overflowing_binary_op(bin_op, &l, &r)?; + if overflowed { + let layout = self.layout_of(substs.type_at(0))?; + let r_val = r.to_scalar()?.to_bits(layout.size)?; + if let sym::unchecked_shl | sym::unchecked_shr = intrinsic_name { + throw_ub_format!("overflowing shift by {} in `{}`", r_val, intrinsic_name); + } else { + throw_ub_format!("overflow executing `{}`", intrinsic_name); + } + } + self.write_scalar(val, dest)?; + } + sym::rotate_left | sym::rotate_right => { + // rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW)) + // rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW)) + let layout = self.layout_of(substs.type_at(0))?; + let val = self.read_scalar(&args[0])?.check_init()?; + let val_bits = val.to_bits(layout.size)?; + let raw_shift = self.read_scalar(&args[1])?.check_init()?; + let raw_shift_bits = raw_shift.to_bits(layout.size)?; + let width_bits = u128::from(layout.size.bits()); + let shift_bits = raw_shift_bits % width_bits; + let inv_shift_bits = (width_bits - shift_bits) % width_bits; + let result_bits = if intrinsic_name == sym::rotate_left { + (val_bits << shift_bits) | (val_bits >> inv_shift_bits) + } else { + (val_bits >> shift_bits) | (val_bits << inv_shift_bits) + }; + let truncated_bits = self.truncate(result_bits, layout); + let result = Scalar::from_uint(truncated_bits, layout.size); + self.write_scalar(result, dest)?; + } + sym::copy => { + self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?; + } + sym::write_bytes => { + self.write_bytes_intrinsic(&args[0], &args[1], &args[2])?; + } + sym::offset => { + let ptr = self.read_pointer(&args[0])?; + let offset_count = self.read_scalar(&args[1])?.to_machine_isize(self)?; + let pointee_ty = substs.type_at(0); + + let offset_ptr = self.ptr_offset_inbounds(ptr, pointee_ty, offset_count)?; + self.write_pointer(offset_ptr, dest)?; + } + sym::arith_offset => { + let ptr = self.read_pointer(&args[0])?; + let offset_count = self.read_scalar(&args[1])?.to_machine_isize(self)?; + let pointee_ty = substs.type_at(0); + + let pointee_size = i64::try_from(self.layout_of(pointee_ty)?.size.bytes()).unwrap(); + let offset_bytes = offset_count.wrapping_mul(pointee_size); + let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self); + self.write_pointer(offset_ptr, dest)?; + } + sym::ptr_offset_from | sym::ptr_offset_from_unsigned => { + let a = self.read_pointer(&args[0])?; + let b = self.read_pointer(&args[1])?; + + let usize_layout = self.layout_of(self.tcx.types.usize)?; + let isize_layout = self.layout_of(self.tcx.types.isize)?; + + // Get offsets for both that are at least relative to the same base. + let (a_offset, b_offset) = + match (self.ptr_try_get_alloc_id(a), self.ptr_try_get_alloc_id(b)) { + (Err(a), Err(b)) => { + // Neither poiner points to an allocation. + // If these are inequal or null, this *will* fail the deref check below. + (a, b) + } + (Err(_), _) | (_, Err(_)) => { + // We managed to find a valid allocation for one pointer, but not the other. + // That means they are definitely not pointing to the same allocation. + throw_ub_format!( + "`{}` called on pointers into different allocations", + intrinsic_name + ); + } + (Ok((a_alloc_id, a_offset, _)), Ok((b_alloc_id, b_offset, _))) => { + // Found allocation for both. They must be into the same allocation. + if a_alloc_id != b_alloc_id { + throw_ub_format!( + "`{}` called on pointers into different allocations", + intrinsic_name + ); + } + // Use these offsets for distance calculation. + (a_offset.bytes(), b_offset.bytes()) + } + }; + + // Compute distance. + let dist = { + // Addresses are unsigned, so this is a `usize` computation. We have to do the + // overflow check separately anyway. + let (val, overflowed, _ty) = { + let a_offset = ImmTy::from_uint(a_offset, usize_layout); + let b_offset = ImmTy::from_uint(b_offset, usize_layout); + self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)? + }; + if overflowed { + // a < b + if intrinsic_name == sym::ptr_offset_from_unsigned { + throw_ub_format!( + "`{}` called when first pointer has smaller offset than second: {} < {}", + intrinsic_name, + a_offset, + b_offset, + ); + } + // The signed form of the intrinsic allows this. If we interpret the + // difference as isize, we'll get the proper signed difference. If that + // seems *positive*, they were more than isize::MAX apart. + let dist = val.to_machine_isize(self)?; + if dist >= 0 { + throw_ub_format!( + "`{}` called when first pointer is too far before second", + intrinsic_name + ); + } + dist + } else { + // b >= a + let dist = val.to_machine_isize(self)?; + // If converting to isize produced a *negative* result, we had an overflow + // because they were more than isize::MAX apart. + if dist < 0 { + throw_ub_format!( + "`{}` called when first pointer is too far ahead of second", + intrinsic_name + ); + } + dist + } + }; + + // Check that the range between them is dereferenceable ("in-bounds or one past the + // end of the same allocation"). This is like the check in ptr_offset_inbounds. + let min_ptr = if dist >= 0 { b } else { a }; + self.check_ptr_access_align( + min_ptr, + Size::from_bytes(dist.unsigned_abs()), + Align::ONE, + CheckInAllocMsg::OffsetFromTest, + )?; + + // Perform division by size to compute return value. + let ret_layout = if intrinsic_name == sym::ptr_offset_from_unsigned { + assert!(0 <= dist && dist <= self.machine_isize_max()); + usize_layout + } else { + assert!(self.machine_isize_min() <= dist && dist <= self.machine_isize_max()); + isize_layout + }; + let pointee_layout = self.layout_of(substs.type_at(0))?; + // If ret_layout is unsigned, we checked that so is the distance, so we are good. + let val = ImmTy::from_int(dist, ret_layout); + let size = ImmTy::from_int(pointee_layout.size.bytes(), ret_layout); + self.exact_div(&val, &size, dest)?; + } + + sym::transmute => { + self.copy_op(&args[0], dest, /*allow_transmute*/ true)?; + } + sym::assert_inhabited | sym::assert_zero_valid | sym::assert_uninit_valid => { + let ty = instance.substs.type_at(0); + let layout = self.layout_of(ty)?; + + // For *all* intrinsics we first check `is_uninhabited` to give a more specific + // error message. + if layout.abi.is_uninhabited() { + // The run-time intrinsic panics just to get a good backtrace; here we abort + // since there is no problem showing a backtrace even for aborts. + M::abort( + self, + format!( + "aborted execution: attempted to instantiate uninhabited type `{}`", + ty + ), + )?; + } + + if intrinsic_name == sym::assert_zero_valid { + let should_panic = !self.tcx.permits_zero_init(layout); + + if should_panic { + M::abort( + self, + format!( + "aborted execution: attempted to zero-initialize type `{}`, which is invalid", + ty + ), + )?; + } + } + + if intrinsic_name == sym::assert_uninit_valid { + let should_panic = !self.tcx.permits_uninit_init(layout); + + if should_panic { + M::abort( + self, + format!( + "aborted execution: attempted to leave type `{}` uninitialized, which is invalid", + ty + ), + )?; + } + } + } + sym::simd_insert => { + let index = u64::from(self.read_scalar(&args[1])?.to_u32()?); + let elem = &args[2]; + let (input, input_len) = self.operand_to_simd(&args[0])?; + let (dest, dest_len) = self.place_to_simd(dest)?; + assert_eq!(input_len, dest_len, "Return vector length must match input length"); + assert!( + index < dest_len, + "Index `{}` must be in bounds of vector with length {}`", + index, + dest_len + ); + + for i in 0..dest_len { + let place = self.mplace_index(&dest, i)?; + let value = if i == index { + elem.clone() + } else { + self.mplace_index(&input, i)?.into() + }; + self.copy_op(&value, &place.into(), /*allow_transmute*/ false)?; + } + } + sym::simd_extract => { + let index = u64::from(self.read_scalar(&args[1])?.to_u32()?); + let (input, input_len) = self.operand_to_simd(&args[0])?; + assert!( + index < input_len, + "index `{}` must be in bounds of vector with length `{}`", + index, + input_len + ); + self.copy_op( + &self.mplace_index(&input, index)?.into(), + dest, + /*allow_transmute*/ false, + )?; + } + sym::likely | sym::unlikely | sym::black_box => { + // These just return their argument + self.copy_op(&args[0], dest, /*allow_transmute*/ false)?; + } + sym::assume => { + let cond = self.read_scalar(&args[0])?.check_init()?.to_bool()?; + if !cond { + throw_ub_format!("`assume` intrinsic called with `false`"); + } + } + sym::raw_eq => { + let result = self.raw_eq_intrinsic(&args[0], &args[1])?; + self.write_scalar(result, dest)?; + } + + sym::vtable_size => { + let ptr = self.read_pointer(&args[0])?; + let (size, _align) = self.get_vtable_size_and_align(ptr)?; + self.write_scalar(Scalar::from_machine_usize(size.bytes(), self), dest)?; + } + sym::vtable_align => { + let ptr = self.read_pointer(&args[0])?; + let (_size, align) = self.get_vtable_size_and_align(ptr)?; + self.write_scalar(Scalar::from_machine_usize(align.bytes(), self), dest)?; + } + + _ => return Ok(false), + } + + trace!("{:?}", self.dump_place(**dest)); + self.go_to_block(ret); + Ok(true) + } + + pub fn exact_div( + &mut self, + a: &ImmTy<'tcx, M::Provenance>, + b: &ImmTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + // Performs an exact division, resulting in undefined behavior where + // `x % y != 0` or `y == 0` or `x == T::MIN && y == -1`. + // First, check x % y != 0 (or if that computation overflows). + let (res, overflow, _ty) = self.overflowing_binary_op(BinOp::Rem, &a, &b)?; + assert!(!overflow); // All overflow is UB, so this should never return on overflow. + if res.assert_bits(a.layout.size) != 0 { + throw_ub_format!("exact_div: {} cannot be divided by {} without remainder", a, b) + } + // `Rem` says this is all right, so we can let `Div` do its job. + self.binop_ignore_overflow(BinOp::Div, &a, &b, dest) + } + + pub fn saturating_arith( + &self, + mir_op: BinOp, + l: &ImmTy<'tcx, M::Provenance>, + r: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Scalar> { + assert!(matches!(mir_op, BinOp::Add | BinOp::Sub)); + let (val, overflowed, _ty) = self.overflowing_binary_op(mir_op, l, r)?; + Ok(if overflowed { + let size = l.layout.size; + let num_bits = size.bits(); + if l.layout.abi.is_signed() { + // For signed ints the saturated value depends on the sign of the first + // term since the sign of the second term can be inferred from this and + // the fact that the operation has overflowed (if either is 0 no + // overflow can occur) + let first_term: u128 = l.to_scalar()?.to_bits(l.layout.size)?; + let first_term_positive = first_term & (1 << (num_bits - 1)) == 0; + if first_term_positive { + // Negative overflow not possible since the positive first term + // can only increase an (in range) negative term for addition + // or corresponding negated positive term for subtraction + Scalar::from_int(size.signed_int_max(), size) + } else { + // Positive overflow not possible for similar reason + // max negative + Scalar::from_int(size.signed_int_min(), size) + } + } else { + // unsigned + if matches!(mir_op, BinOp::Add) { + // max unsigned + Scalar::from_uint(size.unsigned_int_max(), size) + } else { + // underflow to 0 + Scalar::from_uint(0u128, size) + } + } + } else { + val + }) + } + + /// Offsets a pointer by some multiple of its type, returning an error if the pointer leaves its + /// allocation. For integer pointers, we consider each of them their own tiny allocation of size + /// 0, so offset-by-0 (and only 0) is okay -- except that null cannot be offset by _any_ value. + pub fn ptr_offset_inbounds( + &self, + ptr: Pointer>, + pointee_ty: Ty<'tcx>, + offset_count: i64, + ) -> InterpResult<'tcx, Pointer>> { + // We cannot overflow i64 as a type's size must be <= isize::MAX. + let pointee_size = i64::try_from(self.layout_of(pointee_ty)?.size.bytes()).unwrap(); + // The computed offset, in bytes, must not overflow an isize. + // `checked_mul` enforces a too small bound, but no actual allocation can be big enough for + // the difference to be noticeable. + let offset_bytes = + offset_count.checked_mul(pointee_size).ok_or(err_ub!(PointerArithOverflow))?; + // The offset being in bounds cannot rely on "wrapping around" the address space. + // So, first rule out overflows in the pointer arithmetic. + let offset_ptr = ptr.signed_offset(offset_bytes, self)?; + // ptr and offset_ptr must be in bounds of the same allocated object. This means all of the + // memory between these pointers must be accessible. Note that we do not require the + // pointers to be properly aligned (unlike a read/write operation). + let min_ptr = if offset_bytes >= 0 { ptr } else { offset_ptr }; + // This call handles checking for integer/null pointers. + self.check_ptr_access_align( + min_ptr, + Size::from_bytes(offset_bytes.unsigned_abs()), + Align::ONE, + CheckInAllocMsg::PointerArithmeticTest, + )?; + Ok(offset_ptr) + } + + /// Copy `count*size_of::()` many bytes from `*src` to `*dst`. + pub(crate) fn copy_intrinsic( + &mut self, + src: &OpTy<'tcx, >::Provenance>, + dst: &OpTy<'tcx, >::Provenance>, + count: &OpTy<'tcx, >::Provenance>, + nonoverlapping: bool, + ) -> InterpResult<'tcx> { + let count = self.read_scalar(&count)?.to_machine_usize(self)?; + let layout = self.layout_of(src.layout.ty.builtin_deref(true).unwrap().ty)?; + let (size, align) = (layout.size, layout.align.abi); + // `checked_mul` enforces a too small bound (the correct one would probably be machine_isize_max), + // but no actual allocation can be big enough for the difference to be noticeable. + let size = size.checked_mul(count, self).ok_or_else(|| { + err_ub_format!( + "overflow computing total size of `{}`", + if nonoverlapping { "copy_nonoverlapping" } else { "copy" } + ) + })?; + + let src = self.read_pointer(&src)?; + let dst = self.read_pointer(&dst)?; + + self.mem_copy(src, align, dst, align, size, nonoverlapping) + } + + pub(crate) fn write_bytes_intrinsic( + &mut self, + dst: &OpTy<'tcx, >::Provenance>, + byte: &OpTy<'tcx, >::Provenance>, + count: &OpTy<'tcx, >::Provenance>, + ) -> InterpResult<'tcx> { + let layout = self.layout_of(dst.layout.ty.builtin_deref(true).unwrap().ty)?; + + let dst = self.read_pointer(&dst)?; + let byte = self.read_scalar(&byte)?.to_u8()?; + let count = self.read_scalar(&count)?.to_machine_usize(self)?; + + // `checked_mul` enforces a too small bound (the correct one would probably be machine_isize_max), + // but no actual allocation can be big enough for the difference to be noticeable. + let len = layout + .size + .checked_mul(count, self) + .ok_or_else(|| err_ub_format!("overflow computing total size of `write_bytes`"))?; + + let bytes = std::iter::repeat(byte).take(len.bytes_usize()); + self.write_bytes_ptr(dst, bytes) + } + + pub(crate) fn raw_eq_intrinsic( + &mut self, + lhs: &OpTy<'tcx, >::Provenance>, + rhs: &OpTy<'tcx, >::Provenance>, + ) -> InterpResult<'tcx, Scalar> { + let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?; + assert!(!layout.is_unsized()); + + let lhs = self.read_pointer(lhs)?; + let rhs = self.read_pointer(rhs)?; + let lhs_bytes = self.read_bytes_ptr(lhs, layout.size)?; + let rhs_bytes = self.read_bytes_ptr(rhs, layout.size)?; + Ok(Scalar::from_bool(lhs_bytes == rhs_bytes)) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs b/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs new file mode 100644 index 000000000..5864b9215 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs @@ -0,0 +1,130 @@ +use std::convert::TryFrom; + +use rustc_ast::Mutability; +use rustc_hir::lang_items::LangItem; +use rustc_middle::mir::TerminatorKind; +use rustc_middle::ty::layout::LayoutOf; +use rustc_middle::ty::subst::Subst; +use rustc_span::{Span, Symbol}; + +use crate::interpret::{ + intrinsics::{InterpCx, Machine}, + MPlaceTy, MemoryKind, Scalar, +}; + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Walks up the callstack from the intrinsic's callsite, searching for the first callsite in a + /// frame which is not `#[track_caller]`. + pub(crate) fn find_closest_untracked_caller_location(&self) -> Span { + for frame in self.stack().iter().rev() { + debug!("find_closest_untracked_caller_location: checking frame {:?}", frame.instance); + + // Assert that the frame we look at is actually executing code currently + // (`loc` is `Err` when we are unwinding and the frame does not require cleanup). + let loc = frame.loc.unwrap(); + + // This could be a non-`Call` terminator (such as `Drop`), or not a terminator at all + // (such as `box`). Use the normal span by default. + let mut source_info = *frame.body.source_info(loc); + + // If this is a `Call` terminator, use the `fn_span` instead. + let block = &frame.body.basic_blocks()[loc.block]; + if loc.statement_index == block.statements.len() { + debug!( + "find_closest_untracked_caller_location: got terminator {:?} ({:?})", + block.terminator(), + block.terminator().kind + ); + if let TerminatorKind::Call { fn_span, .. } = block.terminator().kind { + source_info.span = fn_span; + } + } + + // Walk up the `SourceScope`s, in case some of them are from MIR inlining. + // If so, the starting `source_info.span` is in the innermost inlined + // function, and will be replaced with outer callsite spans as long + // as the inlined functions were `#[track_caller]`. + loop { + let scope_data = &frame.body.source_scopes[source_info.scope]; + + if let Some((callee, callsite_span)) = scope_data.inlined { + // Stop inside the most nested non-`#[track_caller]` function, + // before ever reaching its caller (which is irrelevant). + if !callee.def.requires_caller_location(*self.tcx) { + return source_info.span; + } + source_info.span = callsite_span; + } + + // Skip past all of the parents with `inlined: None`. + match scope_data.inlined_parent_scope { + Some(parent) => source_info.scope = parent, + None => break, + } + } + + // Stop inside the most nested non-`#[track_caller]` function, + // before ever reaching its caller (which is irrelevant). + if !frame.instance.def.requires_caller_location(*self.tcx) { + return source_info.span; + } + } + + span_bug!(self.cur_span(), "no non-`#[track_caller]` frame found") + } + + /// Allocate a `const core::panic::Location` with the provided filename and line/column numbers. + pub(crate) fn alloc_caller_location( + &mut self, + filename: Symbol, + line: u32, + col: u32, + ) -> MPlaceTy<'tcx, M::Provenance> { + let loc_details = &self.tcx.sess.opts.unstable_opts.location_detail; + let file = if loc_details.file { + self.allocate_str(filename.as_str(), MemoryKind::CallerLocation, Mutability::Not) + } else { + // FIXME: This creates a new allocation each time. It might be preferable to + // perform this allocation only once, and re-use the `MPlaceTy`. + // See https://github.com/rust-lang/rust/pull/89920#discussion_r730012398 + self.allocate_str("", MemoryKind::CallerLocation, Mutability::Not) + }; + let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) }; + let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) }; + + // Allocate memory for `CallerLocation` struct. + let loc_ty = self + .tcx + .bound_type_of(self.tcx.require_lang_item(LangItem::PanicLocation, None)) + .subst(*self.tcx, self.tcx.mk_substs([self.tcx.lifetimes.re_erased.into()].iter())); + let loc_layout = self.layout_of(loc_ty).unwrap(); + // This can fail if rustc runs out of memory right here. Trying to emit an error would be + // pointless, since that would require allocating more memory than a Location. + let location = self.allocate(loc_layout, MemoryKind::CallerLocation).unwrap(); + + // Initialize fields. + self.write_immediate(file.to_ref(self), &self.mplace_field(&location, 0).unwrap().into()) + .expect("writing to memory we just allocated cannot fail"); + self.write_scalar(line, &self.mplace_field(&location, 1).unwrap().into()) + .expect("writing to memory we just allocated cannot fail"); + self.write_scalar(col, &self.mplace_field(&location, 2).unwrap().into()) + .expect("writing to memory we just allocated cannot fail"); + + location + } + + pub(crate) fn location_triple_for_span(&self, span: Span) -> (Symbol, u32, u32) { + let topmost = span.ctxt().outer_expn().expansion_cause().unwrap_or(span); + let caller = self.tcx.sess.source_map().lookup_char_pos(topmost.lo()); + ( + Symbol::intern(&caller.file.name.prefer_remapped().to_string_lossy()), + u32::try_from(caller.line).unwrap(), + u32::try_from(caller.col_display).unwrap().checked_add(1).unwrap(), + ) + } + + pub fn alloc_caller_location_for_span(&mut self, span: Span) -> MPlaceTy<'tcx, M::Provenance> { + let (file, line, column) = self.location_triple_for_span(span); + self.alloc_caller_location(file, line, column) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics/type_name.rs b/compiler/rustc_const_eval/src/interpret/intrinsics/type_name.rs new file mode 100644 index 000000000..f9847742f --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/intrinsics/type_name.rs @@ -0,0 +1,196 @@ +use rustc_data_structures::intern::Interned; +use rustc_hir::def_id::CrateNum; +use rustc_hir::definitions::DisambiguatedDefPathData; +use rustc_middle::mir::interpret::{Allocation, ConstAllocation}; +use rustc_middle::ty::{ + self, + print::{PrettyPrinter, Print, Printer}, + subst::{GenericArg, GenericArgKind}, + Ty, TyCtxt, +}; +use std::fmt::Write; + +struct AbsolutePathPrinter<'tcx> { + tcx: TyCtxt<'tcx>, + path: String, +} + +impl<'tcx> Printer<'tcx> for AbsolutePathPrinter<'tcx> { + type Error = std::fmt::Error; + + type Path = Self; + type Region = Self; + type Type = Self; + type DynExistential = Self; + type Const = Self; + + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } + + fn print_region(self, _region: ty::Region<'_>) -> Result { + Ok(self) + } + + fn print_type(mut self, ty: Ty<'tcx>) -> Result { + match *ty.kind() { + // Types without identity. + ty::Bool + | ty::Char + | ty::Int(_) + | ty::Uint(_) + | ty::Float(_) + | ty::Str + | ty::Array(_, _) + | ty::Slice(_) + | ty::RawPtr(_) + | ty::Ref(_, _, _) + | ty::FnPtr(_) + | ty::Never + | ty::Tuple(_) + | ty::Dynamic(_, _) => self.pretty_print_type(ty), + + // Placeholders (all printed as `_` to uniformize them). + ty::Param(_) | ty::Bound(..) | ty::Placeholder(_) | ty::Infer(_) | ty::Error(_) => { + write!(self, "_")?; + Ok(self) + } + + // Types with identity (print the module path). + ty::Adt(ty::AdtDef(Interned(&ty::AdtDefData { did: def_id, .. }, _)), substs) + | ty::FnDef(def_id, substs) + | ty::Opaque(def_id, substs) + | ty::Projection(ty::ProjectionTy { item_def_id: def_id, substs }) + | ty::Closure(def_id, substs) + | ty::Generator(def_id, substs, _) => self.print_def_path(def_id, substs), + ty::Foreign(def_id) => self.print_def_path(def_id, &[]), + + ty::GeneratorWitness(_) => bug!("type_name: unexpected `GeneratorWitness`"), + } + } + + fn print_const(self, ct: ty::Const<'tcx>) -> Result { + self.pretty_print_const(ct, false) + } + + fn print_dyn_existential( + mut self, + predicates: &'tcx ty::List>>, + ) -> Result { + let mut first = true; + for p in predicates { + if !first { + write!(self, "+")?; + } + first = false; + self = p.print(self)?; + } + Ok(self) + } + + fn path_crate(mut self, cnum: CrateNum) -> Result { + self.path.push_str(self.tcx.crate_name(cnum).as_str()); + Ok(self) + } + + fn path_qualified( + self, + self_ty: Ty<'tcx>, + trait_ref: Option>, + ) -> Result { + self.pretty_path_qualified(self_ty, trait_ref) + } + + fn path_append_impl( + self, + print_prefix: impl FnOnce(Self) -> Result, + _disambiguated_data: &DisambiguatedDefPathData, + self_ty: Ty<'tcx>, + trait_ref: Option>, + ) -> Result { + self.pretty_path_append_impl( + |mut cx| { + cx = print_prefix(cx)?; + + cx.path.push_str("::"); + + Ok(cx) + }, + self_ty, + trait_ref, + ) + } + + fn path_append( + mut self, + print_prefix: impl FnOnce(Self) -> Result, + disambiguated_data: &DisambiguatedDefPathData, + ) -> Result { + self = print_prefix(self)?; + + write!(self.path, "::{}", disambiguated_data.data).unwrap(); + + Ok(self) + } + + fn path_generic_args( + mut self, + print_prefix: impl FnOnce(Self) -> Result, + args: &[GenericArg<'tcx>], + ) -> Result { + self = print_prefix(self)?; + let args = + args.iter().cloned().filter(|arg| !matches!(arg.unpack(), GenericArgKind::Lifetime(_))); + if args.clone().next().is_some() { + self.generic_delimiters(|cx| cx.comma_sep(args)) + } else { + Ok(self) + } + } +} + +impl<'tcx> PrettyPrinter<'tcx> for AbsolutePathPrinter<'tcx> { + fn should_print_region(&self, _region: ty::Region<'_>) -> bool { + false + } + fn comma_sep(mut self, mut elems: impl Iterator) -> Result + where + T: Print<'tcx, Self, Output = Self, Error = Self::Error>, + { + if let Some(first) = elems.next() { + self = first.print(self)?; + for elem in elems { + self.path.push_str(", "); + self = elem.print(self)?; + } + } + Ok(self) + } + + fn generic_delimiters( + mut self, + f: impl FnOnce(Self) -> Result, + ) -> Result { + write!(self, "<")?; + + self = f(self)?; + + write!(self, ">")?; + + Ok(self) + } +} + +impl Write for AbsolutePathPrinter<'_> { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.path.push_str(s); + Ok(()) + } +} + +/// Directly returns an `Allocation` containing an absolute path representation of the given type. +pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ConstAllocation<'tcx> { + let path = AbsolutePathPrinter { tcx, path: String::new() }.print_type(ty).unwrap().path; + let alloc = Allocation::from_bytes_byte_aligned_immutable(path.into_bytes()); + tcx.intern_const_alloc(alloc) +} diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs new file mode 100644 index 000000000..71ccd1799 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -0,0 +1,525 @@ +//! This module contains everything needed to instantiate an interpreter. +//! This separation exists to ensure that no fancy miri features like +//! interpreting common C functions leak into CTFE. + +use std::borrow::{Borrow, Cow}; +use std::fmt::Debug; +use std::hash::Hash; + +use rustc_middle::mir; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_span::def_id::DefId; +use rustc_target::abi::Size; +use rustc_target::spec::abi::Abi as CallAbi; + +use super::{ + AllocId, AllocRange, Allocation, ConstAllocation, Frame, ImmTy, InterpCx, InterpResult, + MemoryKind, OpTy, Operand, PlaceTy, Pointer, Provenance, Scalar, StackPopUnwind, +}; + +/// Data returned by Machine::stack_pop, +/// to provide further control over the popping of the stack frame +#[derive(Eq, PartialEq, Debug, Copy, Clone)] +pub enum StackPopJump { + /// Indicates that no special handling should be + /// done - we'll either return normally or unwind + /// based on the terminator for the function + /// we're leaving. + Normal, + + /// Indicates that we should *not* jump to the return/unwind address, as the callback already + /// took care of everything. + NoJump, +} + +/// Whether this kind of memory is allowed to leak +pub trait MayLeak: Copy { + fn may_leak(self) -> bool; +} + +/// The functionality needed by memory to manage its allocations +pub trait AllocMap { + /// Tests if the map contains the given key. + /// Deliberately takes `&mut` because that is sufficient, and some implementations + /// can be more efficient then (using `RefCell::get_mut`). + fn contains_key(&mut self, k: &Q) -> bool + where + K: Borrow; + + /// Inserts a new entry into the map. + fn insert(&mut self, k: K, v: V) -> Option; + + /// Removes an entry from the map. + fn remove(&mut self, k: &Q) -> Option + where + K: Borrow; + + /// Returns data based on the keys and values in the map. + fn filter_map_collect(&self, f: impl FnMut(&K, &V) -> Option) -> Vec; + + /// Returns a reference to entry `k`. If no such entry exists, call + /// `vacant` and either forward its error, or add its result to the map + /// and return a reference to *that*. + fn get_or(&self, k: K, vacant: impl FnOnce() -> Result) -> Result<&V, E>; + + /// Returns a mutable reference to entry `k`. If no such entry exists, call + /// `vacant` and either forward its error, or add its result to the map + /// and return a reference to *that*. + fn get_mut_or(&mut self, k: K, vacant: impl FnOnce() -> Result) -> Result<&mut V, E>; + + /// Read-only lookup. + fn get(&self, k: K) -> Option<&V> { + self.get_or(k, || Err(())).ok() + } + + /// Mutable lookup. + fn get_mut(&mut self, k: K) -> Option<&mut V> { + self.get_mut_or(k, || Err(())).ok() + } +} + +/// Methods of this trait signifies a point where CTFE evaluation would fail +/// and some use case dependent behaviour can instead be applied. +pub trait Machine<'mir, 'tcx>: Sized { + /// Additional memory kinds a machine wishes to distinguish from the builtin ones + type MemoryKind: Debug + std::fmt::Display + MayLeak + Eq + 'static; + + /// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to. + type Provenance: Provenance + Eq + Hash + 'static; + + /// When getting the AllocId of a pointer, some extra data is also obtained from the provenance + /// that is passed to memory access hooks so they can do things with it. + type ProvenanceExtra: Copy + 'static; + + /// Machines can define extra (non-instance) things that represent values of function pointers. + /// For example, Miri uses this to return a function pointer from `dlsym` + /// that can later be called to execute the right thing. + type ExtraFnVal: Debug + Copy; + + /// Extra data stored in every call frame. + type FrameExtra; + + /// Extra data stored in every allocation. + type AllocExtra: Debug + Clone + 'static; + + /// Memory's allocation map + type MemoryMap: AllocMap< + AllocId, + (MemoryKind, Allocation), + > + Default + + Clone; + + /// The memory kind to use for copied global memory (held in `tcx`) -- + /// or None if such memory should not be mutated and thus any such attempt will cause + /// a `ModifiedStatic` error to be raised. + /// Statics are copied under two circumstances: When they are mutated, and when + /// `adjust_allocation` (see below) returns an owned allocation + /// that is added to the memory so that the work is not done twice. + const GLOBAL_KIND: Option; + + /// Should the machine panic on allocation failures? + const PANIC_ON_ALLOC_FAIL: bool; + + /// Whether memory accesses should be alignment-checked. + fn enforce_alignment(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool; + + /// Whether, when checking alignment, we should `force_int` and thus support + /// custom alignment logic based on whatever the integer address happens to be. + /// + /// Requires Provenance::OFFSET_IS_ADDR to be true. + fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool; + + /// Whether to enforce the validity invariant + fn enforce_validity(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool; + + /// Whether to enforce integers and floats being initialized. + fn enforce_number_init(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool; + + /// Whether function calls should be [ABI](CallAbi)-checked. + fn enforce_abi(_ecx: &InterpCx<'mir, 'tcx, Self>) -> bool { + true + } + + /// Whether CheckedBinOp MIR statements should actually check for overflow. + fn checked_binop_checks_overflow(_ecx: &InterpCx<'mir, 'tcx, Self>) -> bool; + + /// Entry point for obtaining the MIR of anything that should get evaluated. + /// So not just functions and shims, but also const/static initializers, anonymous + /// constants, ... + fn load_mir( + ecx: &InterpCx<'mir, 'tcx, Self>, + instance: ty::InstanceDef<'tcx>, + ) -> InterpResult<'tcx, &'tcx mir::Body<'tcx>> { + Ok(ecx.tcx.instance_mir(instance)) + } + + /// Entry point to all function calls. + /// + /// Returns either the mir to use for the call, or `None` if execution should + /// just proceed (which usually means this hook did all the work that the + /// called function should usually have done). In the latter case, it is + /// this hook's responsibility to advance the instruction pointer! + /// (This is to support functions like `__rust_maybe_catch_panic` that neither find a MIR + /// nor just jump to `ret`, but instead push their own stack frame.) + /// Passing `dest`and `ret` in the same `Option` proved very annoying when only one of them + /// was used. + fn find_mir_or_eval_fn( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + instance: ty::Instance<'tcx>, + abi: CallAbi, + args: &[OpTy<'tcx, Self::Provenance>], + destination: &PlaceTy<'tcx, Self::Provenance>, + target: Option, + unwind: StackPopUnwind, + ) -> InterpResult<'tcx, Option<(&'mir mir::Body<'tcx>, ty::Instance<'tcx>)>>; + + /// Execute `fn_val`. It is the hook's responsibility to advance the instruction + /// pointer as appropriate. + fn call_extra_fn( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + fn_val: Self::ExtraFnVal, + abi: CallAbi, + args: &[OpTy<'tcx, Self::Provenance>], + destination: &PlaceTy<'tcx, Self::Provenance>, + target: Option, + unwind: StackPopUnwind, + ) -> InterpResult<'tcx>; + + /// Directly process an intrinsic without pushing a stack frame. It is the hook's + /// responsibility to advance the instruction pointer as appropriate. + fn call_intrinsic( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + instance: ty::Instance<'tcx>, + args: &[OpTy<'tcx, Self::Provenance>], + destination: &PlaceTy<'tcx, Self::Provenance>, + target: Option, + unwind: StackPopUnwind, + ) -> InterpResult<'tcx>; + + /// Called to evaluate `Assert` MIR terminators that trigger a panic. + fn assert_panic( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + msg: &mir::AssertMessage<'tcx>, + unwind: Option, + ) -> InterpResult<'tcx>; + + /// Called to evaluate `Abort` MIR terminator. + fn abort(_ecx: &mut InterpCx<'mir, 'tcx, Self>, _msg: String) -> InterpResult<'tcx, !> { + throw_unsup_format!("aborting execution is not supported") + } + + /// Called for all binary operations where the LHS has pointer type. + /// + /// Returns a (value, overflowed) pair if the operation succeeded + fn binary_ptr_op( + ecx: &InterpCx<'mir, 'tcx, Self>, + bin_op: mir::BinOp, + left: &ImmTy<'tcx, Self::Provenance>, + right: &ImmTy<'tcx, Self::Provenance>, + ) -> InterpResult<'tcx, (Scalar, bool, Ty<'tcx>)>; + + /// Called to read the specified `local` from the `frame`. + /// Since reading a ZST is not actually accessing memory or locals, this is never invoked + /// for ZST reads. + #[inline] + fn access_local<'a>( + frame: &'a Frame<'mir, 'tcx, Self::Provenance, Self::FrameExtra>, + local: mir::Local, + ) -> InterpResult<'tcx, &'a Operand> + where + 'tcx: 'mir, + { + frame.locals[local].access() + } + + /// Called to write the specified `local` from the `frame`. + /// Since writing a ZST is not actually accessing memory or locals, this is never invoked + /// for ZST reads. + #[inline] + fn access_local_mut<'a>( + ecx: &'a mut InterpCx<'mir, 'tcx, Self>, + frame: usize, + local: mir::Local, + ) -> InterpResult<'tcx, &'a mut Operand> + where + 'tcx: 'mir, + { + ecx.stack_mut()[frame].locals[local].access_mut() + } + + /// Called before a basic block terminator is executed. + /// You can use this to detect endlessly running programs. + #[inline] + fn before_terminator(_ecx: &mut InterpCx<'mir, 'tcx, Self>) -> InterpResult<'tcx> { + Ok(()) + } + + /// Called before a global allocation is accessed. + /// `def_id` is `Some` if this is the "lazy" allocation of a static. + #[inline] + fn before_access_global( + _tcx: TyCtxt<'tcx>, + _machine: &Self, + _alloc_id: AllocId, + _allocation: ConstAllocation<'tcx>, + _static_def_id: Option, + _is_write: bool, + ) -> InterpResult<'tcx> { + Ok(()) + } + + /// Return the `AllocId` for the given thread-local static in the current thread. + fn thread_local_static_base_pointer( + _ecx: &mut InterpCx<'mir, 'tcx, Self>, + def_id: DefId, + ) -> InterpResult<'tcx, Pointer> { + throw_unsup!(ThreadLocalStatic(def_id)) + } + + /// Return the root pointer for the given `extern static`. + fn extern_static_base_pointer( + ecx: &InterpCx<'mir, 'tcx, Self>, + def_id: DefId, + ) -> InterpResult<'tcx, Pointer>; + + /// Return a "base" pointer for the given allocation: the one that is used for direct + /// accesses to this static/const/fn allocation, or the one returned from the heap allocator. + /// + /// Not called on `extern` or thread-local statics (those use the methods above). + fn adjust_alloc_base_pointer( + ecx: &InterpCx<'mir, 'tcx, Self>, + ptr: Pointer, + ) -> Pointer; + + /// "Int-to-pointer cast" + fn ptr_from_addr_cast( + ecx: &InterpCx<'mir, 'tcx, Self>, + addr: u64, + ) -> InterpResult<'tcx, Pointer>>; + + /// Marks a pointer as exposed, allowing it's provenance + /// to be recovered. "Pointer-to-int cast" + fn expose_ptr( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + ptr: Pointer, + ) -> InterpResult<'tcx>; + + /// Convert a pointer with provenance into an allocation-offset pair + /// and extra provenance info. + /// + /// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`. + /// + /// When this fails, that means the pointer does not point to a live allocation. + fn ptr_get_alloc( + ecx: &InterpCx<'mir, 'tcx, Self>, + ptr: Pointer, + ) -> Option<(AllocId, Size, Self::ProvenanceExtra)>; + + /// Called to adjust allocations to the Provenance and AllocExtra of this machine. + /// + /// The way we construct allocations is to always first construct it without extra and then add + /// the extra. This keeps uniform code paths for handling both allocations created by CTFE for + /// globals, and allocations created by Miri during evaluation. + /// + /// `kind` is the kind of the allocation being adjusted; it can be `None` when + /// it's a global and `GLOBAL_KIND` is `None`. + /// + /// This should avoid copying if no work has to be done! If this returns an owned + /// allocation (because a copy had to be done to adjust things), machine memory will + /// cache the result. (This relies on `AllocMap::get_or` being able to add the + /// owned allocation to the map even when the map is shared.) + /// + /// This must only fail if `alloc` contains relocations. + fn adjust_allocation<'b>( + ecx: &InterpCx<'mir, 'tcx, Self>, + id: AllocId, + alloc: Cow<'b, Allocation>, + kind: Option>, + ) -> InterpResult<'tcx, Cow<'b, Allocation>>; + + /// Hook for performing extra checks on a memory read access. + /// + /// Takes read-only access to the allocation so we can keep all the memory read + /// operations take `&self`. Use a `RefCell` in `AllocExtra` if you + /// need to mutate. + #[inline(always)] + fn memory_read( + _tcx: TyCtxt<'tcx>, + _machine: &Self, + _alloc_extra: &Self::AllocExtra, + _prov: (AllocId, Self::ProvenanceExtra), + _range: AllocRange, + ) -> InterpResult<'tcx> { + Ok(()) + } + + /// Hook for performing extra checks on a memory write access. + #[inline(always)] + fn memory_written( + _tcx: TyCtxt<'tcx>, + _machine: &mut Self, + _alloc_extra: &mut Self::AllocExtra, + _prov: (AllocId, Self::ProvenanceExtra), + _range: AllocRange, + ) -> InterpResult<'tcx> { + Ok(()) + } + + /// Hook for performing extra operations on a memory deallocation. + #[inline(always)] + fn memory_deallocated( + _tcx: TyCtxt<'tcx>, + _machine: &mut Self, + _alloc_extra: &mut Self::AllocExtra, + _prov: (AllocId, Self::ProvenanceExtra), + _range: AllocRange, + ) -> InterpResult<'tcx> { + Ok(()) + } + + /// Executes a retagging operation. + #[inline] + fn retag( + _ecx: &mut InterpCx<'mir, 'tcx, Self>, + _kind: mir::RetagKind, + _place: &PlaceTy<'tcx, Self::Provenance>, + ) -> InterpResult<'tcx> { + Ok(()) + } + + /// Called immediately before a new stack frame gets pushed. + fn init_frame_extra( + ecx: &mut InterpCx<'mir, 'tcx, Self>, + frame: Frame<'mir, 'tcx, Self::Provenance>, + ) -> InterpResult<'tcx, Frame<'mir, 'tcx, Self::Provenance, Self::FrameExtra>>; + + /// Borrow the current thread's stack. + fn stack<'a>( + ecx: &'a InterpCx<'mir, 'tcx, Self>, + ) -> &'a [Frame<'mir, 'tcx, Self::Provenance, Self::FrameExtra>]; + + /// Mutably borrow the current thread's stack. + fn stack_mut<'a>( + ecx: &'a mut InterpCx<'mir, 'tcx, Self>, + ) -> &'a mut Vec>; + + /// Called immediately after a stack frame got pushed and its locals got initialized. + fn after_stack_push(_ecx: &mut InterpCx<'mir, 'tcx, Self>) -> InterpResult<'tcx> { + Ok(()) + } + + /// Called immediately after a stack frame got popped, but before jumping back to the caller. + /// The `locals` have already been destroyed! + fn after_stack_pop( + _ecx: &mut InterpCx<'mir, 'tcx, Self>, + _frame: Frame<'mir, 'tcx, Self::Provenance, Self::FrameExtra>, + unwinding: bool, + ) -> InterpResult<'tcx, StackPopJump> { + // By default, we do not support unwinding from panics + assert!(!unwinding); + Ok(StackPopJump::Normal) + } +} + +// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines +// (CTFE and ConstProp) use the same instance. Here, we share that code. +pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) { + type Provenance = AllocId; + type ProvenanceExtra = (); + + type ExtraFnVal = !; + + type MemoryMap = + rustc_data_structures::fx::FxHashMap, Allocation)>; + const GLOBAL_KIND: Option = None; // no copying of globals from `tcx` to machine memory + + type AllocExtra = (); + type FrameExtra = (); + + #[inline(always)] + fn enforce_alignment(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { + // We do not check for alignment to avoid having to carry an `Align` + // in `ConstValue::ByRef`. + false + } + + #[inline(always)] + fn force_int_for_alignment_check(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { + // We do not support `force_int`. + false + } + + #[inline(always)] + fn enforce_validity(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { + false // for now, we don't enforce validity + } + + #[inline(always)] + fn enforce_number_init(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { + true + } + + #[inline(always)] + fn checked_binop_checks_overflow(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { + true + } + + #[inline(always)] + fn call_extra_fn( + _ecx: &mut InterpCx<$mir, $tcx, Self>, + fn_val: !, + _abi: CallAbi, + _args: &[OpTy<$tcx>], + _destination: &PlaceTy<$tcx, Self::Provenance>, + _target: Option, + _unwind: StackPopUnwind, + ) -> InterpResult<$tcx> { + match fn_val {} + } + + #[inline(always)] + fn adjust_allocation<'b>( + _ecx: &InterpCx<$mir, $tcx, Self>, + _id: AllocId, + alloc: Cow<'b, Allocation>, + _kind: Option>, + ) -> InterpResult<$tcx, Cow<'b, Allocation>> { + Ok(alloc) + } + + fn extern_static_base_pointer( + ecx: &InterpCx<$mir, $tcx, Self>, + def_id: DefId, + ) -> InterpResult<$tcx, Pointer> { + // Use the `AllocId` associated with the `DefId`. Any actual *access* will fail. + Ok(Pointer::new(ecx.tcx.create_static_alloc(def_id), Size::ZERO)) + } + + #[inline(always)] + fn adjust_alloc_base_pointer( + _ecx: &InterpCx<$mir, $tcx, Self>, + ptr: Pointer, + ) -> Pointer { + ptr + } + + #[inline(always)] + fn ptr_from_addr_cast( + _ecx: &InterpCx<$mir, $tcx, Self>, + addr: u64, + ) -> InterpResult<$tcx, Pointer>> { + // Allow these casts, but make the pointer not dereferenceable. + // (I.e., they behave like transmutation.) + Ok(Pointer::from_addr(addr)) + } + + #[inline(always)] + fn ptr_get_alloc( + _ecx: &InterpCx<$mir, $tcx, Self>, + ptr: Pointer, + ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> { + // We know `offset` is relative to the allocation, so we can use `into_parts`. + let (alloc_id, offset) = ptr.into_parts(); + Some((alloc_id, offset, ())) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs new file mode 100644 index 000000000..ed2c4edf9 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -0,0 +1,1224 @@ +//! The memory subsystem. +//! +//! Generally, we use `Pointer` to denote memory addresses. However, some operations +//! have a "size"-like parameter, and they take `Scalar` for the address because +//! if the size is 0, then the pointer can also be a (properly aligned, non-null) +//! integer. It is crucial that these operations call `check_align` *before* +//! short-circuiting the empty case! + +use std::assert_matches::assert_matches; +use std::borrow::Cow; +use std::collections::VecDeque; +use std::fmt; +use std::ptr; + +use rustc_ast::Mutability; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_middle::mir::display_allocation; +use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TyCtxt}; +use rustc_target::abi::{Align, HasDataLayout, Size}; + +use super::{ + alloc_range, AllocId, AllocMap, AllocRange, Allocation, CheckInAllocMsg, GlobalAlloc, InterpCx, + InterpResult, Machine, MayLeak, Pointer, PointerArithmetic, Provenance, Scalar, + ScalarMaybeUninit, +}; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum MemoryKind { + /// Stack memory. Error if deallocated except during a stack pop. + Stack, + /// Memory allocated by `caller_location` intrinsic. Error if ever deallocated. + CallerLocation, + /// Additional memory kinds a machine wishes to distinguish from the builtin ones. + Machine(T), +} + +impl MayLeak for MemoryKind { + #[inline] + fn may_leak(self) -> bool { + match self { + MemoryKind::Stack => false, + MemoryKind::CallerLocation => true, + MemoryKind::Machine(k) => k.may_leak(), + } + } +} + +impl fmt::Display for MemoryKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemoryKind::Stack => write!(f, "stack variable"), + MemoryKind::CallerLocation => write!(f, "caller location"), + MemoryKind::Machine(m) => write!(f, "{}", m), + } + } +} + +/// The return value of `get_alloc_info` indicates the "kind" of the allocation. +pub enum AllocKind { + /// A regular live data allocation. + LiveData, + /// A function allocation (that fn ptrs point to). + Function, + /// A (symbolic) vtable allocation. + VTable, + /// A dead allocation. + Dead, +} + +/// The value of a function pointer. +#[derive(Debug, Copy, Clone)] +pub enum FnVal<'tcx, Other> { + Instance(Instance<'tcx>), + Other(Other), +} + +impl<'tcx, Other> FnVal<'tcx, Other> { + pub fn as_instance(self) -> InterpResult<'tcx, Instance<'tcx>> { + match self { + FnVal::Instance(instance) => Ok(instance), + FnVal::Other(_) => { + throw_unsup_format!("'foreign' function pointers are not supported in this context") + } + } + } +} + +// `Memory` has to depend on the `Machine` because some of its operations +// (e.g., `get`) call a `Machine` hook. +pub struct Memory<'mir, 'tcx, M: Machine<'mir, 'tcx>> { + /// Allocations local to this instance of the miri engine. The kind + /// helps ensure that the same mechanism is used for allocation and + /// deallocation. When an allocation is not found here, it is a + /// global and looked up in the `tcx` for read access. Some machines may + /// have to mutate this map even on a read-only access to a global (because + /// they do pointer provenance tracking and the allocations in `tcx` have + /// the wrong type), so we let the machine override this type. + /// Either way, if the machine allows writing to a global, doing so will + /// create a copy of the global allocation here. + // FIXME: this should not be public, but interning currently needs access to it + pub(super) alloc_map: M::MemoryMap, + + /// Map for "extra" function pointers. + extra_fn_ptr_map: FxHashMap, + + /// To be able to compare pointers with null, and to check alignment for accesses + /// to ZSTs (where pointers may dangle), we keep track of the size even for allocations + /// that do not exist any more. + // FIXME: this should not be public, but interning currently needs access to it + pub(super) dead_alloc_map: FxHashMap, +} + +/// A reference to some allocation that was already bounds-checked for the given region +/// and had the on-access machine hooks run. +#[derive(Copy, Clone)] +pub struct AllocRef<'a, 'tcx, Prov, Extra> { + alloc: &'a Allocation, + range: AllocRange, + tcx: TyCtxt<'tcx>, + alloc_id: AllocId, +} +/// A reference to some allocation that was already bounds-checked for the given region +/// and had the on-access machine hooks run. +pub struct AllocRefMut<'a, 'tcx, Prov, Extra> { + alloc: &'a mut Allocation, + range: AllocRange, + tcx: TyCtxt<'tcx>, + alloc_id: AllocId, +} + +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { + pub fn new() -> Self { + Memory { + alloc_map: M::MemoryMap::default(), + extra_fn_ptr_map: FxHashMap::default(), + dead_alloc_map: FxHashMap::default(), + } + } + + /// This is used by [priroda](https://github.com/oli-obk/priroda) + pub fn alloc_map(&self) -> &M::MemoryMap { + &self.alloc_map + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Call this to turn untagged "global" pointers (obtained via `tcx`) into + /// the machine pointer to the allocation. Must never be used + /// for any other pointers, nor for TLS statics. + /// + /// Using the resulting pointer represents a *direct* access to that memory + /// (e.g. by directly using a `static`), + /// as opposed to access through a pointer that was created by the program. + /// + /// This function can fail only if `ptr` points to an `extern static`. + #[inline] + pub fn global_base_pointer( + &self, + ptr: Pointer, + ) -> InterpResult<'tcx, Pointer> { + let alloc_id = ptr.provenance; + // We need to handle `extern static`. + match self.tcx.try_get_global_alloc(alloc_id) { + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => { + bug!("global memory cannot point to thread-local static") + } + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_foreign_item(def_id) => { + return M::extern_static_base_pointer(self, def_id); + } + _ => {} + } + // And we need to get the provenance. + Ok(M::adjust_alloc_base_pointer(self, ptr)) + } + + pub fn create_fn_alloc_ptr( + &mut self, + fn_val: FnVal<'tcx, M::ExtraFnVal>, + ) -> Pointer { + let id = match fn_val { + FnVal::Instance(instance) => self.tcx.create_fn_alloc(instance), + FnVal::Other(extra) => { + // FIXME(RalfJung): Should we have a cache here? + let id = self.tcx.reserve_alloc_id(); + let old = self.memory.extra_fn_ptr_map.insert(id, extra); + assert!(old.is_none()); + id + } + }; + // Functions are global allocations, so make sure we get the right base pointer. + // We know this is not an `extern static` so this cannot fail. + self.global_base_pointer(Pointer::from(id)).unwrap() + } + + pub fn allocate_ptr( + &mut self, + size: Size, + align: Align, + kind: MemoryKind, + ) -> InterpResult<'tcx, Pointer> { + let alloc = Allocation::uninit(size, align, M::PANIC_ON_ALLOC_FAIL)?; + // We can `unwrap` since `alloc` contains no pointers. + Ok(self.allocate_raw_ptr(alloc, kind).unwrap()) + } + + pub fn allocate_bytes_ptr( + &mut self, + bytes: &[u8], + align: Align, + kind: MemoryKind, + mutability: Mutability, + ) -> Pointer { + let alloc = Allocation::from_bytes(bytes, align, mutability); + // We can `unwrap` since `alloc` contains no pointers. + self.allocate_raw_ptr(alloc, kind).unwrap() + } + + /// This can fail only of `alloc` contains relocations. + pub fn allocate_raw_ptr( + &mut self, + alloc: Allocation, + kind: MemoryKind, + ) -> InterpResult<'tcx, Pointer> { + let id = self.tcx.reserve_alloc_id(); + debug_assert_ne!( + Some(kind), + M::GLOBAL_KIND.map(MemoryKind::Machine), + "dynamically allocating global memory" + ); + let alloc = M::adjust_allocation(self, id, Cow::Owned(alloc), Some(kind))?; + self.memory.alloc_map.insert(id, (kind, alloc.into_owned())); + Ok(M::adjust_alloc_base_pointer(self, Pointer::from(id))) + } + + pub fn reallocate_ptr( + &mut self, + ptr: Pointer>, + old_size_and_align: Option<(Size, Align)>, + new_size: Size, + new_align: Align, + kind: MemoryKind, + ) -> InterpResult<'tcx, Pointer> { + let (alloc_id, offset, _prov) = self.ptr_get_alloc_id(ptr)?; + if offset.bytes() != 0 { + throw_ub_format!( + "reallocating {:?} which does not point to the beginning of an object", + ptr + ); + } + + // For simplicities' sake, we implement reallocate as "alloc, copy, dealloc". + // This happens so rarely, the perf advantage is outweighed by the maintenance cost. + let new_ptr = self.allocate_ptr(new_size, new_align, kind)?; + let old_size = match old_size_and_align { + Some((size, _align)) => size, + None => self.get_alloc_raw(alloc_id)?.size(), + }; + // This will also call the access hooks. + self.mem_copy( + ptr, + Align::ONE, + new_ptr.into(), + Align::ONE, + old_size.min(new_size), + /*nonoverlapping*/ true, + )?; + self.deallocate_ptr(ptr, old_size_and_align, kind)?; + + Ok(new_ptr) + } + + #[instrument(skip(self), level = "debug")] + pub fn deallocate_ptr( + &mut self, + ptr: Pointer>, + old_size_and_align: Option<(Size, Align)>, + kind: MemoryKind, + ) -> InterpResult<'tcx> { + let (alloc_id, offset, prov) = self.ptr_get_alloc_id(ptr)?; + trace!("deallocating: {alloc_id:?}"); + + if offset.bytes() != 0 { + throw_ub_format!( + "deallocating {:?} which does not point to the beginning of an object", + ptr + ); + } + + let Some((alloc_kind, mut alloc)) = self.memory.alloc_map.remove(&alloc_id) else { + // Deallocating global memory -- always an error + return Err(match self.tcx.try_get_global_alloc(alloc_id) { + Some(GlobalAlloc::Function(..)) => { + err_ub_format!("deallocating {alloc_id:?}, which is a function") + } + Some(GlobalAlloc::VTable(..)) => { + err_ub_format!("deallocating {alloc_id:?}, which is a vtable") + } + Some(GlobalAlloc::Static(..) | GlobalAlloc::Memory(..)) => { + err_ub_format!("deallocating {alloc_id:?}, which is static memory") + } + None => err_ub!(PointerUseAfterFree(alloc_id)), + } + .into()); + }; + + debug!(?alloc); + + if alloc.mutability == Mutability::Not { + throw_ub_format!("deallocating immutable allocation {alloc_id:?}"); + } + if alloc_kind != kind { + throw_ub_format!( + "deallocating {alloc_id:?}, which is {alloc_kind} memory, using {kind} deallocation operation" + ); + } + if let Some((size, align)) = old_size_and_align { + if size != alloc.size() || align != alloc.align { + throw_ub_format!( + "incorrect layout on deallocation: {alloc_id:?} has size {} and alignment {}, but gave size {} and alignment {}", + alloc.size().bytes(), + alloc.align.bytes(), + size.bytes(), + align.bytes(), + ) + } + } + + // Let the machine take some extra action + let size = alloc.size(); + M::memory_deallocated( + *self.tcx, + &mut self.machine, + &mut alloc.extra, + (alloc_id, prov), + alloc_range(Size::ZERO, size), + )?; + + // Don't forget to remember size and align of this now-dead allocation + let old = self.memory.dead_alloc_map.insert(alloc_id, (size, alloc.align)); + if old.is_some() { + bug!("Nothing can be deallocated twice"); + } + + Ok(()) + } + + /// Internal helper function to determine the allocation and offset of a pointer (if any). + #[inline(always)] + fn get_ptr_access( + &self, + ptr: Pointer>, + size: Size, + align: Align, + ) -> InterpResult<'tcx, Option<(AllocId, Size, M::ProvenanceExtra)>> { + let align = M::enforce_alignment(&self).then_some(align); + self.check_and_deref_ptr( + ptr, + size, + align, + CheckInAllocMsg::MemoryAccessTest, + |alloc_id, offset, prov| { + let (size, align) = self.get_live_alloc_size_and_align(alloc_id)?; + Ok((size, align, (alloc_id, offset, prov))) + }, + ) + } + + /// Check if the given pointer points to live memory of given `size` and `align` + /// (ignoring `M::enforce_alignment`). The caller can control the error message for the + /// out-of-bounds case. + #[inline(always)] + pub fn check_ptr_access_align( + &self, + ptr: Pointer>, + size: Size, + align: Align, + msg: CheckInAllocMsg, + ) -> InterpResult<'tcx> { + self.check_and_deref_ptr(ptr, size, Some(align), msg, |alloc_id, _, _| { + let (size, align) = self.get_live_alloc_size_and_align(alloc_id)?; + Ok((size, align, ())) + })?; + Ok(()) + } + + /// Low-level helper function to check if a ptr is in-bounds and potentially return a reference + /// to the allocation it points to. Supports both shared and mutable references, as the actual + /// checking is offloaded to a helper closure. `align` defines whether and which alignment check + /// is done. Returns `None` for size 0, and otherwise `Some` of what `alloc_size` returned. + fn check_and_deref_ptr( + &self, + ptr: Pointer>, + size: Size, + align: Option, + msg: CheckInAllocMsg, + alloc_size: impl FnOnce( + AllocId, + Size, + M::ProvenanceExtra, + ) -> InterpResult<'tcx, (Size, Align, T)>, + ) -> InterpResult<'tcx, Option> { + fn check_offset_align<'tcx>(offset: u64, align: Align) -> InterpResult<'tcx> { + if offset % align.bytes() == 0 { + Ok(()) + } else { + // The biggest power of two through which `offset` is divisible. + let offset_pow2 = 1 << offset.trailing_zeros(); + throw_ub!(AlignmentCheckFailed { + has: Align::from_bytes(offset_pow2).unwrap(), + required: align, + }) + } + } + + Ok(match self.ptr_try_get_alloc_id(ptr) { + Err(addr) => { + // We couldn't get a proper allocation. This is only okay if the access size is 0, + // and the address is not null. + if size.bytes() > 0 || addr == 0 { + throw_ub!(DanglingIntPointer(addr, msg)); + } + // Must be aligned. + if let Some(align) = align { + check_offset_align(addr, align)?; + } + None + } + Ok((alloc_id, offset, prov)) => { + let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, prov)?; + // Test bounds. This also ensures non-null. + // It is sufficient to check this for the end pointer. Also check for overflow! + if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) { + throw_ub!(PointerOutOfBounds { + alloc_id, + alloc_size, + ptr_offset: self.machine_usize_to_isize(offset.bytes()), + ptr_size: size, + msg, + }) + } + // Ensure we never consider the null pointer dereferencable. + if M::Provenance::OFFSET_IS_ADDR { + assert_ne!(ptr.addr(), Size::ZERO); + } + // Test align. Check this last; if both bounds and alignment are violated + // we want the error to be about the bounds. + if let Some(align) = align { + if M::force_int_for_alignment_check(self) { + // `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true. + check_offset_align(ptr.addr().bytes(), align)?; + } else { + // Check allocation alignment and offset alignment. + if alloc_align.bytes() < align.bytes() { + throw_ub!(AlignmentCheckFailed { has: alloc_align, required: align }); + } + check_offset_align(offset.bytes(), align)?; + } + } + + // We can still be zero-sized in this branch, in which case we have to + // return `None`. + if size.bytes() == 0 { None } else { Some(ret_val) } + } + }) + } +} + +/// Allocation accessors +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Helper function to obtain a global (tcx) allocation. + /// This attempts to return a reference to an existing allocation if + /// one can be found in `tcx`. That, however, is only possible if `tcx` and + /// this machine use the same pointer provenance, so it is indirected through + /// `M::adjust_allocation`. + fn get_global_alloc( + &self, + id: AllocId, + is_write: bool, + ) -> InterpResult<'tcx, Cow<'tcx, Allocation>> { + let (alloc, def_id) = match self.tcx.try_get_global_alloc(id) { + Some(GlobalAlloc::Memory(mem)) => { + // Memory of a constant or promoted or anonymous memory referenced by a static. + (mem, None) + } + Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)), + Some(GlobalAlloc::VTable(..)) => throw_ub!(DerefVTablePointer(id)), + None => throw_ub!(PointerUseAfterFree(id)), + Some(GlobalAlloc::Static(def_id)) => { + assert!(self.tcx.is_static(def_id)); + assert!(!self.tcx.is_thread_local_static(def_id)); + // Notice that every static has two `AllocId` that will resolve to the same + // thing here: one maps to `GlobalAlloc::Static`, this is the "lazy" ID, + // and the other one is maps to `GlobalAlloc::Memory`, this is returned by + // `eval_static_initializer` and it is the "resolved" ID. + // The resolved ID is never used by the interpreted program, it is hidden. + // This is relied upon for soundness of const-patterns; a pointer to the resolved + // ID would "sidestep" the checks that make sure consts do not point to statics! + // The `GlobalAlloc::Memory` branch here is still reachable though; when a static + // contains a reference to memory that was created during its evaluation (i.e., not + // to another static), those inner references only exist in "resolved" form. + if self.tcx.is_foreign_item(def_id) { + // This is unreachable in Miri, but can happen in CTFE where we actually *do* support + // referencing arbitrary (declared) extern statics. + throw_unsup!(ReadExternStatic(def_id)); + } + + // Use a precise span for better cycle errors. + (self.tcx.at(self.cur_span()).eval_static_initializer(def_id)?, Some(def_id)) + } + }; + M::before_access_global(*self.tcx, &self.machine, id, alloc, def_id, is_write)?; + // We got tcx memory. Let the machine initialize its "extra" stuff. + M::adjust_allocation( + self, + id, // always use the ID we got as input, not the "hidden" one. + Cow::Borrowed(alloc.inner()), + M::GLOBAL_KIND.map(MemoryKind::Machine), + ) + } + + /// Gives raw access to the `Allocation`, without bounds or alignment checks. + /// The caller is responsible for calling the access hooks! + fn get_alloc_raw( + &self, + id: AllocId, + ) -> InterpResult<'tcx, &Allocation> { + // The error type of the inner closure here is somewhat funny. We have two + // ways of "erroring": An actual error, or because we got a reference from + // `get_global_alloc` that we can actually use directly without inserting anything anywhere. + // So the error type is `InterpResult<'tcx, &Allocation>`. + let a = self.memory.alloc_map.get_or(id, || { + let alloc = self.get_global_alloc(id, /*is_write*/ false).map_err(Err)?; + match alloc { + Cow::Borrowed(alloc) => { + // We got a ref, cheaply return that as an "error" so that the + // map does not get mutated. + Err(Ok(alloc)) + } + Cow::Owned(alloc) => { + // Need to put it into the map and return a ref to that + let kind = M::GLOBAL_KIND.expect( + "I got a global allocation that I have to copy but the machine does \ + not expect that to happen", + ); + Ok((MemoryKind::Machine(kind), alloc)) + } + } + }); + // Now unpack that funny error type + match a { + Ok(a) => Ok(&a.1), + Err(a) => a, + } + } + + /// "Safe" (bounds and align-checked) allocation access. + pub fn get_ptr_alloc<'a>( + &'a self, + ptr: Pointer>, + size: Size, + align: Align, + ) -> InterpResult<'tcx, Option>> { + let align = M::enforce_alignment(self).then_some(align); + let ptr_and_alloc = self.check_and_deref_ptr( + ptr, + size, + align, + CheckInAllocMsg::MemoryAccessTest, + |alloc_id, offset, prov| { + let alloc = self.get_alloc_raw(alloc_id)?; + Ok((alloc.size(), alloc.align, (alloc_id, offset, prov, alloc))) + }, + )?; + if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc { + let range = alloc_range(offset, size); + M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, prov), range)?; + Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id })) + } else { + // Even in this branch we have to be sure that we actually access the allocation, in + // order to ensure that `static FOO: Type = FOO;` causes a cycle error instead of + // magically pulling *any* ZST value from the ether. However, the `get_raw` above is + // always called when `ptr` has an `AllocId`. + Ok(None) + } + } + + /// Return the `extra` field of the given allocation. + pub fn get_alloc_extra<'a>(&'a self, id: AllocId) -> InterpResult<'tcx, &'a M::AllocExtra> { + Ok(&self.get_alloc_raw(id)?.extra) + } + + /// Gives raw mutable access to the `Allocation`, without bounds or alignment checks. + /// The caller is responsible for calling the access hooks! + /// + /// Also returns a ptr to `self.extra` so that the caller can use it in parallel with the + /// allocation. + fn get_alloc_raw_mut( + &mut self, + id: AllocId, + ) -> InterpResult<'tcx, (&mut Allocation, &mut M)> { + // We have "NLL problem case #3" here, which cannot be worked around without loss of + // efficiency even for the common case where the key is in the map. + // + // (Cannot use `get_mut_or` since `get_global_alloc` needs `&self`.) + if self.memory.alloc_map.get_mut(id).is_none() { + // Slow path. + // Allocation not found locally, go look global. + let alloc = self.get_global_alloc(id, /*is_write*/ true)?; + let kind = M::GLOBAL_KIND.expect( + "I got a global allocation that I have to copy but the machine does \ + not expect that to happen", + ); + self.memory.alloc_map.insert(id, (MemoryKind::Machine(kind), alloc.into_owned())); + } + + let (_kind, alloc) = self.memory.alloc_map.get_mut(id).unwrap(); + if alloc.mutability == Mutability::Not { + throw_ub!(WriteToReadOnly(id)) + } + Ok((alloc, &mut self.machine)) + } + + /// "Safe" (bounds and align-checked) allocation access. + pub fn get_ptr_alloc_mut<'a>( + &'a mut self, + ptr: Pointer>, + size: Size, + align: Align, + ) -> InterpResult<'tcx, Option>> { + let parts = self.get_ptr_access(ptr, size, align)?; + if let Some((alloc_id, offset, prov)) = parts { + let tcx = *self.tcx; + // FIXME: can we somehow avoid looking up the allocation twice here? + // We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`. + let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?; + let range = alloc_range(offset, size); + M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?; + Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id })) + } else { + Ok(None) + } + } + + /// Return the `extra` field of the given allocation. + pub fn get_alloc_extra_mut<'a>( + &'a mut self, + id: AllocId, + ) -> InterpResult<'tcx, (&'a mut M::AllocExtra, &'a mut M)> { + let (alloc, machine) = self.get_alloc_raw_mut(id)?; + Ok((&mut alloc.extra, machine)) + } + + /// Obtain the size and alignment of an allocation, even if that allocation has + /// been deallocated. + pub fn get_alloc_info(&self, id: AllocId) -> (Size, Align, AllocKind) { + // # Regular allocations + // Don't use `self.get_raw` here as that will + // a) cause cycles in case `id` refers to a static + // b) duplicate a global's allocation in miri + if let Some((_, alloc)) = self.memory.alloc_map.get(id) { + return (alloc.size(), alloc.align, AllocKind::LiveData); + } + + // # Function pointers + // (both global from `alloc_map` and local from `extra_fn_ptr_map`) + if self.get_fn_alloc(id).is_some() { + return (Size::ZERO, Align::ONE, AllocKind::Function); + } + + // # Statics + // Can't do this in the match argument, we may get cycle errors since the lock would + // be held throughout the match. + match self.tcx.try_get_global_alloc(id) { + Some(GlobalAlloc::Static(def_id)) => { + assert!(self.tcx.is_static(def_id)); + assert!(!self.tcx.is_thread_local_static(def_id)); + // Use size and align of the type. + let ty = self.tcx.type_of(def_id); + let layout = self.tcx.layout_of(ParamEnv::empty().and(ty)).unwrap(); + assert!(!layout.is_unsized()); + (layout.size, layout.align.abi, AllocKind::LiveData) + } + Some(GlobalAlloc::Memory(alloc)) => { + // Need to duplicate the logic here, because the global allocations have + // different associated types than the interpreter-local ones. + let alloc = alloc.inner(); + (alloc.size(), alloc.align, AllocKind::LiveData) + } + Some(GlobalAlloc::Function(_)) => bug!("We already checked function pointers above"), + Some(GlobalAlloc::VTable(..)) => { + // No data to be accessed here. But vtables are pointer-aligned. + return (Size::ZERO, self.tcx.data_layout.pointer_align.abi, AllocKind::VTable); + } + // The rest must be dead. + None => { + // Deallocated pointers are allowed, we should be able to find + // them in the map. + let (size, align) = *self + .memory + .dead_alloc_map + .get(&id) + .expect("deallocated pointers should all be recorded in `dead_alloc_map`"); + (size, align, AllocKind::Dead) + } + } + } + + /// Obtain the size and alignment of a live allocation. + pub fn get_live_alloc_size_and_align(&self, id: AllocId) -> InterpResult<'tcx, (Size, Align)> { + let (size, align, kind) = self.get_alloc_info(id); + if matches!(kind, AllocKind::Dead) { + throw_ub!(PointerUseAfterFree(id)) + } + Ok((size, align)) + } + + fn get_fn_alloc(&self, id: AllocId) -> Option> { + if let Some(extra) = self.memory.extra_fn_ptr_map.get(&id) { + Some(FnVal::Other(*extra)) + } else { + match self.tcx.try_get_global_alloc(id) { + Some(GlobalAlloc::Function(instance)) => Some(FnVal::Instance(instance)), + _ => None, + } + } + } + + pub fn get_ptr_fn( + &self, + ptr: Pointer>, + ) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> { + trace!("get_ptr_fn({:?})", ptr); + let (alloc_id, offset, _prov) = self.ptr_get_alloc_id(ptr)?; + if offset.bytes() != 0 { + throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset))) + } + self.get_fn_alloc(alloc_id) + .ok_or_else(|| err_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset))).into()) + } + + pub fn get_ptr_vtable( + &self, + ptr: Pointer>, + ) -> InterpResult<'tcx, (Ty<'tcx>, Option>)> { + trace!("get_ptr_vtable({:?})", ptr); + let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?; + if offset.bytes() != 0 { + throw_ub!(InvalidVTablePointer(Pointer::new(alloc_id, offset))) + } + match self.tcx.try_get_global_alloc(alloc_id) { + Some(GlobalAlloc::VTable(ty, trait_ref)) => Ok((ty, trait_ref)), + _ => throw_ub!(InvalidVTablePointer(Pointer::new(alloc_id, offset))), + } + } + + pub fn alloc_mark_immutable(&mut self, id: AllocId) -> InterpResult<'tcx> { + self.get_alloc_raw_mut(id)?.0.mutability = Mutability::Not; + Ok(()) + } + + /// Create a lazy debug printer that prints the given allocation and all allocations it points + /// to, recursively. + #[must_use] + pub fn dump_alloc<'a>(&'a self, id: AllocId) -> DumpAllocs<'a, 'mir, 'tcx, M> { + self.dump_allocs(vec![id]) + } + + /// Create a lazy debug printer for a list of allocations and all allocations they point to, + /// recursively. + #[must_use] + pub fn dump_allocs<'a>(&'a self, mut allocs: Vec) -> DumpAllocs<'a, 'mir, 'tcx, M> { + allocs.sort(); + allocs.dedup(); + DumpAllocs { ecx: self, allocs } + } + + /// Print leaked memory. Allocations reachable from `static_roots` or a `Global` allocation + /// are not considered leaked. Leaks whose kind `may_leak()` returns true are not reported. + pub fn leak_report(&self, static_roots: &[AllocId]) -> usize { + // Collect the set of allocations that are *reachable* from `Global` allocations. + let reachable = { + let mut reachable = FxHashSet::default(); + let global_kind = M::GLOBAL_KIND.map(MemoryKind::Machine); + let mut todo: Vec<_> = + self.memory.alloc_map.filter_map_collect(move |&id, &(kind, _)| { + if Some(kind) == global_kind { Some(id) } else { None } + }); + todo.extend(static_roots); + while let Some(id) = todo.pop() { + if reachable.insert(id) { + // This is a new allocation, add its relocations to `todo`. + if let Some((_, alloc)) = self.memory.alloc_map.get(id) { + todo.extend( + alloc.relocations().values().filter_map(|prov| prov.get_alloc_id()), + ); + } + } + } + reachable + }; + + // All allocations that are *not* `reachable` and *not* `may_leak` are considered leaking. + let leaks: Vec<_> = self.memory.alloc_map.filter_map_collect(|&id, &(kind, _)| { + if kind.may_leak() || reachable.contains(&id) { None } else { Some(id) } + }); + let n = leaks.len(); + if n > 0 { + eprintln!("The following memory was leaked: {:?}", self.dump_allocs(leaks)); + } + n + } +} + +#[doc(hidden)] +/// There's no way to use this directly, it's just a helper struct for the `dump_alloc(s)` methods. +pub struct DumpAllocs<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> { + ecx: &'a InterpCx<'mir, 'tcx, M>, + allocs: Vec, +} + +impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a, 'mir, 'tcx, M> { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Cannot be a closure because it is generic in `Prov`, `Extra`. + fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra>( + fmt: &mut std::fmt::Formatter<'_>, + tcx: TyCtxt<'tcx>, + allocs_to_print: &mut VecDeque, + alloc: &Allocation, + ) -> std::fmt::Result { + for alloc_id in alloc.relocations().values().filter_map(|prov| prov.get_alloc_id()) { + allocs_to_print.push_back(alloc_id); + } + write!(fmt, "{}", display_allocation(tcx, alloc)) + } + + let mut allocs_to_print: VecDeque<_> = self.allocs.iter().copied().collect(); + // `allocs_printed` contains all allocations that we have already printed. + let mut allocs_printed = FxHashSet::default(); + + while let Some(id) = allocs_to_print.pop_front() { + if !allocs_printed.insert(id) { + // Already printed, so skip this. + continue; + } + + write!(fmt, "{id:?}")?; + match self.ecx.memory.alloc_map.get(id) { + Some(&(kind, ref alloc)) => { + // normal alloc + write!(fmt, " ({}, ", kind)?; + write_allocation_track_relocs( + &mut *fmt, + *self.ecx.tcx, + &mut allocs_to_print, + alloc, + )?; + } + None => { + // global alloc + match self.ecx.tcx.try_get_global_alloc(id) { + Some(GlobalAlloc::Memory(alloc)) => { + write!(fmt, " (unchanged global, ")?; + write_allocation_track_relocs( + &mut *fmt, + *self.ecx.tcx, + &mut allocs_to_print, + alloc.inner(), + )?; + } + Some(GlobalAlloc::Function(func)) => { + write!(fmt, " (fn: {func})")?; + } + Some(GlobalAlloc::VTable(ty, Some(trait_ref))) => { + write!(fmt, " (vtable: impl {trait_ref} for {ty})")?; + } + Some(GlobalAlloc::VTable(ty, None)) => { + write!(fmt, " (vtable: impl for {ty})")?; + } + Some(GlobalAlloc::Static(did)) => { + write!(fmt, " (static: {})", self.ecx.tcx.def_path_str(did))?; + } + None => { + write!(fmt, " (deallocated)")?; + } + } + } + } + writeln!(fmt)?; + } + Ok(()) + } +} + +/// Reading and writing. +impl<'tcx, 'a, Prov: Provenance, Extra> AllocRefMut<'a, 'tcx, Prov, Extra> { + /// `range` is relative to this allocation reference, not the base of the allocation. + pub fn write_scalar( + &mut self, + range: AllocRange, + val: ScalarMaybeUninit, + ) -> InterpResult<'tcx> { + let range = self.range.subrange(range); + debug!("write_scalar at {:?}{range:?}: {val:?}", self.alloc_id); + Ok(self + .alloc + .write_scalar(&self.tcx, range, val) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } + + /// `offset` is relative to this allocation reference, not the base of the allocation. + pub fn write_ptr_sized( + &mut self, + offset: Size, + val: ScalarMaybeUninit, + ) -> InterpResult<'tcx> { + self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val) + } + + /// Mark the entire referenced range as uninitalized + pub fn write_uninit(&mut self) -> InterpResult<'tcx> { + Ok(self + .alloc + .write_uninit(&self.tcx, self.range) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } +} + +impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> { + /// `range` is relative to this allocation reference, not the base of the allocation. + pub fn read_scalar( + &self, + range: AllocRange, + read_provenance: bool, + ) -> InterpResult<'tcx, ScalarMaybeUninit> { + let range = self.range.subrange(range); + let res = self + .alloc + .read_scalar(&self.tcx, range, read_provenance) + .map_err(|e| e.to_interp_error(self.alloc_id))?; + debug!("read_scalar at {:?}{range:?}: {res:?}", self.alloc_id); + Ok(res) + } + + /// `range` is relative to this allocation reference, not the base of the allocation. + pub fn read_integer(&self, range: AllocRange) -> InterpResult<'tcx, ScalarMaybeUninit> { + self.read_scalar(range, /*read_provenance*/ false) + } + + /// `offset` is relative to this allocation reference, not the base of the allocation. + pub fn read_pointer(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit> { + self.read_scalar( + alloc_range(offset, self.tcx.data_layout().pointer_size), + /*read_provenance*/ true, + ) + } + + /// `range` is relative to this allocation reference, not the base of the allocation. + pub fn check_bytes( + &self, + range: AllocRange, + allow_uninit: bool, + allow_ptr: bool, + ) -> InterpResult<'tcx> { + Ok(self + .alloc + .check_bytes(&self.tcx, self.range.subrange(range), allow_uninit, allow_ptr) + .map_err(|e| e.to_interp_error(self.alloc_id))?) + } + + /// Returns whether the allocation has relocations for the entire range of the `AllocRef`. + pub(crate) fn has_relocations(&self) -> bool { + self.alloc.has_relocations(&self.tcx, self.range) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Reads the given number of bytes from memory. Returns them as a slice. + /// + /// Performs appropriate bounds checks. + pub fn read_bytes_ptr( + &self, + ptr: Pointer>, + size: Size, + ) -> InterpResult<'tcx, &[u8]> { + let Some(alloc_ref) = self.get_ptr_alloc(ptr, size, Align::ONE)? else { + // zero-sized access + return Ok(&[]); + }; + // Side-step AllocRef and directly access the underlying bytes more efficiently. + // (We are staying inside the bounds here so all is good.) + Ok(alloc_ref + .alloc + .get_bytes(&alloc_ref.tcx, alloc_ref.range) + .map_err(|e| e.to_interp_error(alloc_ref.alloc_id))?) + } + + /// Writes the given stream of bytes into memory. + /// + /// Performs appropriate bounds checks. + pub fn write_bytes_ptr( + &mut self, + ptr: Pointer>, + src: impl IntoIterator, + ) -> InterpResult<'tcx> { + let mut src = src.into_iter(); + let (lower, upper) = src.size_hint(); + let len = upper.expect("can only write bounded iterators"); + assert_eq!(lower, len, "can only write iterators with a precise length"); + + let size = Size::from_bytes(len); + let Some(alloc_ref) = self.get_ptr_alloc_mut(ptr, size, Align::ONE)? else { + // zero-sized access + assert_matches!( + src.next(), + None, + "iterator said it was empty but returned an element" + ); + return Ok(()); + }; + + // Side-step AllocRef and directly access the underlying bytes more efficiently. + // (We are staying inside the bounds here so all is good.) + let alloc_id = alloc_ref.alloc_id; + let bytes = alloc_ref + .alloc + .get_bytes_mut(&alloc_ref.tcx, alloc_ref.range) + .map_err(move |e| e.to_interp_error(alloc_id))?; + // `zip` would stop when the first iterator ends; we want to definitely + // cover all of `bytes`. + for dest in bytes { + *dest = src.next().expect("iterator was shorter than it said it would be"); + } + assert_matches!(src.next(), None, "iterator was longer than it said it would be"); + Ok(()) + } + + pub fn mem_copy( + &mut self, + src: Pointer>, + src_align: Align, + dest: Pointer>, + dest_align: Align, + size: Size, + nonoverlapping: bool, + ) -> InterpResult<'tcx> { + self.mem_copy_repeatedly(src, src_align, dest, dest_align, size, 1, nonoverlapping) + } + + pub fn mem_copy_repeatedly( + &mut self, + src: Pointer>, + src_align: Align, + dest: Pointer>, + dest_align: Align, + size: Size, + num_copies: u64, + nonoverlapping: bool, + ) -> InterpResult<'tcx> { + let tcx = self.tcx; + // We need to do our own bounds-checks. + let src_parts = self.get_ptr_access(src, size, src_align)?; + let dest_parts = self.get_ptr_access(dest, size * num_copies, dest_align)?; // `Size` multiplication + + // FIXME: we look up both allocations twice here, once before for the `check_ptr_access` + // and once below to get the underlying `&[mut] Allocation`. + + // Source alloc preparations and access hooks. + let Some((src_alloc_id, src_offset, src_prov)) = src_parts else { + // Zero-sized *source*, that means dst is also zero-sized and we have nothing to do. + return Ok(()); + }; + let src_alloc = self.get_alloc_raw(src_alloc_id)?; + let src_range = alloc_range(src_offset, size); + M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_prov), src_range)?; + // We need the `dest` ptr for the next operation, so we get it now. + // We already did the source checks and called the hooks so we are good to return early. + let Some((dest_alloc_id, dest_offset, dest_prov)) = dest_parts else { + // Zero-sized *destination*. + return Ok(()); + }; + + // This checks relocation edges on the src, which needs to happen before + // `prepare_relocation_copy`. + let src_bytes = src_alloc + .get_bytes_with_uninit_and_ptr(&tcx, src_range) + .map_err(|e| e.to_interp_error(src_alloc_id))? + .as_ptr(); // raw ptr, so we can also get a ptr to the destination allocation + // first copy the relocations to a temporary buffer, because + // `get_bytes_mut` will clear the relocations, which is correct, + // since we don't want to keep any relocations at the target. + let relocations = + src_alloc.prepare_relocation_copy(self, src_range, dest_offset, num_copies); + // Prepare a copy of the initialization mask. + let compressed = src_alloc.compress_uninit_range(src_range); + + // Destination alloc preparations and access hooks. + let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?; + let dest_range = alloc_range(dest_offset, size * num_copies); + M::memory_written( + *tcx, + extra, + &mut dest_alloc.extra, + (dest_alloc_id, dest_prov), + dest_range, + )?; + let dest_bytes = dest_alloc + .get_bytes_mut_ptr(&tcx, dest_range) + .map_err(|e| e.to_interp_error(dest_alloc_id))? + .as_mut_ptr(); + + if compressed.no_bytes_init() { + // Fast path: If all bytes are `uninit` then there is nothing to copy. The target range + // is marked as uninitialized but we otherwise omit changing the byte representation which may + // be arbitrary for uninitialized bytes. + // This also avoids writing to the target bytes so that the backing allocation is never + // touched if the bytes stay uninitialized for the whole interpreter execution. On contemporary + // operating system this can avoid physically allocating the page. + dest_alloc + .write_uninit(&tcx, dest_range) + .map_err(|e| e.to_interp_error(dest_alloc_id))?; + // We can forget about the relocations, this is all not initialized anyway. + return Ok(()); + } + + // SAFE: The above indexing would have panicked if there weren't at least `size` bytes + // behind `src` and `dest`. Also, we use the overlapping-safe `ptr::copy` if `src` and + // `dest` could possibly overlap. + // The pointers above remain valid even if the `HashMap` table is moved around because they + // point into the `Vec` storing the bytes. + unsafe { + if src_alloc_id == dest_alloc_id { + if nonoverlapping { + // `Size` additions + if (src_offset <= dest_offset && src_offset + size > dest_offset) + || (dest_offset <= src_offset && dest_offset + size > src_offset) + { + throw_ub_format!("copy_nonoverlapping called on overlapping ranges") + } + } + + for i in 0..num_copies { + ptr::copy( + src_bytes, + dest_bytes.add((size * i).bytes_usize()), // `Size` multiplication + size.bytes_usize(), + ); + } + } else { + for i in 0..num_copies { + ptr::copy_nonoverlapping( + src_bytes, + dest_bytes.add((size * i).bytes_usize()), // `Size` multiplication + size.bytes_usize(), + ); + } + } + } + + // now fill in all the "init" data + dest_alloc.mark_compressed_init_range( + &compressed, + alloc_range(dest_offset, size), // just a single copy (i.e., not full `dest_range`) + num_copies, + ); + // copy the relocations to the destination + dest_alloc.mark_relocation_range(relocations); + + Ok(()) + } +} + +/// Machine pointer introspection. +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Test if this value might be null. + /// If the machine does not support ptr-to-int casts, this is conservative. + pub fn scalar_may_be_null(&self, scalar: Scalar) -> InterpResult<'tcx, bool> { + Ok(match scalar.try_to_int() { + Ok(int) => int.is_null(), + Err(_) => { + // Can only happen during CTFE. + let ptr = scalar.to_pointer(self)?; + match self.ptr_try_get_alloc_id(ptr) { + Ok((alloc_id, offset, _)) => { + let (size, _align, _kind) = self.get_alloc_info(alloc_id); + // If the pointer is out-of-bounds, it may be null. + // Note that one-past-the-end (offset == size) is still inbounds, and never null. + offset > size + } + Err(_offset) => bug!("a non-int scalar is always a pointer"), + } + } + }) + } + + /// Turning a "maybe pointer" into a proper pointer (and some information + /// about where it points), or an absolute address. + pub fn ptr_try_get_alloc_id( + &self, + ptr: Pointer>, + ) -> Result<(AllocId, Size, M::ProvenanceExtra), u64> { + match ptr.into_pointer_or_addr() { + Ok(ptr) => match M::ptr_get_alloc(self, ptr) { + Some((alloc_id, offset, extra)) => Ok((alloc_id, offset, extra)), + None => { + assert!(M::Provenance::OFFSET_IS_ADDR); + let (_, addr) = ptr.into_parts(); + Err(addr.bytes()) + } + }, + Err(addr) => Err(addr.bytes()), + } + } + + /// Turning a "maybe pointer" into a proper pointer (and some information about where it points). + #[inline(always)] + pub fn ptr_get_alloc_id( + &self, + ptr: Pointer>, + ) -> InterpResult<'tcx, (AllocId, Size, M::ProvenanceExtra)> { + self.ptr_try_get_alloc_id(ptr).map_err(|offset| { + err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into() + }) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/mod.rs b/compiler/rustc_const_eval/src/interpret/mod.rs new file mode 100644 index 000000000..2e356f67b --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/mod.rs @@ -0,0 +1,34 @@ +//! An interpreter for MIR used in CTFE and by miri + +mod cast; +mod eval_context; +mod intern; +mod intrinsics; +mod machine; +mod memory; +mod operand; +mod operator; +mod place; +mod projection; +mod step; +mod terminator; +mod traits; +mod util; +mod validity; +mod visitor; + +pub use rustc_middle::mir::interpret::*; // have all the `interpret` symbols in one place: here + +pub use self::eval_context::{ + Frame, FrameInfo, InterpCx, LocalState, LocalValue, StackPopCleanup, StackPopUnwind, +}; +pub use self::intern::{intern_const_alloc_recursive, InternKind}; +pub use self::machine::{compile_time_machine, AllocMap, Machine, MayLeak, StackPopJump}; +pub use self::memory::{AllocKind, AllocRef, AllocRefMut, FnVal, Memory, MemoryKind}; +pub use self::operand::{ImmTy, Immediate, OpTy, Operand}; +pub use self::place::{MPlaceTy, MemPlace, MemPlaceMeta, Place, PlaceTy}; +pub use self::validity::{CtfeValidationMode, RefTracking}; +pub use self::visitor::{MutValueVisitor, Value, ValueVisitor}; + +pub(crate) use self::intrinsics::eval_nullary_intrinsic; +use eval_context::{from_known_layout, mir_assign_valid_types}; diff --git a/compiler/rustc_const_eval/src/interpret/operand.rs b/compiler/rustc_const_eval/src/interpret/operand.rs new file mode 100644 index 000000000..94ba62c16 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/operand.rs @@ -0,0 +1,831 @@ +//! Functions concerning immediate values and operands, and reading from operands. +//! All high-level functions to read from memory work on operands as sources. + +use std::fmt::Write; + +use rustc_hir::def::Namespace; +use rustc_middle::ty::layout::{LayoutOf, PrimitiveExt, TyAndLayout}; +use rustc_middle::ty::print::{FmtPrinter, PrettyPrinter, Printer}; +use rustc_middle::ty::{ConstInt, DelaySpanBugEmitted, Ty}; +use rustc_middle::{mir, ty}; +use rustc_target::abi::{self, Abi, Align, HasDataLayout, Size, TagEncoding}; +use rustc_target::abi::{VariantIdx, Variants}; + +use super::{ + alloc_range, from_known_layout, mir_assign_valid_types, AllocId, ConstValue, Frame, GlobalId, + InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, MemPlaceMeta, Place, PlaceTy, Pointer, + Provenance, Scalar, ScalarMaybeUninit, +}; + +/// An `Immediate` represents a single immediate self-contained Rust value. +/// +/// For optimization of a few very common cases, there is also a representation for a pair of +/// primitive values (`ScalarPair`). It allows Miri to avoid making allocations for checked binary +/// operations and wide pointers. This idea was taken from rustc's codegen. +/// In particular, thanks to `ScalarPair`, arithmetic operations and casts can be entirely +/// defined on `Immediate`, and do not have to work with a `Place`. +#[derive(Copy, Clone, Debug)] +pub enum Immediate { + /// A single scalar value (must have *initialized* `Scalar` ABI). + /// FIXME: we also currently often use this for ZST. + /// `ScalarMaybeUninit` should reject ZST, and we should use `Uninit` for them instead. + Scalar(ScalarMaybeUninit), + /// A pair of two scalar value (must have `ScalarPair` ABI where both fields are + /// `Scalar::Initialized`). + ScalarPair(ScalarMaybeUninit, ScalarMaybeUninit), + /// A value of fully uninitialized memory. Can have and size and layout. + Uninit, +} + +impl From> for Immediate { + #[inline(always)] + fn from(val: ScalarMaybeUninit) -> Self { + Immediate::Scalar(val) + } +} + +impl From> for Immediate { + #[inline(always)] + fn from(val: Scalar) -> Self { + Immediate::Scalar(val.into()) + } +} + +impl<'tcx, Prov: Provenance> Immediate { + pub fn from_pointer(p: Pointer, cx: &impl HasDataLayout) -> Self { + Immediate::Scalar(ScalarMaybeUninit::from_pointer(p, cx)) + } + + pub fn from_maybe_pointer(p: Pointer>, cx: &impl HasDataLayout) -> Self { + Immediate::Scalar(ScalarMaybeUninit::from_maybe_pointer(p, cx)) + } + + pub fn new_slice(val: Scalar, len: u64, cx: &impl HasDataLayout) -> Self { + Immediate::ScalarPair(val.into(), Scalar::from_machine_usize(len, cx).into()) + } + + pub fn new_dyn_trait( + val: Scalar, + vtable: Pointer>, + cx: &impl HasDataLayout, + ) -> Self { + Immediate::ScalarPair(val.into(), ScalarMaybeUninit::from_maybe_pointer(vtable, cx)) + } + + #[inline] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn to_scalar_or_uninit(self) -> ScalarMaybeUninit { + match self { + Immediate::Scalar(val) => val, + Immediate::ScalarPair(..) => bug!("Got a scalar pair where a scalar was expected"), + Immediate::Uninit => ScalarMaybeUninit::Uninit, + } + } + + #[inline] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn to_scalar(self) -> InterpResult<'tcx, Scalar> { + self.to_scalar_or_uninit().check_init() + } + + #[inline] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn to_scalar_or_uninit_pair(self) -> (ScalarMaybeUninit, ScalarMaybeUninit) { + match self { + Immediate::ScalarPair(val1, val2) => (val1, val2), + Immediate::Scalar(..) => bug!("Got a scalar where a scalar pair was expected"), + Immediate::Uninit => (ScalarMaybeUninit::Uninit, ScalarMaybeUninit::Uninit), + } + } + + #[inline] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn to_scalar_pair(self) -> InterpResult<'tcx, (Scalar, Scalar)> { + let (val1, val2) = self.to_scalar_or_uninit_pair(); + Ok((val1.check_init()?, val2.check_init()?)) + } +} + +// ScalarPair needs a type to interpret, so we often have an immediate and a type together +// as input for binary and cast operations. +#[derive(Clone, Debug)] +pub struct ImmTy<'tcx, Prov: Provenance = AllocId> { + imm: Immediate, + pub layout: TyAndLayout<'tcx>, +} + +impl std::fmt::Display for ImmTy<'_, Prov> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + /// Helper function for printing a scalar to a FmtPrinter + fn p<'a, 'tcx, Prov: Provenance>( + cx: FmtPrinter<'a, 'tcx>, + s: ScalarMaybeUninit, + ty: Ty<'tcx>, + ) -> Result, std::fmt::Error> { + match s { + ScalarMaybeUninit::Scalar(Scalar::Int(int)) => { + cx.pretty_print_const_scalar_int(int, ty, true) + } + ScalarMaybeUninit::Scalar(Scalar::Ptr(ptr, _sz)) => { + // Just print the ptr value. `pretty_print_const_scalar_ptr` would also try to + // print what is points to, which would fail since it has no access to the local + // memory. + cx.pretty_print_const_pointer(ptr, ty, true) + } + ScalarMaybeUninit::Uninit => cx.typed_value( + |mut this| { + this.write_str("uninit ")?; + Ok(this) + }, + |this| this.print_type(ty), + " ", + ), + } + } + ty::tls::with(|tcx| { + match self.imm { + Immediate::Scalar(s) => { + if let Some(ty) = tcx.lift(self.layout.ty) { + let cx = FmtPrinter::new(tcx, Namespace::ValueNS); + f.write_str(&p(cx, s, ty)?.into_buffer())?; + return Ok(()); + } + write!(f, "{:x}: {}", s, self.layout.ty) + } + Immediate::ScalarPair(a, b) => { + // FIXME(oli-obk): at least print tuples and slices nicely + write!(f, "({:x}, {:x}): {}", a, b, self.layout.ty) + } + Immediate::Uninit => { + write!(f, "uninit: {}", self.layout.ty) + } + } + }) + } +} + +impl<'tcx, Prov: Provenance> std::ops::Deref for ImmTy<'tcx, Prov> { + type Target = Immediate; + #[inline(always)] + fn deref(&self) -> &Immediate { + &self.imm + } +} + +/// An `Operand` is the result of computing a `mir::Operand`. It can be immediate, +/// or still in memory. The latter is an optimization, to delay reading that chunk of +/// memory and to avoid having to store arbitrary-sized data here. +#[derive(Copy, Clone, Debug)] +pub enum Operand { + Immediate(Immediate), + Indirect(MemPlace), +} + +#[derive(Clone, Debug)] +pub struct OpTy<'tcx, Prov: Provenance = AllocId> { + op: Operand, // Keep this private; it helps enforce invariants. + pub layout: TyAndLayout<'tcx>, + /// rustc does not have a proper way to represent the type of a field of a `repr(packed)` struct: + /// it needs to have a different alignment than the field type would usually have. + /// So we represent this here with a separate field that "overwrites" `layout.align`. + /// This means `layout.align` should never be used for an `OpTy`! + /// `None` means "alignment does not matter since this is a by-value operand" + /// (`Operand::Immediate`); this field is only relevant for `Operand::Indirect`. + /// Also CTFE ignores alignment anyway, so this is for Miri only. + pub align: Option, +} + +impl<'tcx, Prov: Provenance> std::ops::Deref for OpTy<'tcx, Prov> { + type Target = Operand; + #[inline(always)] + fn deref(&self) -> &Operand { + &self.op + } +} + +impl<'tcx, Prov: Provenance> From> for OpTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: MPlaceTy<'tcx, Prov>) -> Self { + OpTy { op: Operand::Indirect(*mplace), layout: mplace.layout, align: Some(mplace.align) } + } +} + +impl<'tcx, Prov: Provenance> From<&'_ MPlaceTy<'tcx, Prov>> for OpTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: &MPlaceTy<'tcx, Prov>) -> Self { + OpTy { op: Operand::Indirect(**mplace), layout: mplace.layout, align: Some(mplace.align) } + } +} + +impl<'tcx, Prov: Provenance> From<&'_ mut MPlaceTy<'tcx, Prov>> for OpTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: &mut MPlaceTy<'tcx, Prov>) -> Self { + OpTy { op: Operand::Indirect(**mplace), layout: mplace.layout, align: Some(mplace.align) } + } +} + +impl<'tcx, Prov: Provenance> From> for OpTy<'tcx, Prov> { + #[inline(always)] + fn from(val: ImmTy<'tcx, Prov>) -> Self { + OpTy { op: Operand::Immediate(val.imm), layout: val.layout, align: None } + } +} + +impl<'tcx, Prov: Provenance> ImmTy<'tcx, Prov> { + #[inline] + pub fn from_scalar(val: Scalar, layout: TyAndLayout<'tcx>) -> Self { + ImmTy { imm: val.into(), layout } + } + + #[inline] + pub fn from_immediate(imm: Immediate, layout: TyAndLayout<'tcx>) -> Self { + ImmTy { imm, layout } + } + + #[inline] + pub fn uninit(layout: TyAndLayout<'tcx>) -> Self { + ImmTy { imm: Immediate::Uninit, layout } + } + + #[inline] + pub fn try_from_uint(i: impl Into, layout: TyAndLayout<'tcx>) -> Option { + Some(Self::from_scalar(Scalar::try_from_uint(i, layout.size)?, layout)) + } + #[inline] + pub fn from_uint(i: impl Into, layout: TyAndLayout<'tcx>) -> Self { + Self::from_scalar(Scalar::from_uint(i, layout.size), layout) + } + + #[inline] + pub fn try_from_int(i: impl Into, layout: TyAndLayout<'tcx>) -> Option { + Some(Self::from_scalar(Scalar::try_from_int(i, layout.size)?, layout)) + } + + #[inline] + pub fn from_int(i: impl Into, layout: TyAndLayout<'tcx>) -> Self { + Self::from_scalar(Scalar::from_int(i, layout.size), layout) + } + + #[inline] + pub fn to_const_int(self) -> ConstInt { + assert!(self.layout.ty.is_integral()); + let int = self.to_scalar().expect("to_const_int doesn't work on scalar pairs").assert_int(); + ConstInt::new(int, self.layout.ty.is_signed(), self.layout.ty.is_ptr_sized_integral()) + } +} + +impl<'tcx, Prov: Provenance> OpTy<'tcx, Prov> { + pub fn len(&self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + if self.layout.is_unsized() { + // There are no unsized immediates. + self.assert_mem_place().len(cx) + } else { + match self.layout.fields { + abi::FieldsShape::Array { count, .. } => Ok(count), + _ => bug!("len not supported on sized type {:?}", self.layout.ty), + } + } + } + + pub fn offset_with_meta( + &self, + offset: Size, + meta: MemPlaceMeta, + layout: TyAndLayout<'tcx>, + cx: &impl HasDataLayout, + ) -> InterpResult<'tcx, Self> { + match self.try_as_mplace() { + Ok(mplace) => Ok(mplace.offset_with_meta(offset, meta, layout, cx)?.into()), + Err(imm) => { + assert!( + matches!(*imm, Immediate::Uninit), + "Scalar/ScalarPair cannot be offset into" + ); + assert!(!meta.has_meta()); // no place to store metadata here + // Every part of an uninit is uninit. + Ok(ImmTy::uninit(layout).into()) + } + } + } + + pub fn offset( + &self, + offset: Size, + layout: TyAndLayout<'tcx>, + cx: &impl HasDataLayout, + ) -> InterpResult<'tcx, Self> { + assert!(!layout.is_unsized()); + self.offset_with_meta(offset, MemPlaceMeta::None, layout, cx) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Try reading an immediate in memory; this is interesting particularly for `ScalarPair`. + /// Returns `None` if the layout does not permit loading this as a value. + /// + /// This is an internal function; call `read_immediate` instead. + fn read_immediate_from_mplace_raw( + &self, + mplace: &MPlaceTy<'tcx, M::Provenance>, + force: bool, + ) -> InterpResult<'tcx, Option>> { + if mplace.layout.is_unsized() { + // Don't touch unsized + return Ok(None); + } + + let Some(alloc) = self.get_place_alloc(mplace)? else { + // zero-sized type can be left uninit + return Ok(Some(ImmTy::uninit(mplace.layout))); + }; + + // It may seem like all types with `Scalar` or `ScalarPair` ABI are fair game at this point. + // However, `MaybeUninit` is considered a `Scalar` as far as its layout is concerned -- + // and yet cannot be represented by an interpreter `Scalar`, since we have to handle the + // case where some of the bytes are initialized and others are not. So, we need an extra + // check that walks over the type of `mplace` to make sure it is truly correct to treat this + // like a `Scalar` (or `ScalarPair`). + let scalar_layout = match mplace.layout.abi { + // `if` does not work nested inside patterns, making this a bit awkward to express. + Abi::Scalar(abi::Scalar::Initialized { value: s, .. }) => Some(s), + Abi::Scalar(s) if force => Some(s.primitive()), + _ => None, + }; + if let Some(s) = scalar_layout { + let size = s.size(self); + assert_eq!(size, mplace.layout.size, "abi::Scalar size does not match layout size"); + let scalar = alloc + .read_scalar(alloc_range(Size::ZERO, size), /*read_provenance*/ s.is_ptr())?; + return Ok(Some(ImmTy { imm: scalar.into(), layout: mplace.layout })); + } + let scalar_pair_layout = match mplace.layout.abi { + Abi::ScalarPair( + abi::Scalar::Initialized { value: a, .. }, + abi::Scalar::Initialized { value: b, .. }, + ) => Some((a, b)), + Abi::ScalarPair(a, b) if force => Some((a.primitive(), b.primitive())), + _ => None, + }; + if let Some((a, b)) = scalar_pair_layout { + // We checked `ptr_align` above, so all fields will have the alignment they need. + // We would anyway check against `ptr_align.restrict_for_offset(b_offset)`, + // which `ptr.offset(b_offset)` cannot possibly fail to satisfy. + let (a_size, b_size) = (a.size(self), b.size(self)); + let b_offset = a_size.align_to(b.align(self).abi); + assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields + let a_val = alloc.read_scalar( + alloc_range(Size::ZERO, a_size), + /*read_provenance*/ a.is_ptr(), + )?; + let b_val = alloc + .read_scalar(alloc_range(b_offset, b_size), /*read_provenance*/ b.is_ptr())?; + return Ok(Some(ImmTy { + imm: Immediate::ScalarPair(a_val, b_val), + layout: mplace.layout, + })); + } + // Neither a scalar nor scalar pair. + return Ok(None); + } + + /// Try returning an immediate for the operand. If the layout does not permit loading this as an + /// immediate, return where in memory we can find the data. + /// Note that for a given layout, this operation will either always fail or always + /// succeed! Whether it succeeds depends on whether the layout can be represented + /// in an `Immediate`, not on which data is stored there currently. + /// + /// If `force` is `true`, then even scalars with fields that can be ununit will be + /// read. This means the load is lossy and should not be written back! + /// This flag exists only for validity checking. + /// + /// This is an internal function that should not usually be used; call `read_immediate` instead. + /// ConstProp needs it, though. + pub fn read_immediate_raw( + &self, + src: &OpTy<'tcx, M::Provenance>, + force: bool, + ) -> InterpResult<'tcx, Result, MPlaceTy<'tcx, M::Provenance>>> { + Ok(match src.try_as_mplace() { + Ok(ref mplace) => { + if let Some(val) = self.read_immediate_from_mplace_raw(mplace, force)? { + Ok(val) + } else { + Err(*mplace) + } + } + Err(val) => Ok(val), + }) + } + + /// Read an immediate from a place, asserting that that is possible with the given layout. + #[inline(always)] + pub fn read_immediate( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> { + if let Ok(imm) = self.read_immediate_raw(op, /*force*/ false)? { + Ok(imm) + } else { + span_bug!(self.cur_span(), "primitive read failed for type: {:?}", op.layout.ty); + } + } + + /// Read a scalar from a place + pub fn read_scalar( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ScalarMaybeUninit> { + Ok(self.read_immediate(op)?.to_scalar_or_uninit()) + } + + /// Read a pointer from a place. + pub fn read_pointer( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Pointer>> { + self.read_scalar(op)?.to_pointer(self) + } + + /// Turn the wide MPlace into a string (must already be dereferenced!) + pub fn read_str(&self, mplace: &MPlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx, &str> { + let len = mplace.len(self)?; + let bytes = self.read_bytes_ptr(mplace.ptr, Size::from_bytes(len))?; + let str = std::str::from_utf8(bytes).map_err(|err| err_ub!(InvalidStr(err)))?; + Ok(str) + } + + /// Converts a repr(simd) operand into an operand where `place_index` accesses the SIMD elements. + /// Also returns the number of elements. + /// + /// Can (but does not always) trigger UB if `op` is uninitialized. + pub fn operand_to_simd( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (MPlaceTy<'tcx, M::Provenance>, u64)> { + // Basically we just transmute this place into an array following simd_size_and_type. + // This only works in memory, but repr(simd) types should never be immediates anyway. + assert!(op.layout.ty.is_simd()); + match op.try_as_mplace() { + Ok(mplace) => self.mplace_to_simd(&mplace), + Err(imm) => match *imm { + Immediate::Uninit => { + throw_ub!(InvalidUninitBytes(None)) + } + Immediate::Scalar(..) | Immediate::ScalarPair(..) => { + bug!("arrays/slices can never have Scalar/ScalarPair layout") + } + }, + } + } + + /// Read from a local. Will not actually access the local if reading from a ZST. + /// Will not access memory, instead an indirect `Operand` is returned. + /// + /// This is public because it is used by [priroda](https://github.com/oli-obk/priroda) to get an + /// OpTy from a local. + pub fn local_to_op( + &self, + frame: &Frame<'mir, 'tcx, M::Provenance, M::FrameExtra>, + local: mir::Local, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + let layout = self.layout_of_local(frame, local, layout)?; + let op = if layout.is_zst() { + // Bypass `access_local` (helps in ConstProp) + Operand::Immediate(Immediate::Uninit) + } else { + *M::access_local(frame, local)? + }; + Ok(OpTy { op, layout, align: Some(layout.align.abi) }) + } + + /// Every place can be read from, so we can turn them into an operand. + /// This will definitely return `Indirect` if the place is a `Ptr`, i.e., this + /// will never actually read from memory. + #[inline(always)] + pub fn place_to_op( + &self, + place: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + let op = match **place { + Place::Ptr(mplace) => Operand::Indirect(mplace), + Place::Local { frame, local } => { + *self.local_to_op(&self.stack()[frame], local, None)? + } + }; + Ok(OpTy { op, layout: place.layout, align: Some(place.align) }) + } + + /// Evaluate a place with the goal of reading from it. This lets us sometimes + /// avoid allocations. + pub fn eval_place_to_op( + &self, + mir_place: mir::Place<'tcx>, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // Do not use the layout passed in as argument if the base we are looking at + // here is not the entire place. + let layout = if mir_place.projection.is_empty() { layout } else { None }; + + let mut op = self.local_to_op(self.frame(), mir_place.local, layout)?; + // Using `try_fold` turned out to be bad for performance, hence the loop. + for elem in mir_place.projection.iter() { + op = self.operand_projection(&op, elem)? + } + + trace!("eval_place_to_op: got {:?}", *op); + // Sanity-check the type we ended up with. + debug_assert!( + mir_assign_valid_types( + *self.tcx, + self.param_env, + self.layout_of(self.subst_from_current_frame_and_normalize_erasing_regions( + mir_place.ty(&self.frame().body.local_decls, *self.tcx).ty + )?)?, + op.layout, + ), + "eval_place of a MIR place with type {:?} produced an interpreter operand with type {:?}", + mir_place.ty(&self.frame().body.local_decls, *self.tcx).ty, + op.layout.ty, + ); + Ok(op) + } + + /// Evaluate the operand, returning a place where you can then find the data. + /// If you already know the layout, you can save two table lookups + /// by passing it in here. + #[inline] + pub fn eval_operand( + &self, + mir_op: &mir::Operand<'tcx>, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + use rustc_middle::mir::Operand::*; + let op = match *mir_op { + // FIXME: do some more logic on `move` to invalidate the old location + Copy(place) | Move(place) => self.eval_place_to_op(place, layout)?, + + Constant(ref constant) => { + let val = + self.subst_from_current_frame_and_normalize_erasing_regions(constant.literal)?; + + // This can still fail: + // * During ConstProp, with `TooGeneric` or since the `required_consts` were not all + // checked yet. + // * During CTFE, since promoteds in `const`/`static` initializer bodies can fail. + self.mir_const_to_op(&val, layout)? + } + }; + trace!("{:?}: {:?}", mir_op, *op); + Ok(op) + } + + /// Evaluate a bunch of operands at once + pub(super) fn eval_operands( + &self, + ops: &[mir::Operand<'tcx>], + ) -> InterpResult<'tcx, Vec>> { + ops.iter().map(|op| self.eval_operand(op, None)).collect() + } + + // Used when the miri-engine runs into a constant and for extracting information from constants + // in patterns via the `const_eval` module + /// The `val` and `layout` are assumed to already be in our interpreter + /// "universe" (param_env). + pub fn const_to_op( + &self, + c: ty::Const<'tcx>, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + match c.kind() { + ty::ConstKind::Param(_) | ty::ConstKind::Bound(..) => throw_inval!(TooGeneric), + ty::ConstKind::Error(DelaySpanBugEmitted { reported, .. }) => { + throw_inval!(AlreadyReported(reported)) + } + ty::ConstKind::Unevaluated(uv) => { + let instance = self.resolve(uv.def, uv.substs)?; + Ok(self.eval_to_allocation(GlobalId { instance, promoted: uv.promoted })?.into()) + } + ty::ConstKind::Infer(..) | ty::ConstKind::Placeholder(..) => { + span_bug!(self.cur_span(), "const_to_op: Unexpected ConstKind {:?}", c) + } + ty::ConstKind::Value(valtree) => { + let ty = c.ty(); + let const_val = self.tcx.valtree_to_const_val((ty, valtree)); + self.const_val_to_op(const_val, ty, layout) + } + } + } + + pub fn mir_const_to_op( + &self, + val: &mir::ConstantKind<'tcx>, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + match val { + mir::ConstantKind::Ty(ct) => self.const_to_op(*ct, layout), + mir::ConstantKind::Val(val, ty) => self.const_val_to_op(*val, *ty, layout), + } + } + + pub(crate) fn const_val_to_op( + &self, + val_val: ConstValue<'tcx>, + ty: Ty<'tcx>, + layout: Option>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // Other cases need layout. + let adjust_scalar = |scalar| -> InterpResult<'tcx, _> { + Ok(match scalar { + Scalar::Ptr(ptr, size) => Scalar::Ptr(self.global_base_pointer(ptr)?, size), + Scalar::Int(int) => Scalar::Int(int), + }) + }; + let layout = from_known_layout(self.tcx, self.param_env, layout, || self.layout_of(ty))?; + let op = match val_val { + ConstValue::ByRef { alloc, offset } => { + let id = self.tcx.create_memory_alloc(alloc); + // We rely on mutability being set correctly in that allocation to prevent writes + // where none should happen. + let ptr = self.global_base_pointer(Pointer::new(id, offset))?; + Operand::Indirect(MemPlace::from_ptr(ptr.into())) + } + ConstValue::Scalar(x) => Operand::Immediate(adjust_scalar(x)?.into()), + ConstValue::ZeroSized => Operand::Immediate(Immediate::Uninit), + ConstValue::Slice { data, start, end } => { + // We rely on mutability being set correctly in `data` to prevent writes + // where none should happen. + let ptr = Pointer::new( + self.tcx.create_memory_alloc(data), + Size::from_bytes(start), // offset: `start` + ); + Operand::Immediate(Immediate::new_slice( + Scalar::from_pointer(self.global_base_pointer(ptr)?, &*self.tcx), + u64::try_from(end.checked_sub(start).unwrap()).unwrap(), // len: `end - start` + self, + )) + } + }; + Ok(OpTy { op, layout, align: Some(layout.align.abi) }) + } + + /// Read discriminant, return the runtime value as well as the variant index. + /// Can also legally be called on non-enums (e.g. through the discriminant_value intrinsic)! + pub fn read_discriminant( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (Scalar, VariantIdx)> { + trace!("read_discriminant_value {:#?}", op.layout); + // Get type and layout of the discriminant. + let discr_layout = self.layout_of(op.layout.ty.discriminant_ty(*self.tcx))?; + trace!("discriminant type: {:?}", discr_layout.ty); + + // We use "discriminant" to refer to the value associated with a particular enum variant. + // This is not to be confused with its "variant index", which is just determining its position in the + // declared list of variants -- they can differ with explicitly assigned discriminants. + // We use "tag" to refer to how the discriminant is encoded in memory, which can be either + // straight-forward (`TagEncoding::Direct`) or with a niche (`TagEncoding::Niche`). + let (tag_scalar_layout, tag_encoding, tag_field) = match op.layout.variants { + Variants::Single { index } => { + let discr = match op.layout.ty.discriminant_for_variant(*self.tcx, index) { + Some(discr) => { + // This type actually has discriminants. + assert_eq!(discr.ty, discr_layout.ty); + Scalar::from_uint(discr.val, discr_layout.size) + } + None => { + // On a type without actual discriminants, variant is 0. + assert_eq!(index.as_u32(), 0); + Scalar::from_uint(index.as_u32(), discr_layout.size) + } + }; + return Ok((discr, index)); + } + Variants::Multiple { tag, ref tag_encoding, tag_field, .. } => { + (tag, tag_encoding, tag_field) + } + }; + + // There are *three* layouts that come into play here: + // - The discriminant has a type for typechecking. This is `discr_layout`, and is used for + // the `Scalar` we return. + // - The tag (encoded discriminant) has layout `tag_layout`. This is always an integer type, + // and used to interpret the value we read from the tag field. + // For the return value, a cast to `discr_layout` is performed. + // - The field storing the tag has a layout, which is very similar to `tag_layout` but + // may be a pointer. This is `tag_val.layout`; we just use it for sanity checks. + + // Get layout for tag. + let tag_layout = self.layout_of(tag_scalar_layout.primitive().to_int_ty(*self.tcx))?; + + // Read tag and sanity-check `tag_layout`. + let tag_val = self.read_immediate(&self.operand_field(op, tag_field)?)?; + assert_eq!(tag_layout.size, tag_val.layout.size); + assert_eq!(tag_layout.abi.is_signed(), tag_val.layout.abi.is_signed()); + trace!("tag value: {}", tag_val); + + // Figure out which discriminant and variant this corresponds to. + Ok(match *tag_encoding { + TagEncoding::Direct => { + let scalar = tag_val.to_scalar()?; + // Generate a specific error if `tag_val` is not an integer. + // (`tag_bits` itself is only used for error messages below.) + let tag_bits = scalar + .try_to_int() + .map_err(|dbg_val| err_ub!(InvalidTag(dbg_val)))? + .assert_bits(tag_layout.size); + // Cast bits from tag layout to discriminant layout. + // After the checks we did above, this cannot fail, as + // discriminants are int-like. + let discr_val = + self.cast_from_int_like(scalar, tag_val.layout, discr_layout.ty).unwrap(); + let discr_bits = discr_val.assert_bits(discr_layout.size); + // Convert discriminant to variant index, and catch invalid discriminants. + let index = match *op.layout.ty.kind() { + ty::Adt(adt, _) => { + adt.discriminants(*self.tcx).find(|(_, var)| var.val == discr_bits) + } + ty::Generator(def_id, substs, _) => { + let substs = substs.as_generator(); + substs + .discriminants(def_id, *self.tcx) + .find(|(_, var)| var.val == discr_bits) + } + _ => span_bug!(self.cur_span(), "tagged layout for non-adt non-generator"), + } + .ok_or_else(|| err_ub!(InvalidTag(Scalar::from_uint(tag_bits, tag_layout.size))))?; + // Return the cast value, and the index. + (discr_val, index.0) + } + TagEncoding::Niche { dataful_variant, ref niche_variants, niche_start } => { + let tag_val = tag_val.to_scalar()?; + // Compute the variant this niche value/"tag" corresponds to. With niche layout, + // discriminant (encoded in niche/tag) and variant index are the same. + let variants_start = niche_variants.start().as_u32(); + let variants_end = niche_variants.end().as_u32(); + let variant = match tag_val.try_to_int() { + Err(dbg_val) => { + // So this is a pointer then, and casting to an int failed. + // Can only happen during CTFE. + // The niche must be just 0, and the ptr not null, then we know this is + // okay. Everything else, we conservatively reject. + let ptr_valid = niche_start == 0 + && variants_start == variants_end + && !self.scalar_may_be_null(tag_val)?; + if !ptr_valid { + throw_ub!(InvalidTag(dbg_val)) + } + dataful_variant + } + Ok(tag_bits) => { + let tag_bits = tag_bits.assert_bits(tag_layout.size); + // We need to use machine arithmetic to get the relative variant idx: + // variant_index_relative = tag_val - niche_start_val + let tag_val = ImmTy::from_uint(tag_bits, tag_layout); + let niche_start_val = ImmTy::from_uint(niche_start, tag_layout); + let variant_index_relative_val = + self.binary_op(mir::BinOp::Sub, &tag_val, &niche_start_val)?; + let variant_index_relative = variant_index_relative_val + .to_scalar()? + .assert_bits(tag_val.layout.size); + // Check if this is in the range that indicates an actual discriminant. + if variant_index_relative <= u128::from(variants_end - variants_start) { + let variant_index_relative = u32::try_from(variant_index_relative) + .expect("we checked that this fits into a u32"); + // Then computing the absolute variant idx should not overflow any more. + let variant_index = variants_start + .checked_add(variant_index_relative) + .expect("overflow computing absolute variant idx"); + let variants_len = op + .layout + .ty + .ty_adt_def() + .expect("tagged layout for non adt") + .variants() + .len(); + assert!(usize::try_from(variant_index).unwrap() < variants_len); + VariantIdx::from_u32(variant_index) + } else { + dataful_variant + } + } + }; + // Compute the size of the scalar we need to return. + // No need to cast, because the variant index directly serves as discriminant and is + // encoded in the tag. + (Scalar::from_uint(variant.as_u32(), discr_layout.size), variant) + } + }) + } +} + +// Some nodes are used a lot. Make sure they don't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +mod size_asserts { + use super::*; + // These are in alphabetical order, which is easy to maintain. + rustc_data_structures::static_assert_size!(Immediate, 56); + rustc_data_structures::static_assert_size!(ImmTy<'_>, 72); + rustc_data_structures::static_assert_size!(Operand, 64); + rustc_data_structures::static_assert_size!(OpTy<'_>, 88); +} diff --git a/compiler/rustc_const_eval/src/interpret/operator.rs b/compiler/rustc_const_eval/src/interpret/operator.rs new file mode 100644 index 000000000..f9912d706 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/operator.rs @@ -0,0 +1,463 @@ +use std::convert::TryFrom; + +use rustc_apfloat::Float; +use rustc_middle::mir; +use rustc_middle::mir::interpret::{InterpResult, Scalar}; +use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_middle::ty::{self, FloatTy, Ty}; +use rustc_target::abi::Abi; + +use super::{ImmTy, Immediate, InterpCx, Machine, PlaceTy}; + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Applies the binary operation `op` to the two operands and writes a tuple of the result + /// and a boolean signifying the potential overflow to the destination. + /// + /// `force_overflow_checks` indicates whether overflow checks should be done even when + /// `tcx.sess.overflow_checks()` is `false`. + pub fn binop_with_overflow( + &mut self, + op: mir::BinOp, + force_overflow_checks: bool, + left: &ImmTy<'tcx, M::Provenance>, + right: &ImmTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + let (val, overflowed, ty) = self.overflowing_binary_op(op, &left, &right)?; + debug_assert_eq!( + self.tcx.intern_tup(&[ty, self.tcx.types.bool]), + dest.layout.ty, + "type mismatch for result of {:?}", + op, + ); + // As per https://github.com/rust-lang/rust/pull/98738, we always return `false` in the 2nd + // component when overflow checking is disabled. + let overflowed = + overflowed && (force_overflow_checks || M::checked_binop_checks_overflow(self)); + // Write the result to `dest`. + if let Abi::ScalarPair(..) = dest.layout.abi { + // We can use the optimized path and avoid `place_field` (which might do + // `force_allocation`). + let pair = Immediate::ScalarPair(val.into(), Scalar::from_bool(overflowed).into()); + self.write_immediate(pair, dest)?; + } else { + assert!(self.tcx.sess.opts.unstable_opts.randomize_layout); + // With randomized layout, `(int, bool)` might cease to be a `ScalarPair`, so we have to + // do a component-wise write here. This code path is slower than the above because + // `place_field` will have to `force_allocate` locals here. + let val_field = self.place_field(&dest, 0)?; + self.write_scalar(val, &val_field)?; + let overflowed_field = self.place_field(&dest, 1)?; + self.write_scalar(Scalar::from_bool(overflowed), &overflowed_field)?; + } + Ok(()) + } + + /// Applies the binary operation `op` to the arguments and writes the result to the + /// destination. + pub fn binop_ignore_overflow( + &mut self, + op: mir::BinOp, + left: &ImmTy<'tcx, M::Provenance>, + right: &ImmTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + let (val, _overflowed, ty) = self.overflowing_binary_op(op, left, right)?; + assert_eq!(ty, dest.layout.ty, "type mismatch for result of {:?}", op); + self.write_scalar(val, dest) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + fn binary_char_op( + &self, + bin_op: mir::BinOp, + l: char, + r: char, + ) -> (Scalar, bool, Ty<'tcx>) { + use rustc_middle::mir::BinOp::*; + + let res = match bin_op { + Eq => l == r, + Ne => l != r, + Lt => l < r, + Le => l <= r, + Gt => l > r, + Ge => l >= r, + _ => span_bug!(self.cur_span(), "Invalid operation on char: {:?}", bin_op), + }; + (Scalar::from_bool(res), false, self.tcx.types.bool) + } + + fn binary_bool_op( + &self, + bin_op: mir::BinOp, + l: bool, + r: bool, + ) -> (Scalar, bool, Ty<'tcx>) { + use rustc_middle::mir::BinOp::*; + + let res = match bin_op { + Eq => l == r, + Ne => l != r, + Lt => l < r, + Le => l <= r, + Gt => l > r, + Ge => l >= r, + BitAnd => l & r, + BitOr => l | r, + BitXor => l ^ r, + _ => span_bug!(self.cur_span(), "Invalid operation on bool: {:?}", bin_op), + }; + (Scalar::from_bool(res), false, self.tcx.types.bool) + } + + fn binary_float_op>>( + &self, + bin_op: mir::BinOp, + ty: Ty<'tcx>, + l: F, + r: F, + ) -> (Scalar, bool, Ty<'tcx>) { + use rustc_middle::mir::BinOp::*; + + let (val, ty) = match bin_op { + Eq => (Scalar::from_bool(l == r), self.tcx.types.bool), + Ne => (Scalar::from_bool(l != r), self.tcx.types.bool), + Lt => (Scalar::from_bool(l < r), self.tcx.types.bool), + Le => (Scalar::from_bool(l <= r), self.tcx.types.bool), + Gt => (Scalar::from_bool(l > r), self.tcx.types.bool), + Ge => (Scalar::from_bool(l >= r), self.tcx.types.bool), + Add => ((l + r).value.into(), ty), + Sub => ((l - r).value.into(), ty), + Mul => ((l * r).value.into(), ty), + Div => ((l / r).value.into(), ty), + Rem => ((l % r).value.into(), ty), + _ => span_bug!(self.cur_span(), "invalid float op: `{:?}`", bin_op), + }; + (val, false, ty) + } + + fn binary_int_op( + &self, + bin_op: mir::BinOp, + // passing in raw bits + l: u128, + left_layout: TyAndLayout<'tcx>, + r: u128, + right_layout: TyAndLayout<'tcx>, + ) -> InterpResult<'tcx, (Scalar, bool, Ty<'tcx>)> { + use rustc_middle::mir::BinOp::*; + + // Shift ops can have an RHS with a different numeric type. + if bin_op == Shl || bin_op == Shr { + let size = u128::from(left_layout.size.bits()); + // Even if `r` is signed, we treat it as if it was unsigned (i.e., we use its + // zero-extended form). This matches the codegen backend: + // . + // The overflow check is also ignorant to the sign: + // . + // This would behave rather strangely if we had integer types of size 256: a shift by + // -1i8 would actually shift by 255, but that would *not* be considered overflowing. A + // shift by -1i16 though would be considered overflowing. If we had integers of size + // 512, then a shift by -1i8 would even produce a different result than one by -1i16: + // the first shifts by 255, the latter by u16::MAX % 512 = 511. Lucky enough, our + // integers are maximally 128bits wide, so negative shifts *always* overflow and we have + // consistent results for the same value represented at different bit widths. + assert!(size <= 128); + let overflow = r >= size; + // The shift offset is implicitly masked to the type size, to make sure this operation + // is always defined. This is the one MIR operator that does *not* directly map to a + // single LLVM operation. See + // + // for the corresponding truncation in our codegen backends. + let r = r % size; + let r = u32::try_from(r).unwrap(); // we masked so this will always fit + let result = if left_layout.abi.is_signed() { + let l = self.sign_extend(l, left_layout) as i128; + let result = match bin_op { + Shl => l.checked_shl(r).unwrap(), + Shr => l.checked_shr(r).unwrap(), + _ => bug!(), + }; + result as u128 + } else { + match bin_op { + Shl => l.checked_shl(r).unwrap(), + Shr => l.checked_shr(r).unwrap(), + _ => bug!(), + } + }; + let truncated = self.truncate(result, left_layout); + return Ok((Scalar::from_uint(truncated, left_layout.size), overflow, left_layout.ty)); + } + + // For the remaining ops, the types must be the same on both sides + if left_layout.ty != right_layout.ty { + span_bug!( + self.cur_span(), + "invalid asymmetric binary op {:?}: {:?} ({:?}), {:?} ({:?})", + bin_op, + l, + left_layout.ty, + r, + right_layout.ty, + ) + } + + let size = left_layout.size; + + // Operations that need special treatment for signed integers + if left_layout.abi.is_signed() { + let op: Option bool> = match bin_op { + Lt => Some(i128::lt), + Le => Some(i128::le), + Gt => Some(i128::gt), + Ge => Some(i128::ge), + _ => None, + }; + if let Some(op) = op { + let l = self.sign_extend(l, left_layout) as i128; + let r = self.sign_extend(r, right_layout) as i128; + return Ok((Scalar::from_bool(op(&l, &r)), false, self.tcx.types.bool)); + } + let op: Option (i128, bool)> = match bin_op { + Div if r == 0 => throw_ub!(DivisionByZero), + Rem if r == 0 => throw_ub!(RemainderByZero), + Div => Some(i128::overflowing_div), + Rem => Some(i128::overflowing_rem), + Add => Some(i128::overflowing_add), + Sub => Some(i128::overflowing_sub), + Mul => Some(i128::overflowing_mul), + _ => None, + }; + if let Some(op) = op { + let l = self.sign_extend(l, left_layout) as i128; + let r = self.sign_extend(r, right_layout) as i128; + + // We need a special check for overflowing Rem and Div since they are *UB* + // on overflow, which can happen with "int_min $OP -1". + if matches!(bin_op, Rem | Div) { + if l == size.signed_int_min() && r == -1 { + if bin_op == Rem { + throw_ub!(RemainderOverflow) + } else { + throw_ub!(DivisionOverflow) + } + } + } + + let (result, oflo) = op(l, r); + // This may be out-of-bounds for the result type, so we have to truncate ourselves. + // If that truncation loses any information, we have an overflow. + let result = result as u128; + let truncated = self.truncate(result, left_layout); + return Ok(( + Scalar::from_uint(truncated, size), + oflo || self.sign_extend(truncated, left_layout) != result, + left_layout.ty, + )); + } + } + + let (val, ty) = match bin_op { + Eq => (Scalar::from_bool(l == r), self.tcx.types.bool), + Ne => (Scalar::from_bool(l != r), self.tcx.types.bool), + + Lt => (Scalar::from_bool(l < r), self.tcx.types.bool), + Le => (Scalar::from_bool(l <= r), self.tcx.types.bool), + Gt => (Scalar::from_bool(l > r), self.tcx.types.bool), + Ge => (Scalar::from_bool(l >= r), self.tcx.types.bool), + + BitOr => (Scalar::from_uint(l | r, size), left_layout.ty), + BitAnd => (Scalar::from_uint(l & r, size), left_layout.ty), + BitXor => (Scalar::from_uint(l ^ r, size), left_layout.ty), + + Add | Sub | Mul | Rem | Div => { + assert!(!left_layout.abi.is_signed()); + let op: fn(u128, u128) -> (u128, bool) = match bin_op { + Add => u128::overflowing_add, + Sub => u128::overflowing_sub, + Mul => u128::overflowing_mul, + Div if r == 0 => throw_ub!(DivisionByZero), + Rem if r == 0 => throw_ub!(RemainderByZero), + Div => u128::overflowing_div, + Rem => u128::overflowing_rem, + _ => bug!(), + }; + let (result, oflo) = op(l, r); + // Truncate to target type. + // If that truncation loses any information, we have an overflow. + let truncated = self.truncate(result, left_layout); + return Ok(( + Scalar::from_uint(truncated, size), + oflo || truncated != result, + left_layout.ty, + )); + } + + _ => span_bug!( + self.cur_span(), + "invalid binary op {:?}: {:?}, {:?} (both {:?})", + bin_op, + l, + r, + right_layout.ty, + ), + }; + + Ok((val, false, ty)) + } + + /// Returns the result of the specified operation, whether it overflowed, and + /// the result type. + pub fn overflowing_binary_op( + &self, + bin_op: mir::BinOp, + left: &ImmTy<'tcx, M::Provenance>, + right: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (Scalar, bool, Ty<'tcx>)> { + trace!( + "Running binary op {:?}: {:?} ({:?}), {:?} ({:?})", + bin_op, + *left, + left.layout.ty, + *right, + right.layout.ty + ); + + match left.layout.ty.kind() { + ty::Char => { + assert_eq!(left.layout.ty, right.layout.ty); + let left = left.to_scalar()?; + let right = right.to_scalar()?; + Ok(self.binary_char_op(bin_op, left.to_char()?, right.to_char()?)) + } + ty::Bool => { + assert_eq!(left.layout.ty, right.layout.ty); + let left = left.to_scalar()?; + let right = right.to_scalar()?; + Ok(self.binary_bool_op(bin_op, left.to_bool()?, right.to_bool()?)) + } + ty::Float(fty) => { + assert_eq!(left.layout.ty, right.layout.ty); + let ty = left.layout.ty; + let left = left.to_scalar()?; + let right = right.to_scalar()?; + Ok(match fty { + FloatTy::F32 => { + self.binary_float_op(bin_op, ty, left.to_f32()?, right.to_f32()?) + } + FloatTy::F64 => { + self.binary_float_op(bin_op, ty, left.to_f64()?, right.to_f64()?) + } + }) + } + _ if left.layout.ty.is_integral() => { + // the RHS type can be different, e.g. for shifts -- but it has to be integral, too + assert!( + right.layout.ty.is_integral(), + "Unexpected types for BinOp: {:?} {:?} {:?}", + left.layout.ty, + bin_op, + right.layout.ty + ); + + let l = left.to_scalar()?.to_bits(left.layout.size)?; + let r = right.to_scalar()?.to_bits(right.layout.size)?; + self.binary_int_op(bin_op, l, left.layout, r, right.layout) + } + _ if left.layout.ty.is_any_ptr() => { + // The RHS type must be a `pointer` *or an integer type* (for `Offset`). + // (Even when both sides are pointers, their type might differ, see issue #91636) + assert!( + right.layout.ty.is_any_ptr() || right.layout.ty.is_integral(), + "Unexpected types for BinOp: {:?} {:?} {:?}", + left.layout.ty, + bin_op, + right.layout.ty + ); + + M::binary_ptr_op(self, bin_op, left, right) + } + _ => span_bug!( + self.cur_span(), + "Invalid MIR: bad LHS type for binop: {:?}", + left.layout.ty + ), + } + } + + /// Typed version of `overflowing_binary_op`, returning an `ImmTy`. Also ignores overflows. + #[inline] + pub fn binary_op( + &self, + bin_op: mir::BinOp, + left: &ImmTy<'tcx, M::Provenance>, + right: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> { + let (val, _overflow, ty) = self.overflowing_binary_op(bin_op, left, right)?; + Ok(ImmTy::from_scalar(val, self.layout_of(ty)?)) + } + + /// Returns the result of the specified operation, whether it overflowed, and + /// the result type. + pub fn overflowing_unary_op( + &self, + un_op: mir::UnOp, + val: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (Scalar, bool, Ty<'tcx>)> { + use rustc_middle::mir::UnOp::*; + + let layout = val.layout; + let val = val.to_scalar()?; + trace!("Running unary op {:?}: {:?} ({:?})", un_op, val, layout.ty); + + match layout.ty.kind() { + ty::Bool => { + let val = val.to_bool()?; + let res = match un_op { + Not => !val, + _ => span_bug!(self.cur_span(), "Invalid bool op {:?}", un_op), + }; + Ok((Scalar::from_bool(res), false, self.tcx.types.bool)) + } + ty::Float(fty) => { + let res = match (un_op, fty) { + (Neg, FloatTy::F32) => Scalar::from_f32(-val.to_f32()?), + (Neg, FloatTy::F64) => Scalar::from_f64(-val.to_f64()?), + _ => span_bug!(self.cur_span(), "Invalid float op {:?}", un_op), + }; + Ok((res, false, layout.ty)) + } + _ => { + assert!(layout.ty.is_integral()); + let val = val.to_bits(layout.size)?; + let (res, overflow) = match un_op { + Not => (self.truncate(!val, layout), false), // bitwise negation, then truncate + Neg => { + // arithmetic negation + assert!(layout.abi.is_signed()); + let val = self.sign_extend(val, layout) as i128; + let (res, overflow) = val.overflowing_neg(); + let res = res as u128; + // Truncate to target type. + // If that truncation loses any information, we have an overflow. + let truncated = self.truncate(res, layout); + (truncated, overflow || self.sign_extend(truncated, layout) != res) + } + }; + Ok((Scalar::from_uint(res, layout.size), overflow, layout.ty)) + } + } + } + + pub fn unary_op( + &self, + un_op: mir::UnOp, + val: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> { + let (val, _overflow, ty) = self.overflowing_unary_op(un_op, val)?; + Ok(ImmTy::from_scalar(val, self.layout_of(ty)?)) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs new file mode 100644 index 000000000..f4571a1ca --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -0,0 +1,900 @@ +//! Computations on places -- field projections, going from mir::Place, and writing +//! into a place. +//! All high-level functions to write to memory work on places as destinations. + +use std::hash::Hash; + +use rustc_ast::Mutability; +use rustc_middle::mir; +use rustc_middle::ty; +use rustc_middle::ty::layout::{LayoutOf, PrimitiveExt, TyAndLayout}; +use rustc_target::abi::{self, Abi, Align, HasDataLayout, Size, TagEncoding, VariantIdx}; + +use super::{ + alloc_range, mir_assign_valid_types, AllocId, AllocRef, AllocRefMut, CheckInAllocMsg, + ConstAlloc, ImmTy, Immediate, InterpCx, InterpResult, Machine, MemoryKind, OpTy, Operand, + Pointer, Provenance, Scalar, ScalarMaybeUninit, +}; + +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +/// Information required for the sound usage of a `MemPlace`. +pub enum MemPlaceMeta { + /// The unsized payload (e.g. length for slices or vtable pointer for trait objects). + Meta(Scalar), + /// `Sized` types or unsized `extern type` + None, +} + +impl MemPlaceMeta { + pub fn unwrap_meta(self) -> Scalar { + match self { + Self::Meta(s) => s, + Self::None => { + bug!("expected wide pointer extra data (e.g. slice length or trait object vtable)") + } + } + } + + pub fn has_meta(self) -> bool { + match self { + Self::Meta(_) => true, + Self::None => false, + } + } +} + +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +pub struct MemPlace { + /// The pointer can be a pure integer, with the `None` provenance. + pub ptr: Pointer>, + /// Metadata for unsized places. Interpretation is up to the type. + /// Must not be present for sized types, but can be missing for unsized types + /// (e.g., `extern type`). + pub meta: MemPlaceMeta, +} + +/// A MemPlace with its layout. Constructing it is only possible in this module. +#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] +pub struct MPlaceTy<'tcx, Prov: Provenance = AllocId> { + mplace: MemPlace, + pub layout: TyAndLayout<'tcx>, + /// rustc does not have a proper way to represent the type of a field of a `repr(packed)` struct: + /// it needs to have a different alignment than the field type would usually have. + /// So we represent this here with a separate field that "overwrites" `layout.align`. + /// This means `layout.align` should never be used for a `MPlaceTy`! + pub align: Align, +} + +#[derive(Copy, Clone, Debug)] +pub enum Place { + /// A place referring to a value allocated in the `Memory` system. + Ptr(MemPlace), + + /// To support alloc-free locals, we are able to write directly to a local. + /// (Without that optimization, we'd just always be a `MemPlace`.) + Local { frame: usize, local: mir::Local }, +} + +#[derive(Clone, Debug)] +pub struct PlaceTy<'tcx, Prov: Provenance = AllocId> { + place: Place, // Keep this private; it helps enforce invariants. + pub layout: TyAndLayout<'tcx>, + /// rustc does not have a proper way to represent the type of a field of a `repr(packed)` struct: + /// it needs to have a different alignment than the field type would usually have. + /// So we represent this here with a separate field that "overwrites" `layout.align`. + /// This means `layout.align` should never be used for a `PlaceTy`! + pub align: Align, +} + +impl<'tcx, Prov: Provenance> std::ops::Deref for PlaceTy<'tcx, Prov> { + type Target = Place; + #[inline(always)] + fn deref(&self) -> &Place { + &self.place + } +} + +impl<'tcx, Prov: Provenance> std::ops::Deref for MPlaceTy<'tcx, Prov> { + type Target = MemPlace; + #[inline(always)] + fn deref(&self) -> &MemPlace { + &self.mplace + } +} + +impl<'tcx, Prov: Provenance> From> for PlaceTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: MPlaceTy<'tcx, Prov>) -> Self { + PlaceTy { place: Place::Ptr(*mplace), layout: mplace.layout, align: mplace.align } + } +} + +impl<'tcx, Prov: Provenance> From<&'_ MPlaceTy<'tcx, Prov>> for PlaceTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: &MPlaceTy<'tcx, Prov>) -> Self { + PlaceTy { place: Place::Ptr(**mplace), layout: mplace.layout, align: mplace.align } + } +} + +impl<'tcx, Prov: Provenance> From<&'_ mut MPlaceTy<'tcx, Prov>> for PlaceTy<'tcx, Prov> { + #[inline(always)] + fn from(mplace: &mut MPlaceTy<'tcx, Prov>) -> Self { + PlaceTy { place: Place::Ptr(**mplace), layout: mplace.layout, align: mplace.align } + } +} + +impl MemPlace { + #[inline(always)] + pub fn from_ptr(ptr: Pointer>) -> Self { + MemPlace { ptr, meta: MemPlaceMeta::None } + } + + /// Adjust the provenance of the main pointer (metadata is unaffected). + pub fn map_provenance(self, f: impl FnOnce(Option) -> Option) -> Self { + MemPlace { ptr: self.ptr.map_provenance(f), ..self } + } + + /// Turn a mplace into a (thin or wide) pointer, as a reference, pointing to the same space. + /// This is the inverse of `ref_to_mplace`. + #[inline(always)] + pub fn to_ref(self, cx: &impl HasDataLayout) -> Immediate { + match self.meta { + MemPlaceMeta::None => Immediate::from(Scalar::from_maybe_pointer(self.ptr, cx)), + MemPlaceMeta::Meta(meta) => { + Immediate::ScalarPair(Scalar::from_maybe_pointer(self.ptr, cx).into(), meta.into()) + } + } + } + + #[inline] + pub fn offset_with_meta<'tcx>( + self, + offset: Size, + meta: MemPlaceMeta, + cx: &impl HasDataLayout, + ) -> InterpResult<'tcx, Self> { + Ok(MemPlace { ptr: self.ptr.offset(offset, cx)?, meta }) + } +} + +impl Place { + /// Asserts that this points to some local variable. + /// Returns the frame idx and the variable idx. + #[inline] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn assert_local(&self) -> (usize, mir::Local) { + match self { + Place::Local { frame, local } => (*frame, *local), + _ => bug!("assert_local: expected Place::Local, got {:?}", self), + } + } +} + +impl<'tcx, Prov: Provenance> MPlaceTy<'tcx, Prov> { + /// Produces a MemPlace that works for ZST but nothing else. + /// Conceptually this is a new allocation, but it doesn't actually create an allocation so you + /// don't need to worry about memory leaks. + #[inline] + pub fn fake_alloc_zst(layout: TyAndLayout<'tcx>) -> Self { + assert!(layout.is_zst()); + let align = layout.align.abi; + let ptr = Pointer::from_addr(align.bytes()); // no provenance, absolute address + MPlaceTy { mplace: MemPlace { ptr, meta: MemPlaceMeta::None }, layout, align } + } + + #[inline] + pub fn offset_with_meta( + &self, + offset: Size, + meta: MemPlaceMeta, + layout: TyAndLayout<'tcx>, + cx: &impl HasDataLayout, + ) -> InterpResult<'tcx, Self> { + Ok(MPlaceTy { + mplace: self.mplace.offset_with_meta(offset, meta, cx)?, + align: self.align.restrict_for_offset(offset), + layout, + }) + } + + pub fn offset( + &self, + offset: Size, + layout: TyAndLayout<'tcx>, + cx: &impl HasDataLayout, + ) -> InterpResult<'tcx, Self> { + assert!(!layout.is_unsized()); + self.offset_with_meta(offset, MemPlaceMeta::None, layout, cx) + } + + #[inline] + pub fn from_aligned_ptr(ptr: Pointer>, layout: TyAndLayout<'tcx>) -> Self { + MPlaceTy { mplace: MemPlace::from_ptr(ptr), layout, align: layout.align.abi } + } + + #[inline] + pub fn from_aligned_ptr_with_meta( + ptr: Pointer>, + layout: TyAndLayout<'tcx>, + meta: MemPlaceMeta, + ) -> Self { + let mut mplace = MemPlace::from_ptr(ptr); + mplace.meta = meta; + + MPlaceTy { mplace, layout, align: layout.align.abi } + } + + #[inline] + pub(crate) fn len(&self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + if self.layout.is_unsized() { + // We need to consult `meta` metadata + match self.layout.ty.kind() { + ty::Slice(..) | ty::Str => self.mplace.meta.unwrap_meta().to_machine_usize(cx), + _ => bug!("len not supported on unsized type {:?}", self.layout.ty), + } + } else { + // Go through the layout. There are lots of types that support a length, + // e.g., SIMD types. (But not all repr(simd) types even have FieldsShape::Array!) + match self.layout.fields { + abi::FieldsShape::Array { count, .. } => Ok(count), + _ => bug!("len not supported on sized type {:?}", self.layout.ty), + } + } + } + + #[inline] + pub(super) fn vtable(&self) -> Scalar { + match self.layout.ty.kind() { + ty::Dynamic(..) => self.mplace.meta.unwrap_meta(), + _ => bug!("vtable not supported on type {:?}", self.layout.ty), + } + } +} + +// These are defined here because they produce a place. +impl<'tcx, Prov: Provenance> OpTy<'tcx, Prov> { + #[inline(always)] + /// Note: do not call `as_ref` on the resulting place. This function should only be used to + /// read from the resulting mplace, not to get its address back. + pub fn try_as_mplace(&self) -> Result, ImmTy<'tcx, Prov>> { + match **self { + Operand::Indirect(mplace) => { + Ok(MPlaceTy { mplace, layout: self.layout, align: self.align.unwrap() }) + } + Operand::Immediate(imm) => Err(ImmTy::from_immediate(imm, self.layout)), + } + } + + #[inline(always)] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + /// Note: do not call `as_ref` on the resulting place. This function should only be used to + /// read from the resulting mplace, not to get its address back. + pub fn assert_mem_place(&self) -> MPlaceTy<'tcx, Prov> { + self.try_as_mplace().unwrap() + } +} + +impl<'tcx, Prov: Provenance> PlaceTy<'tcx, Prov> { + /// A place is either an mplace or some local. + #[inline] + pub fn try_as_mplace(&self) -> Result, (usize, mir::Local)> { + match **self { + Place::Ptr(mplace) => Ok(MPlaceTy { mplace, layout: self.layout, align: self.align }), + Place::Local { frame, local } => Err((frame, local)), + } + } + + #[inline(always)] + #[cfg_attr(debug_assertions, track_caller)] // only in debug builds due to perf (see #98980) + pub fn assert_mem_place(self) -> MPlaceTy<'tcx, Prov> { + self.try_as_mplace().unwrap() + } +} + +// FIXME: Working around https://github.com/rust-lang/rust/issues/54385 +impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M> +where + Prov: Provenance + Eq + Hash + 'static, + M: Machine<'mir, 'tcx, Provenance = Prov>, +{ + /// Take a value, which represents a (thin or wide) reference, and make it a place. + /// Alignment is just based on the type. This is the inverse of `MemPlace::to_ref()`. + /// + /// Only call this if you are sure the place is "valid" (aligned and inbounds), or do not + /// want to ever use the place for memory access! + /// Generally prefer `deref_operand`. + pub fn ref_to_mplace( + &self, + val: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + let pointee_type = + val.layout.ty.builtin_deref(true).expect("`ref_to_mplace` called on non-ptr type").ty; + let layout = self.layout_of(pointee_type)?; + let (ptr, meta) = match **val { + Immediate::Scalar(ptr) => (ptr, MemPlaceMeta::None), + Immediate::ScalarPair(ptr, meta) => (ptr, MemPlaceMeta::Meta(meta.check_init()?)), + Immediate::Uninit => throw_ub!(InvalidUninitBytes(None)), + }; + + let mplace = MemPlace { ptr: ptr.to_pointer(self)?, meta }; + // When deref'ing a pointer, the *static* alignment given by the type is what matters. + let align = layout.align.abi; + Ok(MPlaceTy { mplace, layout, align }) + } + + /// Take an operand, representing a pointer, and dereference it to a place -- that + /// will always be a MemPlace. Lives in `place.rs` because it creates a place. + #[instrument(skip(self), level = "debug")] + pub fn deref_operand( + &self, + src: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + let val = self.read_immediate(src)?; + trace!("deref to {} on {:?}", val.layout.ty, *val); + + if val.layout.ty.is_box() { + bug!("dereferencing {:?}", val.layout.ty); + } + + let mplace = self.ref_to_mplace(&val)?; + self.check_mplace_access(mplace, CheckInAllocMsg::DerefTest)?; + Ok(mplace) + } + + #[inline] + pub(super) fn get_place_alloc( + &self, + place: &MPlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Option>> { + assert!(!place.layout.is_unsized()); + assert!(!place.meta.has_meta()); + let size = place.layout.size; + self.get_ptr_alloc(place.ptr, size, place.align) + } + + #[inline] + pub(super) fn get_place_alloc_mut( + &mut self, + place: &MPlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Option>> { + assert!(!place.layout.is_unsized()); + assert!(!place.meta.has_meta()); + let size = place.layout.size; + self.get_ptr_alloc_mut(place.ptr, size, place.align) + } + + /// Check if this mplace is dereferenceable and sufficiently aligned. + fn check_mplace_access( + &self, + mplace: MPlaceTy<'tcx, M::Provenance>, + msg: CheckInAllocMsg, + ) -> InterpResult<'tcx> { + let (size, align) = self + .size_and_align_of_mplace(&mplace)? + .unwrap_or((mplace.layout.size, mplace.layout.align.abi)); + assert!(mplace.align <= align, "dynamic alignment less strict than static one?"); + let align = M::enforce_alignment(self).then_some(align); + self.check_ptr_access_align(mplace.ptr, size, align.unwrap_or(Align::ONE), msg)?; + Ok(()) + } + + /// Converts a repr(simd) place into a place where `place_index` accesses the SIMD elements. + /// Also returns the number of elements. + pub fn mplace_to_simd( + &self, + mplace: &MPlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (MPlaceTy<'tcx, M::Provenance>, u64)> { + // Basically we just transmute this place into an array following simd_size_and_type. + // (Transmuting is okay since this is an in-memory place. We also double-check the size + // stays the same.) + let (len, e_ty) = mplace.layout.ty.simd_size_and_type(*self.tcx); + let array = self.tcx.mk_array(e_ty, len); + let layout = self.layout_of(array)?; + assert_eq!(layout.size, mplace.layout.size); + Ok((MPlaceTy { layout, ..*mplace }, len)) + } + + /// Converts a repr(simd) place into a place where `place_index` accesses the SIMD elements. + /// Also returns the number of elements. + pub fn place_to_simd( + &mut self, + place: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, (MPlaceTy<'tcx, M::Provenance>, u64)> { + let mplace = self.force_allocation(place)?; + self.mplace_to_simd(&mplace) + } + + pub fn local_to_place( + &self, + frame: usize, + local: mir::Local, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + let layout = self.layout_of_local(&self.stack()[frame], local, None)?; + let place = Place::Local { frame, local }; + Ok(PlaceTy { place, layout, align: layout.align.abi }) + } + + /// Computes a place. You should only use this if you intend to write into this + /// place; for reading, a more efficient alternative is `eval_place_to_op`. + #[instrument(skip(self), level = "debug")] + pub fn eval_place( + &mut self, + mir_place: mir::Place<'tcx>, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + let mut place = self.local_to_place(self.frame_idx(), mir_place.local)?; + // Using `try_fold` turned out to be bad for performance, hence the loop. + for elem in mir_place.projection.iter() { + place = self.place_projection(&place, elem)? + } + + trace!("{:?}", self.dump_place(place.place)); + // Sanity-check the type we ended up with. + debug_assert!( + mir_assign_valid_types( + *self.tcx, + self.param_env, + self.layout_of(self.subst_from_current_frame_and_normalize_erasing_regions( + mir_place.ty(&self.frame().body.local_decls, *self.tcx).ty + )?)?, + place.layout, + ), + "eval_place of a MIR place with type {:?} produced an interpreter place with type {:?}", + mir_place.ty(&self.frame().body.local_decls, *self.tcx).ty, + place.layout.ty, + ); + Ok(place) + } + + /// Write an immediate to a place + #[inline(always)] + #[instrument(skip(self), level = "debug")] + pub fn write_immediate( + &mut self, + src: Immediate, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + self.write_immediate_no_validate(src, dest)?; + + if M::enforce_validity(self) { + // Data got changed, better make sure it matches the type! + self.validate_operand(&self.place_to_op(dest)?)?; + } + + Ok(()) + } + + /// Write a scalar to a place + #[inline(always)] + pub fn write_scalar( + &mut self, + val: impl Into>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + self.write_immediate(Immediate::Scalar(val.into()), dest) + } + + /// Write a pointer to a place + #[inline(always)] + pub fn write_pointer( + &mut self, + ptr: impl Into>>, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + self.write_scalar(Scalar::from_maybe_pointer(ptr.into(), self), dest) + } + + /// Write an immediate to a place. + /// If you use this you are responsible for validating that things got copied at the + /// right type. + fn write_immediate_no_validate( + &mut self, + src: Immediate, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + assert!(!dest.layout.is_unsized(), "Cannot write unsized data"); + trace!("write_immediate: {:?} <- {:?}: {}", *dest, src, dest.layout.ty); + + // See if we can avoid an allocation. This is the counterpart to `read_immediate_raw`, + // but not factored as a separate function. + let mplace = match dest.place { + Place::Local { frame, local } => { + match M::access_local_mut(self, frame, local)? { + Operand::Immediate(local) => { + // Local can be updated in-place. + *local = src; + return Ok(()); + } + Operand::Indirect(mplace) => { + // The local is in memory, go on below. + *mplace + } + } + } + Place::Ptr(mplace) => mplace, // already referring to memory + }; + + // This is already in memory, write there. + self.write_immediate_to_mplace_no_validate(src, dest.layout, dest.align, mplace) + } + + /// Write an immediate to memory. + /// If you use this you are responsible for validating that things got copied at the + /// right layout. + fn write_immediate_to_mplace_no_validate( + &mut self, + value: Immediate, + layout: TyAndLayout<'tcx>, + align: Align, + dest: MemPlace, + ) -> InterpResult<'tcx> { + // Note that it is really important that the type here is the right one, and matches the + // type things are read at. In case `value` is a `ScalarPair`, we don't do any magic here + // to handle padding properly, which is only correct if we never look at this data with the + // wrong type. + + let tcx = *self.tcx; + let Some(mut alloc) = self.get_place_alloc_mut(&MPlaceTy { mplace: dest, layout, align })? else { + // zero-sized access + return Ok(()); + }; + + match value { + Immediate::Scalar(scalar) => { + let Abi::Scalar(s) = layout.abi else { span_bug!( + self.cur_span(), + "write_immediate_to_mplace: invalid Scalar layout: {layout:#?}", + ) + }; + let size = s.size(&tcx); + assert_eq!(size, layout.size, "abi::Scalar size does not match layout size"); + alloc.write_scalar(alloc_range(Size::ZERO, size), scalar) + } + Immediate::ScalarPair(a_val, b_val) => { + // We checked `ptr_align` above, so all fields will have the alignment they need. + // We would anyway check against `ptr_align.restrict_for_offset(b_offset)`, + // which `ptr.offset(b_offset)` cannot possibly fail to satisfy. + let Abi::ScalarPair(a, b) = layout.abi else { span_bug!( + self.cur_span(), + "write_immediate_to_mplace: invalid ScalarPair layout: {:#?}", + layout + ) + }; + let (a_size, b_size) = (a.size(&tcx), b.size(&tcx)); + let b_offset = a_size.align_to(b.align(&tcx).abi); + assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields + + // It is tempting to verify `b_offset` against `layout.fields.offset(1)`, + // but that does not work: We could be a newtype around a pair, then the + // fields do not match the `ScalarPair` components. + + alloc.write_scalar(alloc_range(Size::ZERO, a_size), a_val)?; + alloc.write_scalar(alloc_range(b_offset, b_size), b_val) + } + Immediate::Uninit => alloc.write_uninit(), + } + } + + pub fn write_uninit(&mut self, dest: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + let mplace = match dest.try_as_mplace() { + Ok(mplace) => mplace, + Err((frame, local)) => { + match M::access_local_mut(self, frame, local)? { + Operand::Immediate(local) => { + *local = Immediate::Uninit; + return Ok(()); + } + Operand::Indirect(mplace) => { + // The local is in memory, go on below. + MPlaceTy { mplace: *mplace, layout: dest.layout, align: dest.align } + } + } + } + }; + let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else { + // Zero-sized access + return Ok(()); + }; + alloc.write_uninit()?; + Ok(()) + } + + /// Copies the data from an operand to a place. + /// `allow_transmute` indicates whether the layouts may disagree. + #[inline(always)] + #[instrument(skip(self), level = "debug")] + pub fn copy_op( + &mut self, + src: &OpTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + allow_transmute: bool, + ) -> InterpResult<'tcx> { + self.copy_op_no_validate(src, dest, allow_transmute)?; + + if M::enforce_validity(self) { + // Data got changed, better make sure it matches the type! + self.validate_operand(&self.place_to_op(dest)?)?; + } + + Ok(()) + } + + /// Copies the data from an operand to a place. + /// `allow_transmute` indicates whether the layouts may disagree. + /// Also, if you use this you are responsible for validating that things get copied at the + /// right type. + #[instrument(skip(self), level = "debug")] + fn copy_op_no_validate( + &mut self, + src: &OpTy<'tcx, M::Provenance>, + dest: &PlaceTy<'tcx, M::Provenance>, + allow_transmute: bool, + ) -> InterpResult<'tcx> { + // We do NOT compare the types for equality, because well-typed code can + // actually "transmute" `&mut T` to `&T` in an assignment without a cast. + let layout_compat = + mir_assign_valid_types(*self.tcx, self.param_env, src.layout, dest.layout); + if !allow_transmute && !layout_compat { + span_bug!( + self.cur_span(), + "type mismatch when copying!\nsrc: {:?},\ndest: {:?}", + src.layout.ty, + dest.layout.ty, + ); + } + + // Let us see if the layout is simple so we take a shortcut, + // avoid force_allocation. + let src = match self.read_immediate_raw(src, /*force*/ false)? { + Ok(src_val) => { + assert!(!src.layout.is_unsized(), "cannot have unsized immediates"); + assert!( + !dest.layout.is_unsized(), + "the src is sized, so the dest must also be sized" + ); + assert_eq!(src.layout.size, dest.layout.size); + // Yay, we got a value that we can write directly. + return if layout_compat { + self.write_immediate_no_validate(*src_val, dest) + } else { + // This is tricky. The problematic case is `ScalarPair`: the `src_val` was + // loaded using the offsets defined by `src.layout`. When we put this back into + // the destination, we have to use the same offsets! So (a) we make sure we + // write back to memory, and (b) we use `dest` *with the source layout*. + let dest_mem = self.force_allocation(dest)?; + self.write_immediate_to_mplace_no_validate( + *src_val, + src.layout, + dest_mem.align, + *dest_mem, + ) + }; + } + Err(mplace) => mplace, + }; + // Slow path, this does not fit into an immediate. Just memcpy. + trace!("copy_op: {:?} <- {:?}: {}", *dest, src, dest.layout.ty); + + let dest = self.force_allocation(&dest)?; + let Some((dest_size, _)) = self.size_and_align_of_mplace(&dest)? else { + span_bug!(self.cur_span(), "copy_op needs (dynamically) sized values") + }; + if cfg!(debug_assertions) { + let src_size = self.size_and_align_of_mplace(&src)?.unwrap().0; + assert_eq!(src_size, dest_size, "Cannot copy differently-sized data"); + } else { + // As a cheap approximation, we compare the fixed parts of the size. + assert_eq!(src.layout.size, dest.layout.size); + } + + self.mem_copy( + src.ptr, src.align, dest.ptr, dest.align, dest_size, /*nonoverlapping*/ false, + ) + } + + /// Ensures that a place is in memory, and returns where it is. + /// If the place currently refers to a local that doesn't yet have a matching allocation, + /// create such an allocation. + /// This is essentially `force_to_memplace`. + #[instrument(skip(self), level = "debug")] + pub fn force_allocation( + &mut self, + place: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + let mplace = match place.place { + Place::Local { frame, local } => { + match M::access_local_mut(self, frame, local)? { + &mut Operand::Immediate(local_val) => { + // We need to make an allocation. + + // We need the layout of the local. We can NOT use the layout we got, + // that might e.g., be an inner field of a struct with `Scalar` layout, + // that has different alignment than the outer field. + let local_layout = + self.layout_of_local(&self.stack()[frame], local, None)?; + if local_layout.is_unsized() { + throw_unsup_format!("unsized locals are not supported"); + } + let mplace = *self.allocate(local_layout, MemoryKind::Stack)?; + if !matches!(local_val, Immediate::Uninit) { + // Preserve old value. (As an optimization, we can skip this if it was uninit.) + // We don't have to validate as we can assume the local + // was already valid for its type. + self.write_immediate_to_mplace_no_validate( + local_val, + local_layout, + local_layout.align.abi, + mplace, + )?; + } + // Now we can call `access_mut` again, asserting it goes well, + // and actually overwrite things. + *M::access_local_mut(self, frame, local).unwrap() = + Operand::Indirect(mplace); + mplace + } + &mut Operand::Indirect(mplace) => mplace, // this already was an indirect local + } + } + Place::Ptr(mplace) => mplace, + }; + // Return with the original layout, so that the caller can go on + Ok(MPlaceTy { mplace, layout: place.layout, align: place.align }) + } + + pub fn allocate( + &mut self, + layout: TyAndLayout<'tcx>, + kind: MemoryKind, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + assert!(!layout.is_unsized()); + let ptr = self.allocate_ptr(layout.size, layout.align.abi, kind)?; + Ok(MPlaceTy::from_aligned_ptr(ptr.into(), layout)) + } + + /// Returns a wide MPlace of type `&'static [mut] str` to a new 1-aligned allocation. + pub fn allocate_str( + &mut self, + str: &str, + kind: MemoryKind, + mutbl: Mutability, + ) -> MPlaceTy<'tcx, M::Provenance> { + let ptr = self.allocate_bytes_ptr(str.as_bytes(), Align::ONE, kind, mutbl); + let meta = Scalar::from_machine_usize(u64::try_from(str.len()).unwrap(), self); + let mplace = MemPlace { ptr: ptr.into(), meta: MemPlaceMeta::Meta(meta) }; + + let ty = self.tcx.mk_ref( + self.tcx.lifetimes.re_static, + ty::TypeAndMut { ty: self.tcx.types.str_, mutbl }, + ); + let layout = self.layout_of(ty).unwrap(); + MPlaceTy { mplace, layout, align: layout.align.abi } + } + + /// Writes the discriminant of the given variant. + #[instrument(skip(self), level = "debug")] + pub fn write_discriminant( + &mut self, + variant_index: VariantIdx, + dest: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + // This must be an enum or generator. + match dest.layout.ty.kind() { + ty::Adt(adt, _) => assert!(adt.is_enum()), + ty::Generator(..) => {} + _ => span_bug!( + self.cur_span(), + "write_discriminant called on non-variant-type (neither enum nor generator)" + ), + } + // Layout computation excludes uninhabited variants from consideration + // therefore there's no way to represent those variants in the given layout. + // Essentially, uninhabited variants do not have a tag that corresponds to their + // discriminant, so we cannot do anything here. + // When evaluating we will always error before even getting here, but ConstProp 'executes' + // dead code, so we cannot ICE here. + if dest.layout.for_variant(self, variant_index).abi.is_uninhabited() { + throw_ub!(UninhabitedEnumVariantWritten) + } + + match dest.layout.variants { + abi::Variants::Single { index } => { + assert_eq!(index, variant_index); + } + abi::Variants::Multiple { + tag_encoding: TagEncoding::Direct, + tag: tag_layout, + tag_field, + .. + } => { + // No need to validate that the discriminant here because the + // `TyAndLayout::for_variant()` call earlier already checks the variant is valid. + + let discr_val = + dest.layout.ty.discriminant_for_variant(*self.tcx, variant_index).unwrap().val; + + // raw discriminants for enums are isize or bigger during + // their computation, but the in-memory tag is the smallest possible + // representation + let size = tag_layout.size(self); + let tag_val = size.truncate(discr_val); + + let tag_dest = self.place_field(dest, tag_field)?; + self.write_scalar(Scalar::from_uint(tag_val, size), &tag_dest)?; + } + abi::Variants::Multiple { + tag_encoding: + TagEncoding::Niche { dataful_variant, ref niche_variants, niche_start }, + tag: tag_layout, + tag_field, + .. + } => { + // No need to validate that the discriminant here because the + // `TyAndLayout::for_variant()` call earlier already checks the variant is valid. + + if variant_index != dataful_variant { + let variants_start = niche_variants.start().as_u32(); + let variant_index_relative = variant_index + .as_u32() + .checked_sub(variants_start) + .expect("overflow computing relative variant idx"); + // We need to use machine arithmetic when taking into account `niche_start`: + // tag_val = variant_index_relative + niche_start_val + let tag_layout = self.layout_of(tag_layout.primitive().to_int_ty(*self.tcx))?; + let niche_start_val = ImmTy::from_uint(niche_start, tag_layout); + let variant_index_relative_val = + ImmTy::from_uint(variant_index_relative, tag_layout); + let tag_val = self.binary_op( + mir::BinOp::Add, + &variant_index_relative_val, + &niche_start_val, + )?; + // Write result. + let niche_dest = self.place_field(dest, tag_field)?; + self.write_immediate(*tag_val, &niche_dest)?; + } + } + } + + Ok(()) + } + + pub fn raw_const_to_mplace( + &self, + raw: ConstAlloc<'tcx>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + // This must be an allocation in `tcx` + let _ = self.tcx.global_alloc(raw.alloc_id); + let ptr = self.global_base_pointer(Pointer::from(raw.alloc_id))?; + let layout = self.layout_of(raw.ty)?; + Ok(MPlaceTy::from_aligned_ptr(ptr.into(), layout)) + } + + /// Turn a place with a `dyn Trait` type into a place with the actual dynamic type. + pub(super) fn unpack_dyn_trait( + &self, + mplace: &MPlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + let vtable = mplace.vtable().to_pointer(self)?; // also sanity checks the type + let (ty, _) = self.get_ptr_vtable(vtable)?; + let layout = self.layout_of(ty)?; + + let mplace = MPlaceTy { + mplace: MemPlace { meta: MemPlaceMeta::None, ..**mplace }, + layout, + align: layout.align.abi, + }; + Ok(mplace) + } +} + +// Some nodes are used a lot. Make sure they don't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +mod size_asserts { + use super::*; + // These are in alphabetical order, which is easy to maintain. + rustc_data_structures::static_assert_size!(MemPlaceMeta, 24); + rustc_data_structures::static_assert_size!(MemPlace, 40); + rustc_data_structures::static_assert_size!(MPlaceTy<'_>, 64); + rustc_data_structures::static_assert_size!(Place, 48); + rustc_data_structures::static_assert_size!(PlaceTy<'_>, 72); +} diff --git a/compiler/rustc_const_eval/src/interpret/projection.rs b/compiler/rustc_const_eval/src/interpret/projection.rs new file mode 100644 index 000000000..742339f2b --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/projection.rs @@ -0,0 +1,391 @@ +//! This file implements "place projections"; basically a symmetric API for 3 types: MPlaceTy, OpTy, PlaceTy. +//! +//! OpTy and PlaceTy genrally work by "let's see if we are actually an MPlaceTy, and do something custom if not". +//! For PlaceTy, the custom thing is basically always to call `force_allocation` and then use the MPlaceTy logic anyway. +//! For OpTy, the custom thing on field pojections has to be pretty clever (since `Operand::Immediate` can have fields), +//! but for array/slice operations it only has to worry about `Operand::Uninit`. That makes the value part trivial, +//! but we still need to do bounds checking and adjust the layout. To not duplicate that with MPlaceTy, we actually +//! implement the logic on OpTy, and MPlaceTy calls that. + +use std::hash::Hash; + +use rustc_middle::mir; +use rustc_middle::ty; +use rustc_middle::ty::layout::LayoutOf; +use rustc_target::abi::{self, Abi, VariantIdx}; + +use super::{ + ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy, PlaceTy, + Provenance, Scalar, +}; + +// FIXME: Working around https://github.com/rust-lang/rust/issues/54385 +impl<'mir, 'tcx: 'mir, Prov, M> InterpCx<'mir, 'tcx, M> +where + Prov: Provenance + Eq + Hash + 'static, + M: Machine<'mir, 'tcx, Provenance = Prov>, +{ + //# Field access + + /// Offset a pointer to project to a field of a struct/union. Unlike `place_field`, this is + /// always possible without allocating, so it can take `&self`. Also return the field's layout. + /// This supports both struct and array fields. + /// + /// This also works for arrays, but then the `usize` index type is restricting. + /// For indexing into arrays, use `mplace_index`. + pub fn mplace_field( + &self, + base: &MPlaceTy<'tcx, M::Provenance>, + field: usize, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + let offset = base.layout.fields.offset(field); + let field_layout = base.layout.field(self, field); + + // Offset may need adjustment for unsized fields. + let (meta, offset) = if field_layout.is_unsized() { + // Re-use parent metadata to determine dynamic field layout. + // With custom DSTS, this *will* execute user-defined code, but the same + // happens at run-time so that's okay. + match self.size_and_align_of(&base.meta, &field_layout)? { + Some((_, align)) => (base.meta, offset.align_to(align)), + None => { + // For unsized types with an extern type tail we perform no adjustments. + // NOTE: keep this in sync with `PlaceRef::project_field` in the codegen backend. + assert!(matches!(base.meta, MemPlaceMeta::None)); + (base.meta, offset) + } + } + } else { + // base.meta could be present; we might be accessing a sized field of an unsized + // struct. + (MemPlaceMeta::None, offset) + }; + + // We do not look at `base.layout.align` nor `field_layout.align`, unlike + // codegen -- mostly to see if we can get away with that + base.offset_with_meta(offset, meta, field_layout, self) + } + + /// Gets the place of a field inside the place, and also the field's type. + /// Just a convenience function, but used quite a bit. + /// This is the only projection that might have a side-effect: We cannot project + /// into the field of a local `ScalarPair`, we have to first allocate it. + pub fn place_field( + &mut self, + base: &PlaceTy<'tcx, M::Provenance>, + field: usize, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + // FIXME: We could try to be smarter and avoid allocation for fields that span the + // entire place. + let base = self.force_allocation(base)?; + Ok(self.mplace_field(&base, field)?.into()) + } + + pub fn operand_field( + &self, + base: &OpTy<'tcx, M::Provenance>, + field: usize, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + let base = match base.try_as_mplace() { + Ok(ref mplace) => { + // We can reuse the mplace field computation logic for indirect operands. + let field = self.mplace_field(mplace, field)?; + return Ok(field.into()); + } + Err(value) => value, + }; + + let field_layout = base.layout.field(self, field); + let offset = base.layout.fields.offset(field); + // This makes several assumptions about what layouts we will encounter; we match what + // codegen does as good as we can (see `extract_field` in `rustc_codegen_ssa/src/mir/operand.rs`). + let field_val: Immediate<_> = match (*base, base.layout.abi) { + // the field contains no information, can be left uninit + _ if field_layout.is_zst() => Immediate::Uninit, + // the field covers the entire type + _ if field_layout.size == base.layout.size => { + assert!(match (base.layout.abi, field_layout.abi) { + (Abi::Scalar(..), Abi::Scalar(..)) => true, + (Abi::ScalarPair(..), Abi::ScalarPair(..)) => true, + _ => false, + }); + assert!(offset.bytes() == 0); + *base + } + // extract fields from types with `ScalarPair` ABI + (Immediate::ScalarPair(a_val, b_val), Abi::ScalarPair(a, b)) => { + assert!(matches!(field_layout.abi, Abi::Scalar(..))); + Immediate::from(if offset.bytes() == 0 { + debug_assert_eq!(field_layout.size, a.size(self)); + a_val + } else { + debug_assert_eq!(offset, a.size(self).align_to(b.align(self).abi)); + debug_assert_eq!(field_layout.size, b.size(self)); + b_val + }) + } + _ => span_bug!( + self.cur_span(), + "invalid field access on immediate {}, layout {:#?}", + base, + base.layout + ), + }; + + Ok(ImmTy::from_immediate(field_val, field_layout).into()) + } + + //# Downcasting + + pub fn mplace_downcast( + &self, + base: &MPlaceTy<'tcx, M::Provenance>, + variant: VariantIdx, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + // Downcasts only change the layout. + // (In particular, no check about whether this is even the active variant -- that's by design, + // see https://github.com/rust-lang/rust/issues/93688#issuecomment-1032929496.) + assert!(!base.meta.has_meta()); + let mut base = *base; + base.layout = base.layout.for_variant(self, variant); + Ok(base) + } + + pub fn place_downcast( + &self, + base: &PlaceTy<'tcx, M::Provenance>, + variant: VariantIdx, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + // Downcast just changes the layout + let mut base = base.clone(); + base.layout = base.layout.for_variant(self, variant); + Ok(base) + } + + pub fn operand_downcast( + &self, + base: &OpTy<'tcx, M::Provenance>, + variant: VariantIdx, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // Downcast just changes the layout + let mut base = base.clone(); + base.layout = base.layout.for_variant(self, variant); + Ok(base) + } + + //# Slice indexing + + #[inline(always)] + pub fn operand_index( + &self, + base: &OpTy<'tcx, M::Provenance>, + index: u64, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // Not using the layout method because we want to compute on u64 + match base.layout.fields { + abi::FieldsShape::Array { stride, count: _ } => { + // `count` is nonsense for slices, use the dynamic length instead. + let len = base.len(self)?; + if index >= len { + // This can only be reached in ConstProp and non-rustc-MIR. + throw_ub!(BoundsCheckFailed { len, index }); + } + let offset = stride * index; // `Size` multiplication + // All fields have the same layout. + let field_layout = base.layout.field(self, 0); + base.offset(offset, field_layout, self) + } + _ => span_bug!( + self.cur_span(), + "`mplace_index` called on non-array type {:?}", + base.layout.ty + ), + } + } + + // Iterates over all fields of an array. Much more efficient than doing the + // same by repeatedly calling `operand_index`. + pub fn operand_array_fields<'a>( + &self, + base: &'a OpTy<'tcx, Prov>, + ) -> InterpResult<'tcx, impl Iterator>> + 'a> { + let len = base.len(self)?; // also asserts that we have a type where this makes sense + let abi::FieldsShape::Array { stride, .. } = base.layout.fields else { + span_bug!(self.cur_span(), "operand_array_fields: expected an array layout"); + }; + let field_layout = base.layout.field(self, 0); + let dl = &self.tcx.data_layout; + // `Size` multiplication + Ok((0..len).map(move |i| base.offset(stride * i, field_layout, dl))) + } + + /// Index into an array. + pub fn mplace_index( + &self, + base: &MPlaceTy<'tcx, M::Provenance>, + index: u64, + ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> { + Ok(self.operand_index(&base.into(), index)?.assert_mem_place()) + } + + pub fn place_index( + &mut self, + base: &PlaceTy<'tcx, M::Provenance>, + index: u64, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + // There's not a lot we can do here, since we cannot have a place to a part of a local. If + // we are accessing the only element of a 1-element array, it's still the entire local... + // that doesn't seem worth it. + let base = self.force_allocation(base)?; + Ok(self.mplace_index(&base, index)?.into()) + } + + //# ConstantIndex support + + fn operand_constant_index( + &self, + base: &OpTy<'tcx, M::Provenance>, + offset: u64, + min_length: u64, + from_end: bool, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + let n = base.len(self)?; + if n < min_length { + // This can only be reached in ConstProp and non-rustc-MIR. + throw_ub!(BoundsCheckFailed { len: min_length, index: n }); + } + + let index = if from_end { + assert!(0 < offset && offset <= min_length); + n.checked_sub(offset).unwrap() + } else { + assert!(offset < min_length); + offset + }; + + self.operand_index(base, index) + } + + fn place_constant_index( + &mut self, + base: &PlaceTy<'tcx, M::Provenance>, + offset: u64, + min_length: u64, + from_end: bool, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + let base = self.force_allocation(base)?; + Ok(self + .operand_constant_index(&base.into(), offset, min_length, from_end)? + .assert_mem_place() + .into()) + } + + //# Subslicing + + fn operand_subslice( + &self, + base: &OpTy<'tcx, M::Provenance>, + from: u64, + to: u64, + from_end: bool, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + let len = base.len(self)?; // also asserts that we have a type where this makes sense + let actual_to = if from_end { + if from.checked_add(to).map_or(true, |to| to > len) { + // This can only be reached in ConstProp and non-rustc-MIR. + throw_ub!(BoundsCheckFailed { len: len, index: from.saturating_add(to) }); + } + len.checked_sub(to).unwrap() + } else { + to + }; + + // Not using layout method because that works with usize, and does not work with slices + // (that have count 0 in their layout). + let from_offset = match base.layout.fields { + abi::FieldsShape::Array { stride, .. } => stride * from, // `Size` multiplication is checked + _ => { + span_bug!(self.cur_span(), "unexpected layout of index access: {:#?}", base.layout) + } + }; + + // Compute meta and new layout + let inner_len = actual_to.checked_sub(from).unwrap(); + let (meta, ty) = match base.layout.ty.kind() { + // It is not nice to match on the type, but that seems to be the only way to + // implement this. + ty::Array(inner, _) => (MemPlaceMeta::None, self.tcx.mk_array(*inner, inner_len)), + ty::Slice(..) => { + let len = Scalar::from_machine_usize(inner_len, self); + (MemPlaceMeta::Meta(len), base.layout.ty) + } + _ => { + span_bug!(self.cur_span(), "cannot subslice non-array type: `{:?}`", base.layout.ty) + } + }; + let layout = self.layout_of(ty)?; + base.offset_with_meta(from_offset, meta, layout, self) + } + + pub fn place_subslice( + &mut self, + base: &PlaceTy<'tcx, M::Provenance>, + from: u64, + to: u64, + from_end: bool, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + let base = self.force_allocation(base)?; + Ok(self.operand_subslice(&base.into(), from, to, from_end)?.assert_mem_place().into()) + } + + //# Applying a general projection + + /// Projects into a place. + #[instrument(skip(self), level = "trace")] + pub fn place_projection( + &mut self, + base: &PlaceTy<'tcx, M::Provenance>, + proj_elem: mir::PlaceElem<'tcx>, + ) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> { + use rustc_middle::mir::ProjectionElem::*; + Ok(match proj_elem { + Field(field, _) => self.place_field(base, field.index())?, + Downcast(_, variant) => self.place_downcast(base, variant)?, + Deref => self.deref_operand(&self.place_to_op(base)?)?.into(), + Index(local) => { + let layout = self.layout_of(self.tcx.types.usize)?; + let n = self.local_to_op(self.frame(), local, Some(layout))?; + let n = self.read_scalar(&n)?.to_machine_usize(self)?; + self.place_index(base, n)? + } + ConstantIndex { offset, min_length, from_end } => { + self.place_constant_index(base, offset, min_length, from_end)? + } + Subslice { from, to, from_end } => self.place_subslice(base, from, to, from_end)?, + }) + } + + #[instrument(skip(self), level = "trace")] + pub fn operand_projection( + &self, + base: &OpTy<'tcx, M::Provenance>, + proj_elem: mir::PlaceElem<'tcx>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + use rustc_middle::mir::ProjectionElem::*; + Ok(match proj_elem { + Field(field, _) => self.operand_field(base, field.index())?, + Downcast(_, variant) => self.operand_downcast(base, variant)?, + Deref => self.deref_operand(base)?.into(), + Index(local) => { + let layout = self.layout_of(self.tcx.types.usize)?; + let n = self.local_to_op(self.frame(), local, Some(layout))?; + let n = self.read_scalar(&n)?.to_machine_usize(self)?; + self.operand_index(base, n)? + } + ConstantIndex { offset, min_length, from_end } => { + self.operand_constant_index(base, offset, min_length, from_end)? + } + Subslice { from, to, from_end } => self.operand_subslice(base, from, to, from_end)?, + }) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/step.rs b/compiler/rustc_const_eval/src/interpret/step.rs new file mode 100644 index 000000000..fea158a9f --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/step.rs @@ -0,0 +1,320 @@ +//! This module contains the `InterpCx` methods for executing a single step of the interpreter. +//! +//! The main entry point is the `step` method. + +use rustc_middle::mir; +use rustc_middle::mir::interpret::{InterpResult, Scalar}; +use rustc_middle::ty::layout::LayoutOf; + +use super::{InterpCx, Machine}; + +/// Classify whether an operator is "left-homogeneous", i.e., the LHS has the +/// same type as the result. +#[inline] +fn binop_left_homogeneous(op: mir::BinOp) -> bool { + use rustc_middle::mir::BinOp::*; + match op { + Add | Sub | Mul | Div | Rem | BitXor | BitAnd | BitOr | Offset | Shl | Shr => true, + Eq | Ne | Lt | Le | Gt | Ge => false, + } +} +/// Classify whether an operator is "right-homogeneous", i.e., the RHS has the +/// same type as the LHS. +#[inline] +fn binop_right_homogeneous(op: mir::BinOp) -> bool { + use rustc_middle::mir::BinOp::*; + match op { + Add | Sub | Mul | Div | Rem | BitXor | BitAnd | BitOr | Eq | Ne | Lt | Le | Gt | Ge => true, + Offset | Shl | Shr => false, + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + pub fn run(&mut self) -> InterpResult<'tcx> { + while self.step()? {} + Ok(()) + } + + /// Returns `true` as long as there are more things to do. + /// + /// This is used by [priroda](https://github.com/oli-obk/priroda) + /// + /// This is marked `#inline(always)` to work around adversarial codegen when `opt-level = 3` + #[inline(always)] + pub fn step(&mut self) -> InterpResult<'tcx, bool> { + if self.stack().is_empty() { + return Ok(false); + } + + let Ok(loc) = self.frame().loc else { + // We are unwinding and this fn has no cleanup code. + // Just go on unwinding. + trace!("unwinding: skipping frame"); + self.pop_stack_frame(/* unwinding */ true)?; + return Ok(true); + }; + let basic_block = &self.body().basic_blocks()[loc.block]; + + if let Some(stmt) = basic_block.statements.get(loc.statement_index) { + let old_frames = self.frame_idx(); + self.statement(stmt)?; + // Make sure we are not updating `statement_index` of the wrong frame. + assert_eq!(old_frames, self.frame_idx()); + // Advance the program counter. + self.frame_mut().loc.as_mut().unwrap().statement_index += 1; + return Ok(true); + } + + M::before_terminator(self)?; + + let terminator = basic_block.terminator(); + self.terminator(terminator)?; + Ok(true) + } + + /// Runs the interpretation logic for the given `mir::Statement` at the current frame and + /// statement counter. + /// + /// This does NOT move the statement counter forward, the caller has to do that! + pub fn statement(&mut self, stmt: &mir::Statement<'tcx>) -> InterpResult<'tcx> { + info!("{:?}", stmt); + + use rustc_middle::mir::StatementKind::*; + + match &stmt.kind { + Assign(box (place, rvalue)) => self.eval_rvalue_into_place(rvalue, *place)?, + + SetDiscriminant { place, variant_index } => { + let dest = self.eval_place(**place)?; + self.write_discriminant(*variant_index, &dest)?; + } + + Deinit(place) => { + let dest = self.eval_place(**place)?; + self.write_uninit(&dest)?; + } + + // Mark locals as alive + StorageLive(local) => { + self.storage_live(*local)?; + } + + // Mark locals as dead + StorageDead(local) => { + self.storage_dead(*local)?; + } + + // No dynamic semantics attached to `FakeRead`; MIR + // interpreter is solely intended for borrowck'ed code. + FakeRead(..) => {} + + // Stacked Borrows. + Retag(kind, place) => { + let dest = self.eval_place(**place)?; + M::retag(self, *kind, &dest)?; + } + + // Call CopyNonOverlapping + CopyNonOverlapping(box rustc_middle::mir::CopyNonOverlapping { src, dst, count }) => { + let src = self.eval_operand(src, None)?; + let dst = self.eval_operand(dst, None)?; + let count = self.eval_operand(count, None)?; + self.copy_intrinsic(&src, &dst, &count, /* nonoverlapping */ true)?; + } + + // Statements we do not track. + AscribeUserType(..) => {} + + // Currently, Miri discards Coverage statements. Coverage statements are only injected + // via an optional compile time MIR pass and have no side effects. Since Coverage + // statements don't exist at the source level, it is safe for Miri to ignore them, even + // for undefined behavior (UB) checks. + // + // A coverage counter inside a const expression (for example, a counter injected in a + // const function) is discarded when the const is evaluated at compile time. Whether + // this should change, and/or how to implement a const eval counter, is a subject of the + // following issue: + // + // FIXME(#73156): Handle source code coverage in const eval + Coverage(..) => {} + + // Defined to do nothing. These are added by optimization passes, to avoid changing the + // size of MIR constantly. + Nop => {} + } + + Ok(()) + } + + /// Evaluate an assignment statement. + /// + /// There is no separate `eval_rvalue` function. Instead, the code for handling each rvalue + /// type writes its results directly into the memory specified by the place. + pub fn eval_rvalue_into_place( + &mut self, + rvalue: &mir::Rvalue<'tcx>, + place: mir::Place<'tcx>, + ) -> InterpResult<'tcx> { + let dest = self.eval_place(place)?; + // FIXME: ensure some kind of non-aliasing between LHS and RHS? + // Also see https://github.com/rust-lang/rust/issues/68364. + + use rustc_middle::mir::Rvalue::*; + match *rvalue { + ThreadLocalRef(did) => { + let ptr = M::thread_local_static_base_pointer(self, did)?; + self.write_pointer(ptr, &dest)?; + } + + Use(ref operand) => { + // Avoid recomputing the layout + let op = self.eval_operand(operand, Some(dest.layout))?; + self.copy_op(&op, &dest, /*allow_transmute*/ false)?; + } + + CopyForDeref(ref place) => { + let op = self.eval_place_to_op(*place, Some(dest.layout))?; + self.copy_op(&op, &dest, /* allow_transmute*/ false)?; + } + + BinaryOp(bin_op, box (ref left, ref right)) => { + let layout = binop_left_homogeneous(bin_op).then_some(dest.layout); + let left = self.read_immediate(&self.eval_operand(left, layout)?)?; + let layout = binop_right_homogeneous(bin_op).then_some(left.layout); + let right = self.read_immediate(&self.eval_operand(right, layout)?)?; + self.binop_ignore_overflow(bin_op, &left, &right, &dest)?; + } + + CheckedBinaryOp(bin_op, box (ref left, ref right)) => { + // Due to the extra boolean in the result, we can never reuse the `dest.layout`. + let left = self.read_immediate(&self.eval_operand(left, None)?)?; + let layout = binop_right_homogeneous(bin_op).then_some(left.layout); + let right = self.read_immediate(&self.eval_operand(right, layout)?)?; + self.binop_with_overflow( + bin_op, /*force_overflow_checks*/ false, &left, &right, &dest, + )?; + } + + UnaryOp(un_op, ref operand) => { + // The operand always has the same type as the result. + let val = self.read_immediate(&self.eval_operand(operand, Some(dest.layout))?)?; + let val = self.unary_op(un_op, &val)?; + assert_eq!(val.layout, dest.layout, "layout mismatch for result of {:?}", un_op); + self.write_immediate(*val, &dest)?; + } + + Aggregate(box ref kind, ref operands) => { + assert!(matches!(kind, mir::AggregateKind::Array(..))); + + for (field_index, operand) in operands.iter().enumerate() { + let op = self.eval_operand(operand, None)?; + let field_dest = self.place_field(&dest, field_index)?; + self.copy_op(&op, &field_dest, /*allow_transmute*/ false)?; + } + } + + Repeat(ref operand, _) => { + let src = self.eval_operand(operand, None)?; + assert!(!src.layout.is_unsized()); + let dest = self.force_allocation(&dest)?; + let length = dest.len(self)?; + + if length == 0 { + // Nothing to copy... but let's still make sure that `dest` as a place is valid. + self.get_place_alloc_mut(&dest)?; + } else { + // Write the src to the first element. + let first = self.mplace_field(&dest, 0)?; + self.copy_op(&src, &first.into(), /*allow_transmute*/ false)?; + + // This is performance-sensitive code for big static/const arrays! So we + // avoid writing each operand individually and instead just make many copies + // of the first element. + let elem_size = first.layout.size; + let first_ptr = first.ptr; + let rest_ptr = first_ptr.offset(elem_size, self)?; + // For the alignment of `rest_ptr`, we crucially do *not* use `first.align` as + // that place might be more aligned than its type mandates (a `u8` array could + // be 4-aligned if it sits at the right spot in a struct). Instead we use + // `first.layout.align`, i.e., the alignment given by the type. + self.mem_copy_repeatedly( + first_ptr, + first.align, + rest_ptr, + first.layout.align.abi, + elem_size, + length - 1, + /*nonoverlapping:*/ true, + )?; + } + } + + Len(place) => { + let src = self.eval_place(place)?; + let mplace = self.force_allocation(&src)?; + let len = mplace.len(self)?; + self.write_scalar(Scalar::from_machine_usize(len, self), &dest)?; + } + + AddressOf(_, place) | Ref(_, _, place) => { + let src = self.eval_place(place)?; + let place = self.force_allocation(&src)?; + self.write_immediate(place.to_ref(self), &dest)?; + } + + NullaryOp(null_op, ty) => { + let ty = self.subst_from_current_frame_and_normalize_erasing_regions(ty)?; + let layout = self.layout_of(ty)?; + if layout.is_unsized() { + // FIXME: This should be a span_bug (#80742) + self.tcx.sess.delay_span_bug( + self.frame().current_span(), + &format!("Nullary MIR operator called for unsized type {}", ty), + ); + throw_inval!(SizeOfUnsizedType(ty)); + } + let val = match null_op { + mir::NullOp::SizeOf => layout.size.bytes(), + mir::NullOp::AlignOf => layout.align.abi.bytes(), + }; + self.write_scalar(Scalar::from_machine_usize(val, self), &dest)?; + } + + ShallowInitBox(ref operand, _) => { + let src = self.eval_operand(operand, None)?; + let v = self.read_immediate(&src)?; + self.write_immediate(*v, &dest)?; + } + + Cast(cast_kind, ref operand, cast_ty) => { + let src = self.eval_operand(operand, None)?; + let cast_ty = + self.subst_from_current_frame_and_normalize_erasing_regions(cast_ty)?; + self.cast(&src, cast_kind, cast_ty, &dest)?; + } + + Discriminant(place) => { + let op = self.eval_place_to_op(place, None)?; + let discr_val = self.read_discriminant(&op)?.0; + self.write_scalar(discr_val, &dest)?; + } + } + + trace!("{:?}", self.dump_place(*dest)); + + Ok(()) + } + + /// Evaluate the given terminator. Will also adjust the stack frame and statement position accordingly. + fn terminator(&mut self, terminator: &mir::Terminator<'tcx>) -> InterpResult<'tcx> { + info!("{:?}", terminator.kind); + + self.eval_terminator(terminator)?; + if !self.stack().is_empty() { + if let Ok(loc) = self.frame().loc { + info!("// executing {:?}", loc.block); + } + } + Ok(()) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/terminator.rs b/compiler/rustc_const_eval/src/interpret/terminator.rs new file mode 100644 index 000000000..d563e35f9 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/terminator.rs @@ -0,0 +1,671 @@ +use std::borrow::Cow; + +use rustc_middle::ty::layout::{FnAbiOf, LayoutOf}; +use rustc_middle::ty::Instance; +use rustc_middle::{ + mir, + ty::{self, Ty}, +}; +use rustc_target::abi; +use rustc_target::abi::call::{ArgAbi, ArgAttribute, ArgAttributes, FnAbi, PassMode}; +use rustc_target::spec::abi::Abi; + +use super::{ + FnVal, ImmTy, Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemoryKind, OpTy, Operand, + PlaceTy, Scalar, StackPopCleanup, StackPopUnwind, +}; + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + pub(super) fn eval_terminator( + &mut self, + terminator: &mir::Terminator<'tcx>, + ) -> InterpResult<'tcx> { + use rustc_middle::mir::TerminatorKind::*; + match terminator.kind { + Return => { + self.pop_stack_frame(/* unwinding */ false)? + } + + Goto { target } => self.go_to_block(target), + + SwitchInt { ref discr, ref targets, switch_ty } => { + let discr = self.read_immediate(&self.eval_operand(discr, None)?)?; + trace!("SwitchInt({:?})", *discr); + assert_eq!(discr.layout.ty, switch_ty); + + // Branch to the `otherwise` case by default, if no match is found. + assert!(!targets.iter().is_empty()); + let mut target_block = targets.otherwise(); + + for (const_int, target) in targets.iter() { + // Compare using MIR BinOp::Eq, to also support pointer values. + // (Avoiding `self.binary_op` as that does some redundant layout computation.) + let res = self + .overflowing_binary_op( + mir::BinOp::Eq, + &discr, + &ImmTy::from_uint(const_int, discr.layout), + )? + .0; + if res.to_bool()? { + target_block = target; + break; + } + } + + self.go_to_block(target_block); + } + + Call { + ref func, + ref args, + destination, + target, + ref cleanup, + from_hir_call: _, + fn_span: _, + } => { + let old_stack = self.frame_idx(); + let old_loc = self.frame().loc; + let func = self.eval_operand(func, None)?; + let args = self.eval_operands(args)?; + + let fn_sig_binder = func.layout.ty.fn_sig(*self.tcx); + let fn_sig = + self.tcx.normalize_erasing_late_bound_regions(self.param_env, fn_sig_binder); + let extra_args = &args[fn_sig.inputs().len()..]; + let extra_args = self.tcx.mk_type_list(extra_args.iter().map(|arg| arg.layout.ty)); + + let (fn_val, fn_abi, with_caller_location) = match *func.layout.ty.kind() { + ty::FnPtr(_sig) => { + let fn_ptr = self.read_pointer(&func)?; + let fn_val = self.get_ptr_fn(fn_ptr)?; + (fn_val, self.fn_abi_of_fn_ptr(fn_sig_binder, extra_args)?, false) + } + ty::FnDef(def_id, substs) => { + let instance = + self.resolve(ty::WithOptConstParam::unknown(def_id), substs)?; + ( + FnVal::Instance(instance), + self.fn_abi_of_instance(instance, extra_args)?, + instance.def.requires_caller_location(*self.tcx), + ) + } + _ => span_bug!( + terminator.source_info.span, + "invalid callee of type {:?}", + func.layout.ty + ), + }; + + let destination = self.eval_place(destination)?; + self.eval_fn_call( + fn_val, + (fn_sig.abi, fn_abi), + &args, + with_caller_location, + &destination, + target, + match (cleanup, fn_abi.can_unwind) { + (Some(cleanup), true) => StackPopUnwind::Cleanup(*cleanup), + (None, true) => StackPopUnwind::Skip, + (_, false) => StackPopUnwind::NotAllowed, + }, + )?; + // Sanity-check that `eval_fn_call` either pushed a new frame or + // did a jump to another block. + if self.frame_idx() == old_stack && self.frame().loc == old_loc { + span_bug!(terminator.source_info.span, "evaluating this call made no progress"); + } + } + + Drop { place, target, unwind } => { + let place = self.eval_place(place)?; + let ty = place.layout.ty; + trace!("TerminatorKind::drop: {:?}, type {}", place, ty); + + let instance = Instance::resolve_drop_in_place(*self.tcx, ty); + self.drop_in_place(&place, instance, target, unwind)?; + } + + Assert { ref cond, expected, ref msg, target, cleanup } => { + let cond_val = + self.read_immediate(&self.eval_operand(cond, None)?)?.to_scalar()?.to_bool()?; + if expected == cond_val { + self.go_to_block(target); + } else { + M::assert_panic(self, msg, cleanup)?; + } + } + + Abort => { + M::abort(self, "the program aborted execution".to_owned())?; + } + + // When we encounter Resume, we've finished unwinding + // cleanup for the current stack frame. We pop it in order + // to continue unwinding the next frame + Resume => { + trace!("unwinding: resuming from cleanup"); + // By definition, a Resume terminator means + // that we're unwinding + self.pop_stack_frame(/* unwinding */ true)?; + return Ok(()); + } + + // It is UB to ever encounter this. + Unreachable => throw_ub!(Unreachable), + + // These should never occur for MIR we actually run. + DropAndReplace { .. } + | FalseEdge { .. } + | FalseUnwind { .. } + | Yield { .. } + | GeneratorDrop => span_bug!( + terminator.source_info.span, + "{:#?} should have been eliminated by MIR pass", + terminator.kind + ), + + // Inline assembly can't be interpreted. + InlineAsm { .. } => throw_unsup_format!("inline assembly is not supported"), + } + + Ok(()) + } + + fn check_argument_compat( + caller_abi: &ArgAbi<'tcx, Ty<'tcx>>, + callee_abi: &ArgAbi<'tcx, Ty<'tcx>>, + ) -> bool { + // Heuristic for type comparison. + let layout_compat = || { + if caller_abi.layout.ty == callee_abi.layout.ty { + // No question + return true; + } + if caller_abi.layout.is_unsized() || callee_abi.layout.is_unsized() { + // No, no, no. We require the types to *exactly* match for unsized arguments. If + // these are somehow unsized "in a different way" (say, `dyn Trait` vs `[i32]`), + // then who knows what happens. + return false; + } + if caller_abi.layout.size != callee_abi.layout.size + || caller_abi.layout.align.abi != callee_abi.layout.align.abi + { + // This cannot go well... + return false; + } + // The rest *should* be okay, but we are extra conservative. + match (caller_abi.layout.abi, callee_abi.layout.abi) { + // Different valid ranges are okay (once we enforce validity, + // that will take care to make it UB to leave the range, just + // like for transmute). + (abi::Abi::Scalar(caller), abi::Abi::Scalar(callee)) => { + caller.primitive() == callee.primitive() + } + ( + abi::Abi::ScalarPair(caller1, caller2), + abi::Abi::ScalarPair(callee1, callee2), + ) => { + caller1.primitive() == callee1.primitive() + && caller2.primitive() == callee2.primitive() + } + // Be conservative + _ => false, + } + }; + // Padding must be fully equal. + let pad_compat = || caller_abi.pad == callee_abi.pad; + // When comparing the PassMode, we have to be smart about comparing the attributes. + let arg_attr_compat = |a1: ArgAttributes, a2: ArgAttributes| { + // There's only one regular attribute that matters for the call ABI: InReg. + // Everything else is things like noalias, dereferencable, nonnull, ... + // (This also applies to pointee_size, pointee_align.) + if a1.regular.contains(ArgAttribute::InReg) != a2.regular.contains(ArgAttribute::InReg) + { + return false; + } + // We also compare the sign extension mode -- this could let the callee make assumptions + // about bits that conceptually were not even passed. + if a1.arg_ext != a2.arg_ext { + return false; + } + return true; + }; + let mode_compat = || match (caller_abi.mode, callee_abi.mode) { + (PassMode::Ignore, PassMode::Ignore) => true, + (PassMode::Direct(a1), PassMode::Direct(a2)) => arg_attr_compat(a1, a2), + (PassMode::Pair(a1, b1), PassMode::Pair(a2, b2)) => { + arg_attr_compat(a1, a2) && arg_attr_compat(b1, b2) + } + (PassMode::Cast(c1), PassMode::Cast(c2)) => c1 == c2, + ( + PassMode::Indirect { attrs: a1, extra_attrs: None, on_stack: s1 }, + PassMode::Indirect { attrs: a2, extra_attrs: None, on_stack: s2 }, + ) => arg_attr_compat(a1, a2) && s1 == s2, + ( + PassMode::Indirect { attrs: a1, extra_attrs: Some(e1), on_stack: s1 }, + PassMode::Indirect { attrs: a2, extra_attrs: Some(e2), on_stack: s2 }, + ) => arg_attr_compat(a1, a2) && arg_attr_compat(e1, e2) && s1 == s2, + _ => false, + }; + + if layout_compat() && pad_compat() && mode_compat() { + return true; + } + trace!( + "check_argument_compat: incompatible ABIs:\ncaller: {:?}\ncallee: {:?}", + caller_abi, + callee_abi + ); + return false; + } + + /// Initialize a single callee argument, checking the types for compatibility. + fn pass_argument<'x, 'y>( + &mut self, + caller_args: &mut impl Iterator< + Item = (&'x OpTy<'tcx, M::Provenance>, &'y ArgAbi<'tcx, Ty<'tcx>>), + >, + callee_abi: &ArgAbi<'tcx, Ty<'tcx>>, + callee_arg: &PlaceTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> + where + 'tcx: 'x, + 'tcx: 'y, + { + if matches!(callee_abi.mode, PassMode::Ignore) { + // This one is skipped. + return Ok(()); + } + // Find next caller arg. + let (caller_arg, caller_abi) = caller_args.next().ok_or_else(|| { + err_ub_format!("calling a function with fewer arguments than it requires") + })?; + // Now, check + if !Self::check_argument_compat(caller_abi, callee_abi) { + throw_ub_format!( + "calling a function with argument of type {:?} passing data of type {:?}", + callee_arg.layout.ty, + caller_arg.layout.ty + ) + } + // Special handling for unsized parameters. + if caller_arg.layout.is_unsized() { + // `check_argument_compat` ensures that both have the same type, so we know they will use the metadata the same way. + assert_eq!(caller_arg.layout.ty, callee_arg.layout.ty); + // We have to properly pre-allocate the memory for the callee. + // So let's tear down some wrappers. + // This all has to be in memory, there are no immediate unsized values. + let src = caller_arg.assert_mem_place(); + // The destination cannot be one of these "spread args". + let (dest_frame, dest_local) = callee_arg.assert_local(); + // We are just initializing things, so there can't be anything here yet. + assert!(matches!( + *self.local_to_op(&self.stack()[dest_frame], dest_local, None)?, + Operand::Immediate(Immediate::Uninit) + )); + // Allocate enough memory to hold `src`. + let Some((size, align)) = self.size_and_align_of_mplace(&src)? else { + span_bug!(self.cur_span(), "unsized fn arg with `extern` type tail should not be allowed") + }; + let ptr = self.allocate_ptr(size, align, MemoryKind::Stack)?; + let dest_place = + MPlaceTy::from_aligned_ptr_with_meta(ptr.into(), callee_arg.layout, src.meta); + // Update the local to be that new place. + *M::access_local_mut(self, dest_frame, dest_local)? = Operand::Indirect(*dest_place); + } + // We allow some transmutes here. + // FIXME: Depending on the PassMode, this should reset some padding to uninitialized. (This + // is true for all `copy_op`, but there are a lot of special cases for argument passing + // specifically.) + self.copy_op(&caller_arg, callee_arg, /*allow_transmute*/ true) + } + + /// Call this function -- pushing the stack frame and initializing the arguments. + /// + /// `caller_fn_abi` is used to determine if all the arguments are passed the proper way. + /// However, we also need `caller_abi` to determine if we need to do untupling of arguments. + /// + /// `with_caller_location` indicates whether the caller passed a caller location. Miri + /// implements caller locations without argument passing, but to match `FnAbi` we need to know + /// when those arguments are present. + pub(crate) fn eval_fn_call( + &mut self, + fn_val: FnVal<'tcx, M::ExtraFnVal>, + (caller_abi, caller_fn_abi): (Abi, &FnAbi<'tcx, Ty<'tcx>>), + args: &[OpTy<'tcx, M::Provenance>], + with_caller_location: bool, + destination: &PlaceTy<'tcx, M::Provenance>, + target: Option, + mut unwind: StackPopUnwind, + ) -> InterpResult<'tcx> { + trace!("eval_fn_call: {:#?}", fn_val); + + let instance = match fn_val { + FnVal::Instance(instance) => instance, + FnVal::Other(extra) => { + return M::call_extra_fn( + self, + extra, + caller_abi, + args, + destination, + target, + unwind, + ); + } + }; + + match instance.def { + ty::InstanceDef::Intrinsic(def_id) => { + assert!(self.tcx.is_intrinsic(def_id)); + // caller_fn_abi is not relevant here, we interpret the arguments directly for each intrinsic. + M::call_intrinsic(self, instance, args, destination, target, unwind) + } + ty::InstanceDef::VTableShim(..) + | ty::InstanceDef::ReifyShim(..) + | ty::InstanceDef::ClosureOnceShim { .. } + | ty::InstanceDef::FnPtrShim(..) + | ty::InstanceDef::DropGlue(..) + | ty::InstanceDef::CloneShim(..) + | ty::InstanceDef::Item(_) => { + // We need MIR for this fn + let Some((body, instance)) = + M::find_mir_or_eval_fn(self, instance, caller_abi, args, destination, target, unwind)? else { + return Ok(()); + }; + + // Compute callee information using the `instance` returned by + // `find_mir_or_eval_fn`. + // FIXME: for variadic support, do we have to somehow determine callee's extra_args? + let callee_fn_abi = self.fn_abi_of_instance(instance, ty::List::empty())?; + + if callee_fn_abi.c_variadic || caller_fn_abi.c_variadic { + throw_unsup_format!("calling a c-variadic function is not supported"); + } + + if M::enforce_abi(self) { + if caller_fn_abi.conv != callee_fn_abi.conv { + throw_ub_format!( + "calling a function with calling convention {:?} using calling convention {:?}", + callee_fn_abi.conv, + caller_fn_abi.conv + ) + } + } + + if !matches!(unwind, StackPopUnwind::NotAllowed) && !callee_fn_abi.can_unwind { + // The callee cannot unwind. + unwind = StackPopUnwind::NotAllowed; + } + + self.push_stack_frame( + instance, + body, + destination, + StackPopCleanup::Goto { ret: target, unwind }, + )?; + + // If an error is raised here, pop the frame again to get an accurate backtrace. + // To this end, we wrap it all in a `try` block. + let res: InterpResult<'tcx> = try { + trace!( + "caller ABI: {:?}, args: {:#?}", + caller_abi, + args.iter() + .map(|arg| (arg.layout.ty, format!("{:?}", **arg))) + .collect::>() + ); + trace!( + "spread_arg: {:?}, locals: {:#?}", + body.spread_arg, + body.args_iter() + .map(|local| ( + local, + self.layout_of_local(self.frame(), local, None).unwrap().ty + )) + .collect::>() + ); + + // In principle, we have two iterators: Where the arguments come from, and where + // they go to. + + // For where they come from: If the ABI is RustCall, we untuple the + // last incoming argument. These two iterators do not have the same type, + // so to keep the code paths uniform we accept an allocation + // (for RustCall ABI only). + let caller_args: Cow<'_, [OpTy<'tcx, M::Provenance>]> = + if caller_abi == Abi::RustCall && !args.is_empty() { + // Untuple + let (untuple_arg, args) = args.split_last().unwrap(); + trace!("eval_fn_call: Will pass last argument by untupling"); + Cow::from( + args.iter() + .map(|a| Ok(a.clone())) + .chain( + (0..untuple_arg.layout.fields.count()) + .map(|i| self.operand_field(untuple_arg, i)), + ) + .collect::>>>( + )?, + ) + } else { + // Plain arg passing + Cow::from(args) + }; + // If `with_caller_location` is set we pretend there is an extra argument (that + // we will not pass). + assert_eq!( + caller_args.len() + if with_caller_location { 1 } else { 0 }, + caller_fn_abi.args.len(), + "mismatch between caller ABI and caller arguments", + ); + let mut caller_args = caller_args + .iter() + .zip(caller_fn_abi.args.iter()) + .filter(|arg_and_abi| !matches!(arg_and_abi.1.mode, PassMode::Ignore)); + + // Now we have to spread them out across the callee's locals, + // taking into account the `spread_arg`. If we could write + // this is a single iterator (that handles `spread_arg`), then + // `pass_argument` would be the loop body. It takes care to + // not advance `caller_iter` for ZSTs. + let mut callee_args_abis = callee_fn_abi.args.iter(); + for local in body.args_iter() { + let dest = self.eval_place(mir::Place::from(local))?; + if Some(local) == body.spread_arg { + // Must be a tuple + for i in 0..dest.layout.fields.count() { + let dest = self.place_field(&dest, i)?; + let callee_abi = callee_args_abis.next().unwrap(); + self.pass_argument(&mut caller_args, callee_abi, &dest)?; + } + } else { + // Normal argument + let callee_abi = callee_args_abis.next().unwrap(); + self.pass_argument(&mut caller_args, callee_abi, &dest)?; + } + } + // If the callee needs a caller location, pretend we consume one more argument from the ABI. + if instance.def.requires_caller_location(*self.tcx) { + callee_args_abis.next().unwrap(); + } + // Now we should have no more caller args or callee arg ABIs + assert!( + callee_args_abis.next().is_none(), + "mismatch between callee ABI and callee body arguments" + ); + if caller_args.next().is_some() { + throw_ub_format!("calling a function with more arguments than it expected") + } + // Don't forget to check the return type! + if !Self::check_argument_compat(&caller_fn_abi.ret, &callee_fn_abi.ret) { + throw_ub_format!( + "calling a function with return type {:?} passing \ + return place of type {:?}", + callee_fn_abi.ret.layout.ty, + caller_fn_abi.ret.layout.ty, + ) + } + }; + match res { + Err(err) => { + self.stack_mut().pop(); + Err(err) + } + Ok(()) => Ok(()), + } + } + // cannot use the shim here, because that will only result in infinite recursion + ty::InstanceDef::Virtual(def_id, idx) => { + let mut args = args.to_vec(); + // We have to implement all "object safe receivers". So we have to go search for a + // pointer or `dyn Trait` type, but it could be wrapped in newtypes. So recursively + // unwrap those newtypes until we are there. + let mut receiver = args[0].clone(); + let receiver_place = loop { + match receiver.layout.ty.kind() { + ty::Ref(..) | ty::RawPtr(..) => break self.deref_operand(&receiver)?, + ty::Dynamic(..) => break receiver.assert_mem_place(), // no immediate unsized values + _ => { + // Not there yet, search for the only non-ZST field. + let mut non_zst_field = None; + for i in 0..receiver.layout.fields.count() { + let field = self.operand_field(&receiver, i)?; + if !field.layout.is_zst() { + assert!( + non_zst_field.is_none(), + "multiple non-ZST fields in dyn receiver type {}", + receiver.layout.ty + ); + non_zst_field = Some(field); + } + } + receiver = non_zst_field.unwrap_or_else(|| { + panic!( + "no non-ZST fields in dyn receiver type {}", + receiver.layout.ty + ) + }); + } + } + }; + // Obtain the underlying trait we are working on. + let receiver_tail = self + .tcx + .struct_tail_erasing_lifetimes(receiver_place.layout.ty, self.param_env); + let ty::Dynamic(data, ..) = receiver_tail.kind() else { + span_bug!(self.cur_span(), "dyanmic call on non-`dyn` type {}", receiver_tail) + }; + + // Get the required information from the vtable. + let vptr = receiver_place.meta.unwrap_meta().to_pointer(self)?; + let (dyn_ty, dyn_trait) = self.get_ptr_vtable(vptr)?; + if dyn_trait != data.principal() { + throw_ub_format!( + "`dyn` call on a pointer whose vtable does not match its type" + ); + } + + // Now determine the actual method to call. We can do that in two different ways and + // compare them to ensure everything fits. + let Some(ty::VtblEntry::Method(fn_inst)) = self.get_vtable_entries(vptr)?.get(idx).copied() else { + throw_ub_format!("`dyn` call trying to call something that is not a method") + }; + if cfg!(debug_assertions) { + let tcx = *self.tcx; + + let trait_def_id = tcx.trait_of_item(def_id).unwrap(); + let virtual_trait_ref = + ty::TraitRef::from_method(tcx, trait_def_id, instance.substs); + assert_eq!( + receiver_tail, + virtual_trait_ref.self_ty(), + "mismatch in underlying dyn trait computation within Miri and MIR building", + ); + let existential_trait_ref = + ty::ExistentialTraitRef::erase_self_ty(tcx, virtual_trait_ref); + let concrete_trait_ref = existential_trait_ref.with_self_ty(tcx, dyn_ty); + + let concrete_method = Instance::resolve_for_vtable( + tcx, + self.param_env, + def_id, + instance.substs.rebase_onto(tcx, trait_def_id, concrete_trait_ref.substs), + ) + .unwrap(); + assert_eq!(fn_inst, concrete_method); + } + + // `*mut receiver_place.layout.ty` is almost the layout that we + // want for args[0]: We have to project to field 0 because we want + // a thin pointer. + assert!(receiver_place.layout.is_unsized()); + let receiver_ptr_ty = self.tcx.mk_mut_ptr(receiver_place.layout.ty); + let this_receiver_ptr = self.layout_of(receiver_ptr_ty)?.field(self, 0); + // Adjust receiver argument. + args[0] = OpTy::from(ImmTy::from_immediate( + Scalar::from_maybe_pointer(receiver_place.ptr, self).into(), + this_receiver_ptr, + )); + trace!("Patched receiver operand to {:#?}", args[0]); + // recurse with concrete function + self.eval_fn_call( + FnVal::Instance(fn_inst), + (caller_abi, caller_fn_abi), + &args, + with_caller_location, + destination, + target, + unwind, + ) + } + } + } + + fn drop_in_place( + &mut self, + place: &PlaceTy<'tcx, M::Provenance>, + instance: ty::Instance<'tcx>, + target: mir::BasicBlock, + unwind: Option, + ) -> InterpResult<'tcx> { + trace!("drop_in_place: {:?},\n {:?}, {:?}", *place, place.layout.ty, instance); + // We take the address of the object. This may well be unaligned, which is fine + // for us here. However, unaligned accesses will probably make the actual drop + // implementation fail -- a problem shared by rustc. + let place = self.force_allocation(place)?; + + let (instance, place) = match place.layout.ty.kind() { + ty::Dynamic(..) => { + // Dropping a trait object. Need to find actual drop fn. + let place = self.unpack_dyn_trait(&place)?; + let instance = ty::Instance::resolve_drop_in_place(*self.tcx, place.layout.ty); + (instance, place) + } + _ => (instance, place), + }; + let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty())?; + + let arg = ImmTy::from_immediate( + place.to_ref(self), + self.layout_of(self.tcx.mk_mut_ptr(place.layout.ty))?, + ); + let ret = MPlaceTy::fake_alloc_zst(self.layout_of(self.tcx.types.unit)?); + + self.eval_fn_call( + FnVal::Instance(instance), + (Abi::Rust, fn_abi), + &[arg.into()], + false, + &ret.into(), + Some(target), + match unwind { + Some(cleanup) => StackPopUnwind::Cleanup(cleanup), + None => StackPopUnwind::Skip, + }, + ) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/traits.rs b/compiler/rustc_const_eval/src/interpret/traits.rs new file mode 100644 index 000000000..b3a511d5a --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/traits.rs @@ -0,0 +1,59 @@ +use rustc_middle::mir::interpret::{InterpResult, Pointer}; +use rustc_middle::ty::layout::LayoutOf; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_target::abi::{Align, Size}; + +use super::util::ensure_monomorphic_enough; +use super::{InterpCx, Machine}; + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + /// Creates a dynamic vtable for the given type and vtable origin. This is used only for + /// objects. + /// + /// The `trait_ref` encodes the erased self type. Hence, if we are making an object `Foo` + /// from a value of type `Foo`, then `trait_ref` would map `T: Trait`. `None` here means that + /// this is an auto trait without any methods, so we only need the basic vtable (drop, size, + /// align). + pub fn get_vtable_ptr( + &self, + ty: Ty<'tcx>, + poly_trait_ref: Option>, + ) -> InterpResult<'tcx, Pointer>> { + trace!("get_vtable(trait_ref={:?})", poly_trait_ref); + + let (ty, poly_trait_ref) = self.tcx.erase_regions((ty, poly_trait_ref)); + + // All vtables must be monomorphic, bail out otherwise. + ensure_monomorphic_enough(*self.tcx, ty)?; + ensure_monomorphic_enough(*self.tcx, poly_trait_ref)?; + + let vtable_symbolic_allocation = self.tcx.create_vtable_alloc(ty, poly_trait_ref); + let vtable_ptr = self.global_base_pointer(Pointer::from(vtable_symbolic_allocation))?; + Ok(vtable_ptr.into()) + } + + /// Returns a high-level representation of the entires of the given vtable. + pub fn get_vtable_entries( + &self, + vtable: Pointer>, + ) -> InterpResult<'tcx, &'tcx [ty::VtblEntry<'tcx>]> { + let (ty, poly_trait_ref) = self.get_ptr_vtable(vtable)?; + Ok(if let Some(poly_trait_ref) = poly_trait_ref { + let trait_ref = poly_trait_ref.with_self_ty(*self.tcx, ty); + let trait_ref = self.tcx.erase_regions(trait_ref); + self.tcx.vtable_entries(trait_ref) + } else { + TyCtxt::COMMON_VTABLE_ENTRIES + }) + } + + pub fn get_vtable_size_and_align( + &self, + vtable: Pointer>, + ) -> InterpResult<'tcx, (Size, Align)> { + let (ty, _trait_ref) = self.get_ptr_vtable(vtable)?; + let layout = self.layout_of(ty)?; + assert!(!layout.is_unsized(), "there are no vtables for unsized types"); + Ok((layout.size, layout.align.abi)) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/util.rs b/compiler/rustc_const_eval/src/interpret/util.rs new file mode 100644 index 000000000..2bc521d5b --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/util.rs @@ -0,0 +1,73 @@ +use rustc_middle::mir::interpret::InterpResult; +use rustc_middle::ty::{self, Ty, TyCtxt, TypeSuperVisitable, TypeVisitable, TypeVisitor}; +use std::convert::TryInto; +use std::ops::ControlFlow; + +/// Checks whether a type contains generic parameters which require substitution. +/// +/// In case it does, returns a `TooGeneric` const eval error. Note that due to polymorphization +/// types may be "concrete enough" even though they still contain generic parameters in +/// case these parameters are unused. +pub(crate) fn ensure_monomorphic_enough<'tcx, T>(tcx: TyCtxt<'tcx>, ty: T) -> InterpResult<'tcx> +where + T: TypeVisitable<'tcx>, +{ + debug!("ensure_monomorphic_enough: ty={:?}", ty); + if !ty.needs_subst() { + return Ok(()); + } + + struct FoundParam; + struct UsedParamsNeedSubstVisitor<'tcx> { + tcx: TyCtxt<'tcx>, + } + + impl<'tcx> TypeVisitor<'tcx> for UsedParamsNeedSubstVisitor<'tcx> { + type BreakTy = FoundParam; + + fn visit_ty(&mut self, ty: Ty<'tcx>) -> ControlFlow { + if !ty.needs_subst() { + return ControlFlow::CONTINUE; + } + + match *ty.kind() { + ty::Param(_) => ControlFlow::Break(FoundParam), + ty::Closure(def_id, substs) + | ty::Generator(def_id, substs, ..) + | ty::FnDef(def_id, substs) => { + let instance = ty::InstanceDef::Item(ty::WithOptConstParam::unknown(def_id)); + let unused_params = self.tcx.unused_generic_params(instance); + for (index, subst) in substs.into_iter().enumerate() { + let index = index + .try_into() + .expect("more generic parameters than can fit into a `u32`"); + let is_used = unused_params.contains(index).map_or(true, |unused| !unused); + // Only recurse when generic parameters in fns, closures and generators + // are used and require substitution. + // Just in case there are closures or generators within this subst, + // recurse. + if is_used && subst.needs_subst() { + return subst.visit_with(self); + } + } + ControlFlow::CONTINUE + } + _ => ty.super_visit_with(self), + } + } + + fn visit_const(&mut self, c: ty::Const<'tcx>) -> ControlFlow { + match c.kind() { + ty::ConstKind::Param(..) => ControlFlow::Break(FoundParam), + _ => c.super_visit_with(self), + } + } + } + + let mut vis = UsedParamsNeedSubstVisitor { tcx }; + if matches!(ty.visit_with(&mut vis), ControlFlow::Break(FoundParam)) { + throw_inval!(TooGeneric); + } else { + Ok(()) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs new file mode 100644 index 000000000..0e50d1ed4 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -0,0 +1,986 @@ +//! Check the validity invariant of a given value, and tell the user +//! where in the value it got violated. +//! In const context, this goes even further and tries to approximate const safety. +//! That's useful because it means other passes (e.g. promotion) can rely on `const`s +//! to be const-safe. + +use std::convert::TryFrom; +use std::fmt::Write; +use std::num::NonZeroUsize; + +use rustc_data_structures::fx::FxHashSet; +use rustc_hir as hir; +use rustc_middle::mir::interpret::InterpError; +use rustc_middle::ty; +use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::DUMMY_SP; +use rustc_target::abi::{Abi, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange}; + +use std::hash::Hash; + +use super::{ + alloc_range, CheckInAllocMsg, GlobalAlloc, Immediate, InterpCx, InterpResult, MPlaceTy, + Machine, MemPlaceMeta, OpTy, Scalar, ScalarMaybeUninit, ValueVisitor, +}; + +macro_rules! throw_validation_failure { + ($where:expr, { $( $what_fmt:expr ),+ } $( expected { $( $expected_fmt:expr ),+ } )?) => {{ + let mut msg = String::new(); + msg.push_str("encountered "); + write!(&mut msg, $($what_fmt),+).unwrap(); + $( + msg.push_str(", but expected "); + write!(&mut msg, $($expected_fmt),+).unwrap(); + )? + let path = rustc_middle::ty::print::with_no_trimmed_paths!({ + let where_ = &$where; + if !where_.is_empty() { + let mut path = String::new(); + write_path(&mut path, where_); + Some(path) + } else { + None + } + }); + throw_ub!(ValidationFailure { path, msg }) + }}; +} + +/// If $e throws an error matching the pattern, throw a validation failure. +/// Other errors are passed back to the caller, unchanged -- and if they reach the root of +/// the visitor, we make sure only validation errors and `InvalidProgram` errors are left. +/// This lets you use the patterns as a kind of validation list, asserting which errors +/// can possibly happen: +/// +/// ``` +/// let v = try_validation!(some_fn(), some_path, { +/// Foo | Bar | Baz => { "some failure" }, +/// }); +/// ``` +/// +/// An additional expected parameter can also be added to the failure message: +/// +/// ``` +/// let v = try_validation!(some_fn(), some_path, { +/// Foo | Bar | Baz => { "some failure" } expected { "something that wasn't a failure" }, +/// }); +/// ``` +/// +/// An additional nicety is that both parameters actually take format args, so you can just write +/// the format string in directly: +/// +/// ``` +/// let v = try_validation!(some_fn(), some_path, { +/// Foo | Bar | Baz => { "{:?}", some_failure } expected { "{}", expected_value }, +/// }); +/// ``` +/// +macro_rules! try_validation { + ($e:expr, $where:expr, + $( $( $p:pat_param )|+ => { $( $what_fmt:expr ),+ } $( expected { $( $expected_fmt:expr ),+ } )? ),+ $(,)? + ) => {{ + match $e { + Ok(x) => x, + // We catch the error and turn it into a validation failure. We are okay with + // allocation here as this can only slow down builds that fail anyway. + Err(e) => match e.kind() { + $( + $($p)|+ => + throw_validation_failure!( + $where, + { $( $what_fmt ),+ } $( expected { $( $expected_fmt ),+ } )? + ) + ),+, + #[allow(unreachable_patterns)] + _ => Err::(e)?, + } + } + }}; +} + +/// We want to show a nice path to the invalid field for diagnostics, +/// but avoid string operations in the happy case where no error happens. +/// So we track a `Vec` where `PathElem` contains all the data we +/// need to later print something for the user. +#[derive(Copy, Clone, Debug)] +pub enum PathElem { + Field(Symbol), + Variant(Symbol), + GeneratorState(VariantIdx), + CapturedVar(Symbol), + ArrayElem(usize), + TupleElem(usize), + Deref, + EnumTag, + GeneratorTag, + DynDowncast, +} + +/// Extra things to check for during validation of CTFE results. +pub enum CtfeValidationMode { + /// Regular validation, nothing special happening. + Regular, + /// Validation of a `const`. + /// `inner` says if this is an inner, indirect allocation (as opposed to the top-level const + /// allocation). Being an inner allocation makes a difference because the top-level allocation + /// of a `const` is copied for each use, but the inner allocations are implicitly shared. + /// `allow_static_ptrs` says if pointers to statics are permitted (which is the case for promoteds in statics). + Const { inner: bool, allow_static_ptrs: bool }, +} + +/// State for tracking recursive validation of references +pub struct RefTracking { + pub seen: FxHashSet, + pub todo: Vec<(T, PATH)>, +} + +impl RefTracking { + pub fn empty() -> Self { + RefTracking { seen: FxHashSet::default(), todo: vec![] } + } + pub fn new(op: T) -> Self { + let mut ref_tracking_for_consts = + RefTracking { seen: FxHashSet::default(), todo: vec![(op, PATH::default())] }; + ref_tracking_for_consts.seen.insert(op); + ref_tracking_for_consts + } + + pub fn track(&mut self, op: T, path: impl FnOnce() -> PATH) { + if self.seen.insert(op) { + trace!("Recursing below ptr {:#?}", op); + let path = path(); + // Remember to come back to this later. + self.todo.push((op, path)); + } + } +} + +/// Format a path +fn write_path(out: &mut String, path: &[PathElem]) { + use self::PathElem::*; + + for elem in path.iter() { + match elem { + Field(name) => write!(out, ".{}", name), + EnumTag => write!(out, "."), + Variant(name) => write!(out, ".", name), + GeneratorTag => write!(out, "."), + GeneratorState(idx) => write!(out, ".", idx.index()), + CapturedVar(name) => write!(out, ".", name), + TupleElem(idx) => write!(out, ".{}", idx), + ArrayElem(idx) => write!(out, "[{}]", idx), + // `.` does not match Rust syntax, but it is more readable for long paths -- and + // some of the other items here also are not Rust syntax. Actually we can't + // even use the usual syntax because we are just showing the projections, + // not the root. + Deref => write!(out, "."), + DynDowncast => write!(out, "."), + } + .unwrap() + } +} + +// Formats such that a sentence like "expected something {}" to mean +// "expected something " makes sense. +fn wrapping_range_format(r: WrappingRange, max_hi: u128) -> String { + let WrappingRange { start: lo, end: hi } = r; + assert!(hi <= max_hi); + if lo > hi { + format!("less or equal to {}, or greater or equal to {}", hi, lo) + } else if lo == hi { + format!("equal to {}", lo) + } else if lo == 0 { + assert!(hi < max_hi, "should not be printing if the range covers everything"); + format!("less or equal to {}", hi) + } else if hi == max_hi { + assert!(lo > 0, "should not be printing if the range covers everything"); + format!("greater or equal to {}", lo) + } else { + format!("in the range {:?}", r) + } +} + +struct ValidityVisitor<'rt, 'mir, 'tcx, M: Machine<'mir, 'tcx>> { + /// The `path` may be pushed to, but the part that is present when a function + /// starts must not be changed! `visit_fields` and `visit_array` rely on + /// this stack discipline. + path: Vec, + ref_tracking: Option<&'rt mut RefTracking, Vec>>, + /// `None` indicates this is not validating for CTFE (but for runtime). + ctfe_mode: Option, + ecx: &'rt InterpCx<'mir, 'tcx, M>, +} + +impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, 'tcx, M> { + fn aggregate_field_path_elem(&mut self, layout: TyAndLayout<'tcx>, field: usize) -> PathElem { + // First, check if we are projecting to a variant. + match layout.variants { + Variants::Multiple { tag_field, .. } => { + if tag_field == field { + return match layout.ty.kind() { + ty::Adt(def, ..) if def.is_enum() => PathElem::EnumTag, + ty::Generator(..) => PathElem::GeneratorTag, + _ => bug!("non-variant type {:?}", layout.ty), + }; + } + } + Variants::Single { .. } => {} + } + + // Now we know we are projecting to a field, so figure out which one. + match layout.ty.kind() { + // generators and closures. + ty::Closure(def_id, _) | ty::Generator(def_id, _, _) => { + let mut name = None; + // FIXME this should be more descriptive i.e. CapturePlace instead of CapturedVar + // https://github.com/rust-lang/project-rfc-2229/issues/46 + if let Some(local_def_id) = def_id.as_local() { + let tables = self.ecx.tcx.typeck(local_def_id); + if let Some(captured_place) = + tables.closure_min_captures_flattened(local_def_id).nth(field) + { + // Sometimes the index is beyond the number of upvars (seen + // for a generator). + let var_hir_id = captured_place.get_root_variable(); + let node = self.ecx.tcx.hir().get(var_hir_id); + if let hir::Node::Pat(pat) = node { + if let hir::PatKind::Binding(_, _, ident, _) = pat.kind { + name = Some(ident.name); + } + } + } + } + + PathElem::CapturedVar(name.unwrap_or_else(|| { + // Fall back to showing the field index. + sym::integer(field) + })) + } + + // tuples + ty::Tuple(_) => PathElem::TupleElem(field), + + // enums + ty::Adt(def, ..) if def.is_enum() => { + // we might be projecting *to* a variant, or to a field *in* a variant. + match layout.variants { + Variants::Single { index } => { + // Inside a variant + PathElem::Field(def.variant(index).fields[field].name) + } + Variants::Multiple { .. } => bug!("we handled variants above"), + } + } + + // other ADTs + ty::Adt(def, _) => PathElem::Field(def.non_enum_variant().fields[field].name), + + // arrays/slices + ty::Array(..) | ty::Slice(..) => PathElem::ArrayElem(field), + + // dyn traits + ty::Dynamic(..) => PathElem::DynDowncast, + + // nothing else has an aggregate layout + _ => bug!("aggregate_field_path_elem: got non-aggregate type {:?}", layout.ty), + } + } + + fn with_elem( + &mut self, + elem: PathElem, + f: impl FnOnce(&mut Self) -> InterpResult<'tcx, R>, + ) -> InterpResult<'tcx, R> { + // Remember the old state + let path_len = self.path.len(); + // Record new element + self.path.push(elem); + // Perform operation + let r = f(self)?; + // Undo changes + self.path.truncate(path_len); + // Done + Ok(r) + } + + fn check_wide_ptr_meta( + &mut self, + meta: MemPlaceMeta, + pointee: TyAndLayout<'tcx>, + ) -> InterpResult<'tcx> { + let tail = self.ecx.tcx.struct_tail_erasing_lifetimes(pointee.ty, self.ecx.param_env); + match tail.kind() { + ty::Dynamic(..) => { + let vtable = meta.unwrap_meta().to_pointer(self.ecx)?; + // Make sure it is a genuine vtable pointer. + let (_ty, _trait) = try_validation!( + self.ecx.get_ptr_vtable(vtable), + self.path, + err_ub!(DanglingIntPointer(..)) | + err_ub!(InvalidVTablePointer(..)) => + { "{vtable}" } expected { "a vtable pointer" }, + ); + // FIXME: check if the type/trait match what ty::Dynamic says? + } + ty::Slice(..) | ty::Str => { + let _len = meta.unwrap_meta().to_machine_usize(self.ecx)?; + // We do not check that `len * elem_size <= isize::MAX`: + // that is only required for references, and there it falls out of the + // "dereferenceable" check performed by Stacked Borrows. + } + ty::Foreign(..) => { + // Unsized, but not wide. + } + _ => bug!("Unexpected unsized type tail: {:?}", tail), + } + + Ok(()) + } + + /// Check a reference or `Box`. + fn check_safe_pointer( + &mut self, + value: &OpTy<'tcx, M::Provenance>, + kind: &str, + ) -> InterpResult<'tcx> { + let value = self.ecx.read_immediate(value)?; + // Handle wide pointers. + // Check metadata early, for better diagnostics + let place = try_validation!( + self.ecx.ref_to_mplace(&value), + self.path, + err_ub!(InvalidUninitBytes(None)) => { "uninitialized {}", kind }, + ); + if place.layout.is_unsized() { + self.check_wide_ptr_meta(place.meta, place.layout)?; + } + // Make sure this is dereferenceable and all. + let size_and_align = try_validation!( + self.ecx.size_and_align_of_mplace(&place), + self.path, + err_ub!(InvalidMeta(msg)) => { "invalid {} metadata: {}", kind, msg }, + ); + let (size, align) = size_and_align + // for the purpose of validity, consider foreign types to have + // alignment and size determined by the layout (size will be 0, + // alignment should take attributes into account). + .unwrap_or_else(|| (place.layout.size, place.layout.align.abi)); + // Direct call to `check_ptr_access_align` checks alignment even on CTFE machines. + try_validation!( + self.ecx.check_ptr_access_align( + place.ptr, + size, + align, + CheckInAllocMsg::InboundsTest, // will anyway be replaced by validity message + ), + self.path, + err_ub!(AlignmentCheckFailed { required, has }) => + { + "an unaligned {kind} (required {} byte alignment but found {})", + required.bytes(), + has.bytes() + }, + err_ub!(DanglingIntPointer(0, _)) => + { "a null {kind}" }, + err_ub!(DanglingIntPointer(i, _)) => + { "a dangling {kind} (address {i:#x} is unallocated)" }, + err_ub!(PointerOutOfBounds { .. }) => + { "a dangling {kind} (going beyond the bounds of its allocation)" }, + // This cannot happen during const-eval (because interning already detects + // dangling pointers), but it can happen in Miri. + err_ub!(PointerUseAfterFree(..)) => + { "a dangling {kind} (use-after-free)" }, + ); + // Do not allow pointers to uninhabited types. + if place.layout.abi.is_uninhabited() { + throw_validation_failure!(self.path, + { "a {kind} pointing to uninhabited type {}", place.layout.ty } + ) + } + // Recursive checking + if let Some(ref mut ref_tracking) = self.ref_tracking { + // Proceed recursively even for ZST, no reason to skip them! + // `!` is a ZST and we want to validate it. + if let Ok((alloc_id, _offset, _prov)) = self.ecx.ptr_try_get_alloc_id(place.ptr) { + // Special handling for pointers to statics (irrespective of their type). + let alloc_kind = self.ecx.tcx.try_get_global_alloc(alloc_id); + if let Some(GlobalAlloc::Static(did)) = alloc_kind { + assert!(!self.ecx.tcx.is_thread_local_static(did)); + assert!(self.ecx.tcx.is_static(did)); + if matches!( + self.ctfe_mode, + Some(CtfeValidationMode::Const { allow_static_ptrs: false, .. }) + ) { + // See const_eval::machine::MemoryExtra::can_access_statics for why + // this check is so important. + // This check is reachable when the const just referenced the static, + // but never read it (so we never entered `before_access_global`). + throw_validation_failure!(self.path, + { "a {} pointing to a static variable", kind } + ); + } + // We skip checking other statics. These statics must be sound by + // themselves, and the only way to get broken statics here is by using + // unsafe code. + // The reasons we don't check other statics is twofold. For one, in all + // sound cases, the static was already validated on its own, and second, we + // trigger cycle errors if we try to compute the value of the other static + // and that static refers back to us. + // We might miss const-invalid data, + // but things are still sound otherwise (in particular re: consts + // referring to statics). + return Ok(()); + } + } + let path = &self.path; + ref_tracking.track(place, || { + // We need to clone the path anyway, make sure it gets created + // with enough space for the additional `Deref`. + let mut new_path = Vec::with_capacity(path.len() + 1); + new_path.extend(path); + new_path.push(PathElem::Deref); + new_path + }); + } + Ok(()) + } + + fn read_scalar( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, ScalarMaybeUninit> { + self.ecx.read_scalar(op) + } + + fn read_immediate_forced( + &self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Immediate> { + Ok(*self.ecx.read_immediate_raw(op, /*force*/ true)?.unwrap()) + } + + /// Check if this is a value of primitive type, and if yes check the validity of the value + /// at that type. Return `true` if the type is indeed primitive. + fn try_visit_primitive( + &mut self, + value: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, bool> { + // Go over all the primitive types + let ty = value.layout.ty; + match ty.kind() { + ty::Bool => { + let value = self.read_scalar(value)?; + try_validation!( + value.to_bool(), + self.path, + err_ub!(InvalidBool(..)) | err_ub!(InvalidUninitBytes(None)) => + { "{:x}", value } expected { "a boolean" }, + ); + Ok(true) + } + ty::Char => { + let value = self.read_scalar(value)?; + try_validation!( + value.to_char(), + self.path, + err_ub!(InvalidChar(..)) | err_ub!(InvalidUninitBytes(None)) => + { "{:x}", value } expected { "a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`)" }, + ); + Ok(true) + } + ty::Float(_) | ty::Int(_) | ty::Uint(_) => { + let value = self.read_scalar(value)?; + // NOTE: Keep this in sync with the array optimization for int/float + // types below! + if M::enforce_number_init(self.ecx) { + try_validation!( + value.check_init(), + self.path, + err_ub!(InvalidUninitBytes(..)) => + { "{:x}", value } expected { "initialized bytes" } + ); + } + // As a special exception we *do* match on a `Scalar` here, since we truly want + // to know its underlying representation (and *not* cast it to an integer). + let is_ptr = value.check_init().map_or(false, |v| matches!(v, Scalar::Ptr(..))); + if is_ptr { + throw_validation_failure!(self.path, + { "{:x}", value } expected { "plain (non-pointer) bytes" } + ) + } + Ok(true) + } + ty::RawPtr(..) => { + // We are conservative with uninit for integers, but try to + // actually enforce the strict rules for raw pointers (mostly because + // that lets us re-use `ref_to_mplace`). + let place = try_validation!( + self.ecx.read_immediate(value).and_then(|ref i| self.ecx.ref_to_mplace(i)), + self.path, + err_ub!(InvalidUninitBytes(None)) => { "uninitialized raw pointer" }, + ); + if place.layout.is_unsized() { + self.check_wide_ptr_meta(place.meta, place.layout)?; + } + Ok(true) + } + ty::Ref(_, ty, mutbl) => { + if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { .. })) + && *mutbl == hir::Mutability::Mut + { + // A mutable reference inside a const? That does not seem right (except if it is + // a ZST). + let layout = self.ecx.layout_of(*ty)?; + if !layout.is_zst() { + throw_validation_failure!(self.path, { "mutable reference in a `const`" }); + } + } + self.check_safe_pointer(value, "reference")?; + Ok(true) + } + ty::FnPtr(_sig) => { + let value = try_validation!( + self.ecx.read_scalar(value).and_then(|v| v.check_init()), + self.path, + err_ub!(InvalidUninitBytes(None)) => { "uninitialized bytes" } expected { "a proper pointer or integer value" }, + ); + + // If we check references recursively, also check that this points to a function. + if let Some(_) = self.ref_tracking { + let ptr = value.to_pointer(self.ecx)?; + let _fn = try_validation!( + self.ecx.get_ptr_fn(ptr), + self.path, + err_ub!(DanglingIntPointer(..)) | + err_ub!(InvalidFunctionPointer(..)) => + { "{ptr}" } expected { "a function pointer" }, + ); + // FIXME: Check if the signature matches + } else { + // Otherwise (for standalone Miri), we have to still check it to be non-null. + if self.ecx.scalar_may_be_null(value)? { + throw_validation_failure!(self.path, { "a null function pointer" }); + } + } + Ok(true) + } + ty::Never => throw_validation_failure!(self.path, { "a value of the never type `!`" }), + ty::Foreign(..) | ty::FnDef(..) => { + // Nothing to check. + Ok(true) + } + // The above should be all the primitive types. The rest is compound, we + // check them by visiting their fields/variants. + ty::Adt(..) + | ty::Tuple(..) + | ty::Array(..) + | ty::Slice(..) + | ty::Str + | ty::Dynamic(..) + | ty::Closure(..) + | ty::Generator(..) => Ok(false), + // Some types only occur during typechecking, they have no layout. + // We should not see them here and we could not check them anyway. + ty::Error(_) + | ty::Infer(..) + | ty::Placeholder(..) + | ty::Bound(..) + | ty::Param(..) + | ty::Opaque(..) + | ty::Projection(..) + | ty::GeneratorWitness(..) => bug!("Encountered invalid type {:?}", ty), + } + } + + fn visit_scalar( + &mut self, + scalar: ScalarMaybeUninit, + scalar_layout: ScalarAbi, + ) -> InterpResult<'tcx> { + // We check `is_full_range` in a slightly complicated way because *if* we are checking + // number validity, then we want to ensure that `Scalar::Initialized` is indeed initialized, + // i.e. that we go over the `check_init` below. + let size = scalar_layout.size(self.ecx); + let is_full_range = match scalar_layout { + ScalarAbi::Initialized { .. } => { + if M::enforce_number_init(self.ecx) { + false // not "full" since uninit is not accepted + } else { + scalar_layout.is_always_valid(self.ecx) + } + } + ScalarAbi::Union { .. } => true, + }; + if is_full_range { + // Nothing to check. Cruciall we don't even `read_scalar` until here, since that would + // fail for `Union` scalars! + return Ok(()); + } + // We have something to check: it must at least be initialized. + let valid_range = scalar_layout.valid_range(self.ecx); + let WrappingRange { start, end } = valid_range; + let max_value = size.unsigned_int_max(); + assert!(end <= max_value); + let value = try_validation!( + scalar.check_init(), + self.path, + err_ub!(InvalidUninitBytes(None)) => { "{:x}", scalar } + expected { "something {}", wrapping_range_format(valid_range, max_value) }, + ); + let bits = match value.try_to_int() { + Ok(int) => int.assert_bits(size), + Err(_) => { + // So this is a pointer then, and casting to an int failed. + // Can only happen during CTFE. + // We support 2 kinds of ranges here: full range, and excluding zero. + if start == 1 && end == max_value { + // Only null is the niche. So make sure the ptr is NOT null. + if self.ecx.scalar_may_be_null(value)? { + throw_validation_failure!(self.path, + { "a potentially null pointer" } + expected { + "something that cannot possibly fail to be {}", + wrapping_range_format(valid_range, max_value) + } + ) + } else { + return Ok(()); + } + } else if scalar_layout.is_always_valid(self.ecx) { + // Easy. (This is reachable if `enforce_number_validity` is set.) + return Ok(()); + } else { + // Conservatively, we reject, because the pointer *could* have a bad + // value. + throw_validation_failure!(self.path, + { "a pointer" } + expected { + "something that cannot possibly fail to be {}", + wrapping_range_format(valid_range, max_value) + } + ) + } + } + }; + // Now compare. + if valid_range.contains(bits) { + Ok(()) + } else { + throw_validation_failure!(self.path, + { "{}", bits } + expected { "something {}", wrapping_range_format(valid_range, max_value) } + ) + } + } +} + +impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M> + for ValidityVisitor<'rt, 'mir, 'tcx, M> +{ + type V = OpTy<'tcx, M::Provenance>; + + #[inline(always)] + fn ecx(&self) -> &InterpCx<'mir, 'tcx, M> { + &self.ecx + } + + fn read_discriminant( + &mut self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, VariantIdx> { + self.with_elem(PathElem::EnumTag, move |this| { + Ok(try_validation!( + this.ecx.read_discriminant(op), + this.path, + err_ub!(InvalidTag(val)) => + { "{:x}", val } expected { "a valid enum tag" }, + err_ub!(InvalidUninitBytes(None)) => + { "uninitialized bytes" } expected { "a valid enum tag" }, + ) + .1) + }) + } + + #[inline] + fn visit_field( + &mut self, + old_op: &OpTy<'tcx, M::Provenance>, + field: usize, + new_op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + let elem = self.aggregate_field_path_elem(old_op.layout, field); + self.with_elem(elem, move |this| this.visit_value(new_op)) + } + + #[inline] + fn visit_variant( + &mut self, + old_op: &OpTy<'tcx, M::Provenance>, + variant_id: VariantIdx, + new_op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx> { + let name = match old_op.layout.ty.kind() { + ty::Adt(adt, _) => PathElem::Variant(adt.variant(variant_id).name), + // Generators also have variants + ty::Generator(..) => PathElem::GeneratorState(variant_id), + _ => bug!("Unexpected type with variant: {:?}", old_op.layout.ty), + }; + self.with_elem(name, move |this| this.visit_value(new_op)) + } + + #[inline(always)] + fn visit_union( + &mut self, + op: &OpTy<'tcx, M::Provenance>, + _fields: NonZeroUsize, + ) -> InterpResult<'tcx> { + // Special check preventing `UnsafeCell` inside unions in the inner part of constants. + if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { inner: true, .. })) { + if !op.layout.ty.is_freeze(self.ecx.tcx.at(DUMMY_SP), self.ecx.param_env) { + throw_validation_failure!(self.path, { "`UnsafeCell` in a `const`" }); + } + } + Ok(()) + } + + #[inline] + fn visit_box(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + self.check_safe_pointer(op, "box")?; + Ok(()) + } + + #[inline] + fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + trace!("visit_value: {:?}, {:?}", *op, op.layout); + + // Check primitive types -- the leaves of our recursive descent. + if self.try_visit_primitive(op)? { + return Ok(()); + } + + // Special check preventing `UnsafeCell` in the inner part of constants + if let Some(def) = op.layout.ty.ty_adt_def() { + if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { inner: true, .. })) + && def.is_unsafe_cell() + { + throw_validation_failure!(self.path, { "`UnsafeCell` in a `const`" }); + } + } + + // Recursively walk the value at its type. + self.walk_value(op)?; + + // *After* all of this, check the ABI. We need to check the ABI to handle + // types like `NonNull` where the `Scalar` info is more restrictive than what + // the fields say (`rustc_layout_scalar_valid_range_start`). + // But in most cases, this will just propagate what the fields say, + // and then we want the error to point at the field -- so, first recurse, + // then check ABI. + // + // FIXME: We could avoid some redundant checks here. For newtypes wrapping + // scalars, we do the same check on every "level" (e.g., first we check + // MyNewtype and then the scalar in there). + match op.layout.abi { + Abi::Uninhabited => { + throw_validation_failure!(self.path, + { "a value of uninhabited type {:?}", op.layout.ty } + ); + } + Abi::Scalar(scalar_layout) => { + // We use a 'forced' read because we always need a `Immediate` here + // and treating "partially uninit" as "fully uninit" is fine for us. + let scalar = self.read_immediate_forced(op)?.to_scalar_or_uninit(); + self.visit_scalar(scalar, scalar_layout)?; + } + Abi::ScalarPair(a_layout, b_layout) => { + // There is no `rustc_layout_scalar_valid_range_start` for pairs, so + // we would validate these things as we descend into the fields, + // but that can miss bugs in layout computation. Layout computation + // is subtle due to enums having ScalarPair layout, where one field + // is the discriminant. + if cfg!(debug_assertions) { + // We use a 'forced' read because we always need a `Immediate` here + // and treating "partially uninit" as "fully uninit" is fine for us. + let (a, b) = self.read_immediate_forced(op)?.to_scalar_or_uninit_pair(); + self.visit_scalar(a, a_layout)?; + self.visit_scalar(b, b_layout)?; + } + } + Abi::Vector { .. } => { + // No checks here, we assume layout computation gets this right. + // (This is harder to check since Miri does not represent these as `Immediate`. We + // also cannot use field projections since this might be a newtype around a vector.) + } + Abi::Aggregate { .. } => { + // Nothing to do. + } + } + + Ok(()) + } + + fn visit_aggregate( + &mut self, + op: &OpTy<'tcx, M::Provenance>, + fields: impl Iterator>, + ) -> InterpResult<'tcx> { + match op.layout.ty.kind() { + ty::Str => { + let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate + let len = mplace.len(self.ecx)?; + try_validation!( + self.ecx.read_bytes_ptr(mplace.ptr, Size::from_bytes(len)), + self.path, + err_ub!(InvalidUninitBytes(..)) => { "uninitialized data in `str`" }, + ); + } + ty::Array(tys, ..) | ty::Slice(tys) + // This optimization applies for types that can hold arbitrary bytes (such as + // integer and floating point types) or for structs or tuples with no fields. + // FIXME(wesleywiser) This logic could be extended further to arbitrary structs + // or tuples made up of integer/floating point types or inhabited ZSTs with no + // padding. + if matches!(tys.kind(), ty::Int(..) | ty::Uint(..) | ty::Float(..)) + => + { + // Optimized handling for arrays of integer/float type. + + // This is the length of the array/slice. + let len = op.len(self.ecx)?; + // This is the element type size. + let layout = self.ecx.layout_of(*tys)?; + // This is the size in bytes of the whole array. (This checks for overflow.) + let size = layout.size * len; + // If the size is 0, there is nothing to check. + // (`size` can only be 0 of `len` is 0, and empty arrays are always valid.) + if size == Size::ZERO { + return Ok(()); + } + // Now that we definitely have a non-ZST array, we know it lives in memory. + let mplace = match op.try_as_mplace() { + Ok(mplace) => mplace, + Err(imm) => match *imm { + Immediate::Uninit => + throw_validation_failure!(self.path, { "uninitialized bytes" }), + Immediate::Scalar(..) | Immediate::ScalarPair(..) => + bug!("arrays/slices can never have Scalar/ScalarPair layout"), + } + }; + + // Optimization: we just check the entire range at once. + // NOTE: Keep this in sync with the handling of integer and float + // types above, in `visit_primitive`. + // In run-time mode, we accept pointers in here. This is actually more + // permissive than a per-element check would be, e.g., we accept + // a &[u8] that contains a pointer even though bytewise checking would + // reject it. However, that's good: We don't inherently want + // to reject those pointers, we just do not have the machinery to + // talk about parts of a pointer. + // We also accept uninit, for consistency with the slow path. + let alloc = self.ecx.get_ptr_alloc(mplace.ptr, size, mplace.align)?.expect("we already excluded size 0"); + + match alloc.check_bytes( + alloc_range(Size::ZERO, size), + /*allow_uninit*/ !M::enforce_number_init(self.ecx), + /*allow_ptr*/ false, + ) { + // In the happy case, we needn't check anything else. + Ok(()) => {} + // Some error happened, try to provide a more detailed description. + Err(err) => { + // For some errors we might be able to provide extra information. + // (This custom logic does not fit the `try_validation!` macro.) + match err.kind() { + err_ub!(InvalidUninitBytes(Some((_alloc_id, access)))) => { + // Some byte was uninitialized, determine which + // element that byte belongs to so we can + // provide an index. + let i = usize::try_from( + access.uninit.start.bytes() / layout.size.bytes(), + ) + .unwrap(); + self.path.push(PathElem::ArrayElem(i)); + + throw_validation_failure!(self.path, { "uninitialized bytes" }) + } + + // Propagate upwards (that will also check for unexpected errors). + _ => return Err(err), + } + } + } + } + // Fast path for arrays and slices of ZSTs. We only need to check a single ZST element + // of an array and not all of them, because there's only a single value of a specific + // ZST type, so either validation fails for all elements or none. + ty::Array(tys, ..) | ty::Slice(tys) if self.ecx.layout_of(*tys)?.is_zst() => { + // Validate just the first element (if any). + self.walk_aggregate(op, fields.take(1))? + } + _ => { + self.walk_aggregate(op, fields)? // default handler + } + } + Ok(()) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { + fn validate_operand_internal( + &self, + op: &OpTy<'tcx, M::Provenance>, + path: Vec, + ref_tracking: Option<&mut RefTracking, Vec>>, + ctfe_mode: Option, + ) -> InterpResult<'tcx> { + trace!("validate_operand_internal: {:?}, {:?}", *op, op.layout.ty); + + // Construct a visitor + let mut visitor = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx: self }; + + // Run it. + match visitor.visit_value(&op) { + Ok(()) => Ok(()), + // Pass through validation failures. + Err(err) if matches!(err.kind(), err_ub!(ValidationFailure { .. })) => Err(err), + // Complain about any other kind of UB error -- those are bad because we'd like to + // report them in a way that shows *where* in the value the issue lies. + Err(err) if matches!(err.kind(), InterpError::UndefinedBehavior(_)) => { + err.print_backtrace(); + bug!("Unexpected Undefined Behavior error during validation: {}", err); + } + // Pass through everything else. + Err(err) => Err(err), + } + } + + /// This function checks the data at `op` to be const-valid. + /// `op` is assumed to cover valid memory if it is an indirect operand. + /// It will error if the bits at the destination do not match the ones described by the layout. + /// + /// `ref_tracking` is used to record references that we encounter so that they + /// can be checked recursively by an outside driving loop. + /// + /// `constant` controls whether this must satisfy the rules for constants: + /// - no pointers to statics. + /// - no `UnsafeCell` or non-ZST `&mut`. + #[inline(always)] + pub fn const_validate_operand( + &self, + op: &OpTy<'tcx, M::Provenance>, + path: Vec, + ref_tracking: &mut RefTracking, Vec>, + ctfe_mode: CtfeValidationMode, + ) -> InterpResult<'tcx> { + self.validate_operand_internal(op, path, Some(ref_tracking), Some(ctfe_mode)) + } + + /// This function checks the data at `op` to be runtime-valid. + /// `op` is assumed to cover valid memory if it is an indirect operand. + /// It will error if the bits at the destination do not match the ones described by the layout. + #[inline(always)] + pub fn validate_operand(&self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> { + self.validate_operand_internal(op, vec![], None, None) + } +} diff --git a/compiler/rustc_const_eval/src/interpret/visitor.rs b/compiler/rustc_const_eval/src/interpret/visitor.rs new file mode 100644 index 000000000..aee1f93b1 --- /dev/null +++ b/compiler/rustc_const_eval/src/interpret/visitor.rs @@ -0,0 +1,534 @@ +//! Visitor for a run-time value with a given layout: Traverse enums, structs and other compound +//! types until we arrive at the leaves, with custom handling for primitive types. + +use rustc_middle::mir::interpret::InterpResult; +use rustc_middle::ty; +use rustc_middle::ty::layout::TyAndLayout; +use rustc_target::abi::{FieldsShape, VariantIdx, Variants}; + +use std::num::NonZeroUsize; + +use super::{InterpCx, MPlaceTy, Machine, OpTy, PlaceTy}; + +/// A thing that we can project into, and that has a layout. +/// This wouldn't have to depend on `Machine` but with the current type inference, +/// that's just more convenient to work with (avoids repeating all the `Machine` bounds). +pub trait Value<'mir, 'tcx, M: Machine<'mir, 'tcx>>: Sized { + /// Gets this value's layout. + fn layout(&self) -> TyAndLayout<'tcx>; + + /// Makes this into an `OpTy`, in a cheap way that is good for reading. + fn to_op_for_read( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>>; + + /// Makes this into an `OpTy`, in a potentially more expensive way that is good for projections. + fn to_op_for_proj( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + self.to_op_for_read(ecx) + } + + /// Creates this from an `OpTy`. + /// + /// If `to_op_for_proj` only ever produces `Indirect` operands, then this one is definitely `Indirect`. + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self; + + /// Projects to the given enum variant. + fn project_downcast( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self>; + + /// Projects to the n-th field. + fn project_field( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self>; +} + +/// A thing that we can project into given *mutable* access to `ecx`, and that has a layout. +/// This wouldn't have to depend on `Machine` but with the current type inference, +/// that's just more convenient to work with (avoids repeating all the `Machine` bounds). +pub trait ValueMut<'mir, 'tcx, M: Machine<'mir, 'tcx>>: Sized { + /// Gets this value's layout. + fn layout(&self) -> TyAndLayout<'tcx>; + + /// Makes this into an `OpTy`, in a cheap way that is good for reading. + fn to_op_for_read( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>>; + + /// Makes this into an `OpTy`, in a potentially more expensive way that is good for projections. + fn to_op_for_proj( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>>; + + /// Creates this from an `OpTy`. + /// + /// If `to_op_for_proj` only ever produces `Indirect` operands, then this one is definitely `Indirect`. + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self; + + /// Projects to the given enum variant. + fn project_downcast( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self>; + + /// Projects to the n-th field. + fn project_field( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self>; +} + +// We cannot have a general impl which shows that Value implies ValueMut. (When we do, it says we +// cannot `impl ValueMut for PlaceTy` because some downstream crate could `impl Value for PlaceTy`.) +// So we have some copy-paste here. (We could have a macro but since we only have 2 types with this +// double-impl, that would barely make the code shorter, if at all.) + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> Value<'mir, 'tcx, M> for OpTy<'tcx, M::Provenance> { + #[inline(always)] + fn layout(&self) -> TyAndLayout<'tcx> { + self.layout + } + + #[inline(always)] + fn to_op_for_read( + &self, + _ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.clone()) + } + + #[inline(always)] + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self { + op.clone() + } + + #[inline(always)] + fn project_downcast( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self> { + ecx.operand_downcast(self, variant) + } + + #[inline(always)] + fn project_field( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self> { + ecx.operand_field(self, field) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueMut<'mir, 'tcx, M> + for OpTy<'tcx, M::Provenance> +{ + #[inline(always)] + fn layout(&self) -> TyAndLayout<'tcx> { + self.layout + } + + #[inline(always)] + fn to_op_for_read( + &self, + _ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.clone()) + } + + #[inline(always)] + fn to_op_for_proj( + &self, + _ecx: &mut InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.clone()) + } + + #[inline(always)] + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self { + op.clone() + } + + #[inline(always)] + fn project_downcast( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self> { + ecx.operand_downcast(self, variant) + } + + #[inline(always)] + fn project_field( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self> { + ecx.operand_field(self, field) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> Value<'mir, 'tcx, M> + for MPlaceTy<'tcx, M::Provenance> +{ + #[inline(always)] + fn layout(&self) -> TyAndLayout<'tcx> { + self.layout + } + + #[inline(always)] + fn to_op_for_read( + &self, + _ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.into()) + } + + #[inline(always)] + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self { + // assert is justified because our `to_op_for_read` only ever produces `Indirect` operands. + op.assert_mem_place() + } + + #[inline(always)] + fn project_downcast( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self> { + ecx.mplace_downcast(self, variant) + } + + #[inline(always)] + fn project_field( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self> { + ecx.mplace_field(self, field) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueMut<'mir, 'tcx, M> + for MPlaceTy<'tcx, M::Provenance> +{ + #[inline(always)] + fn layout(&self) -> TyAndLayout<'tcx> { + self.layout + } + + #[inline(always)] + fn to_op_for_read( + &self, + _ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.into()) + } + + #[inline(always)] + fn to_op_for_proj( + &self, + _ecx: &mut InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + Ok(self.into()) + } + + #[inline(always)] + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self { + // assert is justified because our `to_op_for_proj` only ever produces `Indirect` operands. + op.assert_mem_place() + } + + #[inline(always)] + fn project_downcast( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self> { + ecx.mplace_downcast(self, variant) + } + + #[inline(always)] + fn project_field( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self> { + ecx.mplace_field(self, field) + } +} + +impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueMut<'mir, 'tcx, M> + for PlaceTy<'tcx, M::Provenance> +{ + #[inline(always)] + fn layout(&self) -> TyAndLayout<'tcx> { + self.layout + } + + #[inline(always)] + fn to_op_for_read( + &self, + ecx: &InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // We `force_allocation` here so that `from_op` below can work. + ecx.place_to_op(self) + } + + #[inline(always)] + fn to_op_for_proj( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + ) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> { + // We `force_allocation` here so that `from_op` below can work. + Ok(ecx.force_allocation(self)?.into()) + } + + #[inline(always)] + fn from_op(op: &OpTy<'tcx, M::Provenance>) -> Self { + // assert is justified because our `to_op` only ever produces `Indirect` operands. + op.assert_mem_place().into() + } + + #[inline(always)] + fn project_downcast( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + variant: VariantIdx, + ) -> InterpResult<'tcx, Self> { + ecx.place_downcast(self, variant) + } + + #[inline(always)] + fn project_field( + &self, + ecx: &mut InterpCx<'mir, 'tcx, M>, + field: usize, + ) -> InterpResult<'tcx, Self> { + ecx.place_field(self, field) + } +} + +macro_rules! make_value_visitor { + ($visitor_trait:ident, $value_trait:ident, $($mutability:ident)?) => { + // How to traverse a value and what to do when we are at the leaves. + pub trait $visitor_trait<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>>: Sized { + type V: $value_trait<'mir, 'tcx, M>; + + /// The visitor must have an `InterpCx` in it. + fn ecx(&$($mutability)? self) + -> &$($mutability)? InterpCx<'mir, 'tcx, M>; + + /// `read_discriminant` can be hooked for better error messages. + #[inline(always)] + fn read_discriminant( + &mut self, + op: &OpTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, VariantIdx> { + Ok(self.ecx().read_discriminant(op)?.1) + } + + // Recursive actions, ready to be overloaded. + /// Visits the given value, dispatching as appropriate to more specialized visitors. + #[inline(always)] + fn visit_value(&mut self, v: &Self::V) -> InterpResult<'tcx> + { + self.walk_value(v) + } + /// Visits the given value as a union. No automatic recursion can happen here. + #[inline(always)] + fn visit_union(&mut self, _v: &Self::V, _fields: NonZeroUsize) -> InterpResult<'tcx> + { + Ok(()) + } + /// Visits the given value as the pointer of a `Box`. There is nothing to recurse into. + /// The type of `v` will be a raw pointer, but this is a field of `Box` and the + /// pointee type is the actual `T`. + #[inline(always)] + fn visit_box(&mut self, _v: &Self::V) -> InterpResult<'tcx> + { + Ok(()) + } + /// Visits this value as an aggregate, you are getting an iterator yielding + /// all the fields (still in an `InterpResult`, you have to do error handling yourself). + /// Recurses into the fields. + #[inline(always)] + fn visit_aggregate( + &mut self, + v: &Self::V, + fields: impl Iterator>, + ) -> InterpResult<'tcx> { + self.walk_aggregate(v, fields) + } + + /// Called each time we recurse down to a field of a "product-like" aggregate + /// (structs, tuples, arrays and the like, but not enums), passing in old (outer) + /// and new (inner) value. + /// This gives the visitor the chance to track the stack of nested fields that + /// we are descending through. + #[inline(always)] + fn visit_field( + &mut self, + _old_val: &Self::V, + _field: usize, + new_val: &Self::V, + ) -> InterpResult<'tcx> { + self.visit_value(new_val) + } + /// Called when recursing into an enum variant. + /// This gives the visitor the chance to track the stack of nested fields that + /// we are descending through. + #[inline(always)] + fn visit_variant( + &mut self, + _old_val: &Self::V, + _variant: VariantIdx, + new_val: &Self::V, + ) -> InterpResult<'tcx> { + self.visit_value(new_val) + } + + // Default recursors. Not meant to be overloaded. + fn walk_aggregate( + &mut self, + v: &Self::V, + fields: impl Iterator>, + ) -> InterpResult<'tcx> { + // Now iterate over it. + for (idx, field_val) in fields.enumerate() { + self.visit_field(v, idx, &field_val?)?; + } + Ok(()) + } + fn walk_value(&mut self, v: &Self::V) -> InterpResult<'tcx> + { + let ty = v.layout().ty; + trace!("walk_value: type: {ty}"); + + // Special treatment for special types, where the (static) layout is not sufficient. + match *ty.kind() { + // If it is a trait object, switch to the real type that was used to create it. + ty::Dynamic(..) => { + // unsized values are never immediate, so we can assert_mem_place + let op = v.to_op_for_read(self.ecx())?; + let dest = op.assert_mem_place(); + let inner_mplace = self.ecx().unpack_dyn_trait(&dest)?; + trace!("walk_value: dyn object layout: {:#?}", inner_mplace.layout); + // recurse with the inner type + return self.visit_field(&v, 0, &$value_trait::from_op(&inner_mplace.into())); + }, + // Slices do not need special handling here: they have `Array` field + // placement with length 0, so we enter the `Array` case below which + // indirectly uses the metadata to determine the actual length. + + // However, `Box`... let's talk about `Box`. + ty::Adt(def, ..) if def.is_box() => { + // `Box` is a hybrid primitive-library-defined type that one the one hand is + // a dereferenceable pointer, on the other hand has *basically arbitrary + // user-defined layout* since the user controls the 'allocator' field. So it + // cannot be treated like a normal pointer, since it does not fit into an + // `Immediate`. Yeah, it is quite terrible. But many visitors want to do + // something with "all boxed pointers", so we handle this mess for them. + // + // When we hit a `Box`, we do not do the usual `visit_aggregate`; instead, + // we (a) call `visit_box` on the pointer value, and (b) recurse on the + // allocator field. We also assert tons of things to ensure we do not miss + // any other fields. + + // `Box` has two fields: the pointer we care about, and the allocator. + assert_eq!(v.layout().fields.count(), 2, "`Box` must have exactly 2 fields"); + let (unique_ptr, alloc) = + (v.project_field(self.ecx(), 0)?, v.project_field(self.ecx(), 1)?); + // Unfortunately there is some type junk in the way here: `unique_ptr` is a `Unique`... + // (which means another 2 fields, the second of which is a `PhantomData`) + assert_eq!(unique_ptr.layout().fields.count(), 2); + let (nonnull_ptr, phantom) = ( + unique_ptr.project_field(self.ecx(), 0)?, + unique_ptr.project_field(self.ecx(), 1)?, + ); + assert!( + phantom.layout().ty.ty_adt_def().is_some_and(|adt| adt.is_phantom_data()), + "2nd field of `Unique` should be PhantomData but is {:?}", + phantom.layout().ty, + ); + // ... that contains a `NonNull`... (gladly, only a single field here) + assert_eq!(nonnull_ptr.layout().fields.count(), 1); + let raw_ptr = nonnull_ptr.project_field(self.ecx(), 0)?; // the actual raw ptr + // ... whose only field finally is a raw ptr we can dereference. + self.visit_box(&raw_ptr)?; + + // The second `Box` field is the allocator, which we recursively check for validity + // like in regular structs. + self.visit_field(v, 1, &alloc)?; + + // We visited all parts of this one. + return Ok(()); + } + _ => {}, + }; + + // Visit the fields of this value. + match v.layout().fields { + FieldsShape::Primitive => {} + FieldsShape::Union(fields) => { + self.visit_union(v, fields)?; + } + FieldsShape::Arbitrary { ref offsets, .. } => { + // FIXME: We collect in a vec because otherwise there are lifetime + // errors: Projecting to a field needs access to `ecx`. + let fields: Vec> = + (0..offsets.len()).map(|i| { + v.project_field(self.ecx(), i) + }) + .collect(); + self.visit_aggregate(v, fields.into_iter())?; + } + FieldsShape::Array { .. } => { + // Let's get an mplace (or immediate) first. + // This might `force_allocate` if `v` is a `PlaceTy`, but `place_index` does that anyway. + let op = v.to_op_for_proj(self.ecx())?; + // Now we can go over all the fields. + // This uses the *run-time length*, i.e., if we are a slice, + // the dynamic info from the metadata is used. + let iter = self.ecx().operand_array_fields(&op)? + .map(|f| f.and_then(|f| { + Ok($value_trait::from_op(&f)) + })); + self.visit_aggregate(v, iter)?; + } + } + + match v.layout().variants { + // If this is a multi-variant layout, find the right variant and proceed + // with *its* fields. + Variants::Multiple { .. } => { + let op = v.to_op_for_read(self.ecx())?; + let idx = self.read_discriminant(&op)?; + let inner = v.project_downcast(self.ecx(), idx)?; + trace!("walk_value: variant layout: {:#?}", inner.layout()); + // recurse with the inner type + self.visit_variant(v, idx, &inner) + } + // For single-variant layouts, we already did anything there is to do. + Variants::Single { .. } => Ok(()) + } + } + } + } +} + +make_value_visitor!(ValueVisitor, Value,); +make_value_visitor!(MutValueVisitor, ValueMut, mut); -- cgit v1.2.3