diff options
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
29 files changed, 1298 insertions, 718 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs index 5d775b9b5..c4572e035 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs @@ -6,6 +6,7 @@ mod returning; use std::borrow::Cow; +use cranelift_codegen::ir::{AbiParam, SigRef}; use cranelift_module::ModuleError; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::ty::layout::FnAbiOf; @@ -13,12 +14,9 @@ use rustc_session::Session; use rustc_target::abi::call::{Conv, FnAbi}; use rustc_target::spec::abi::Abi; -use cranelift_codegen::ir::{AbiParam, SigRef}; - use self::pass_mode::*; -use crate::prelude::*; - pub(crate) use self::returning::codegen_return; +use crate::prelude::*; fn clif_sig_from_fn_abi<'tcx>( tcx: TyCtxt<'tcx>, @@ -30,7 +28,7 @@ fn clif_sig_from_fn_abi<'tcx>( let inputs = fn_abi.args.iter().flat_map(|arg_abi| arg_abi.get_abi_param(tcx).into_iter()); let (return_ptr, returns) = fn_abi.ret.get_abi_return(tcx); - // Sometimes the first param is an pointer to the place where the return value needs to be stored. + // Sometimes the first param is a pointer to the place where the return value needs to be stored. let params: Vec<_> = return_ptr.into_iter().chain(inputs).collect(); Signature { params, returns, call_conv } @@ -122,32 +120,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { args: &[Value], ) -> Cow<'_, [Value]> { if self.tcx.sess.target.is_like_windows { - let (mut params, mut args): (Vec<_>, Vec<_>) = - params - .into_iter() - .zip(args) - .map(|(param, &arg)| { - if param.value_type == types::I128 { - let arg_ptr = Pointer::stack_slot(self.bcx.create_sized_stack_slot( - StackSlotData { kind: StackSlotKind::ExplicitSlot, size: 16 }, - )); - arg_ptr.store(self, arg, MemFlags::trusted()); - (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) - } else { - (param, arg) - } - }) - .unzip(); + let (mut params, mut args): (Vec<_>, Vec<_>) = params + .into_iter() + .zip(args) + .map(|(param, &arg)| { + if param.value_type == types::I128 { + let arg_ptr = self.create_stack_slot(16, 16); + arg_ptr.store(self, arg, MemFlags::trusted()); + (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) + } else { + (param, arg) + } + }) + .unzip(); let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128; if indirect_ret_val { params.insert(0, AbiParam::new(self.pointer_type)); - let ret_ptr = - Pointer::stack_slot(self.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - })); + let ret_ptr = self.create_stack_slot(16, 16); args.insert(0, ret_ptr.get_addr(self)); self.lib_call_unadjusted(name, params, vec![], &args); return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]); diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs index 0d16da480..065226700 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs @@ -1,14 +1,14 @@ //! Argument passing -use crate::prelude::*; -use crate::value_and_place::assert_assignable; - use cranelift_codegen::ir::{ArgumentExtension, ArgumentPurpose}; use rustc_target::abi::call::{ ArgAbi, ArgAttributes, ArgExtension as RustcArgExtension, CastTarget, PassMode, Reg, RegKind, }; use smallvec::{smallvec, SmallVec}; +use crate::prelude::*; +use crate::value_and_place::assert_assignable; + pub(super) trait ArgAbiExt<'tcx> { fn get_abi_param(&self, tcx: TyCtxt<'tcx>) -> SmallVec<[AbiParam; 2]>; fn get_abi_return(&self, tcx: TyCtxt<'tcx>) -> (Option<AbiParam>, Vec<AbiParam>); @@ -189,16 +189,13 @@ pub(super) fn from_casted_value<'tcx>( let abi_params = cast_target_to_abi_params(cast); let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum(); let layout_size = u32::try_from(layout.size.bytes()).unwrap(); - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. + let ptr = fx.create_stack_slot( // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`. // It may also be smaller for example when the type is a wrapper around an integer with a // larger alignment than the integer. - size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + std::cmp::max(abi_param_size, layout_size), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); let mut offset = 0; let mut block_params_iter = block_params.iter().copied(); for param in abi_params { diff --git a/compiler/rustc_codegen_cranelift/src/abi/returning.rs b/compiler/rustc_codegen_cranelift/src/abi/returning.rs index 646fb4a3c..0799a22c6 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/returning.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/returning.rs @@ -1,10 +1,10 @@ //! Return value handling -use crate::prelude::*; - use rustc_target::abi::call::{ArgAbi, PassMode}; use smallvec::{smallvec, SmallVec}; +use crate::prelude::*; + /// Return a place where the return value of the current function can be written to. If necessary /// this adds an extra parameter pointing to where the return value needs to be stored. pub(super) fn codegen_return_param<'tcx>( diff --git a/compiler/rustc_codegen_cranelift/src/allocator.rs b/compiler/rustc_codegen_cranelift/src/allocator.rs index 4e4c595de..e8af3e8c2 100644 --- a/compiler/rustc_codegen_cranelift/src/allocator.rs +++ b/compiler/rustc_codegen_cranelift/src/allocator.rs @@ -1,8 +1,6 @@ //! Allocator shim // Adapted from rustc -use crate::prelude::*; - use rustc_ast::expand::allocator::{ alloc_error_handler_name, default_fn_name, global_fn_name, AllocatorKind, AllocatorTy, ALLOCATOR_METHODS, NO_ALLOC_SHIM_IS_UNSTABLE, @@ -10,6 +8,8 @@ use rustc_ast::expand::allocator::{ use rustc_codegen_ssa::base::allocator_kind_for_codegen; use rustc_session::config::OomStrategy; +use crate::prelude::*; + /// Returns whether an allocator shim was created pub(crate) fn codegen( tcx: TyCtxt<'_>, diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs index 359d581c1..321612238 100644 --- a/compiler/rustc_codegen_cranelift/src/analyze.rs +++ b/compiler/rustc_codegen_cranelift/src/analyze.rs @@ -1,11 +1,11 @@ //! SSA analysis -use crate::prelude::*; - use rustc_index::IndexVec; use rustc_middle::mir::StatementKind::*; use rustc_middle::ty::Ty; +use crate::prelude::*; + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub(crate) enum SsaKind { NotSsa, diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs index 0a451dad9..91b1547cb 100644 --- a/compiler/rustc_codegen_cranelift/src/base.rs +++ b/compiler/rustc_codegen_cranelift/src/base.rs @@ -1,15 +1,14 @@ //! Codegen of a single function +use cranelift_codegen::ir::UserFuncName; +use cranelift_codegen::CodegenError; +use cranelift_module::ModuleError; use rustc_ast::InlineAsmOptions; use rustc_index::IndexVec; use rustc_middle::ty::adjustment::PointerCoercion; use rustc_middle::ty::layout::FnAbiOf; use rustc_middle::ty::print::with_no_trimmed_paths; -use cranelift_codegen::ir::UserFuncName; -use cranelift_codegen::CodegenError; -use cranelift_module::ModuleError; - use crate::constant::ConstantCx; use crate::debuginfo::FunctionDebugContext; use crate::prelude::*; @@ -250,17 +249,6 @@ pub(crate) fn verify_func( } fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { - if let Err(err) = - fx.mir.post_mono_checks(fx.tcx, ty::ParamEnv::reveal_all(), |c| Ok(fx.monomorphize(c))) - { - err.emit_err(fx.tcx); - fx.bcx.append_block_params_for_function_params(fx.block_map[START_BLOCK]); - fx.bcx.switch_to_block(fx.block_map[START_BLOCK]); - // compilation should have been aborted - fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); - return; - } - let arg_uninhabited = fx .mir .args_iter() @@ -490,7 +478,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { TerminatorKind::Yield { .. } | TerminatorKind::FalseEdge { .. } | TerminatorKind::FalseUnwind { .. } - | TerminatorKind::GeneratorDrop => { + | TerminatorKind::CoroutineDrop => { bug!("shouldn't exist at codegen {:?}", bb_data.terminator()); } TerminatorKind::Drop { place, target, unwind: _, replace: _ } => { @@ -778,7 +766,7 @@ fn codegen_stmt<'tcx>( NullOp::SizeOf => layout.size.bytes(), NullOp::AlignOf => layout.align.abi.bytes(), NullOp::OffsetOf(fields) => { - layout.offset_of_subfield(fx, fields.iter().map(|f| f.index())).bytes() + layout.offset_of_subfield(fx, fields.iter()).bytes() } }; let val = CValue::by_val( diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs index 6bf3a866b..0b5cb1547 100644 --- a/compiler/rustc_codegen_cranelift/src/cast.rs +++ b/compiler/rustc_codegen_cranelift/src/cast.rs @@ -104,11 +104,7 @@ pub(crate) fn clif_int_or_float_cast( &[from], )[0]; // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128 - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - }); - let ret_ptr = Pointer::stack_slot(stack_slot); + let ret_ptr = fx.create_stack_slot(16, 16); ret_ptr.store(fx, ret, MemFlags::trusted()); ret_ptr.load(fx, types::I128, MemFlags::trusted()) } else { @@ -129,8 +125,8 @@ pub(crate) fn clif_int_or_float_cast( let (min, max) = match (to_ty, to_signed) { (types::I8, false) => (0, i64::from(u8::MAX)), (types::I16, false) => (0, i64::from(u16::MAX)), - (types::I8, true) => (i64::from(i8::MIN), i64::from(i8::MAX)), - (types::I16, true) => (i64::from(i16::MIN), i64::from(i16::MAX)), + (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)), + (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)), _ => unreachable!(), }; let min_val = fx.bcx.ins().iconst(types::I32, min); diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs index 359b430b4..63562d335 100644 --- a/compiler/rustc_codegen_cranelift/src/common.rs +++ b/compiler/rustc_codegen_cranelift/src/common.rs @@ -1,6 +1,5 @@ use cranelift_codegen::isa::TargetFrontendConfig; use gimli::write::FileId; - use rustc_data_structures::sync::Lrc; use rustc_index::IndexVec; use rustc_middle::ty::layout::{ @@ -204,9 +203,9 @@ pub(crate) fn type_min_max_value( (types::I8, false) | (types::I16, false) | (types::I32, false) | (types::I64, false) => { 0i64 } - (types::I8, true) => i64::from(i8::MIN), - (types::I16, true) => i64::from(i16::MIN), - (types::I32, true) => i64::from(i32::MIN), + (types::I8, true) => i64::from(i8::MIN as u8), + (types::I16, true) => i64::from(i16::MIN as u16), + (types::I32, true) => i64::from(i32::MIN as u32), (types::I64, true) => i64::MIN, _ => unreachable!(), }; @@ -216,9 +215,9 @@ pub(crate) fn type_min_max_value( (types::I16, false) => i64::from(u16::MAX), (types::I32, false) => i64::from(u32::MAX), (types::I64, false) => u64::MAX as i64, - (types::I8, true) => i64::from(i8::MAX), - (types::I16, true) => i64::from(i16::MAX), - (types::I32, true) => i64::from(i32::MAX), + (types::I8, true) => i64::from(i8::MAX as u8), + (types::I16, true) => i64::from(i16::MAX as u16), + (types::I32, true) => i64::from(i32::MAX as u32), (types::I64, true) => i64::MAX, _ => unreachable!(), }; @@ -384,6 +383,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { }) } + pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer { + if align <= 16 { + let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData { + kind: StackSlotKind::ExplicitSlot, + // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to + // specify stack slot alignment. + size: (size + 15) / 16 * 16, + }); + Pointer::stack_slot(stack_slot) + } else { + // Alignment is too big to handle using the above hack. Dynamically realign a stack slot + // instead. This wastes some space for the realignment. + let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self); + let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align)); + let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align)); + Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset)) + } + } + pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) { if let Some(debug_context) = &mut self.cx.debug_context { let (file, line, column) = @@ -412,46 +430,11 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { } } - // Note: must be kept in sync with get_caller_location from cg_ssa - pub(crate) fn get_caller_location(&mut self, mut source_info: mir::SourceInfo) -> CValue<'tcx> { - let span_to_caller_location = |fx: &mut FunctionCx<'_, '_, 'tcx>, span: Span| { - let topmost = span.ctxt().outer_expn().expansion_cause().unwrap_or(span); - let caller = fx.tcx.sess.source_map().lookup_char_pos(topmost.lo()); - let const_loc = fx.tcx.const_caller_location(( - rustc_span::symbol::Symbol::intern( - &caller.file.name.prefer_remapped().to_string_lossy(), - ), - caller.line as u32, - caller.col_display as u32 + 1, - )); - crate::constant::codegen_const_value(fx, const_loc, fx.tcx.caller_location_ty()) - }; - - // Walk up the `SourceScope`s, in case some of them are from MIR inlining. - // If so, the starting `source_info.span` is in the innermost inlined - // function, and will be replaced with outer callsite spans as long - // as the inlined functions were `#[track_caller]`. - loop { - let scope_data = &self.mir.source_scopes[source_info.scope]; - - if let Some((callee, callsite_span)) = scope_data.inlined { - // Stop inside the most nested non-`#[track_caller]` function, - // before ever reaching its caller (which is irrelevant). - if !callee.def.requires_caller_location(self.tcx) { - return span_to_caller_location(self, source_info.span); - } - source_info.span = callsite_span; - } - - // Skip past all of the parents with `inlined: None`. - match scope_data.inlined_parent_scope { - Some(parent) => source_info.scope = parent, - None => break, - } - } - - // No inlined `SourceScope`s, or all of them were `#[track_caller]`. - self.caller_location.unwrap_or_else(|| span_to_caller_location(self, source_info.span)) + pub(crate) fn get_caller_location(&mut self, source_info: mir::SourceInfo) -> CValue<'tcx> { + self.mir.caller_location_span(source_info, self.caller_location, self.tcx, |span| { + let const_loc = self.tcx.span_as_caller_location(span); + crate::constant::codegen_const_value(self, const_loc, self.tcx.caller_location_ty()) + }) } pub(crate) fn anonymous_str(&mut self, msg: &str) -> Value { diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs index d2b928db7..20f2ee4c7 100644 --- a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs +++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs @@ -1,8 +1,7 @@ use std::sync::{Arc, Condvar, Mutex}; -use rustc_session::Session; - use jobserver::HelperThread; +use rustc_session::Session; // FIXME don't panic when a worker thread panics diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs index 14b10ed8b..b0853d30e 100644 --- a/compiler/rustc_codegen_cranelift/src/constant.rs +++ b/compiler/rustc_codegen_cranelift/src/constant.rs @@ -1,12 +1,11 @@ //! Handling of `static`s, `const`s and promoted allocations +use cranelift_module::*; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar}; use rustc_middle::mir::ConstValue; -use cranelift_module::*; - use crate::prelude::*; pub(crate) struct ConstantCx { @@ -101,7 +100,7 @@ pub(crate) fn codegen_const_value<'tcx>( if fx.clif_type(layout.ty).is_some() { return CValue::const_val(fx, layout, int); } else { - let raw_val = int.to_bits(int.size()).unwrap(); + let raw_val = int.size().truncate(int.to_bits(int.size()).unwrap()); let val = match int.size().bytes() { 1 => fx.bcx.ins().iconst(types::I8, raw_val as i64), 2 => fx.bcx.ins().iconst(types::I16, raw_val as i64), @@ -187,8 +186,7 @@ pub(crate) fn codegen_const_value<'tcx>( ConstValue::Slice { data, meta } => { let alloc_id = fx.tcx.reserve_and_set_memory_alloc(data); let ptr = pointer_for_allocation(fx, alloc_id).get_addr(fx); - // FIXME: the `try_from` here can actually fail, e.g. for very long ZST slices. - let len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(meta).unwrap()); + let len = fx.bcx.ins().iconst(fx.pointer_type, meta as i64); CValue::by_val_pair(ptr, len, layout) } } @@ -512,7 +510,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( | TerminatorKind::Drop { .. } | TerminatorKind::Assert { .. } => {} TerminatorKind::Yield { .. } - | TerminatorKind::GeneratorDrop + | TerminatorKind::CoroutineDrop | TerminatorKind::FalseEdge { .. } | TerminatorKind::FalseUnwind { .. } => unreachable!(), TerminatorKind::InlineAsm { .. } => return None, diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs index c4a5627e6..81b819a55 100644 --- a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs +++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs @@ -1,10 +1,9 @@ //! Write the debuginfo into an object file. use cranelift_object::ObjectProduct; -use rustc_data_structures::fx::FxHashMap; - use gimli::write::{Address, AttributeValue, EndianVec, Result, Sections, Writer}; use gimli::{RunTimeEndian, SectionId}; +use rustc_data_structures::fx::FxHashMap; use super::object::WriteDebugInfo; use super::DebugContext; diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs index b19b935a0..6230ca15d 100644 --- a/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs +++ b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs @@ -3,20 +3,18 @@ use std::ffi::OsStr; use std::path::{Component, Path}; -use crate::debuginfo::FunctionDebugContext; -use crate::prelude::*; - -use rustc_data_structures::sync::Lrc; -use rustc_span::{ - FileName, Pos, SourceFile, SourceFileAndLine, SourceFileHash, SourceFileHashAlgorithm, -}; - use cranelift_codegen::binemit::CodeOffset; use cranelift_codegen::MachSrcLoc; - use gimli::write::{ Address, AttributeValue, FileId, FileInfo, LineProgram, LineString, LineStringTable, }; +use rustc_data_structures::sync::Lrc; +use rustc_span::{ + FileName, Pos, SourceFile, SourceFileAndLine, SourceFileHash, SourceFileHashAlgorithm, +}; + +use crate::debuginfo::FunctionDebugContext; +use crate::prelude::*; // OPTIMIZATION: It is cheaper to do this in one pass than using `.parent()` and `.file_name()`. fn split_path_dir_and_file(path: &Path) -> (&Path, &OsStr) { @@ -97,7 +95,11 @@ impl DebugContext { match &source_file.name { FileName::Real(path) => { let (dir_path, file_name) = - split_path_dir_and_file(path.remapped_path_if_available()); + split_path_dir_and_file(if self.should_remap_filepaths { + path.remapped_path_if_available() + } else { + path.local_path_if_available() + }); let dir_name = osstr_as_utf8_bytes(dir_path.as_os_str()); let file_name = osstr_as_utf8_bytes(file_name); @@ -118,7 +120,14 @@ impl DebugContext { filename => { let dir_id = line_program.default_directory(); let dummy_file_name = LineString::new( - filename.prefer_remapped().to_string().into_bytes(), + filename + .display(if self.should_remap_filepaths { + FileNameDisplayPreference::Remapped + } else { + FileNameDisplayPreference::Local + }) + .to_string() + .into_bytes(), line_program.encoding(), line_strings, ); diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs index 8a4b1cccf..e6edc452c 100644 --- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs @@ -5,11 +5,8 @@ mod line_info; mod object; mod unwind; -use crate::prelude::*; - use cranelift_codegen::ir::Endianness; use cranelift_codegen::isa::TargetIsa; - use gimli::write::{ Address, AttributeValue, DwarfUnit, FileId, LineProgram, LineString, Range, RangeList, UnitEntryId, @@ -17,12 +14,13 @@ use gimli::write::{ use gimli::{Encoding, Format, LineEncoding, RunTimeEndian}; use indexmap::IndexSet; -pub(crate) use emit::{DebugReloc, DebugRelocName}; -pub(crate) use unwind::UnwindContext; +pub(crate) use self::emit::{DebugReloc, DebugRelocName}; +pub(crate) use self::unwind::UnwindContext; +use crate::prelude::*; pub(crate) fn producer() -> String { format!( - "cg_clif (rustc {}, cranelift {})", + "rustc version {} with cranelift {}", rustc_interface::util::rustc_version_str().unwrap_or("unknown version"), cranelift_codegen::VERSION, ) @@ -33,6 +31,8 @@ pub(crate) struct DebugContext { dwarf: DwarfUnit, unit_range_list: RangeList, + + should_remap_filepaths: bool, } pub(crate) struct FunctionDebugContext { @@ -65,12 +65,18 @@ impl DebugContext { let mut dwarf = DwarfUnit::new(encoding); + let should_remap_filepaths = tcx.sess.should_prefer_remapped_for_codegen(); + let producer = producer(); let comp_dir = tcx .sess .opts .working_dir - .to_string_lossy(FileNameDisplayPreference::Remapped) + .to_string_lossy(if should_remap_filepaths { + FileNameDisplayPreference::Remapped + } else { + FileNameDisplayPreference::Local + }) .into_owned(); let (name, file_info) = match tcx.sess.local_crate_source_file() { Some(path) => { @@ -104,7 +110,12 @@ impl DebugContext { root.set(gimli::DW_AT_low_pc, AttributeValue::Address(Address::Constant(0))); } - DebugContext { endian, dwarf, unit_range_list: RangeList(Vec::new()) } + DebugContext { + endian, + dwarf, + unit_range_list: RangeList(Vec::new()), + should_remap_filepaths, + } } pub(crate) fn define_function( diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs index 9dc9b2cf9..f1840a7bf 100644 --- a/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs +++ b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs @@ -1,12 +1,9 @@ -use rustc_data_structures::fx::FxHashMap; - use cranelift_module::FuncId; use cranelift_object::ObjectProduct; - +use gimli::SectionId; use object::write::{Relocation, StandardSegment}; use object::{RelocationEncoding, SectionKind}; - -use gimli::SectionId; +use rustc_data_structures::fx::FxHashMap; use crate::debuginfo::{DebugReloc, DebugRelocName}; diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs index 493359c74..35278e6fb 100644 --- a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs +++ b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs @@ -1,15 +1,13 @@ //! Unwind info generation (`.eh_frame`) -use crate::prelude::*; - use cranelift_codegen::ir::Endianness; use cranelift_codegen::isa::{unwind::UnwindInfo, TargetIsa}; - use cranelift_object::ObjectProduct; use gimli::write::{Address, CieId, EhFrame, FrameTable, Section}; use gimli::RunTimeEndian; use super::object::WriteDebugInfo; +use crate::prelude::*; pub(crate) struct UnwindContext { endian: RunTimeEndian, diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs index 3e9383095..11229dd42 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs @@ -6,19 +6,19 @@ use std::path::PathBuf; use std::sync::Arc; use std::thread::JoinHandle; +use cranelift_object::{ObjectBuilder, ObjectModule}; +use rustc_codegen_ssa::assert_module_sources::CguReuse; use rustc_codegen_ssa::back::metadata::create_compressed_metadata_file; +use rustc_codegen_ssa::base::determine_cgu_reuse; use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind}; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_metadata::EncodedMetadata; use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; use rustc_middle::mir::mono::{CodegenUnit, MonoItem}; -use rustc_session::cgu_reuse_tracker::CguReuse; use rustc_session::config::{DebugInfo, OutputFilenames, OutputType}; use rustc_session::Session; -use cranelift_object::{ObjectBuilder, ObjectModule}; - use crate::concurrency_limiter::{ConcurrencyLimiter, ConcurrencyLimiterToken}; use crate::global_asm::GlobalAsmConfig; use crate::{prelude::*, BackendConfig}; @@ -361,12 +361,26 @@ pub(crate) fn run_aot( metadata: EncodedMetadata, need_metadata_module: bool, ) -> Box<OngoingCodegen> { + // FIXME handle `-Ctarget-cpu=native` + let target_cpu = match tcx.sess.opts.cg.target_cpu { + Some(ref name) => name, + None => tcx.sess.target.cpu.as_ref(), + } + .to_owned(); + let cgus = if tcx.sess.opts.output_types.should_codegen() { tcx.collect_and_partition_mono_items(()).1 } else { // If only `--emit metadata` is used, we shouldn't perform any codegen. // Also `tcx.collect_and_partition_mono_items` may panic in that case. - &[] + return Box::new(OngoingCodegen { + modules: vec![], + allocator_module: None, + metadata_module: None, + metadata, + crate_info: CrateInfo::new(tcx, target_cpu), + concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0), + }); }; if tcx.dep_graph.is_fully_enabled() { @@ -375,20 +389,28 @@ pub(crate) fn run_aot( } } + // Calculate the CGU reuse + let cgu_reuse = tcx.sess.time("find_cgu_reuse", || { + cgus.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect::<Vec<_>>() + }); + + rustc_codegen_ssa::assert_module_sources::assert_module_sources(tcx, &|cgu_reuse_tracker| { + for (i, cgu) in cgus.iter().enumerate() { + let cgu_reuse = cgu_reuse[i]; + cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse); + } + }); + let global_asm_config = Arc::new(crate::global_asm::GlobalAsmConfig::new(tcx)); let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, cgus.len()); let modules = tcx.sess.time("codegen mono items", || { cgus.iter() - .map(|cgu| { - let cgu_reuse = if backend_config.disable_incr_cache { - CguReuse::No - } else { - determine_cgu_reuse(tcx, cgu) - }; - tcx.sess.cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse); - + .enumerate() + .map(|(i, cgu)| { + let cgu_reuse = + if backend_config.disable_incr_cache { CguReuse::No } else { cgu_reuse[i] }; match cgu_reuse { CguReuse::No => { let dep_node = cgu.codegen_dep_node(tcx); @@ -407,8 +429,7 @@ pub(crate) fn run_aot( ) .0 } - CguReuse::PreLto => unreachable!(), - CguReuse::PostLto => { + CguReuse::PreLto | CguReuse::PostLto => { concurrency_limiter.job_already_done(); OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu)) } @@ -474,13 +495,6 @@ pub(crate) fn run_aot( None }; - // FIXME handle `-Ctarget-cpu=native` - let target_cpu = match tcx.sess.opts.cg.target_cpu { - Some(ref name) => name, - None => tcx.sess.target.cpu.as_ref(), - } - .to_owned(); - Box::new(OngoingCodegen { modules, allocator_module, @@ -490,32 +504,3 @@ pub(crate) fn run_aot( concurrency_limiter, }) } - -// Adapted from https://github.com/rust-lang/rust/blob/303d8aff6092709edd4dbd35b1c88e9aa40bf6d8/src/librustc_codegen_ssa/base.rs#L922-L953 -fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> CguReuse { - if !tcx.dep_graph.is_fully_enabled() { - return CguReuse::No; - } - - let work_product_id = &cgu.work_product_id(); - if tcx.dep_graph.previous_work_product(work_product_id).is_none() { - // We don't have anything cached for this CGU. This can happen - // if the CGU did not exist in the previous session. - return CguReuse::No; - } - - // Try to mark the CGU as green. If it we can do so, it means that nothing - // affecting the LLVM module has changed and we can re-use a cached version. - // If we compile with any kind of LTO, this means we can re-use the bitcode - // of the Pre-LTO stage (possibly also the Post-LTO version but we'll only - // know that later). If we are not doing LTO, there is only one optimized - // version of each module, so we re-use that. - let dep_node = cgu.codegen_dep_node(tcx); - assert!( - !tcx.dep_graph.dep_node_exists(&dep_node), - "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.", - cgu.name() - ); - - if tcx.try_mark_green(&dep_node) { CguReuse::PostLto } else { CguReuse::No } -} diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs index 1c606494f..6ee65d12c 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs @@ -6,13 +6,12 @@ use std::ffi::CString; use std::os::raw::{c_char, c_int}; use std::sync::{mpsc, Mutex, OnceLock}; +use cranelift_jit::{JITBuilder, JITModule}; use rustc_codegen_ssa::CrateInfo; use rustc_middle::mir::mono::MonoItem; use rustc_session::Session; use rustc_span::Symbol; -use cranelift_jit::{JITBuilder, JITModule}; - use crate::{prelude::*, BackendConfig}; use crate::{CodegenCx, CodegenMode}; diff --git a/compiler/rustc_codegen_cranelift/src/global_asm.rs b/compiler/rustc_codegen_cranelift/src/global_asm.rs index baadd7a9e..b14007f4e 100644 --- a/compiler/rustc_codegen_cranelift/src/global_asm.rs +++ b/compiler/rustc_codegen_cranelift/src/global_asm.rs @@ -9,16 +9,22 @@ use std::sync::Arc; use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_hir::{InlineAsmOperand, ItemId}; use rustc_session::config::{OutputFilenames, OutputType}; +use rustc_target::asm::InlineAsmArch; use crate::prelude::*; pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, item_id: ItemId) { let item = tcx.hir().item(item_id); if let rustc_hir::ItemKind::GlobalAsm(asm) = item.kind { - if !asm.options.contains(InlineAsmOptions::ATT_SYNTAX) { - global_asm.push_str("\n.intel_syntax noprefix\n"); - } else { - global_asm.push_str("\n.att_syntax\n"); + let is_x86 = + matches!(tcx.sess.asm_arch.unwrap(), InlineAsmArch::X86 | InlineAsmArch::X86_64); + + if is_x86 { + if !asm.options.contains(InlineAsmOptions::ATT_SYNTAX) { + global_asm.push_str("\n.intel_syntax noprefix\n"); + } else { + global_asm.push_str("\n.att_syntax\n"); + } } for piece in asm.template { match *piece { @@ -40,6 +46,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, global_asm.push_str(&string); } InlineAsmOperand::SymFn { anon_const } => { + if cfg!(not(feature = "inline_asm_sym")) { + tcx.sess.span_err( + item.span, + "asm! and global_asm! sym operands are not yet supported", + ); + } + let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id); let instance = match ty.kind() { &ty::FnDef(def_id, args) => Instance::new(def_id, args), @@ -51,6 +64,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, global_asm.push_str(symbol.name); } InlineAsmOperand::SymStatic { path: _, def_id } => { + if cfg!(not(feature = "inline_asm_sym")) { + tcx.sess.span_err( + item.span, + "asm! and global_asm! sym operands are not yet supported", + ); + } + let instance = Instance::mono(tcx, def_id).polymorphize(tcx); let symbol = tcx.symbol_name(instance); global_asm.push_str(symbol.name); @@ -65,7 +85,11 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, } } } - global_asm.push_str("\n.att_syntax\n\n"); + + global_asm.push('\n'); + if is_x86 { + global_asm.push_str(".att_syntax\n\n"); + } } else { bug!("Expected GlobalAsm found {:?}", item); } @@ -73,18 +97,21 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, #[derive(Debug)] pub(crate) struct GlobalAsmConfig { - asm_enabled: bool, assembler: PathBuf, + target: String, pub(crate) output_filenames: Arc<OutputFilenames>, } impl GlobalAsmConfig { pub(crate) fn new(tcx: TyCtxt<'_>) -> Self { - let asm_enabled = cfg!(feature = "inline_asm") && !tcx.sess.target.is_like_windows; - GlobalAsmConfig { - asm_enabled, assembler: crate::toolchain::get_toolchain_binary(tcx.sess, "as"), + target: match &tcx.sess.opts.target_triple { + rustc_target::spec::TargetTriple::TargetTriple(triple) => triple.clone(), + rustc_target::spec::TargetTriple::TargetJson { path_for_rustdoc, .. } => { + path_for_rustdoc.to_str().unwrap().to_owned() + } + }, output_filenames: tcx.output_filenames(()).clone(), } } @@ -99,42 +126,75 @@ pub(crate) fn compile_global_asm( return Ok(None); } - if !config.asm_enabled { - if global_asm.contains("__rust_probestack") { - return Ok(None); - } - - if cfg!(not(feature = "inline_asm")) { - return Err( - "asm! and global_asm! support is disabled while compiling rustc_codegen_cranelift" - .to_owned(), - ); - } else { - return Err("asm! and global_asm! are not yet supported on Windows".to_owned()); - } - } - // Remove all LLVM style comments - let global_asm = global_asm + let mut global_asm = global_asm .lines() .map(|line| if let Some(index) = line.find("//") { &line[0..index] } else { line }) .collect::<Vec<_>>() .join("\n"); + global_asm.push('\n'); - let output_object_file = config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)); + let global_asm_object_file = add_file_stem_postfix( + config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)), + ".asm", + ); // Assemble `global_asm` - let global_asm_object_file = add_file_stem_postfix(output_object_file, ".asm"); - let mut child = Command::new(&config.assembler) - .arg("-o") - .arg(&global_asm_object_file) - .stdin(Stdio::piped()) - .spawn() - .expect("Failed to spawn `as`."); - child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap(); - let status = child.wait().expect("Failed to wait for `as`."); - if !status.success() { - return Err(format!("Failed to assemble `{}`", global_asm)); + if option_env!("CG_CLIF_FORCE_GNU_AS").is_some() { + let mut child = Command::new(&config.assembler) + .arg("-o") + .arg(&global_asm_object_file) + .stdin(Stdio::piped()) + .spawn() + .expect("Failed to spawn `as`."); + child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap(); + let status = child.wait().expect("Failed to wait for `as`."); + if !status.success() { + return Err(format!("Failed to assemble `{}`", global_asm)); + } + } else { + let mut child = Command::new(std::env::current_exe().unwrap()) + .arg("--target") + .arg(&config.target) + .arg("--crate-type") + .arg("staticlib") + .arg("--emit") + .arg("obj") + .arg("-o") + .arg(&global_asm_object_file) + .arg("-") + .arg("-Abad_asm_style") + .arg("-Zcodegen-backend=llvm") + .stdin(Stdio::piped()) + .spawn() + .expect("Failed to spawn `as`."); + let mut stdin = child.stdin.take().unwrap(); + stdin + .write_all( + br####" + #![feature(decl_macro, no_core, rustc_attrs)] + #![allow(internal_features)] + #![no_core] + #[rustc_builtin_macro] + #[rustc_macro_transparency = "semitransparent"] + macro global_asm() { /* compiler built-in */ } + global_asm!(r###" + "####, + ) + .unwrap(); + stdin.write_all(global_asm.as_bytes()).unwrap(); + stdin + .write_all( + br####" + "###); + "####, + ) + .unwrap(); + std::mem::drop(stdin); + let status = child.wait().expect("Failed to wait for `as`."); + if !status.success() { + return Err(format!("Failed to assemble `{}`", global_asm)); + } } Ok(Some(global_asm_object_file)) diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs index 50bbf8105..ce0eecca8 100644 --- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs +++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs @@ -1,18 +1,19 @@ //! Codegen of `asm!` invocations. -use crate::prelude::*; - use std::fmt::Write; use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_middle::mir::InlineAsmOperand; use rustc_span::sym; use rustc_target::asm::*; +use target_lexicon::BinaryFormat; + +use crate::prelude::*; enum CInlineAsmOperand<'tcx> { In { reg: InlineAsmRegOrRegClass, - value: CValue<'tcx>, + value: Value, }, Out { reg: InlineAsmRegOrRegClass, @@ -22,7 +23,7 @@ enum CInlineAsmOperand<'tcx> { InOut { reg: InlineAsmRegOrRegClass, _late: bool, - in_value: CValue<'tcx>, + in_value: Value, out_place: Option<CPlace<'tcx>>, }, Const { @@ -43,191 +44,23 @@ pub(crate) fn codegen_inline_asm<'tcx>( ) { // FIXME add .eh_frame unwind info directives - if !template.is_empty() { - // Used by panic_abort - if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) { - fx.bcx.ins().trap(TrapCode::User(1)); - return; - } - - // Used by stdarch - if template[0] == InlineAsmTemplatePiece::String("mov ".to_string()) - && matches!( - template[1], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[2] == InlineAsmTemplatePiece::String(", rbx".to_string()) - && template[3] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[4] == InlineAsmTemplatePiece::String("cpuid".to_string()) - && template[5] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[6] == InlineAsmTemplatePiece::String("xchg ".to_string()) - && matches!( - template[7], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[8] == InlineAsmTemplatePiece::String(", rbx".to_string()) - { - assert_eq!(operands.len(), 4); - let (leaf, eax_place) = match operands[1] { - InlineAsmOperand::InOut { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: _, - ref in_value, - out_place: Some(out_place), - } => ( - crate::base::codegen_operand(fx, in_value).load_scalar(fx), - crate::base::codegen_place(fx, out_place), - ), - _ => unreachable!(), - }; - let ebx_place = match operands[0] { - InlineAsmOperand::Out { - reg: - InlineAsmRegOrRegClass::RegClass(InlineAsmRegClass::X86( - X86InlineAsmRegClass::reg, - )), - late: _, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - let (sub_leaf, ecx_place) = match operands[2] { - InlineAsmOperand::InOut { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - late: _, - ref in_value, - out_place: Some(out_place), - } => ( - crate::base::codegen_operand(fx, in_value).load_scalar(fx), - crate::base::codegen_place(fx, out_place), - ), - _ => unreachable!(), - }; - let edx_place = match operands[3] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: _, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - - let (eax, ebx, ecx, edx) = crate::intrinsics::codegen_cpuid_call(fx, leaf, sub_leaf); - - eax_place.write_cvalue(fx, CValue::by_val(eax, fx.layout_of(fx.tcx.types.u32))); - ebx_place.write_cvalue(fx, CValue::by_val(ebx, fx.layout_of(fx.tcx.types.u32))); - ecx_place.write_cvalue(fx, CValue::by_val(ecx, fx.layout_of(fx.tcx.types.u32))); - edx_place.write_cvalue(fx, CValue::by_val(edx, fx.layout_of(fx.tcx.types.u32))); - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } - - // Used by compiler-builtins - if fx.tcx.symbol_name(fx.instance).name.starts_with("___chkstk") { - // ___chkstk, ___chkstk_ms and __alloca are only used on Windows - crate::trap::trap_unimplemented(fx, "Stack probes are not supported"); - return; - } else if fx.tcx.symbol_name(fx.instance).name == "__alloca" { - crate::trap::trap_unimplemented(fx, "Alloca is not supported"); - return; - } - - // Used by measureme - if template[0] == InlineAsmTemplatePiece::String("xor %eax, %eax".to_string()) - && template[1] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[2] == InlineAsmTemplatePiece::String("mov %rbx, ".to_string()) - && matches!( - template[3], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[4] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[5] == InlineAsmTemplatePiece::String("cpuid".to_string()) - && template[6] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[7] == InlineAsmTemplatePiece::String("mov ".to_string()) - && matches!( - template[8], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[9] == InlineAsmTemplatePiece::String(", %rbx".to_string()) - { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } else if template[0] == InlineAsmTemplatePiece::String("rdpmc".to_string()) { - // Return zero dummy values for all performance counters - match operands[0] { - InlineAsmOperand::In { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - value: _, - } => {} - _ => unreachable!(), - }; - let lo = match operands[1] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: true, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - let hi = match operands[2] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: true, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - - let u32_layout = fx.layout_of(fx.tcx.types.u32); - let zero = fx.bcx.ins().iconst(types::I32, 0); - lo.write_cvalue(fx, CValue::by_val(zero, u32_layout)); - hi.write_cvalue(fx, CValue::by_val(zero, u32_layout)); - - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } else if template[0] == InlineAsmTemplatePiece::String("lock xadd ".to_string()) - && matches!( - template[1], - InlineAsmTemplatePiece::Placeholder { operand_idx: 1, modifier: None, span: _ } - ) - && template[2] == InlineAsmTemplatePiece::String(", (".to_string()) - && matches!( - template[3], - InlineAsmTemplatePiece::Placeholder { operand_idx: 0, modifier: None, span: _ } - ) - && template[4] == InlineAsmTemplatePiece::String(")".to_string()) - { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } + // Used by panic_abort on Windows, but uses a syntax which only happens to work with + // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for + // the LLVM backend. + if template.len() == 1 + && template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) + { + fx.bcx.ins().trap(TrapCode::User(1)); + return; } let operands = operands .into_iter() .map(|operand| match *operand { - InlineAsmOperand::In { reg, ref value } => { - CInlineAsmOperand::In { reg, value: crate::base::codegen_operand(fx, value) } - } + InlineAsmOperand::In { reg, ref value } => CInlineAsmOperand::In { + reg, + value: crate::base::codegen_operand(fx, value).load_scalar(fx), + }, InlineAsmOperand::Out { reg, late, ref place } => CInlineAsmOperand::Out { reg, late, @@ -237,7 +70,7 @@ pub(crate) fn codegen_inline_asm<'tcx>( CInlineAsmOperand::InOut { reg, _late: late, - in_value: crate::base::codegen_operand(fx, in_value), + in_value: crate::base::codegen_operand(fx, in_value).load_scalar(fx), out_place: out_place.map(|place| crate::base::codegen_place(fx, place)), } } @@ -252,6 +85,12 @@ pub(crate) fn codegen_inline_asm<'tcx>( CInlineAsmOperand::Const { value } } InlineAsmOperand::SymFn { ref value } => { + if cfg!(not(feature = "inline_asm_sym")) { + fx.tcx + .sess + .span_err(span, "asm! and global_asm! sym operands are not yet supported"); + } + let const_ = fx.monomorphize(value.const_); if let ty::FnDef(def_id, args) = *const_.ty().kind() { let instance = ty::Instance::resolve_for_fn_ptr( @@ -329,7 +168,7 @@ pub(crate) fn codegen_inline_asm<'tcx>( for (i, operand) in operands.iter().enumerate() { match operand { CInlineAsmOperand::In { reg: _, value } => { - inputs.push((asm_gen.stack_slots_input[i].unwrap(), value.load_scalar(fx))); + inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value)); } CInlineAsmOperand::Out { reg: _, late: _, place } => { if let Some(place) = place { @@ -337,7 +176,7 @@ pub(crate) fn codegen_inline_asm<'tcx>( } } CInlineAsmOperand::InOut { reg: _, _late: _, in_value, out_place } => { - inputs.push((asm_gen.stack_slots_input[i].unwrap(), in_value.load_scalar(fx))); + inputs.push((asm_gen.stack_slots_input[i].unwrap(), *in_value)); if let Some(out_place) = out_place { outputs.push((asm_gen.stack_slots_output[i].unwrap(), *out_place)); } @@ -589,11 +428,29 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { } fn generate_asm_wrapper(&self, asm_name: &str) -> String { + let binary_format = crate::target_triple(self.tcx.sess).binary_format; + let mut generated_asm = String::new(); - writeln!(generated_asm, ".globl {}", asm_name).unwrap(); - writeln!(generated_asm, ".type {},@function", asm_name).unwrap(); - writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap(); - writeln!(generated_asm, "{}:", asm_name).unwrap(); + match binary_format { + BinaryFormat::Elf => { + writeln!(generated_asm, ".globl {}", asm_name).unwrap(); + writeln!(generated_asm, ".type {},@function", asm_name).unwrap(); + writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap(); + writeln!(generated_asm, "{}:", asm_name).unwrap(); + } + BinaryFormat::Macho => { + writeln!(generated_asm, ".globl _{}", asm_name).unwrap(); + writeln!(generated_asm, "_{}:", asm_name).unwrap(); + } + BinaryFormat::Coff => { + writeln!(generated_asm, ".globl {}", asm_name).unwrap(); + writeln!(generated_asm, "{}:", asm_name).unwrap(); + } + _ => self + .tcx + .sess + .fatal(format!("Unsupported binary format for inline asm: {binary_format:?}")), + } let is_x86 = matches!(self.arch, InlineAsmArch::X86 | InlineAsmArch::X86_64); @@ -690,8 +547,19 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { if is_x86 { generated_asm.push_str(".att_syntax\n"); } - writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap(); - generated_asm.push_str(".text\n"); + + match binary_format { + BinaryFormat::Elf => { + writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap(); + generated_asm.push_str(".text\n"); + } + BinaryFormat::Macho | BinaryFormat::Coff => {} + _ => self + .tcx + .sess + .fatal(format!("Unsupported binary format for inline asm: {binary_format:?}")), + } + generated_asm.push_str("\n\n"); generated_asm @@ -699,25 +567,26 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { fn prologue(generated_asm: &mut String, arch: InlineAsmArch) { match arch { - InlineAsmArch::X86 => { - generated_asm.push_str(" push ebp\n"); - generated_asm.push_str(" mov ebp,[esp+8]\n"); - } InlineAsmArch::X86_64 => { generated_asm.push_str(" push rbp\n"); - generated_asm.push_str(" mov rbp,rdi\n"); - } - InlineAsmArch::RiscV32 => { - generated_asm.push_str(" addi sp, sp, -8\n"); - generated_asm.push_str(" sw ra, 4(sp)\n"); - generated_asm.push_str(" sw s0, 0(sp)\n"); - generated_asm.push_str(" mv s0, a0\n"); + generated_asm.push_str(" mov rbp,rsp\n"); + generated_asm.push_str(" push rbx\n"); // rbx is callee saved + // rbx is reserved by LLVM for the "base pointer", so rustc doesn't allow using it + generated_asm.push_str(" mov rbx,rdi\n"); + } + InlineAsmArch::AArch64 => { + generated_asm.push_str(" stp fp, lr, [sp, #-32]!\n"); + generated_asm.push_str(" mov fp, sp\n"); + generated_asm.push_str(" str x19, [sp, #24]\n"); // x19 is callee saved + // x19 is reserved by LLVM for the "base pointer", so rustc doesn't allow using it + generated_asm.push_str(" mov x19, x0\n"); } InlineAsmArch::RiscV64 => { generated_asm.push_str(" addi sp, sp, -16\n"); generated_asm.push_str(" sd ra, 8(sp)\n"); - generated_asm.push_str(" sd s0, 0(sp)\n"); - generated_asm.push_str(" mv s0, a0\n"); + generated_asm.push_str(" sd s1, 0(sp)\n"); // s1 is callee saved + // s1/x9 is reserved by LLVM for the "base pointer", so rustc doesn't allow using it + generated_asm.push_str(" mv s1, a0\n"); } _ => unimplemented!("prologue for {:?}", arch), } @@ -725,22 +594,18 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { fn epilogue(generated_asm: &mut String, arch: InlineAsmArch) { match arch { - InlineAsmArch::X86 => { - generated_asm.push_str(" pop ebp\n"); - generated_asm.push_str(" ret\n"); - } InlineAsmArch::X86_64 => { + generated_asm.push_str(" pop rbx\n"); generated_asm.push_str(" pop rbp\n"); generated_asm.push_str(" ret\n"); } - InlineAsmArch::RiscV32 => { - generated_asm.push_str(" lw s0, 0(sp)\n"); - generated_asm.push_str(" lw ra, 4(sp)\n"); - generated_asm.push_str(" addi sp, sp, 8\n"); + InlineAsmArch::AArch64 => { + generated_asm.push_str(" ldr x19, [sp, #24]\n"); + generated_asm.push_str(" ldp fp, lr, [sp], #32\n"); generated_asm.push_str(" ret\n"); } InlineAsmArch::RiscV64 => { - generated_asm.push_str(" ld s0, 0(sp)\n"); + generated_asm.push_str(" ld s1, 0(sp)\n"); generated_asm.push_str(" ld ra, 8(sp)\n"); generated_asm.push_str(" addi sp, sp, 16\n"); generated_asm.push_str(" ret\n"); @@ -751,10 +616,13 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { fn epilogue_noreturn(generated_asm: &mut String, arch: InlineAsmArch) { match arch { - InlineAsmArch::X86 | InlineAsmArch::X86_64 => { + InlineAsmArch::X86_64 => { generated_asm.push_str(" ud2\n"); } - InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { + InlineAsmArch::AArch64 => { + generated_asm.push_str(" brk #0x1\n"); + } + InlineAsmArch::RiscV64 => { generated_asm.push_str(" ebreak\n"); } _ => unimplemented!("epilogue_noreturn for {:?}", arch), @@ -768,25 +636,20 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { offset: Size, ) { match arch { - InlineAsmArch::X86 => { - write!(generated_asm, " mov [ebp+0x{:x}], ", offset.bytes()).unwrap(); - reg.emit(generated_asm, InlineAsmArch::X86, None).unwrap(); - generated_asm.push('\n'); - } InlineAsmArch::X86_64 => { - write!(generated_asm, " mov [rbp+0x{:x}], ", offset.bytes()).unwrap(); + write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); generated_asm.push('\n'); } - InlineAsmArch::RiscV32 => { - generated_asm.push_str(" sw "); - reg.emit(generated_asm, InlineAsmArch::RiscV32, None).unwrap(); - writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap(); + InlineAsmArch::AArch64 => { + generated_asm.push_str(" str "); + reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap(); + writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap(); } InlineAsmArch::RiscV64 => { generated_asm.push_str(" sd "); reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap(); - writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap(); + writeln!(generated_asm, ", 0x{:x}(s1)", offset.bytes()).unwrap(); } _ => unimplemented!("save_register for {:?}", arch), } @@ -799,25 +662,20 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { offset: Size, ) { match arch { - InlineAsmArch::X86 => { - generated_asm.push_str(" mov "); - reg.emit(generated_asm, InlineAsmArch::X86, None).unwrap(); - writeln!(generated_asm, ", [ebp+0x{:x}]", offset.bytes()).unwrap(); - } InlineAsmArch::X86_64 => { generated_asm.push_str(" mov "); reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); - writeln!(generated_asm, ", [rbp+0x{:x}]", offset.bytes()).unwrap(); + writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap(); } - InlineAsmArch::RiscV32 => { - generated_asm.push_str(" lw "); - reg.emit(generated_asm, InlineAsmArch::RiscV32, None).unwrap(); - writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap(); + InlineAsmArch::AArch64 => { + generated_asm.push_str(" ldr "); + reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap(); + writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap(); } InlineAsmArch::RiscV64 => { generated_asm.push_str(" ld "); reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap(); - writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap(); + writeln!(generated_asm, ", 0x{:x}(s1)", offset.bytes()).unwrap(); } _ => unimplemented!("restore_register for {:?}", arch), } @@ -831,13 +689,7 @@ fn call_inline_asm<'tcx>( inputs: Vec<(Size, Value)>, outputs: Vec<(Size, CPlace<'tcx>)>, ) { - let stack_slot = fx.bcx.func.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: u32::try_from(slot_size.bytes()).unwrap(), - }); - if fx.clif_comments.enabled() { - fx.add_comment(stack_slot, "inline asm scratch slot"); - } + let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16); let inline_asm_func = fx .module @@ -857,15 +709,103 @@ fn call_inline_asm<'tcx>( } for (offset, value) in inputs { - fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap()); + stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store( + fx, + value, + MemFlags::trusted(), + ); } - let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0); + let stack_slot_addr = stack_slot.get_addr(fx); fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { let ty = fx.clif_type(place.layout().ty).unwrap(); - let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap()); + let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( + fx, + ty, + MemFlags::trusted(), + ); place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } + +pub(crate) fn codegen_xgetbv<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + xcr_no: Value, + ret: CPlace<'tcx>, +) { + // FIXME add .eh_frame unwind info directives + + let operands = vec![ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + value: xcr_no, + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + late: true, + place: Some(ret), + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + late: true, + place: None, + }, + ]; + let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM; + + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); + + let mut asm_gen = InlineAssemblyGenerator { + tcx: fx.tcx, + arch: fx.tcx.sess.asm_arch.unwrap(), + enclosing_def_id: fx.instance.def_id(), + template: &[InlineAsmTemplatePiece::String( + " + xgetbv + // out = rdx << 32 | rax + shl rdx, 32 + or rax, rdx + " + .to_string(), + )], + operands: &operands, + options, + registers: Vec::new(), + stack_slots_clobber: Vec::new(), + stack_slots_input: Vec::new(), + stack_slots_output: Vec::new(), + stack_slot_size: Size::from_bytes(0), + }; + asm_gen.allocate_registers(); + asm_gen.allocate_stack_slots(); + + let inline_asm_index = fx.cx.inline_asm_index.get(); + fx.cx.inline_asm_index.set(inline_asm_index + 1); + let asm_name = format!( + "__inline_asm_{}_n{}", + fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"), + inline_asm_index + ); + + let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); + fx.cx.global_asm.push_str(&generated_asm); + + for (i, operand) in operands.iter().enumerate() { + match operand { + CInlineAsmOperand::In { reg: _, value } => { + inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value)); + } + CInlineAsmOperand::Out { reg: _, late: _, place } => { + if let Some(place) = place { + outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place)); + } + } + _ => unreachable!(), + } + } + + call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); +} diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs deleted file mode 100644 index 5120b89c4..000000000 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs +++ /dev/null @@ -1,74 +0,0 @@ -//! Emulation of a subset of the cpuid x86 instruction. - -use crate::prelude::*; - -/// Emulates a subset of the cpuid x86 instruction. -/// -/// This emulates an intel cpu with sse and sse2 support, but which doesn't support anything else. -pub(crate) fn codegen_cpuid_call<'tcx>( - fx: &mut FunctionCx<'_, '_, 'tcx>, - leaf: Value, - _sub_leaf: Value, -) -> (Value, Value, Value, Value) { - let leaf_0 = fx.bcx.create_block(); - let leaf_1 = fx.bcx.create_block(); - let leaf_7 = fx.bcx.create_block(); - let leaf_8000_0000 = fx.bcx.create_block(); - let leaf_8000_0001 = fx.bcx.create_block(); - let unsupported_leaf = fx.bcx.create_block(); - - let dest = fx.bcx.create_block(); - let eax = fx.bcx.append_block_param(dest, types::I32); - let ebx = fx.bcx.append_block_param(dest, types::I32); - let ecx = fx.bcx.append_block_param(dest, types::I32); - let edx = fx.bcx.append_block_param(dest, types::I32); - - let mut switch = cranelift_frontend::Switch::new(); - switch.set_entry(0, leaf_0); - switch.set_entry(1, leaf_1); - switch.set_entry(7, leaf_7); - switch.set_entry(0x8000_0000, leaf_8000_0000); - switch.set_entry(0x8000_0001, leaf_8000_0001); - switch.emit(&mut fx.bcx, leaf, unsupported_leaf); - - fx.bcx.switch_to_block(leaf_0); - let max_basic_leaf = fx.bcx.ins().iconst(types::I32, 1); - let vend0 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"Genu"))); - let vend2 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ineI"))); - let vend1 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ntel"))); - fx.bcx.ins().jump(dest, &[max_basic_leaf, vend0, vend1, vend2]); - - fx.bcx.switch_to_block(leaf_1); - let cpu_signature = fx.bcx.ins().iconst(types::I32, 0); - let additional_information = fx.bcx.ins().iconst(types::I32, 0); - let ecx_features = fx.bcx.ins().iconst(types::I32, 0); - let edx_features = fx.bcx.ins().iconst(types::I32, 1 << 25 /* sse */ | 1 << 26 /* sse2 */); - fx.bcx.ins().jump(dest, &[cpu_signature, additional_information, ecx_features, edx_features]); - - fx.bcx.switch_to_block(leaf_7); - // This leaf technically has subleaves, but we just return zero for all subleaves. - let zero = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[zero, zero, zero, zero]); - - fx.bcx.switch_to_block(leaf_8000_0000); - let extended_max_basic_leaf = fx.bcx.ins().iconst(types::I32, 0); - let zero = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[extended_max_basic_leaf, zero, zero, zero]); - - fx.bcx.switch_to_block(leaf_8000_0001); - let zero = fx.bcx.ins().iconst(types::I32, 0); - let proc_info_ecx = fx.bcx.ins().iconst(types::I32, 0); - let proc_info_edx = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[zero, zero, proc_info_ecx, proc_info_edx]); - - fx.bcx.switch_to_block(unsupported_leaf); - crate::trap::trap_unimplemented( - fx, - "__cpuid_count arch intrinsic doesn't yet support specified leaf", - ); - - fx.bcx.switch_to_block(dest); - fx.bcx.ins().nop(); - - (eax, ebx, ecx, edx) -} diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs index 63b5402f2..e9b7daf14 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs @@ -1,10 +1,10 @@ //! Emulate LLVM intrinsics +use rustc_middle::ty::GenericArgsRef; + use crate::intrinsics::*; use crate::prelude::*; -use rustc_middle::ty::GenericArgsRef; - pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: &str, @@ -51,6 +51,21 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.fma.v") => { + intrinsic_args!(fx, args => (x,y,z); intrinsic); + + simd_trio_for_each_lane( + fx, + x, + y, + z, + ret, + &|fx, _lane_ty, _res_lane_ty, lane_x, lane_y, lane_z| { + fx.bcx.ins().fma(lane_x, lane_y, lane_z) + }, + ); + } + _ => { fx.tcx .sess diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs index c20a99159..ee098be1f 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs @@ -1,10 +1,10 @@ //! Emulate AArch64 LLVM intrinsics +use rustc_middle::ty::GenericArgsRef; + use crate::intrinsics::*; use crate::prelude::*; -use rustc_middle::ty::GenericArgsRef; - pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: &str, @@ -44,7 +44,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { @@ -52,7 +54,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqsub.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { @@ -156,6 +160,106 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.aarch64.neon.umaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.smaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.uminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.sminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.fminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.fmaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.addp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().iadd(x_lane, y_lane), + ); + } + + // FIXME generalize vector types + "llvm.aarch64.neon.tbl1.v16i8" => { + intrinsic_args!(fx, args => (t, idx); intrinsic); + + let zero = fx.bcx.ins().iconst(types::I8, 0); + for i in 0..16 { + let idx_lane = idx.value_lane(fx, i).load_scalar(fx); + let is_zero = + fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThanOrEqual, idx_lane, 16); + let t_idx = fx.bcx.ins().uextend(fx.pointer_type, idx_lane); + let t_lane = t.value_lane_dyn(fx, t_idx).load_scalar(fx); + let res = fx.bcx.ins().select(is_zero, zero, t_lane); + ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted()); + } + } + /* _ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v") || intrinsic.starts_with("llvm.aarch64.neon.sqshl.v") diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index e62de6b61..4c5360486 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -1,10 +1,10 @@ //! Emulate x86 LLVM intrinsics +use rustc_middle::ty::GenericArgsRef; + use crate::intrinsics::*; use crate::prelude::*; -use rustc_middle::ty::GenericArgsRef; - pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, intrinsic: &str, @@ -20,53 +20,23 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( // Used by is_x86_feature_detected!(); "llvm.x86.xgetbv" => { - // FIXME use the actual xgetbv instruction - intrinsic_args!(fx, args => (v); intrinsic); - - let v = v.load_scalar(fx); + intrinsic_args!(fx, args => (xcr_no); intrinsic); - // As of writing on XCR0 exists - fx.bcx.ins().trapnz(v, TrapCode::UnreachableCodeReached); + let xcr_no = xcr_no.load_scalar(fx); - let res = fx.bcx.ins().iconst(types::I64, 1 /* bit 0 must be set */); - ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64))); + crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret); } - // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` - "llvm.x86.sse2.pmovmskb.128" - | "llvm.x86.avx2.pmovmskb" - | "llvm.x86.sse.movmsk.ps" - | "llvm.x86.sse2.movmsk.pd" => { - intrinsic_args!(fx, args => (a); intrinsic); - - let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); - let lane_ty = fx.clif_type(lane_ty).unwrap(); - assert!(lane_count <= 32); - - let mut res = fx.bcx.ins().iconst(types::I32, 0); - - for lane in (0..lane_count).rev() { - let a_lane = a.value_lane(fx, lane).load_scalar(fx); + "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128&ig_expand=4009 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256&ig_expand=4010 + intrinsic_args!(fx, args => (ptr); intrinsic); - // cast float to int - let a_lane = match lane_ty { - types::F32 => codegen_bitcast(fx, types::I32, a_lane), - types::F64 => codegen_bitcast(fx, types::I64, a_lane), - _ => a_lane, - }; - - // extract sign bit of an int - let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1)); - - // shift sign bit into result - let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false); - res = fx.bcx.ins().ishl_imm(res, 1); - res = fx.bcx.ins().bor(res, a_lane_sign); - } - - let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32)); - ret.write_cvalue(fx, res); + // FIXME correctly handle unalignedness + let val = CValue::by_ref(Pointer::new(ptr.load_scalar(fx)), ret.layout()); + ret.write_cvalue(fx, val); } + "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), @@ -74,8 +44,10 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( }; let x = codegen_operand(fx, x); let y = codegen_operand(fx, y); - let kind = crate::constant::mir_operand_get_const_val(fx, kind) - .expect("llvm.x86.sse2.cmp.* kind not const"); + let kind = match kind { + Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0, + Operand::Copy(_) | Operand::Move(_) => unreachable!("{kind:?}"), + }; let flt_cc = match kind .try_to_bits(Size::from_bytes(1)) @@ -210,8 +182,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } } - "llvm.x86.avx2.vperm2i128" => { + "llvm.x86.avx2.vperm2i128" + | "llvm.x86.avx.vperm2f128.ps.256" + | "llvm.x86.avx.vperm2f128.pd.256" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd let (a, b, imm8) = match args { [a, b, imm8] => (a, b, imm8), _ => bug!("wrong number of args for intrinsic {intrinsic}"), @@ -220,19 +196,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let b = codegen_operand(fx, b); let imm8 = codegen_operand(fx, imm8).load_scalar(fx); - let a_0 = a.value_lane(fx, 0).load_scalar(fx); - let a_1 = a.value_lane(fx, 1).load_scalar(fx); - let a_low = fx.bcx.ins().iconcat(a_0, a_1); - let a_2 = a.value_lane(fx, 2).load_scalar(fx); - let a_3 = a.value_lane(fx, 3).load_scalar(fx); - let a_high = fx.bcx.ins().iconcat(a_2, a_3); + let a_low = a.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx); + let a_high = a.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx); - let b_0 = b.value_lane(fx, 0).load_scalar(fx); - let b_1 = b.value_lane(fx, 1).load_scalar(fx); - let b_low = fx.bcx.ins().iconcat(b_0, b_1); - let b_2 = b.value_lane(fx, 2).load_scalar(fx); - let b_3 = b.value_lane(fx, 3).load_scalar(fx); - let b_high = fx.bcx.ins().iconcat(b_2, b_3); + let b_low = b.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx); + let b_high = b.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx); fn select4( fx: &mut FunctionCx<'_, '_, '_>, @@ -257,16 +225,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let control0 = imm8; let res_low = select4(fx, a_high, a_low, b_high, b_low, control0); - let (res_0, res_1) = fx.bcx.ins().isplit(res_low); let control1 = fx.bcx.ins().ushr_imm(imm8, 4); let res_high = select4(fx, a_high, a_low, b_high, b_low, control1); - let (res_2, res_3) = fx.bcx.ins().isplit(res_high); - ret.place_lane(fx, 0).to_ptr().store(fx, res_0, MemFlags::trusted()); - ret.place_lane(fx, 1).to_ptr().store(fx, res_1, MemFlags::trusted()); - ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted()); - ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted()); + ret.place_typed_lane(fx, fx.tcx.types.u128, 0).to_ptr().store( + fx, + res_low, + MemFlags::trusted(), + ); + ret.place_typed_lane(fx, fx.tcx.types.u128, 1).to_ptr().store( + fx, + res_high, + MemFlags::trusted(), + ); } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { let a = match args { @@ -308,6 +280,512 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let val = CValue::by_val_pair(cb_out, c, layout); ret.write_cvalue(fx, val); } + "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| { + // (a + b + 1) >> 1 + let lane_ty = fx.bcx.func.dfg.value_type(a_lane); + let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane); + let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane); + let sum = fx.bcx.ins().iadd(a_lane, b_lane); + let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1); + let res = fx.bcx.ins().ushr_imm(num_plus_one, 1); + fx.bcx.ins().ireduce(lane_ty, res) + }, + ); + } + "llvm.x86.sse2.psra.w" => { + intrinsic_args!(fx, args => (a, count); intrinsic); + + let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted()); + let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap(); + let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1)); + let saturated_count = fx.bcx.ins().umin(count_lane, max_count); + + // FIXME use vector instructions when possible + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| { + fx.bcx.ins().sshr(a_lane, saturated_count) + }); + } + "llvm.x86.sse2.psad.bw" | "llvm.x86.avx2.psad.bw" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8&ig_expand=5770 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8&ig_expand=5771 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.u64); + assert_eq!(lane_count, ret_lane_count * 8); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.u64); + for out_lane_idx in 0..lane_count / 8 { + let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); + + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + + let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); + let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); + let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff); + lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff); + } + + let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.ssse3.pmadd.ub.sw.128" | "llvm.x86.avx2.pmadd.ub.sw" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16&ig_expand=4267 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16&ig_expand=4270 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1); + + let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0); + + let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16)); + let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16)); + + let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min); + let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val); + + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.sse2.pmadd.wd" | "llvm.x86.avx2.pmadd.wd" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16&ig_expand=4231 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16&ig_expand=4234 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.i32); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let res_lane = fx.bcx.ins().iadd(mul0, mul1); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.ssse3.pmul.hr.sw.128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16&ig_expand=4782 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count, ret_lane_count); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + for out_lane_idx in 0..lane_count { + let a_lane = a.value_lane(fx, out_lane_idx).load_scalar(fx); + let a_lane = fx.bcx.ins().sextend(types::I32, a_lane); + let b_lane = b.value_lane(fx, out_lane_idx).load_scalar(fx); + let b_lane = fx.bcx.ins().sextend(types::I32, b_lane); + + let mul: Value = fx.bcx.ins().imul(a_lane, b_lane); + let shifted = fx.bcx.ins().ushr_imm(mul, 14); + let incremented = fx.bcx.ins().iadd_imm(shifted, 1); + let shifted_again = fx.bcx.ins().ushr_imm(incremented, 1); + + let res_lane = fx.bcx.ins().ireduce(types::I16, shifted_again); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.sse2.packuswb.128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.u8); + assert_eq!(lane_count * 2, ret_lane_count); + + let zero = fx.bcx.ins().iconst(types::I16, 0); + let max_u8 = fx.bcx.ins().iconst(types::I16, 255); + let ret_lane_layout = fx.layout_of(fx.tcx.types.u8); + + for idx in 0..lane_count { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.avx2.packuswb" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.u8); + assert_eq!(lane_count * 2, ret_lane_count); + + let zero = fx.bcx.ins().iconst(types::I16, 0); + let max_u8 = fx.bcx.ins().iconst(types::I16, 255); + let ret_lane_layout = fx.layout_of(fx.tcx.types.u8); + + for idx in 0..lane_count / 2 { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, zero); + let sat = fx.bcx.ins().umin(sat, max_u8); + let res = fx.bcx.ins().ireduce(types::I8, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.sse2.packssdw.128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i32); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count * 2, ret_lane_count); + + let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); + let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + + for idx in 0..lane_count { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.sse41.packusdw" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32&ig_expand=4912 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i32); + assert_eq!(ret_lane_ty, fx.tcx.types.u16); + assert_eq!(lane_count * 2, ret_lane_count); + + let min_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MIN)); + let max_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MAX)); + let ret_lane_layout = fx.layout_of(fx.tcx.types.u16); + + for idx in 0..lane_count { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().umax(lane, min_u16); + let sat = fx.bcx.ins().umin(sat, max_u16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().umax(lane, min_u16); + let sat = fx.bcx.ins().umin(sat, max_u16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.avx2.packssdw" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i32); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count * 2, ret_lane_count); + + let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); + let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + + for idx in 0..lane_count / 2 { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = a.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane); + } + + for idx in 0..lane_count / 2 { + let lane = b.value_lane(fx, idx).load_scalar(fx); + let sat = fx.bcx.ins().smax(lane, min_i16); + let sat = fx.bcx.ins().umin(sat, max_i16); + let res = fx.bcx.ins().ireduce(types::I16, sat); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane); + } + } + + "llvm.x86.pclmulqdq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 + intrinsic_args!(fx, args => (a, b, imm8); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i64); + assert_eq!(ret_lane_ty, fx.tcx.types.i64); + assert_eq!(lane_count, 2); + assert_eq!(ret_lane_count, 2); + + let imm8 = imm8.load_scalar(fx); + + let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001); + let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); + let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); + let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0); + + let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000); + let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); + let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); + let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0); + + fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value { + let tmp = fx.bcx.ins().ushr_imm(val, bit); + fx.bcx.ins().band_imm(tmp, 1) + } + + let mut res1 = fx.bcx.ins().iconst(types::I64, 0); + for i in 0..=63 { + let x = extract_bit(fx, temp1, 0); + let y = extract_bit(fx, temp2, i); + let mut temp = fx.bcx.ins().band(x, y); + for j in 1..=i { + let x = extract_bit(fx, temp1, j); + let y = extract_bit(fx, temp2, i - j); + let z = fx.bcx.ins().band(x, y); + temp = fx.bcx.ins().bxor(temp, z); + } + let temp = fx.bcx.ins().ishl_imm(temp, i); + res1 = fx.bcx.ins().bor(res1, temp); + } + ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted()); + + let mut res2 = fx.bcx.ins().iconst(types::I64, 0); + for i in 64..=127 { + let mut temp = fx.bcx.ins().iconst(types::I64, 0); + for j in i - 63..=63 { + let x = extract_bit(fx, temp1, j); + let y = extract_bit(fx, temp2, i - j); + let z = fx.bcx.ins().band(x, y); + temp = fx.bcx.ins().bxor(temp, z); + } + let temp = fx.bcx.ins().ishl_imm(temp, i); + res2 = fx.bcx.ins().bor(res2, temp); + } + ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted()); + } + + "llvm.x86.avx.ptestz.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256&ig_expand=6945 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i64); + assert_eq!(ret.layout().ty, fx.tcx.types.i32); + assert_eq!(lane_count, 4); + + let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); + let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); + let a_lane2 = a.value_lane(fx, 2).load_scalar(fx); + let a_lane3 = a.value_lane(fx, 3).load_scalar(fx); + let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); + let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); + let b_lane2 = b.value_lane(fx, 2).load_scalar(fx); + let b_lane3 = b.value_lane(fx, 3).load_scalar(fx); + + let zero0 = fx.bcx.ins().band(a_lane0, b_lane0); + let zero1 = fx.bcx.ins().band(a_lane1, b_lane1); + let zero2 = fx.bcx.ins().band(a_lane2, b_lane2); + let zero3 = fx.bcx.ins().band(a_lane3, b_lane3); + + let all_zero0 = fx.bcx.ins().bor(zero0, zero1); + let all_zero1 = fx.bcx.ins().bor(zero2, zero3); + let all_zero = fx.bcx.ins().bor(all_zero0, all_zero1); + + let res = fx.bcx.ins().icmp_imm(IntCC::Equal, all_zero, 0); + let res = CValue::by_val( + fx.bcx.ins().uextend(types::I32, res), + fx.layout_of(fx.tcx.types.i32), + ); + ret.write_cvalue(fx, res); + } + _ => { fx.tcx .sess diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index 36e9ba9c7..bfeeb117f 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -12,23 +12,20 @@ macro_rules! intrinsic_args { } } -mod cpuid; mod llvm; mod llvm_aarch64; mod llvm_x86; mod simd; -pub(crate) use cpuid::codegen_cpuid_call; -pub(crate) use llvm::codegen_llvm_intrinsic_call; - +use cranelift_codegen::ir::AtomicRmwOp; use rustc_middle::ty; use rustc_middle::ty::layout::{HasParamEnv, ValidityRequirement}; use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; use rustc_middle::ty::GenericArgsRef; use rustc_span::symbol::{kw, sym, Symbol}; +pub(crate) use self::llvm::codegen_llvm_intrinsic_call; use crate::prelude::*; -use cranelift_codegen::ir::AtomicRmwOp; fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! { bug!("wrong number of args for intrinsic {}", intrinsic); @@ -135,6 +132,65 @@ fn simd_pair_for_each_lane<'tcx>( } } +fn simd_horizontal_pair_for_each_lane<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + x: CValue<'tcx>, + y: CValue<'tcx>, + ret: CPlace<'tcx>, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + assert_eq!(lane_count, ret_lane_count); + + for lane_idx in 0..lane_count { + let src = if lane_idx < (lane_count / 2) { x } else { y }; + let src_idx = lane_idx % (lane_count / 2); + + let lhs_lane = src.value_lane(fx, src_idx * 2).load_scalar(fx); + let rhs_lane = src.value_lane(fx, src_idx * 2 + 1).load_scalar(fx); + + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lhs_lane, rhs_lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); + } +} + +fn simd_trio_for_each_lane<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + x: CValue<'tcx>, + y: CValue<'tcx>, + z: CValue<'tcx>, + ret: CPlace<'tcx>, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value, Value) -> Value, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + assert_eq!(lane_count, ret_lane_count); + + for lane_idx in 0..lane_count { + let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx); + let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx); + let z_lane = z.value_lane(fx, lane_idx).load_scalar(fx); + + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane, z_lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); + } +} + fn simd_reduce<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index 6efbe1498..ea137c4ca 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -148,7 +148,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let total_len = lane_count * 2; let indexes = - idx.iter().map(|idx| idx.unwrap_leaf().try_to_u16().unwrap()).collect::<Vec<u16>>(); + idx.iter().map(|idx| idx.unwrap_leaf().try_to_u32().unwrap()).collect::<Vec<u32>>(); for &idx in &indexes { assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len); @@ -216,8 +216,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let indexes = { use rustc_middle::mir::interpret::*; - let idx_const = crate::constant::mir_operand_get_const_val(fx, idx) - .expect("simd_shuffle idx not const"); + let idx_const = match idx { + Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0, + Operand::Copy(_) | Operand::Move(_) => unreachable!("{idx:?}"), + }; let idx_bytes = match idx_const { ConstValue::Indirect { alloc_id, offset } => { @@ -343,7 +345,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( ret.write_cvalue(fx, ret_lane); } - sym::simd_neg => { + sym::simd_neg + | sym::simd_bswap + | sym::simd_bitreverse + | sym::simd_ctlz + | sym::simd_cttz => { intrinsic_args!(fx, args => (a); intrinsic); if !a.layout().ty.is_simd() { @@ -351,16 +357,21 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( return; } - simd_for_each_lane( - fx, - a, - ret, - &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() { - ty::Int(_) => fx.bcx.ins().ineg(lane), - ty::Float(_) => fx.bcx.ins().fneg(lane), - _ => unreachable!(), - }, - ); + simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| match ( + lane_ty.kind(), + intrinsic, + ) { + (ty::Int(_), sym::simd_neg) => fx.bcx.ins().ineg(lane), + (ty::Float(_), sym::simd_neg) => fx.bcx.ins().fneg(lane), + + (ty::Uint(ty::UintTy::U8) | ty::Int(ty::IntTy::I8), sym::simd_bswap) => lane, + (ty::Uint(_) | ty::Int(_), sym::simd_bswap) => fx.bcx.ins().bswap(lane), + (ty::Uint(_) | ty::Int(_), sym::simd_bitreverse) => fx.bcx.ins().bitrev(lane), + (ty::Uint(_) | ty::Int(_), sym::simd_ctlz) => fx.bcx.ins().clz(lane), + (ty::Uint(_) | ty::Int(_), sym::simd_cttz) => fx.bcx.ins().ctz(lane), + + _ => unreachable!(), + }); } sym::simd_add diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs index d01ded8ab..148193b5a 100644 --- a/compiler/rustc_codegen_cranelift/src/lib.rs +++ b/compiler/rustc_codegen_cranelift/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(all(doc, not(bootstrap)), allow(internal_features))] +#![cfg_attr(all(doc, not(bootstrap)), feature(rustdoc_internals))] +#![cfg_attr(all(doc, not(bootstrap)), doc(rust_logo))] #![feature(rustc_private)] // Note: please avoid adding other feature gates where possible #![warn(rust_2018_idioms)] @@ -29,6 +32,8 @@ use std::any::Any; use std::cell::{Cell, RefCell}; use std::sync::Arc; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::settings::{self, Configurable}; use rustc_codegen_ssa::traits::CodegenBackend; use rustc_codegen_ssa::CodegenResults; use rustc_data_structures::profiling::SelfProfilerRef; @@ -39,9 +44,6 @@ use rustc_session::config::OutputFilenames; use rustc_session::Session; use rustc_span::Symbol; -use cranelift_codegen::isa::TargetIsa; -use cranelift_codegen::settings::{self, Configurable}; - pub use crate::config::*; use crate::prelude::*; @@ -76,22 +78,6 @@ mod value_and_place; mod vtable; mod prelude { - pub(crate) use rustc_span::{FileNameDisplayPreference, Span}; - - pub(crate) use rustc_hir::def_id::{DefId, LOCAL_CRATE}; - pub(crate) use rustc_middle::bug; - pub(crate) use rustc_middle::mir::{self, *}; - pub(crate) use rustc_middle::ty::layout::{self, LayoutOf, TyAndLayout}; - pub(crate) use rustc_middle::ty::{ - self, FloatTy, Instance, InstanceDef, IntTy, ParamEnv, Ty, TyCtxt, TypeAndMut, - TypeFoldable, TypeVisitableExt, UintTy, - }; - pub(crate) use rustc_target::abi::{Abi, FieldIdx, Scalar, Size, VariantIdx, FIRST_VARIANT}; - - pub(crate) use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; - - pub(crate) use rustc_index::Idx; - pub(crate) use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; pub(crate) use cranelift_codegen::ir::function::Function; pub(crate) use cranelift_codegen::ir::types; @@ -103,6 +89,18 @@ mod prelude { pub(crate) use cranelift_codegen::Context; pub(crate) use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable}; pub(crate) use cranelift_module::{self, DataDescription, FuncId, Linkage, Module}; + pub(crate) use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; + pub(crate) use rustc_hir::def_id::{DefId, LOCAL_CRATE}; + pub(crate) use rustc_index::Idx; + pub(crate) use rustc_middle::bug; + pub(crate) use rustc_middle::mir::{self, *}; + pub(crate) use rustc_middle::ty::layout::{self, LayoutOf, TyAndLayout}; + pub(crate) use rustc_middle::ty::{ + self, FloatTy, Instance, InstanceDef, IntTy, ParamEnv, Ty, TyCtxt, TypeAndMut, + TypeFoldable, TypeVisitableExt, UintTy, + }; + pub(crate) use rustc_span::{FileNameDisplayPreference, Span}; + pub(crate) use rustc_target::abi::{Abi, FieldIdx, Scalar, Size, VariantIdx, FIRST_VARIANT}; pub(crate) use crate::abi::*; pub(crate) use crate::base::{codegen_operand, codegen_place}; @@ -191,7 +189,7 @@ impl CodegenBackend for CraneliftCodegenBackend { } fn target_features(&self, _sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> { - vec![] + vec![] // FIXME necessary for #[cfg(target_feature] } fn print_version(&self) { @@ -263,9 +261,9 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Arc<dyn isa::Tar let preserve_frame_pointer = sess.target.options.frame_pointer != rustc_target::spec::FramePointer::MayOmit || matches!(sess.opts.cg.force_frame_pointers, Some(true)); - if preserve_frame_pointer { - flags_builder.set("preserve_frame_pointers", "true").unwrap(); - } + flags_builder + .set("preserve_frame_pointers", if preserve_frame_pointer { "true" } else { "false" }) + .unwrap(); let tls_model = match target_triple.binary_format { BinaryFormat::Elf => "elf_gd", diff --git a/compiler/rustc_codegen_cranelift/src/pointer.rs b/compiler/rustc_codegen_cranelift/src/pointer.rs index b60e56720..11ac6b946 100644 --- a/compiler/rustc_codegen_cranelift/src/pointer.rs +++ b/compiler/rustc_codegen_cranelift/src/pointer.rs @@ -1,11 +1,10 @@ //! Defines [`Pointer`] which is used to improve the quality of the generated clif ir for pointer //! operations. -use crate::prelude::*; - +use cranelift_codegen::ir::immediates::Offset32; use rustc_target::abi::Align; -use cranelift_codegen::ir::immediates::Offset32; +use crate::prelude::*; /// A pointer pointing either to a certain address, a certain stack slot or nothing. #[derive(Copy, Clone, Debug)] diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs index 0ead50c34..da84e54a9 100644 --- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs +++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs @@ -63,8 +63,8 @@ use cranelift_codegen::{ ir::entities::AnyEntity, write::{FuncWriter, PlainWriter}, }; - use rustc_middle::ty::layout::FnAbiOf; +use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_session::config::{OutputFilenames, OutputType}; use crate::prelude::*; @@ -80,15 +80,17 @@ impl CommentWriter { pub(crate) fn new<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self { let enabled = should_write_ir(tcx); let global_comments = if enabled { - vec![ - format!("symbol {}", tcx.symbol_name(instance).name), - format!("instance {:?}", instance), - format!( - "abi {:?}", - RevealAllLayoutCx(tcx).fn_abi_of_instance(instance, ty::List::empty()) - ), - String::new(), - ] + with_no_trimmed_paths!({ + vec![ + format!("symbol {}", tcx.symbol_name(instance).name), + format!("instance {:?}", instance), + format!( + "abi {:?}", + RevealAllLayoutCx(tcx).fn_abi_of_instance(instance, ty::List::empty()) + ), + String::new(), + ] + }) } else { vec![] }; diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs index 45893a4f3..21ad2a835 100644 --- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs +++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs @@ -1,11 +1,10 @@ //! Definition of [`CValue`] and [`CPlace`] -use crate::prelude::*; - -use rustc_middle::ty::FnSig; - use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir::immediates::Offset32; +use rustc_middle::ty::FnSig; + +use crate::prelude::*; fn codegen_field<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, @@ -133,18 +132,11 @@ impl<'tcx> CValue<'tcx> { (ptr.get_addr(fx), vtable) } CValueInner::ByValPair(data, vtable) => { - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(fx.target_config.pointer_type().bytes()).unwrap() + 15) - / 16 - * 16, - }); - let data_ptr = Pointer::stack_slot(stack_slot); - let mut flags = MemFlags::new(); - flags.set_notrap(); - data_ptr.store(fx, data, flags); + let data_ptr = fx.create_stack_slot( + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + ); + data_ptr.store(fx, data, MemFlags::trusted()); (data_ptr.get_addr(fx), vtable) } @@ -251,6 +243,34 @@ impl<'tcx> CValue<'tcx> { let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); let lane_layout = fx.layout_of(lane_ty); assert!(lane_idx < lane_count); + + match self.0 { + CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(), + CValueInner::ByRef(ptr, None) => { + let field_offset = lane_layout.size * lane_idx; + let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()); + CValue::by_ref(field_ptr, lane_layout) + } + CValueInner::ByRef(_, Some(_)) => unreachable!(), + } + } + + /// Like [`CValue::value_field`] except using the passed type as lane type instead of the one + /// specified by the vector type. + pub(crate) fn value_typed_lane( + self, + fx: &mut FunctionCx<'_, '_, 'tcx>, + lane_ty: Ty<'tcx>, + lane_idx: u64, + ) -> CValue<'tcx> { + let layout = self.1; + assert!(layout.ty.is_simd()); + let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + assert!( + (lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size + ); + match self.0 { CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(), CValueInner::ByRef(ptr, None) => { @@ -310,7 +330,8 @@ impl<'tcx> CValue<'tcx> { fx.bcx.ins().iconcat(lsb, msb) } ty::Bool | ty::Char | ty::Uint(_) | ty::Int(_) | ty::Ref(..) | ty::RawPtr(..) => { - fx.bcx.ins().iconst(clif_ty, const_val.to_bits(layout.size).unwrap() as i64) + let raw_val = const_val.size().truncate(const_val.to_bits(layout.size).unwrap()); + fx.bcx.ins().iconst(clif_ty, raw_val as i64) } ty::Float(FloatTy::F32) => { fx.bcx.ins().f32const(Ieee32::with_bits(u32::try_from(const_val).unwrap())) @@ -372,13 +393,11 @@ impl<'tcx> CPlace<'tcx> { .fatal(format!("values of type {} are too big to store on the stack", layout.ty)); } - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16, - }); - CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout } + let stack_slot = fx.create_stack_slot( + u32::try_from(layout.size.bytes()).unwrap(), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); + CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout } } pub(crate) fn new_var( @@ -543,13 +562,7 @@ impl<'tcx> CPlace<'tcx> { _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data), _ if src_ty.is_vector() || dst_ty.is_vector() => { // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers. - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (src_ty.bytes() + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes()); ptr.store(fx, data, MemFlags::trusted()); ptr.load(fx, dst_ty, MemFlags::trusted()) } @@ -749,6 +762,34 @@ impl<'tcx> CPlace<'tcx> { } } + /// Like [`CPlace::place_field`] except using the passed type as lane type instead of the one + /// specified by the vector type. + pub(crate) fn place_typed_lane( + self, + fx: &mut FunctionCx<'_, '_, 'tcx>, + lane_ty: Ty<'tcx>, + lane_idx: u64, + ) -> CPlace<'tcx> { + let layout = self.layout(); + assert!(layout.ty.is_simd()); + let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + assert!( + (lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size + ); + + match self.inner { + CPlaceInner::Var(_, _) => unreachable!(), + CPlaceInner::VarPair(_, _, _) => unreachable!(), + CPlaceInner::Addr(ptr, None) => { + let field_offset = lane_layout.size * lane_idx; + let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()); + CPlace::for_ptr(field_ptr, lane_layout) + } + CPlaceInner::Addr(_, Some(_)) => unreachable!(), + } + } + pub(crate) fn place_index( self, fx: &mut FunctionCx<'_, '_, 'tcx>, |