From 9918693037dce8aa4bb6f08741b6812923486c18 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 Jun 2024 11:26:03 +0200 Subject: Merging upstream version 1.76.0+dfsg1. Signed-off-by: Daniel Baumann --- .../rustc_codegen_cranelift/src/abi/comments.rs | 1 - compiler/rustc_codegen_cranelift/src/abi/mod.rs | 3 +- compiler/rustc_codegen_cranelift/src/analyze.rs | 1 - compiler/rustc_codegen_cranelift/src/base.rs | 22 +- compiler/rustc_codegen_cranelift/src/common.rs | 14 +- .../src/concurrency_limiter.rs | 4 +- compiler/rustc_codegen_cranelift/src/constant.rs | 52 +- compiler/rustc_codegen_cranelift/src/driver/aot.rs | 2 +- compiler/rustc_codegen_cranelift/src/inline_asm.rs | 167 ++--- .../rustc_codegen_cranelift/src/intrinsics/llvm.rs | 4 +- .../src/intrinsics/llvm_aarch64.rs | 2 - .../src/intrinsics/llvm_x86.rs | 715 +++++++++++++++++++-- .../rustc_codegen_cranelift/src/intrinsics/mod.rs | 20 +- .../rustc_codegen_cranelift/src/intrinsics/simd.rs | 68 +- .../rustc_codegen_cranelift/src/pretty_clif.rs | 21 +- compiler/rustc_codegen_cranelift/src/unsize.rs | 114 +++- .../rustc_codegen_cranelift/src/value_and_place.rs | 93 ++- 17 files changed, 995 insertions(+), 308 deletions(-) (limited to 'compiler/rustc_codegen_cranelift/src') diff --git a/compiler/rustc_codegen_cranelift/src/abi/comments.rs b/compiler/rustc_codegen_cranelift/src/abi/comments.rs index ade6968de..a318cae17 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/comments.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/comments.rs @@ -3,7 +3,6 @@ use std::borrow::Cow; -use rustc_middle::mir; use rustc_target::abi::call::PassMode; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs index c4572e035..2c194f6d6 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs @@ -6,7 +6,7 @@ mod returning; use std::borrow::Cow; -use cranelift_codegen::ir::{AbiParam, SigRef}; +use cranelift_codegen::ir::SigRef; use cranelift_module::ModuleError; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::ty::layout::FnAbiOf; @@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>( args, ret_place, target, + source_info.span, ); return; } diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs index 321612238..c5762638a 100644 --- a/compiler/rustc_codegen_cranelift/src/analyze.rs +++ b/compiler/rustc_codegen_cranelift/src/analyze.rs @@ -2,7 +2,6 @@ use rustc_index::IndexVec; use rustc_middle::mir::StatementKind::*; -use rustc_middle::ty::Ty; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs index 91b1547cb..df40a5eb4 100644 --- a/compiler/rustc_codegen_cranelift/src/base.rs +++ b/compiler/rustc_codegen_cranelift/src/base.rs @@ -176,10 +176,10 @@ pub(crate) fn compile_fn( match module.define_function(codegened_func.func_id, context) { Ok(()) => {} Err(ModuleError::Compilation(CodegenError::ImplLimitExceeded)) => { - let handler = rustc_session::EarlyErrorHandler::new( + let early_dcx = rustc_session::EarlyDiagCtxt::new( rustc_session::config::ErrorOutputType::default(), ); - handler.early_error(format!( + early_dcx.early_error(format!( "backend implementation limit exceeded while compiling {name}", name = codegened_func.symbol_name )); @@ -353,7 +353,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { fx, rustc_hir::LangItem::PanicBoundsCheck, &[index, len, location], - source_info.span, + Some(source_info.span), ); } AssertKind::MisalignedPointerDereference { ref required, ref found } => { @@ -365,7 +365,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { fx, rustc_hir::LangItem::PanicMisalignedPointerDereference, &[required, found, location], - source_info.span, + Some(source_info.span), ); } _ => { @@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { ); } - crate::inline_asm::codegen_inline_asm( + crate::inline_asm::codegen_inline_asm_terminator( fx, source_info.span, template, @@ -945,19 +945,19 @@ pub(crate) fn codegen_panic<'tcx>( let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap()); let args = [msg_ptr, msg_len, location]; - codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, source_info.span); + codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, Some(source_info.span)); } pub(crate) fn codegen_panic_nounwind<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, msg_str: &str, - source_info: mir::SourceInfo, + span: Option, ) { let msg_ptr = fx.anonymous_str(msg_str); let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap()); let args = [msg_ptr, msg_len]; - codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, source_info.span); + codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, span); } pub(crate) fn codegen_unwind_terminate<'tcx>( @@ -967,16 +967,16 @@ pub(crate) fn codegen_unwind_terminate<'tcx>( ) { let args = []; - codegen_panic_inner(fx, reason.lang_item(), &args, source_info.span); + codegen_panic_inner(fx, reason.lang_item(), &args, Some(source_info.span)); } fn codegen_panic_inner<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, lang_item: rustc_hir::LangItem, args: &[Value], - span: Span, + span: Option, ) { - let def_id = fx.tcx.require_lang_item(lang_item, Some(span)); + let def_id = fx.tcx.require_lang_item(lang_item, span); let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx); let symbol_name = fx.tcx.symbol_name(instance).name; diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs index 63562d335..bd19a7ed0 100644 --- a/compiler/rustc_codegen_cranelift/src/common.rs +++ b/compiler/rustc_codegen_cranelift/src/common.rs @@ -98,11 +98,15 @@ fn clif_pair_type_from_ty<'tcx>( /// Is a pointer to this type a fat ptr? pub(crate) fn has_ptr_meta<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool { - let ptr_ty = Ty::new_ptr(tcx, TypeAndMut { ty, mutbl: rustc_hir::Mutability::Not }); - match &tcx.layout_of(ParamEnv::reveal_all().and(ptr_ty)).unwrap().abi { - Abi::Scalar(_) => false, - Abi::ScalarPair(_, _) => true, - abi => unreachable!("Abi of ptr to {:?} is {:?}???", ty, abi), + if ty.is_sized(tcx, ParamEnv::reveal_all()) { + return false; + } + + let tail = tcx.struct_tail_erasing_lifetimes(ty, ParamEnv::reveal_all()); + match tail.kind() { + ty::Foreign(..) => false, + ty::Str | ty::Slice(..) | ty::Dynamic(..) => true, + _ => bug!("unexpected unsized tail: {:?}", tail), } } diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs index 20f2ee4c7..967896913 100644 --- a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs +++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs @@ -46,7 +46,7 @@ impl ConcurrencyLimiter { } } - pub(super) fn acquire(&mut self, handler: &rustc_errors::Handler) -> ConcurrencyLimiterToken { + pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken { let mut state = self.state.lock().unwrap(); loop { state.assert_invariants(); @@ -64,7 +64,7 @@ impl ConcurrencyLimiter { // Make sure to drop the mutex guard first to prevent poisoning the mutex. drop(state); if let Some(err) = err { - handler.fatal(err).raise(); + dcx.fatal(err); } else { // The error was already emitted, but compilation continued. Raise a silent // fatal error. diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs index b0853d30e..9ffa006e5 100644 --- a/compiler/rustc_codegen_cranelift/src/constant.rs +++ b/compiler/rustc_codegen_cranelift/src/constant.rs @@ -1,10 +1,12 @@ //! Handling of `static`s, `const`s and promoted allocations +use std::cmp::Ordering; + use cranelift_module::*; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::FxHashSet; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar}; -use rustc_middle::mir::ConstValue; +use rustc_middle::ty::ScalarInt; use crate::prelude::*; @@ -123,7 +125,8 @@ pub(crate) fn codegen_const_value<'tcx>( } } Scalar::Ptr(ptr, _size) => { - let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative + let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative + let alloc_id = prov.alloc_id(); let base_addr = match fx.tcx.global_alloc(alloc_id) { GlobalAlloc::Memory(alloc) => { let data_id = data_id_for_alloc_id( @@ -371,7 +374,8 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec(); data.define(bytes.into_boxed_slice()); - for &(offset, alloc_id) in alloc.provenance().ptrs().iter() { + for &(offset, prov) in alloc.provenance().ptrs().iter() { + let alloc_id = prov.alloc_id(); let addend = { let endianness = tcx.data_layout.endian; let offset = offset.bytes() as usize; @@ -430,9 +434,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant pub(crate) fn mir_operand_get_const_val<'tcx>( fx: &FunctionCx<'_, '_, 'tcx>, operand: &Operand<'tcx>, -) -> Option> { +) -> Option { match operand { - Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0), + Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(), // FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored // inside a temporary before being passed to the intrinsic requiring the const argument. // This code tries to find a single constant defining definition of the referenced local. @@ -440,7 +444,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( if !place.projection.is_empty() { return None; } - let mut computed_const_val = None; + let mut computed_scalar_int = None; for bb_data in fx.mir.basic_blocks.iter() { for stmt in &bb_data.statements { match &stmt.kind { @@ -456,22 +460,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( operand, ty, ) => { - if computed_const_val.is_some() { + if computed_scalar_int.is_some() { return None; // local assigned twice } if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) { return None; } - let const_val = mir_operand_get_const_val(fx, operand)?; - if fx.layout_of(*ty).size - != const_val.try_to_scalar_int()?.size() + let scalar_int = mir_operand_get_const_val(fx, operand)?; + let scalar_int = match fx + .layout_of(*ty) + .size + .cmp(&scalar_int.size()) { - return None; - } - computed_const_val = Some(const_val); + Ordering::Equal => scalar_int, + Ordering::Less => match ty.kind() { + ty::Uint(_) => ScalarInt::try_from_uint( + scalar_int.try_to_uint(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + ty::Int(_) => ScalarInt::try_from_int( + scalar_int.try_to_int(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + _ => unreachable!(), + }, + Ordering::Greater => return None, + }; + computed_scalar_int = Some(scalar_int); } Rvalue::Use(operand) => { - computed_const_val = mir_operand_get_const_val(fx, operand) + computed_scalar_int = mir_operand_get_const_val(fx, operand) } _ => return None, } @@ -522,7 +542,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( TerminatorKind::Call { .. } => {} } } - computed_const_val + computed_scalar_int } } } diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs index 11229dd42..b3ab533df 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs @@ -422,7 +422,7 @@ pub(crate) fn run_aot( backend_config.clone(), global_asm_config.clone(), cgu.name(), - concurrency_limiter.acquire(tcx.sess.diagnostic()), + concurrency_limiter.acquire(tcx.sess.dcx()), ), module_codegen, Some(rustc_middle::dep_graph::hash_result), diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs index ce0eecca8..73f4bc7c1 100644 --- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs +++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs @@ -3,14 +3,13 @@ use std::fmt::Write; use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; -use rustc_middle::mir::InlineAsmOperand; use rustc_span::sym; use rustc_target::asm::*; use target_lexicon::BinaryFormat; use crate::prelude::*; -enum CInlineAsmOperand<'tcx> { +pub(crate) enum CInlineAsmOperand<'tcx> { In { reg: InlineAsmRegOrRegClass, value: Value, @@ -34,7 +33,7 @@ enum CInlineAsmOperand<'tcx> { }, } -pub(crate) fn codegen_inline_asm<'tcx>( +pub(crate) fn codegen_inline_asm_terminator<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, span: Span, template: &[InlineAsmTemplatePiece], @@ -42,8 +41,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( options: InlineAsmOptions, destination: Option, ) { - // FIXME add .eh_frame unwind info directives - // Used by panic_abort on Windows, but uses a syntax which only happens to work with // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for // the LLVM backend. @@ -135,15 +132,33 @@ pub(crate) fn codegen_inline_asm<'tcx>( }) .collect::>(); - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); + codegen_inline_asm_inner(fx, template, &operands, options); + + match destination { + Some(destination) => { + let destination_block = fx.get_block(destination); + fx.bcx.ins().jump(destination_block, &[]); + } + None => { + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); + } + } +} + +pub(crate) fn codegen_inline_asm_inner<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + template: &[InlineAsmTemplatePiece], + operands: &[CInlineAsmOperand<'tcx>], + options: InlineAsmOptions, +) { + // FIXME add .eh_frame unwind info directives let mut asm_gen = InlineAssemblyGenerator { tcx: fx.tcx, arch: fx.tcx.sess.asm_arch.unwrap(), enclosing_def_id: fx.instance.def_id(), template, - operands: &operands, + operands, options, registers: Vec::new(), stack_slots_clobber: Vec::new(), @@ -165,6 +180,8 @@ pub(crate) fn codegen_inline_asm<'tcx>( let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); fx.cx.global_asm.push_str(&generated_asm); + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); for (i, operand) in operands.iter().enumerate() { match operand { CInlineAsmOperand::In { reg: _, value } => { @@ -186,16 +203,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( } call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); - - match destination { - Some(destination) => { - let destination_block = fx.get_block(destination); - fx.bcx.ins().jump(destination_block, &[]); - } - None => { - fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); - } - } } struct InlineAssemblyGenerator<'a, 'tcx> { @@ -637,8 +644,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap(); + write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32) + .unwrap(); + } + _ => { + write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + } + } generated_asm.push('\n'); } InlineAsmArch::AArch64 => { @@ -663,8 +683,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - generated_asm.push_str(" mov "); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!( + generated_asm, + " movups xmm{}", + reg as u32 - X86InlineAsmReg::xmm0 as u32 + ) + .unwrap(); + } + _ => { + generated_asm.push_str(" mov "); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap() + } + } writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap(); } InlineAsmArch::AArch64 => { @@ -720,7 +756,12 @@ fn call_inline_asm<'tcx>( fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { - let ty = fx.clif_type(place.layout().ty).unwrap(); + let ty = if place.layout().ty.is_simd() { + let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx); + fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap() + } else { + fx.clif_type(place.layout().ty).unwrap() + }; let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( fx, ty, @@ -729,83 +770,3 @@ fn call_inline_asm<'tcx>( place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } - -pub(crate) fn codegen_xgetbv<'tcx>( - fx: &mut FunctionCx<'_, '_, 'tcx>, - xcr_no: Value, - ret: CPlace<'tcx>, -) { - // FIXME add .eh_frame unwind info directives - - let operands = vec![ - CInlineAsmOperand::In { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - value: xcr_no, - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: true, - place: Some(ret), - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: true, - place: None, - }, - ]; - let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM; - - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); - - let mut asm_gen = InlineAssemblyGenerator { - tcx: fx.tcx, - arch: fx.tcx.sess.asm_arch.unwrap(), - enclosing_def_id: fx.instance.def_id(), - template: &[InlineAsmTemplatePiece::String( - " - xgetbv - // out = rdx << 32 | rax - shl rdx, 32 - or rax, rdx - " - .to_string(), - )], - operands: &operands, - options, - registers: Vec::new(), - stack_slots_clobber: Vec::new(), - stack_slots_input: Vec::new(), - stack_slots_output: Vec::new(), - stack_slot_size: Size::from_bytes(0), - }; - asm_gen.allocate_registers(); - asm_gen.allocate_stack_slots(); - - let inline_asm_index = fx.cx.inline_asm_index.get(); - fx.cx.inline_asm_index.set(inline_asm_index + 1); - let asm_name = format!( - "__inline_asm_{}_n{}", - fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"), - inline_asm_index - ); - - let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); - fx.cx.global_asm.push_str(&generated_asm); - - for (i, operand) in operands.iter().enumerate() { - match operand { - CInlineAsmOperand::In { reg: _, value } => { - inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value)); - } - CInlineAsmOperand::Out { reg: _, late: _, place } => { - if let Some(place) = place { - outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place)); - } - } - _ => unreachable!(), - } - } - - call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); -} diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs index e9b7daf14..dbd5db875 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs @@ -1,7 +1,5 @@ //! Emulate LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; - use crate::intrinsics::*; use crate::prelude::*; @@ -12,6 +10,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option, + span: Span, ) { if intrinsic.starts_with("llvm.aarch64") { return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call( @@ -31,6 +30,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args, ret, target, + span, ); } diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs index ee098be1f..e1e514dca 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs @@ -1,7 +1,5 @@ //! Emulate AArch64 LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; - use crate::intrinsics::*; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index 4c5360486..99bb5c4ea 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -1,7 +1,9 @@ //! Emulate x86 LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; +use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; +use rustc_target::asm::*; +use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand}; use crate::intrinsics::*; use crate::prelude::*; @@ -12,19 +14,53 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option, + span: Span, ) { match intrinsic { "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => { // Spin loop hint } + "llvm.x86.avx.vzeroupper" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper&ig_expand=7218 + // Do nothing. It is a perf hint anyway. + } + // Used by is_x86_feature_detected!(); "llvm.x86.xgetbv" => { intrinsic_args!(fx, args => (xcr_no); intrinsic); let xcr_no = xcr_no.load_scalar(fx); - crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret); + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + " + xgetbv + // out = rdx << 32 | rax + shl rdx, 32 + or rax, rdx + " + .to_string(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + value: xcr_no, + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + late: true, + place: Some(ret), + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + late: true, + place: None, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => { @@ -37,6 +73,103 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } + "llvm.x86.avx2.gather.d.d" + | "llvm.x86.avx2.gather.d.q" + | "llvm.x86.avx2.gather.d.ps" + | "llvm.x86.avx2.gather.d.pd" + | "llvm.x86.avx2.gather.d.d.256" + | "llvm.x86.avx2.gather.d.q.256" + | "llvm.x86.avx2.gather.d.ps.256" + | "llvm.x86.avx2.gather.d.pd.256" + | "llvm.x86.avx2.gather.q.d" + | "llvm.x86.avx2.gather.q.q" + | "llvm.x86.avx2.gather.q.ps" + | "llvm.x86.avx2.gather.q.pd" + | "llvm.x86.avx2.gather.q.d.256" + | "llvm.x86.avx2.gather.q.q.256" + | "llvm.x86.avx2.gather.q.ps.256" + | "llvm.x86.avx2.gather.q.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822 + // ... + + intrinsic_args!(fx, args => (src, ptr, index, mask, scale); intrinsic); + + let (src_lane_count, src_lane_ty) = src.layout().ty.simd_size_and_type(fx.tcx); + let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(src_lane_ty, ret_lane_ty); + assert!(index_lane_ty.is_integral()); + assert_eq!(src_lane_count, mask_lane_count); + assert_eq!(src_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap(); + let index_lane_clif_ty = fx.clif_type(index_lane_ty).unwrap(); + let mask_lane_clif_ty = fx.clif_type(mask_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + let ptr = ptr.load_scalar(fx); + let scale = scale.load_scalar(fx); + let scale = fx.bcx.ins().uextend(types::I64, scale); + for lane_idx in 0..std::cmp::min(src_lane_count, index_lane_count) { + let src_lane = src.value_lane(fx, lane_idx).load_scalar(fx); + let index_lane = index.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = + fx.bcx.ins().bitcast(mask_lane_clif_ty.as_int(), MemFlags::new(), mask_lane); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + let mask_lane = match mask_lane_clif_ty { + types::I32 | types::F32 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64) + } + types::I64 | types::F64 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64) + } + _ => unreachable!(), + }; + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let index_lane = if index_lane_clif_ty != types::I64 { + fx.bcx.ins().sextend(types::I64, index_lane) + } else { + index_lane + }; + let offset = fx.bcx.ins().imul(index_lane, scale); + let lane_ptr = fx.bcx.ins().iadd(ptr, offset); + let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[src_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + + for lane_idx in std::cmp::min(src_lane_count, index_lane_count)..ret_lane_count { + let zero_lane = fx.bcx.ins().iconst(mask_lane_clif_ty.as_int(), 0); + let zero_lane = fx.bcx.ins().bitcast(mask_lane_clif_ty, MemFlags::new(), zero_lane); + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(zero_lane, ret_lane_layout)); + } + } + "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), @@ -241,16 +374,31 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ); } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { - let a = match args { - [a] => a, - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); + intrinsic_args!(fx, args => (a); intrinsic); simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { fx.bcx.ins().iabs(lane) }); } + "llvm.x86.sse2.cvttps2dq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429 + intrinsic_args!(fx, args => (a); intrinsic); + let a = a.load_scalar(fx); + + // Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned + // into 0x80000000 for which Cranelift doesn't have a native instruction. + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } "llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => { intrinsic_args!(fx, args => (c_in, a, b); intrinsic); let c_in = c_in.load_scalar(fx); @@ -332,9 +480,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for out_lane_idx in 0..lane_count / 8 { let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); - for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 { let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let a_lane = fx.bcx.ins().uextend(types::I16, a_lane); let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = fx.bcx.ins().uextend(types::I16, b_lane); let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); @@ -405,12 +555,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); for out_lane_idx in 0..lane_count / 2 { let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); - let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0); let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); - let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1); let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); @@ -565,14 +715,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -582,7 +732,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -609,8 +759,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -619,8 +769,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -641,14 +791,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -658,7 +808,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -668,7 +818,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -678,7 +828,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -686,66 +836,489 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } - "llvm.x86.pclmulqdq" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 - intrinsic_args!(fx, args => (a, b, imm8); intrinsic); + "llvm.x86.fma.vfmaddsub.ps" + | "llvm.x86.fma.vfmaddsub.pd" + | "llvm.x86.fma.vfmaddsub.ps.256" + | "llvm.x86.fma.vfmaddsub.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185 + intrinsic_args!(fx, args => (a, b, c); intrinsic); assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); let layout = a.layout(); let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert_eq!(lane_ty, fx.tcx.types.i64); - assert_eq!(ret_lane_ty, fx.tcx.types.i64); - assert_eq!(lane_count, 2); - assert_eq!(ret_lane_count, 2); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); - let imm8 = imm8.load_scalar(fx); + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fsub(mul, c_lane) + } else { + fx.bcx.ins().fadd(mul, c_lane) + }; - let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001); - let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); - let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); - let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0); + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + } - let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000); - let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); - let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); - let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0); + "llvm.x86.fma.vfmsubadd.ps" + | "llvm.x86.fma.vfmsubadd.pd" + | "llvm.x86.fma.vfmsubadd.ps.256" + | "llvm.x86.fma.vfmsubadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305 + intrinsic_args!(fx, args => (a, b, c); intrinsic); - fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value { - let tmp = fx.bcx.ins().ushr_imm(val, bit); - fx.bcx.ins().band_imm(tmp, 1) - } + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); - let mut res1 = fx.bcx.ins().iconst(types::I64, 0); - for i in 0..=63 { - let x = extract_bit(fx, temp1, 0); - let y = extract_bit(fx, temp2, i); - let mut temp = fx.bcx.ins().band(x, y); - for j in 1..=i { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res1 = fx.bcx.ins().bor(res1, temp); + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fadd(mul, c_lane) + } else { + fx.bcx.ins().fsub(mul, c_lane) + }; + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); } - ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted()); - - let mut res2 = fx.bcx.ins().iconst(types::I64, 0); - for i in 64..=127 { - let mut temp = fx.bcx.ins().iconst(types::I64, 0); - for j in i - 63..=63 { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res2 = fx.bcx.ins().bor(res2, temp); + } + + "llvm.x86.fma.vfnmadd.ps" + | "llvm.x86.fma.vfnmadd.pd" + | "llvm.x86.fma.vfnmadd.ps.256" + | "llvm.x86.fma.vfnmadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371 + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let neg_mul = fx.bcx.ins().fneg(mul); + let res = fx.bcx.ins().fadd(neg_mul, c_lane); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); } - ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted()); + } + + "llvm.x86.sse42.pcmpestri128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestri` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestri xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: a, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + // Implicit result of the pcmpestri intrinsic + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + late: true, + place: Some(ret), + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sse42.pcmpestrm128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm&ig_expand=940 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestrm` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestrm xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.pclmulqdq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 + intrinsic_args!(fx, args => (a, b, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_clmulepi64_si128` is not a constant", + ); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aeskeygenassist" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261 + intrinsic_args!(fx, args => (a, _imm8); intrinsic); + + let a = a.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_aeskeygenassist_si128` is not a constant", + ); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesimc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260 + intrinsic_args!(fx, args => (a); intrinsic); + + let a = a.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdec" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdeclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256rnds2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977 + intrinsic_args!(fx, args => (a, b, k); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let k = k.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + // Implicit argument to the sha256rnds2 instruction + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: k, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg1" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.avx.ptestz.256" => { diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index bfeeb117f..68126f124 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -487,13 +487,12 @@ fn codegen_regular_intrinsic_call<'tcx>( let layout = fx.layout_of(generic_args.type_at(0)); // Note: Can't use is_unsized here as truly unsized types need to take the fixed size // branch - let size = if let Abi::ScalarPair(_, _) = ptr.layout().abi { - let (_ptr, info) = ptr.load_scalar_pair(fx); - let (size, _align) = crate::unsize::size_and_align_of_dst(fx, layout, info); - size + let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi { + Some(ptr.load_scalar_pair(fx).1) } else { - fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64) + None }; + let (size, _align) = crate::unsize::size_and_align_of(fx, layout, meta); ret.write_cvalue(fx, CValue::by_val(size, usize_layout)); } sym::min_align_of_val => { @@ -502,13 +501,12 @@ fn codegen_regular_intrinsic_call<'tcx>( let layout = fx.layout_of(generic_args.type_at(0)); // Note: Can't use is_unsized here as truly unsized types need to take the fixed size // branch - let align = if let Abi::ScalarPair(_, _) = ptr.layout().abi { - let (_ptr, info) = ptr.load_scalar_pair(fx); - let (_size, align) = crate::unsize::size_and_align_of_dst(fx, layout, info); - align + let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi { + Some(ptr.load_scalar_pair(fx).1) } else { - fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64) + None }; + let (_size, align) = crate::unsize::size_and_align_of(fx, layout, meta); ret.write_cvalue(fx, CValue::by_val(align, usize_layout)); } @@ -688,7 +686,7 @@ fn codegen_regular_intrinsic_call<'tcx>( } }) }); - crate::base::codegen_panic_nounwind(fx, &msg_str, source_info); + crate::base::codegen_panic_nounwind(fx, &msg_str, Some(source_info.span)); return; } } diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index ea137c4ca..fe4f073f7 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -1,7 +1,6 @@ //! Codegen `extern "platform-intrinsic"` intrinsics. -use rustc_middle::ty::GenericArgsRef; -use rustc_span::Symbol; +use cranelift_codegen::ir::immediates::Offset32; use rustc_target::abi::Endian; use super::*; @@ -282,11 +281,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); }; - let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + let idx: u32 = idx_const + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), @@ -331,10 +330,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), @@ -1008,8 +1007,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( } } + sym::simd_masked_load => { + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); + + let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(val_lane_count, mask_lane_count); + assert_eq!(val_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + let ptr_val = ptr.load_scalar(fx); + + for lane_idx in 0..ret_lane_count { + let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32; + let res = fx.bcx.ins().load( + lane_clif_ty, + MemFlags::trusted(), + ptr_val, + Offset32::new(offset), + ); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[val_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + } + sym::simd_scatter => { - intrinsic_args!(fx, args => (val, ptr, mask); intrinsic); + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx); diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs index da84e54a9..196418023 100644 --- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs +++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs @@ -58,11 +58,10 @@ use std::fmt; use std::io::Write; -use cranelift_codegen::{ - entity::SecondaryMap, - ir::entities::AnyEntity, - write::{FuncWriter, PlainWriter}, -}; +use cranelift_codegen::entity::SecondaryMap; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::Fact; +use cranelift_codegen::write::{FuncWriter, PlainWriter}; use rustc_middle::ty::layout::FnAbiOf; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_session::config::{OutputFilenames, OutputType}; @@ -155,8 +154,13 @@ impl FuncWriter for &'_ CommentWriter { _func: &Function, entity: AnyEntity, value: &dyn fmt::Display, + maybe_fact: Option<&Fact>, ) -> fmt::Result { - write!(w, " {} = {}", entity, value)?; + if let Some(fact) = maybe_fact { + write!(w, " {} ! {} = {}", entity, fact, value)?; + } else { + write!(w, " {} = {}", entity, value)?; + } if let Some(comment) = self.entity_comments.get(&entity) { writeln!(w, " ; {}", comment.replace('\n', "\n; ")) @@ -227,9 +231,8 @@ pub(crate) fn write_ir_file( let res = std::fs::File::create(clif_file_name).and_then(|mut file| write(&mut file)); if let Err(err) = res { // Using early_warn as no Session is available here - let handler = rustc_session::EarlyErrorHandler::new( - rustc_session::config::ErrorOutputType::default(), - ); + let handler = + rustc_session::EarlyDiagCtxt::new(rustc_session::config::ErrorOutputType::default()); handler.early_warn(format!("error writing ir file: {}", err)); } } diff --git a/compiler/rustc_codegen_cranelift/src/unsize.rs b/compiler/rustc_codegen_cranelift/src/unsize.rs index c6133f2b3..f777e1137 100644 --- a/compiler/rustc_codegen_cranelift/src/unsize.rs +++ b/compiler/rustc_codegen_cranelift/src/unsize.rs @@ -2,6 +2,9 @@ //! //! [`PointerCoercion::Unsize`]: `rustc_middle::ty::adjustment::PointerCoercion::Unsize` +use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; + +use crate::base::codegen_panic_nounwind; use crate::prelude::*; // Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/base.rs#L159-L307 @@ -187,63 +190,113 @@ pub(crate) fn coerce_dyn_star<'tcx>( // Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/glue.rs -pub(crate) fn size_and_align_of_dst<'tcx>( +pub(crate) fn size_and_align_of<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, layout: TyAndLayout<'tcx>, - info: Value, + info: Option, ) -> (Value, Value) { - assert!(layout.is_unsized() || layout.abi == Abi::Uninhabited); - match layout.ty.kind() { + if layout.is_sized() { + return ( + fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64), + fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64), + ); + } + + let ty = layout.ty; + match ty.kind() { ty::Dynamic(..) => { // load size/align from vtable - (crate::vtable::size_of_obj(fx, info), crate::vtable::min_align_of_obj(fx, info)) + ( + crate::vtable::size_of_obj(fx, info.unwrap()), + crate::vtable::min_align_of_obj(fx, info.unwrap()), + ) } ty::Slice(_) | ty::Str => { let unit = layout.field(fx, 0); // The info in this case is the length of the str, so the size is that // times the unit size. ( - fx.bcx.ins().imul_imm(info, unit.size.bytes() as i64), + fx.bcx.ins().imul_imm(info.unwrap(), unit.size.bytes() as i64), fx.bcx.ins().iconst(fx.pointer_type, unit.align.abi.bytes() as i64), ) } - _ => { + ty::Foreign(_) => { + let trap_block = fx.bcx.create_block(); + let true_ = fx.bcx.ins().iconst(types::I8, 1); + let next_block = fx.bcx.create_block(); + fx.bcx.ins().brif(true_, trap_block, &[], next_block, &[]); + fx.bcx.seal_block(trap_block); + fx.bcx.seal_block(next_block); + fx.bcx.switch_to_block(trap_block); + + // `extern` type. We cannot compute the size, so panic. + let msg_str = with_no_visible_paths!({ + with_no_trimmed_paths!({ + format!("attempted to compute the size or alignment of extern type `{ty}`") + }) + }); + + codegen_panic_nounwind(fx, &msg_str, None); + + fx.bcx.switch_to_block(next_block); + + // This function does not return so we can now return whatever we want. + let size = fx.bcx.ins().iconst(fx.pointer_type, 42); + let align = fx.bcx.ins().iconst(fx.pointer_type, 42); + (size, align) + } + ty::Adt(..) | ty::Tuple(..) => { // First get the size of all statically known fields. // Don't use size_of because it also rounds up to alignment, which we // want to avoid, as the unsized field's alignment could be smaller. assert!(!layout.ty.is_simd()); let i = layout.fields.count() - 1; - let sized_size = layout.fields.offset(i).bytes(); + let unsized_offset_unadjusted = layout.fields.offset(i).bytes(); + let unsized_offset_unadjusted = + fx.bcx.ins().iconst(fx.pointer_type, unsized_offset_unadjusted as i64); let sized_align = layout.align.abi.bytes(); let sized_align = fx.bcx.ins().iconst(fx.pointer_type, sized_align as i64); // Recurse to get the size of the dynamically sized field (must be // the last field). let field_layout = layout.field(fx, i); - let (unsized_size, mut unsized_align) = size_and_align_of_dst(fx, field_layout, info); - - // FIXME (#26403, #27023): We should be adding padding - // to `sized_size` (to accommodate the `unsized_align` - // required of the unsized field that follows) before - // summing it with `sized_size`. (Note that since #26403 - // is unfixed, we do not yet add the necessary padding - // here. But this is where the add would go.) - - // Return the sum of sizes and max of aligns. - let size = fx.bcx.ins().iadd_imm(unsized_size, sized_size as i64); - - // Packed types ignore the alignment of their fields. - if let ty::Adt(def, _) = layout.ty.kind() { - if def.repr().packed() { - unsized_align = sized_align; + let (unsized_size, mut unsized_align) = size_and_align_of(fx, field_layout, info); + + // # First compute the dynamic alignment + + // For packed types, we need to cap the alignment. + if let ty::Adt(def, _) = ty.kind() { + if let Some(packed) = def.repr().pack { + if packed.bytes() == 1 { + // We know this will be capped to 1. + unsized_align = fx.bcx.ins().iconst(fx.pointer_type, 1); + } else { + // We have to dynamically compute `min(unsized_align, packed)`. + let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64); + let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed); + unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed); + } } } // Choose max of two known alignments (combined value must // be aligned according to more restrictive of the two). let cmp = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, sized_align, unsized_align); - let align = fx.bcx.ins().select(cmp, sized_align, unsized_align); + let full_align = fx.bcx.ins().select(cmp, sized_align, unsized_align); + + // # Then compute the dynamic size + + // The full formula for the size would be: + // let unsized_offset_adjusted = unsized_offset_unadjusted.align_to(unsized_align); + // let full_size = (unsized_offset_adjusted + unsized_size).align_to(full_align); + // However, `unsized_size` is a multiple of `unsized_align`. + // Therefore, we can equivalently do the `align_to(unsized_align)` *after* adding `unsized_size`: + // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(unsized_align).align_to(full_align); + // Furthermore, `align >= unsized_align`, and therefore we only need to do: + // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(full_align); + + let full_size = fx.bcx.ins().iadd(unsized_offset_unadjusted, unsized_size); // Issue #27023: must add any necessary padding to `size` // (to make it a multiple of `align`) before returning it. @@ -255,12 +308,13 @@ pub(crate) fn size_and_align_of_dst<'tcx>( // emulated via the semi-standard fast bit trick: // // `(size + (align-1)) & -align` - let addend = fx.bcx.ins().iadd_imm(align, -1); - let add = fx.bcx.ins().iadd(size, addend); - let neg = fx.bcx.ins().ineg(align); - let size = fx.bcx.ins().band(add, neg); + let addend = fx.bcx.ins().iadd_imm(full_align, -1); + let add = fx.bcx.ins().iadd(full_size, addend); + let neg = fx.bcx.ins().ineg(full_align); + let full_size = fx.bcx.ins().band(add, neg); - (size, align) + (full_size, full_align) } + _ => bug!("size_and_align_of_dst: {ty} not supported"), } } diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs index 21ad2a835..567a5669d 100644 --- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs +++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs @@ -20,34 +20,36 @@ fn codegen_field<'tcx>( (base.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()), field_layout) }; - if let Some(extra) = extra { - if field_layout.is_sized() { - return simple(fx); - } - match field_layout.ty.kind() { - ty::Slice(..) | ty::Str | ty::Foreign(..) => simple(fx), - ty::Adt(def, _) if def.repr().packed() => { - assert_eq!(layout.align.abi.bytes(), 1); - simple(fx) - } - _ => { - // We have to align the offset for DST's - let unaligned_offset = field_offset.bytes(); - let (_, unsized_align) = - crate::unsize::size_and_align_of_dst(fx, field_layout, extra); + if field_layout.is_sized() { + return simple(fx); + } + match field_layout.ty.kind() { + ty::Slice(..) | ty::Str => simple(fx), + _ => { + let unaligned_offset = field_offset.bytes(); - let one = fx.bcx.ins().iconst(fx.pointer_type, 1); - let align_sub_1 = fx.bcx.ins().isub(unsized_align, one); - let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64); - let zero = fx.bcx.ins().iconst(fx.pointer_type, 0); - let and_rhs = fx.bcx.ins().isub(zero, unsized_align); - let offset = fx.bcx.ins().band(and_lhs, and_rhs); + // Get the alignment of the field + let (_, mut unsized_align) = crate::unsize::size_and_align_of(fx, field_layout, extra); - (base.offset_value(fx, offset), field_layout) + // For packed types, we need to cap alignment. + if let ty::Adt(def, _) = layout.ty.kind() { + if let Some(packed) = def.repr().pack { + let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64); + let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed); + unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed); + } } + + // Bump the unaligned offset up to the appropriate alignment + let one = fx.bcx.ins().iconst(fx.pointer_type, 1); + let align_sub_1 = fx.bcx.ins().isub(unsized_align, one); + let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64); + let zero = fx.bcx.ins().iconst(fx.pointer_type, 0); + let and_rhs = fx.bcx.ins().isub(zero, unsized_align); + let offset = fx.bcx.ins().band(and_lhs, and_rhs); + + (base.offset_value(fx, offset), field_layout) } - } else { - simple(fx) } } @@ -329,7 +331,13 @@ impl<'tcx> CValue<'tcx> { let msb = fx.bcx.ins().iconst(types::I64, (const_val >> 64) as u64 as i64); fx.bcx.ins().iconcat(lsb, msb) } - ty::Bool | ty::Char | ty::Uint(_) | ty::Int(_) | ty::Ref(..) | ty::RawPtr(..) => { + ty::Bool + | ty::Char + | ty::Uint(_) + | ty::Int(_) + | ty::Ref(..) + | ty::RawPtr(..) + | ty::FnPtr(..) => { let raw_val = const_val.size().truncate(const_val.to_bits(layout.size).unwrap()); fx.bcx.ins().iconst(clif_ty, raw_val as i64) } @@ -725,13 +733,8 @@ impl<'tcx> CPlace<'tcx> { }; let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field); - if field_layout.is_unsized() { - if let ty::Foreign(_) = field_layout.ty.kind() { - assert!(extra.is_none()); - CPlace::for_ptr(field_ptr, field_layout) - } else { - CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout) - } + if has_ptr_meta(fx.tcx, field_layout.ty) { + CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout) } else { CPlace::for_ptr(field_ptr, field_layout) } @@ -971,6 +974,32 @@ pub(crate) fn assert_assignable<'tcx>( } } } + (&ty::Coroutine(def_id_a, args_a, mov_a), &ty::Coroutine(def_id_b, args_b, mov_b)) + if def_id_a == def_id_b && mov_a == mov_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } + (&ty::CoroutineWitness(def_id_a, args_a), &ty::CoroutineWitness(def_id_b, args_b)) + if def_id_a == def_id_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } (ty::Param(_), _) | (_, ty::Param(_)) if fx.tcx.sess.opts.unstable_opts.polymorphize => { // No way to check if it is correct or not with polymorphization enabled } -- cgit v1.2.3