From 64d98f8ee037282c35007b64c2649055c56af1db Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 17 Apr 2024 14:19:03 +0200
Subject: Merging upstream version 1.68.2+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 compiler/rustc_codegen_cranelift/src/abi/mod.rs    |  36 ++--
 compiler/rustc_codegen_cranelift/src/allocator.rs  |   4 +-
 compiler/rustc_codegen_cranelift/src/base.rs       |  18 +-
 compiler/rustc_codegen_cranelift/src/cast.rs       |   2 +-
 compiler/rustc_codegen_cranelift/src/common.rs     |  11 +-
 compiler/rustc_codegen_cranelift/src/constant.rs   |  84 +++++---
 .../rustc_codegen_cranelift/src/debuginfo/mod.rs   |   2 +-
 .../src/debuginfo/unwind.rs                        |   4 +-
 .../rustc_codegen_cranelift/src/discriminant.rs    | 207 ++++++++++++++-----
 compiler/rustc_codegen_cranelift/src/driver/jit.rs |  12 +-
 compiler/rustc_codegen_cranelift/src/driver/mod.rs |   3 +-
 .../rustc_codegen_cranelift/src/intrinsics/llvm.rs | 176 ++--------------
 .../src/intrinsics/llvm_aarch64.rs                 | 222 +++++++++++++++++++++
 .../src/intrinsics/llvm_x86.rs                     | 197 ++++++++++++++++++
 .../rustc_codegen_cranelift/src/intrinsics/mod.rs  | 102 ++--------
 .../rustc_codegen_cranelift/src/intrinsics/simd.rs |   7 +-
 compiler/rustc_codegen_cranelift/src/main_shim.rs  |   6 +-
 compiler/rustc_codegen_cranelift/src/num.rs        |   6 +-
 .../src/optimize/peephole.rs                       |  20 --
 .../rustc_codegen_cranelift/src/value_and_place.rs |  15 +-
 20 files changed, 728 insertions(+), 406 deletions(-)
 create mode 100644 compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
 create mode 100644 compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs

(limited to 'compiler/rustc_codegen_cranelift/src')
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 1e22537c2..65cc6b437 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -56,13 +56,13 @@ pub(crate) fn conv_to_call_conv(c: Conv, default_call_conv: CallConv) -> CallCon
 
 pub(crate) fn get_function_sig<'tcx>(
     tcx: TyCtxt<'tcx>,
-    triple: &target_lexicon::Triple,
+    default_call_conv: CallConv,
     inst: Instance<'tcx>,
 ) -> Signature {
     assert!(!inst.substs.needs_infer());
     clif_sig_from_fn_abi(
         tcx,
-        CallConv::triple_default(triple),
+        default_call_conv,
         &RevealAllLayoutCx(tcx).fn_abi_of_instance(inst, ty::List::empty()),
     )
 }
@@ -74,7 +74,7 @@ pub(crate) fn import_function<'tcx>(
     inst: Instance<'tcx>,
 ) -> FuncId {
     let name = tcx.symbol_name(inst).name;
-    let sig = get_function_sig(tcx, module.isa().triple(), inst);
+    let sig = get_function_sig(tcx, module.target_config().default_call_conv, inst);
     match module.declare_function(name, Linkage::Import, &sig) {
         Ok(func_id) => func_id,
         Err(ModuleError::IncompatibleDeclaration(_)) => tcx.sess.fatal(&format!(
@@ -341,18 +341,16 @@ pub(crate) fn codegen_terminator_call<'tcx>(
     destination: Place<'tcx>,
     target: Option<BasicBlock>,
 ) {
-    let fn_ty = fx.monomorphize(func.ty(fx.mir, fx.tcx));
-    let fn_sig =
-        fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), fn_ty.fn_sig(fx.tcx));
+    let func = codegen_operand(fx, func);
+    let fn_sig = func.layout().ty.fn_sig(fx.tcx);
 
     let ret_place = codegen_place(fx, destination);
 
     // Handle special calls like intrinsics and empty drop glue.
-    let instance = if let ty::FnDef(def_id, substs) = *fn_ty.kind() {
-        let instance = ty::Instance::resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, substs)
-            .unwrap()
-            .unwrap()
-            .polymorphize(fx.tcx);
+    let instance = if let ty::FnDef(def_id, substs) = *func.layout().ty.kind() {
+        let instance =
+            ty::Instance::expect_resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, substs)
+                .polymorphize(fx.tcx);
 
         if fx.tcx.symbol_name(instance).name.starts_with("llvm.") {
             crate::intrinsics::codegen_llvm_intrinsic_call(
@@ -391,17 +389,17 @@ pub(crate) fn codegen_terminator_call<'tcx>(
         None
     };
 
-    let extra_args = &args[fn_sig.inputs().len()..];
+    let extra_args = &args[fn_sig.inputs().skip_binder().len()..];
     let extra_args = fx
         .tcx
         .mk_type_list(extra_args.iter().map(|op_arg| fx.monomorphize(op_arg.ty(fx.mir, fx.tcx))));
     let fn_abi = if let Some(instance) = instance {
         RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(instance, extra_args)
     } else {
-        RevealAllLayoutCx(fx.tcx).fn_abi_of_fn_ptr(fn_ty.fn_sig(fx.tcx), extra_args)
+        RevealAllLayoutCx(fx.tcx).fn_abi_of_fn_ptr(fn_sig, extra_args)
     };
 
-    let is_cold = if fn_sig.abi == Abi::RustCold {
+    let is_cold = if fn_sig.abi() == Abi::RustCold {
         true
     } else {
         instance
@@ -418,7 +416,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
     }
 
     // Unpack arguments tuple for closures
-    let mut args = if fn_sig.abi == Abi::RustCall {
+    let mut args = if fn_sig.abi() == Abi::RustCall {
         assert_eq!(args.len(), 2, "rust-call abi requires two arguments");
         let self_arg = codegen_call_argument_operand(fx, &args[0]);
         let pack_arg = codegen_call_argument_operand(fx, &args[1]);
@@ -486,7 +484,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
                 fx.add_comment(nop_inst, "indirect call");
             }
 
-            let func = codegen_operand(fx, func).load_scalar(fx);
+            let func = func.load_scalar(fx);
             let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
             let sig = fx.bcx.import_signature(sig);
 
@@ -517,11 +515,11 @@ pub(crate) fn codegen_terminator_call<'tcx>(
         };
 
         // FIXME find a cleaner way to support varargs
-        if fn_sig.c_variadic {
-            if !matches!(fn_sig.abi, Abi::C { .. }) {
+        if fn_sig.c_variadic() {
+            if !matches!(fn_sig.abi(), Abi::C { .. }) {
                 fx.tcx.sess.span_fatal(
                     source_info.span,
-                    &format!("Variadic call for non-C abi {:?}", fn_sig.abi),
+                    &format!("Variadic call for non-C abi {:?}", fn_sig.abi()),
                 );
             }
             let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
diff --git a/compiler/rustc_codegen_cranelift/src/allocator.rs b/compiler/rustc_codegen_cranelift/src/allocator.rs
index 12bb00d34..850822717 100644
--- a/compiler/rustc_codegen_cranelift/src/allocator.rs
+++ b/compiler/rustc_codegen_cranelift/src/allocator.rs
@@ -66,7 +66,7 @@ fn codegen_inner(
         };
 
         let sig = Signature {
-            call_conv: CallConv::triple_default(module.isa().triple()),
+            call_conv: module.target_config().default_call_conv,
             params: arg_tys.iter().cloned().map(AbiParam::new).collect(),
             returns: output.into_iter().map(AbiParam::new).collect(),
         };
@@ -104,7 +104,7 @@ fn codegen_inner(
     }
 
     let sig = Signature {
-        call_conv: CallConv::triple_default(module.isa().triple()),
+        call_conv: module.target_config().default_call_conv,
         params: vec![AbiParam::new(usize_ty), AbiParam::new(usize_ty)],
         returns: vec![],
     };
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 1db445027..89d955e8b 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -59,7 +59,7 @@ pub(crate) fn codegen_fn<'tcx>(
 
     // Declare function
     let symbol_name = tcx.symbol_name(instance).name.to_string();
-    let sig = get_function_sig(tcx, module.isa().triple(), instance);
+    let sig = get_function_sig(tcx, module.target_config().default_call_conv, instance);
     let func_id = module.declare_function(&symbol_name, Linkage::Local, &sig).unwrap();
 
     // Make the FunctionBuilder
@@ -372,8 +372,10 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                 }
             }
 
-            TerminatorKind::SwitchInt { discr, switch_ty, targets } => {
-                let discr = codegen_operand(fx, discr).load_scalar(fx);
+            TerminatorKind::SwitchInt { discr, targets } => {
+                let discr = codegen_operand(fx, discr);
+                let switch_ty = discr.layout().ty;
+                let discr = discr.load_scalar(fx);
 
                 let use_bool_opt = switch_ty.kind() == fx.tcx.types.bool.kind()
                     || (targets.iter().count() == 1 && targets.iter().next().unwrap().0 == 0);
@@ -388,11 +390,9 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                         _ => unreachable!("{:?}", targets),
                     };
 
-                    let discr = crate::optimize::peephole::maybe_unwrap_bint(&mut fx.bcx, discr);
                     let (discr, is_inverted) =
                         crate::optimize::peephole::maybe_unwrap_bool_not(&mut fx.bcx, discr);
                     let test_zero = if is_inverted { !test_zero } else { test_zero };
-                    let discr = crate::optimize::peephole::maybe_unwrap_bint(&mut fx.bcx, discr);
                     if let Some(taken) = crate::optimize::peephole::maybe_known_branch_taken(
                         &fx.bcx, discr, test_zero,
                     ) {
@@ -569,7 +569,7 @@ fn codegen_stmt<'tcx>(
                         UnOp::Not => match layout.ty.kind() {
                             ty::Bool => {
                                 let res = fx.bcx.ins().icmp_imm(IntCC::Equal, val, 0);
-                                CValue::by_val(fx.bcx.ins().bint(types::I8, res), layout)
+                                CValue::by_val(res, layout)
                             }
                             ty::Uint(_) | ty::Int(_) => {
                                 CValue::by_val(fx.bcx.ins().bnot(val), layout)
@@ -577,12 +577,6 @@ fn codegen_stmt<'tcx>(
                             _ => unreachable!("un op Not for {:?}", layout.ty),
                         },
                         UnOp::Neg => match layout.ty.kind() {
-                            ty::Int(IntTy::I128) => {
-                                // FIXME remove this case once ineg.i128 works
-                                let zero =
-                                    CValue::const_val(fx, layout, ty::ScalarInt::null(layout.size));
-                                crate::num::codegen_int_binop(fx, BinOp::Sub, zero, operand)
-                            }
                             ty::Int(_) => CValue::by_val(fx.bcx.ins().ineg(val), layout),
                             ty::Float(_) => CValue::by_val(fx.bcx.ins().fneg(val), layout),
                             _ => unreachable!("un op Neg for {:?}", layout.ty),
diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs
index bad5d1f08..5091c5a9f 100644
--- a/compiler/rustc_codegen_cranelift/src/cast.rs
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@@ -149,7 +149,7 @@ pub(crate) fn clif_int_or_float_cast(
         }
 
         let is_not_nan = fx.bcx.ins().fcmp(FloatCC::Equal, from, from);
-        let zero = fx.bcx.ins().iconst(to_ty, 0);
+        let zero = type_zero_value(&mut fx.bcx, to_ty);
         fx.bcx.ins().select(is_not_nan, val, zero)
     } else if from_ty.is_float() && to_ty.is_float() {
         // float -> float
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 589594465..2dcd42fbd 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -162,11 +162,20 @@ pub(crate) fn codegen_icmp_imm(
             }
         }
     } else {
-        let rhs = i64::try_from(rhs).expect("codegen_icmp_imm rhs out of range for <128bit int");
+        let rhs = rhs as i64; // Truncates on purpose in case rhs is actually an unsigned value
         fx.bcx.ins().icmp_imm(intcc, lhs, rhs)
     }
 }
 
+pub(crate) fn type_zero_value(bcx: &mut FunctionBuilder<'_>, ty: Type) -> Value {
+    if ty == types::I128 {
+        let zero = bcx.ins().iconst(types::I64, 0);
+        bcx.ins().iconcat(zero, zero)
+    } else {
+        bcx.ins().iconst(ty, 0)
+    }
+}
+
 pub(crate) fn type_min_max_value(
     bcx: &mut FunctionBuilder<'_>,
     ty: Type,
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
index a6bde8840..51450897b 100644
--- a/compiler/rustc_codegen_cranelift/src/constant.rs
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -28,9 +28,7 @@ impl ConstantCx {
     }
 
     pub(crate) fn finalize(mut self, tcx: TyCtxt<'_>, module: &mut dyn Module) {
-        //println!("todo {:?}", self.todo);
         define_all_allocs(tcx, module, &mut self);
-        //println!("done {:?}", self.done);
         self.done.clear();
     }
 }
@@ -268,16 +266,7 @@ fn data_id_for_static(
     def_id: DefId,
     definition: bool,
 ) -> DataId {
-    let rlinkage = tcx.codegen_fn_attrs(def_id).linkage;
-    let linkage = if definition {
-        crate::linkage::get_static_linkage(tcx, def_id)
-    } else if rlinkage == Some(rustc_middle::mir::mono::Linkage::ExternalWeak)
-        || rlinkage == Some(rustc_middle::mir::mono::Linkage::WeakAny)
-    {
-        Linkage::Preemptible
-    } else {
-        Linkage::Import
-    };
+    let attrs = tcx.codegen_fn_attrs(def_id);
 
     let instance = Instance::mono(tcx, def_id).polymorphize(tcx);
     let symbol_name = tcx.symbol_name(instance).name;
@@ -289,25 +278,33 @@ fn data_id_for_static(
     };
     let align = tcx.layout_of(ParamEnv::reveal_all().and(ty)).unwrap().align.pref.bytes();
 
-    let attrs = tcx.codegen_fn_attrs(def_id);
+    if let Some(import_linkage) = attrs.import_linkage {
+        assert!(!definition);
 
-    let data_id = match module.declare_data(
-        &*symbol_name,
-        linkage,
-        is_mutable,
-        attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL),
-    ) {
-        Ok(data_id) => data_id,
-        Err(ModuleError::IncompatibleDeclaration(_)) => tcx.sess.fatal(&format!(
-            "attempt to declare `{symbol_name}` as static, but it was already declared as function"
-        )),
-        Err(err) => Err::<_, _>(err).unwrap(),
-    };
+        let linkage = if import_linkage == rustc_middle::mir::mono::Linkage::ExternalWeak
+            || import_linkage == rustc_middle::mir::mono::Linkage::WeakAny
+        {
+            Linkage::Preemptible
+        } else {
+            Linkage::Import
+        };
+
+        let data_id = match module.declare_data(
+            &*symbol_name,
+            linkage,
+            is_mutable,
+            attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL),
+        ) {
+            Ok(data_id) => data_id,
+            Err(ModuleError::IncompatibleDeclaration(_)) => tcx.sess.fatal(&format!(
+                "attempt to declare `{symbol_name}` as static, but it was already declared as function"
+            )),
+            Err(err) => Err::<_, _>(err).unwrap(),
+        };
 
-    if rlinkage.is_some() {
         // Comment copied from https://github.com/rust-lang/rust/blob/45060c2a66dfd667f88bd8b94261b28a58d85bd5/src/librustc_codegen_llvm/consts.rs#L141
         // Declare an internal global `extern_with_linkage_foo` which
-        // is initialized with the address of `foo`.  If `foo` is
+        // is initialized with the address of `foo`. If `foo` is
         // discarded during linking (for example, if `foo` has weak
         // linkage and there are no definitions), then
         // `extern_with_linkage_foo` will instead be initialized to
@@ -326,10 +323,34 @@ fn data_id_for_static(
             Err(ModuleError::DuplicateDefinition(_)) => {}
             res => res.unwrap(),
         }
-        ref_data_id
-    } else {
-        data_id
+
+        return ref_data_id;
     }
+
+    let linkage = if definition {
+        crate::linkage::get_static_linkage(tcx, def_id)
+    } else if attrs.linkage == Some(rustc_middle::mir::mono::Linkage::ExternalWeak)
+        || attrs.linkage == Some(rustc_middle::mir::mono::Linkage::WeakAny)
+    {
+        Linkage::Preemptible
+    } else {
+        Linkage::Import
+    };
+
+    let data_id = match module.declare_data(
+        &*symbol_name,
+        linkage,
+        is_mutable,
+        attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL),
+    ) {
+        Ok(data_id) => data_id,
+        Err(ModuleError::IncompatibleDeclaration(_)) => tcx.sess.fatal(&format!(
+            "attempt to declare `{symbol_name}` as static, but it was already declared as function"
+        )),
+        Err(err) => Err::<_, _>(err).unwrap(),
+    };
+
+    data_id
 }
 
 fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut ConstantCx) {
@@ -348,8 +369,6 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
                 (data_id, alloc, None)
             }
             TodoItem::Static(def_id) => {
-                //println!("static {:?}", def_id);
-
                 let section_name = tcx.codegen_fn_attrs(def_id).link_section;
 
                 let alloc = tcx.eval_static_initializer(def_id).unwrap();
@@ -359,7 +378,6 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
             }
         };
 
-        //("data_id {}", data_id);
         if cx.done.contains(&data_id) {
             continue;
         }
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
index 2ba012a77..28fbcb15b 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@@ -68,7 +68,7 @@ impl DebugContext {
             .working_dir
             .to_string_lossy(FileNameDisplayPreference::Remapped)
             .into_owned();
-        let (name, file_info) = match tcx.sess.local_crate_source_file.clone() {
+        let (name, file_info) = match tcx.sess.local_crate_source_file() {
             Some(path) => {
                 let name = path.to_string_lossy().into_owned();
                 (name, None)
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
index d26392c49..493359c74 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
@@ -39,7 +39,9 @@ impl UnwindContext {
     }
 
     pub(crate) fn add_function(&mut self, func_id: FuncId, context: &Context, isa: &dyn TargetIsa) {
-        let unwind_info = if let Some(unwind_info) = context.create_unwind_info(isa).unwrap() {
+        let unwind_info = if let Some(unwind_info) =
+            context.compiled_code().unwrap().create_unwind_info(isa).unwrap()
+        {
             unwind_info
         } else {
             return;
diff --git a/compiler/rustc_codegen_cranelift/src/discriminant.rs b/compiler/rustc_codegen_cranelift/src/discriminant.rs
index 97b395bcd..3cbf313ad 100644
--- a/compiler/rustc_codegen_cranelift/src/discriminant.rs
+++ b/compiler/rustc_codegen_cranelift/src/discriminant.rs
@@ -1,6 +1,7 @@
 //! Handling of enum discriminants
 //!
-//! Adapted from <https://github.com/rust-lang/rust/blob/d760df5aea483aae041c9a241e7acacf48f75035/src/librustc_codegen_ssa/mir/place.rs>
+//! Adapted from <https://github.com/rust-lang/rust/blob/31c0645b9d2539f47eecb096142474b29dc542f7/compiler/rustc_codegen_ssa/src/mir/place.rs>
+//! (<https://github.com/rust-lang/rust/pull/104535>)
 
 use rustc_target::abi::{Int, TagEncoding, Variants};
 
@@ -47,13 +48,19 @@ pub(crate) fn codegen_set_discriminant<'tcx>(
         } => {
             if variant_index != untagged_variant {
                 let niche = place.place_field(fx, mir::Field::new(tag_field));
+                let niche_type = fx.clif_type(niche.layout().ty).unwrap();
                 let niche_value = variant_index.as_u32() - niche_variants.start().as_u32();
-                let niche_value = ty::ScalarInt::try_from_uint(
-                    u128::from(niche_value).wrapping_add(niche_start),
-                    niche.layout().size,
-                )
-                .unwrap();
-                let niche_llval = CValue::const_val(fx, niche.layout(), niche_value);
+                let niche_value = (niche_value as u128).wrapping_add(niche_start);
+                let niche_value = match niche_type {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, niche_value as u64 as i64);
+                        let msb =
+                            fx.bcx.ins().iconst(types::I64, (niche_value >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, niche_value as i64),
+                };
+                let niche_llval = CValue::by_val(niche_value, niche.layout());
                 niche.write_cvalue(fx, niche_llval);
             }
         }
@@ -96,6 +103,7 @@ pub(crate) fn codegen_get_discriminant<'tcx>(
         }
     };
 
+    let cast_to_size = dest_layout.layout.size();
     let cast_to = fx.clif_type(dest_layout.ty).unwrap();
 
     // Read the tag/niche-encoded discriminant from memory.
@@ -114,21 +122,128 @@ pub(crate) fn codegen_get_discriminant<'tcx>(
             dest.write_cvalue(fx, res);
         }
         TagEncoding::Niche { untagged_variant, ref niche_variants, niche_start } => {
-            // Rebase from niche values to discriminants, and check
-            // whether the result is in range for the niche variants.
-
-            // We first compute the "relative discriminant" (wrt `niche_variants`),
-            // that is, if `n = niche_variants.end() - niche_variants.start()`,
-            // we remap `niche_start..=niche_start + n` (which may wrap around)
-            // to (non-wrap-around) `0..=n`, to be able to check whether the
-            // discriminant corresponds to a niche variant with one comparison.
-            // We also can't go directly to the (variant index) discriminant
-            // and check that it is in the range `niche_variants`, because
-            // that might not fit in the same type, on top of needing an extra
-            // comparison (see also the comment on `let niche_discr`).
-            let relative_discr = if niche_start == 0 {
-                tag
+            let tag_size = tag_scalar.size(fx);
+            let max_unsigned = tag_size.unsigned_int_max();
+            let max_signed = tag_size.signed_int_max() as u128;
+            let min_signed = max_signed + 1;
+            let relative_max = niche_variants.end().as_u32() - niche_variants.start().as_u32();
+            let niche_end = niche_start.wrapping_add(relative_max as u128) & max_unsigned;
+            let range = tag_scalar.valid_range(fx);
+
+            let sle = |lhs: u128, rhs: u128| -> bool {
+                // Signed and unsigned comparisons give the same results,
+                // except that in signed comparisons an integer with the
+                // sign bit set is less than one with the sign bit clear.
+                // Toggle the sign bit to do a signed comparison.
+                (lhs ^ min_signed) <= (rhs ^ min_signed)
+            };
+
+            // We have a subrange `niche_start..=niche_end` inside `range`.
+            // If the value of the tag is inside this subrange, it's a
+            // "niche value", an increment of the discriminant. Otherwise it
+            // indicates the untagged variant.
+            // A general algorithm to extract the discriminant from the tag
+            // is:
+            // relative_tag = tag - niche_start
+            // is_niche = relative_tag <= (ule) relative_max
+            // discr = if is_niche {
+            //     cast(relative_tag) + niche_variants.start()
+            // } else {
+            //     untagged_variant
+            // }
+            // However, we will likely be able to emit simpler code.
+
+            // Find the least and greatest values in `range`, considered
+            // both as signed and unsigned.
+            let (low_unsigned, high_unsigned) =
+                if range.start <= range.end { (range.start, range.end) } else { (0, max_unsigned) };
+            let (low_signed, high_signed) = if sle(range.start, range.end) {
+                (range.start, range.end)
             } else {
+                (min_signed, max_signed)
+            };
+
+            let niches_ule = niche_start <= niche_end;
+            let niches_sle = sle(niche_start, niche_end);
+            let cast_smaller = cast_to_size <= tag_size;
+
+            // In the algorithm above, we can change
+            // cast(relative_tag) + niche_variants.start()
+            // into
+            // cast(tag + (niche_variants.start() - niche_start))
+            // if either the casted type is no larger than the original
+            // type, or if the niche values are contiguous (in either the
+            // signed or unsigned sense).
+            let can_incr = cast_smaller || niches_ule || niches_sle;
+
+            let data_for_boundary_niche = || -> Option<(IntCC, u128)> {
+                if !can_incr {
+                    None
+                } else if niche_start == low_unsigned {
+                    Some((IntCC::UnsignedLessThanOrEqual, niche_end))
+                } else if niche_end == high_unsigned {
+                    Some((IntCC::UnsignedGreaterThanOrEqual, niche_start))
+                } else if niche_start == low_signed {
+                    Some((IntCC::SignedLessThanOrEqual, niche_end))
+                } else if niche_end == high_signed {
+                    Some((IntCC::SignedGreaterThanOrEqual, niche_start))
+                } else {
+                    None
+                }
+            };
+
+            let (is_niche, tagged_discr, delta) = if relative_max == 0 {
+                // Best case scenario: only one tagged variant. This will
+                // likely become just a comparison and a jump.
+                // The algorithm is:
+                // is_niche = tag == niche_start
+                // discr = if is_niche {
+                //     niche_start
+                // } else {
+                //     untagged_variant
+                // }
+                let is_niche = codegen_icmp_imm(fx, IntCC::Equal, tag, niche_start as i128);
+                let tagged_discr =
+                    fx.bcx.ins().iconst(cast_to, niche_variants.start().as_u32() as i64);
+                (is_niche, tagged_discr, 0)
+            } else if let Some((predicate, constant)) = data_for_boundary_niche() {
+                // The niche values are either the lowest or the highest in
+                // `range`. We can avoid the first subtraction in the
+                // algorithm.
+                // The algorithm is now this:
+                // is_niche = tag <= niche_end
+                // discr = if is_niche {
+                //     cast(tag + (niche_variants.start() - niche_start))
+                // } else {
+                //     untagged_variant
+                // }
+                // (the first line may instead be tag >= niche_start,
+                // and may be a signed or unsigned comparison)
+                // The arithmetic must be done before the cast, so we can
+                // have the correct wrapping behavior. See issue #104519 for
+                // the consequences of getting this wrong.
+                let is_niche = codegen_icmp_imm(fx, predicate, tag, constant as i128);
+                let delta = (niche_variants.start().as_u32() as u128).wrapping_sub(niche_start);
+                let incr_tag = if delta == 0 {
+                    tag
+                } else {
+                    let delta = match fx.bcx.func.dfg.value_type(tag) {
+                        types::I128 => {
+                            let lsb = fx.bcx.ins().iconst(types::I64, delta as u64 as i64);
+                            let msb = fx.bcx.ins().iconst(types::I64, (delta >> 64) as u64 as i64);
+                            fx.bcx.ins().iconcat(lsb, msb)
+                        }
+                        ty => fx.bcx.ins().iconst(ty, delta as i64),
+                    };
+                    fx.bcx.ins().iadd(tag, delta)
+                };
+
+                let cast_tag = clif_intcast(fx, incr_tag, cast_to, !niches_ule);
+
+                (is_niche, cast_tag, 0)
+            } else {
+                // The special cases don't apply, so we'll have to go with
+                // the general algorithm.
                 let niche_start = match fx.bcx.func.dfg.value_type(tag) {
                     types::I128 => {
                         let lsb = fx.bcx.ins().iconst(types::I64, niche_start as u64 as i64);
@@ -138,40 +253,40 @@ pub(crate) fn codegen_get_discriminant<'tcx>(
                     }
                     ty => fx.bcx.ins().iconst(ty, niche_start as i64),
                 };
-                fx.bcx.ins().isub(tag, niche_start)
-            };
-            let relative_max = niche_variants.end().as_u32() - niche_variants.start().as_u32();
-            let is_niche = {
-                codegen_icmp_imm(
+                let relative_discr = fx.bcx.ins().isub(tag, niche_start);
+                let cast_tag = clif_intcast(fx, relative_discr, cast_to, false);
+                let is_niche = crate::common::codegen_icmp_imm(
                     fx,
                     IntCC::UnsignedLessThanOrEqual,
                     relative_discr,
                     i128::from(relative_max),
-                )
+                );
+                (is_niche, cast_tag, niche_variants.start().as_u32() as u128)
             };
 
-            // NOTE(eddyb) this addition needs to be performed on the final
-            // type, in case the niche itself can't represent all variant
-            // indices (e.g. `u8` niche with more than `256` variants,
-            // but enough uninhabited variants so that the remaining variants
-            // fit in the niche).
-            // In other words, `niche_variants.end - niche_variants.start`
-            // is representable in the niche, but `niche_variants.end`
-            // might not be, in extreme cases.
-            let niche_discr = {
-                let relative_discr = if relative_max == 0 {
-                    // HACK(eddyb) since we have only one niche, we know which
-                    // one it is, and we can avoid having a dynamic value here.
-                    fx.bcx.ins().iconst(cast_to, 0)
-                } else {
-                    clif_intcast(fx, relative_discr, cast_to, false)
+            let tagged_discr = if delta == 0 {
+                tagged_discr
+            } else {
+                let delta = match cast_to {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, delta as u64 as i64);
+                        let msb = fx.bcx.ins().iconst(types::I64, (delta >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, delta as i64),
                 };
-                fx.bcx.ins().iadd_imm(relative_discr, i64::from(niche_variants.start().as_u32()))
+                fx.bcx.ins().iadd(tagged_discr, delta)
             };
 
-            let untagged_variant =
-                fx.bcx.ins().iconst(cast_to, i64::from(untagged_variant.as_u32()));
-            let discr = fx.bcx.ins().select(is_niche, niche_discr, untagged_variant);
+            let untagged_variant = if cast_to == types::I128 {
+                let zero = fx.bcx.ins().iconst(types::I64, 0);
+                let untagged_variant =
+                    fx.bcx.ins().iconst(types::I64, i64::from(untagged_variant.as_u32()));
+                fx.bcx.ins().iconcat(untagged_variant, zero)
+            } else {
+                fx.bcx.ins().iconst(cast_to, i64::from(untagged_variant.as_u32()))
+            };
+            let discr = fx.bcx.ins().select(is_niche, tagged_discr, untagged_variant);
             let res = CValue::by_val(discr, dest_layout);
             dest.write_cvalue(fx, res);
         }
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index 6a430b521..be1b8c9ea 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -159,7 +159,7 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, backend_config: BackendConfig) -> ! {
 
     tcx.sess.abort_if_errors();
 
-    jit_module.finalize_definitions();
+    jit_module.finalize_definitions().unwrap();
     unsafe { cx.unwind_context.register_jit(&jit_module) };
 
     println!(
@@ -245,7 +245,11 @@ fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) ->
             let backend_config = lazy_jit_state.backend_config.clone();
 
             let name = tcx.symbol_name(instance).name;
-            let sig = crate::abi::get_function_sig(tcx, jit_module.isa().triple(), instance);
+            let sig = crate::abi::get_function_sig(
+                tcx,
+                jit_module.target_config().default_call_conv,
+                instance,
+            );
             let func_id = jit_module.declare_function(name, Linkage::Export, &sig).unwrap();
 
             let current_ptr = jit_module.read_got_entry(func_id);
@@ -278,7 +282,7 @@ fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) ->
             });
 
             assert!(cx.global_asm.is_empty());
-            jit_module.finalize_definitions();
+            jit_module.finalize_definitions().unwrap();
             unsafe { cx.unwind_context.register_jit(&jit_module) };
             jit_module.get_finalized_function(func_id)
         })
@@ -344,7 +348,7 @@ fn codegen_shim<'tcx>(
     let pointer_type = module.target_config().pointer_type();
 
     let name = tcx.symbol_name(inst).name;
-    let sig = crate::abi::get_function_sig(tcx, module.isa().triple(), inst);
+    let sig = crate::abi::get_function_sig(tcx, module.target_config().default_call_conv, inst);
     let func_id = module.declare_function(name, Linkage::Export, &sig).unwrap();
 
     let instance_ptr = Box::into_raw(Box::new(inst));
diff --git a/compiler/rustc_codegen_cranelift/src/driver/mod.rs b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
index 8f5714ecb..6e925cea2 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
@@ -24,7 +24,8 @@ fn predefine_mono_items<'tcx>(
                 MonoItem::Fn(instance) => {
                     let name = tcx.symbol_name(instance).name;
                     let _inst_guard = crate::PrintOnPanic(|| format!("{:?} {}", instance, name));
-                    let sig = get_function_sig(tcx, module.isa().triple(), instance);
+                    let sig =
+                        get_function_sig(tcx, module.target_config().default_call_conv, instance);
                     let linkage = crate::linkage::get_clif_linkage(
                         mono_item,
                         linkage,
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
index 783d426c3..f722e5228 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
@@ -8,135 +8,37 @@ use rustc_middle::ty::subst::SubstsRef;
 pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: &str,
-    _substs: SubstsRef<'tcx>,
+    substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
     target: Option<BasicBlock>,
 ) {
-    match intrinsic {
-        "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
-            // Spin loop hint
-        }
+    if intrinsic.starts_with("llvm.aarch64") {
+        return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
+            fx, intrinsic, substs, args, ret, target,
+        );
+    }
+    if intrinsic.starts_with("llvm.x86") {
+        return llvm_x86::codegen_x86_llvm_intrinsic_call(fx, intrinsic, substs, args, ret, target);
+    }
 
-        // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
-        "llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => {
+    match intrinsic {
+        _ if intrinsic.starts_with("llvm.ctlz.v") => {
             intrinsic_args!(fx, args => (a); intrinsic);
 
-            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
-            let lane_ty = fx.clif_type(lane_ty).unwrap();
-            assert!(lane_count <= 32);
-
-            let mut res = fx.bcx.ins().iconst(types::I32, 0);
-
-            for lane in (0..lane_count).rev() {
-                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
-
-                // cast float to int
-                let a_lane = match lane_ty {
-                    types::F32 => fx.bcx.ins().bitcast(types::I32, a_lane),
-                    types::F64 => fx.bcx.ins().bitcast(types::I64, a_lane),
-                    _ => a_lane,
-                };
-
-                // extract sign bit of an int
-                let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));
-
-                // shift sign bit into result
-                let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
-                res = fx.bcx.ins().ishl_imm(res, 1);
-                res = fx.bcx.ins().bor(res, a_lane_sign);
-            }
-
-            let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
-            ret.write_cvalue(fx, res);
-        }
-        "llvm.x86.sse2.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
-            let (x, y, kind) = match args {
-                [x, y, kind] => (x, y, kind),
-                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
-            };
-            let x = codegen_operand(fx, x);
-            let y = codegen_operand(fx, y);
-            let kind = crate::constant::mir_operand_get_const_val(fx, kind)
-                .expect("llvm.x86.sse2.cmp.* kind not const");
-
-            let flt_cc = match kind
-                .try_to_bits(Size::from_bytes(1))
-                .unwrap_or_else(|| panic!("kind not scalar: {:?}", kind))
-            {
-                0 => FloatCC::Equal,
-                1 => FloatCC::LessThan,
-                2 => FloatCC::LessThanOrEqual,
-                7 => FloatCC::Ordered,
-                3 => FloatCC::Unordered,
-                4 => FloatCC::NotEqual,
-                5 => FloatCC::UnorderedOrGreaterThanOrEqual,
-                6 => FloatCC::UnorderedOrGreaterThan,
-                kind => unreachable!("kind {:?}", kind),
-            };
-
-            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
-                let res_lane = match lane_ty.kind() {
-                    ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
-                    _ => unreachable!("{:?}", lane_ty),
-                };
-                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().clz(lane)
             });
         }
-        "llvm.x86.sse2.psrli.d" => {
-            let (a, imm8) = match args {
-                [a, imm8] => (a, imm8),
-                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
-            };
-            let a = codegen_operand(fx, a);
-            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
-                .expect("llvm.x86.sse2.psrli.d imm8 not const");
 
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
-                .try_to_bits(Size::from_bytes(4))
-                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
-            {
-                imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
-                _ => fx.bcx.ins().iconst(types::I32, 0),
-            });
-        }
-        "llvm.x86.sse2.pslli.d" => {
-            let (a, imm8) = match args {
-                [a, imm8] => (a, imm8),
-                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
-            };
-            let a = codegen_operand(fx, a);
-            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
-                .expect("llvm.x86.sse2.psrli.d imm8 not const");
+        _ if intrinsic.starts_with("llvm.ctpop.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
 
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
-                .try_to_bits(Size::from_bytes(4))
-                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
-            {
-                imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
-                _ => fx.bcx.ins().iconst(types::I32, 0),
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().popcnt(lane)
             });
         }
-        "llvm.x86.sse2.storeu.dq" => {
-            intrinsic_args!(fx, args => (mem_addr, a); intrinsic);
-            let mem_addr = mem_addr.load_scalar(fx);
-
-            // FIXME correctly handle the unalignment
-            let dest = CPlace::for_ptr(Pointer::new(mem_addr), a.layout());
-            dest.write_cvalue(fx, a);
-        }
-        "llvm.x86.addcarry.64" => {
-            intrinsic_args!(fx, args => (c_in, a, b); intrinsic);
-            let c_in = c_in.load_scalar(fx);
-
-            llvm_add_sub(fx, BinOp::Add, ret, c_in, a, b);
-        }
-        "llvm.x86.subborrow.64" => {
-            intrinsic_args!(fx, args => (b_in, a, b); intrinsic);
-            let b_in = b_in.load_scalar(fx);
 
-            llvm_add_sub(fx, BinOp::Sub, ret, b_in, a, b);
-        }
         _ => {
             fx.tcx
                 .sess
@@ -150,47 +52,3 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
     let ret_block = fx.get_block(dest);
     fx.bcx.ins().jump(ret_block, &[]);
 }
-
-// llvm.x86.avx2.vperm2i128
-// llvm.x86.ssse3.pshuf.b.128
-// llvm.x86.avx2.pshuf.b
-// llvm.x86.avx2.psrli.w
-// llvm.x86.sse2.psrli.w
-
-fn llvm_add_sub<'tcx>(
-    fx: &mut FunctionCx<'_, '_, 'tcx>,
-    bin_op: BinOp,
-    ret: CPlace<'tcx>,
-    cb_in: Value,
-    a: CValue<'tcx>,
-    b: CValue<'tcx>,
-) {
-    assert_eq!(
-        a.layout().ty,
-        fx.tcx.types.u64,
-        "llvm.x86.addcarry.64/llvm.x86.subborrow.64 second operand must be u64"
-    );
-    assert_eq!(
-        b.layout().ty,
-        fx.tcx.types.u64,
-        "llvm.x86.addcarry.64/llvm.x86.subborrow.64 third operand must be u64"
-    );
-
-    // c + carry -> c + first intermediate carry or borrow respectively
-    let int0 = crate::num::codegen_checked_int_binop(fx, bin_op, a, b);
-    let c = int0.value_field(fx, mir::Field::new(0));
-    let cb0 = int0.value_field(fx, mir::Field::new(1)).load_scalar(fx);
-
-    // c + carry -> c + second intermediate carry or borrow respectively
-    let cb_in_as_u64 = fx.bcx.ins().uextend(types::I64, cb_in);
-    let cb_in_as_u64 = CValue::by_val(cb_in_as_u64, fx.layout_of(fx.tcx.types.u64));
-    let int1 = crate::num::codegen_checked_int_binop(fx, bin_op, c, cb_in_as_u64);
-    let (c, cb1) = int1.load_scalar_pair(fx);
-
-    // carry0 | carry1 -> carry or borrow respectively
-    let cb_out = fx.bcx.ins().bor(cb0, cb1);
-
-    let layout = fx.layout_of(fx.tcx.mk_tup([fx.tcx.types.u8, fx.tcx.types.u64].iter()));
-    let val = CValue::by_val_pair(cb_out, c, layout);
-    ret.write_cvalue(fx, val);
-}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
new file mode 100644
index 000000000..b431158d2
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
@@ -0,0 +1,222 @@
+//! Emulate AArch64 LLVM intrinsics
+
+use crate::intrinsics::*;
+use crate::prelude::*;
+
+use rustc_middle::ty::subst::SubstsRef;
+
+pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: &str,
+    _substs: SubstsRef<'tcx>,
+    args: &[mir::Operand<'tcx>],
+    ret: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+) {
+    // llvm.aarch64.neon.sqshl.v*i*
+
+    match intrinsic {
+        "llvm.aarch64.isb" => {
+            fx.bcx.ins().fence();
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().iabs(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.cls.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().cls(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.rbit.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().bitrev(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
+                crate::num::codegen_saturating_int_binop(fx, BinOp::Add, x_lane, y_lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
+                crate::num::codegen_saturating_int_binop(fx, BinOp::Sub, x_lane, y_lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umax.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umaxv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smin.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umin.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sminv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.uminv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        /*
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v")
+            || intrinsic.starts_with("llvm.aarch64.neon.sqshl.v")
+            // FIXME split this one out once saturating is implemented
+            || intrinsic.starts_with("llvm.aarch64.neon.sqshlu.v") =>
+        {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            simd_pair_for_each_lane(fx, a, b, ret, &|fx, _lane_ty, _res_lane_ty, a, b| {
+                // FIXME saturate?
+                fx.bcx.ins().ishl(a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqshrn.v") => {
+            let (a, imm32) = match args {
+                [a, imm32] => (a, imm32),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
+                .expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
+            {
+                imm32 if imm32 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm32 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqshrun.v") => {
+            let (a, imm32) = match args {
+                [a, imm32] => (a, imm32),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
+                .expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
+            {
+                imm32 if imm32 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm32 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        */
+        _ => {
+            fx.tcx.sess.warn(&format!(
+                "unsupported AArch64 llvm intrinsic {}; replacing with trap",
+                intrinsic
+            ));
+            crate::trap::trap_unimplemented(fx, intrinsic);
+            return;
+        }
+    }
+
+    let dest = target.expect("all llvm intrinsics used by stdlib should return");
+    let ret_block = fx.get_block(dest);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
new file mode 100644
index 000000000..7bc161fbe
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -0,0 +1,197 @@
+//! Emulate x86 LLVM intrinsics
+
+use crate::intrinsics::*;
+use crate::prelude::*;
+
+use rustc_middle::ty::subst::SubstsRef;
+
+pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: &str,
+    _substs: SubstsRef<'tcx>,
+    args: &[mir::Operand<'tcx>],
+    ret: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+) {
+    match intrinsic {
+        "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
+            // Spin loop hint
+        }
+
+        // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
+        "llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_ty = fx.clif_type(lane_ty).unwrap();
+            assert!(lane_count <= 32);
+
+            let mut res = fx.bcx.ins().iconst(types::I32, 0);
+
+            for lane in (0..lane_count).rev() {
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+
+                // cast float to int
+                let a_lane = match lane_ty {
+                    types::F32 => fx.bcx.ins().bitcast(types::I32, a_lane),
+                    types::F64 => fx.bcx.ins().bitcast(types::I64, a_lane),
+                    _ => a_lane,
+                };
+
+                // extract sign bit of an int
+                let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));
+
+                // shift sign bit into result
+                let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
+                res = fx.bcx.ins().ishl_imm(res, 1);
+                res = fx.bcx.ins().bor(res, a_lane_sign);
+            }
+
+            let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
+            ret.write_cvalue(fx, res);
+        }
+        "llvm.x86.sse2.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
+            let (x, y, kind) = match args {
+                [x, y, kind] => (x, y, kind),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let x = codegen_operand(fx, x);
+            let y = codegen_operand(fx, y);
+            let kind = crate::constant::mir_operand_get_const_val(fx, kind)
+                .expect("llvm.x86.sse2.cmp.* kind not const");
+
+            let flt_cc = match kind
+                .try_to_bits(Size::from_bytes(1))
+                .unwrap_or_else(|| panic!("kind not scalar: {:?}", kind))
+            {
+                0 => FloatCC::Equal,
+                1 => FloatCC::LessThan,
+                2 => FloatCC::LessThanOrEqual,
+                7 => FloatCC::Ordered,
+                3 => FloatCC::Unordered,
+                4 => FloatCC::NotEqual,
+                5 => FloatCC::UnorderedOrGreaterThanOrEqual,
+                6 => FloatCC::UnorderedOrGreaterThan,
+                kind => unreachable!("kind {:?}", kind),
+            };
+
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
+                let res_lane = match lane_ty.kind() {
+                    ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_ty),
+                };
+                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
+            });
+        }
+        "llvm.x86.sse2.psrli.d" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.sse2.psrli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.sse2.pslli.d" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.sse2.psrli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.sse2.storeu.dq" => {
+            intrinsic_args!(fx, args => (mem_addr, a); intrinsic);
+            let mem_addr = mem_addr.load_scalar(fx);
+
+            // FIXME correctly handle the unalignment
+            let dest = CPlace::for_ptr(Pointer::new(mem_addr), a.layout());
+            dest.write_cvalue(fx, a);
+        }
+        "llvm.x86.addcarry.64" => {
+            intrinsic_args!(fx, args => (c_in, a, b); intrinsic);
+            let c_in = c_in.load_scalar(fx);
+
+            llvm_add_sub(fx, BinOp::Add, ret, c_in, a, b);
+        }
+        "llvm.x86.subborrow.64" => {
+            intrinsic_args!(fx, args => (b_in, a, b); intrinsic);
+            let b_in = b_in.load_scalar(fx);
+
+            llvm_add_sub(fx, BinOp::Sub, ret, b_in, a, b);
+        }
+        _ => {
+            fx.tcx.sess.warn(&format!(
+                "unsupported x86 llvm intrinsic {}; replacing with trap",
+                intrinsic
+            ));
+            crate::trap::trap_unimplemented(fx, intrinsic);
+            return;
+        }
+    }
+
+    let dest = target.expect("all llvm intrinsics used by stdlib should return");
+    let ret_block = fx.get_block(dest);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
+
+// llvm.x86.avx2.vperm2i128
+// llvm.x86.ssse3.pshuf.b.128
+// llvm.x86.avx2.pshuf.b
+// llvm.x86.avx2.psrli.w
+// llvm.x86.sse2.psrli.w
+
+fn llvm_add_sub<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    ret: CPlace<'tcx>,
+    cb_in: Value,
+    a: CValue<'tcx>,
+    b: CValue<'tcx>,
+) {
+    assert_eq!(
+        a.layout().ty,
+        fx.tcx.types.u64,
+        "llvm.x86.addcarry.64/llvm.x86.subborrow.64 second operand must be u64"
+    );
+    assert_eq!(
+        b.layout().ty,
+        fx.tcx.types.u64,
+        "llvm.x86.addcarry.64/llvm.x86.subborrow.64 third operand must be u64"
+    );
+
+    // c + carry -> c + first intermediate carry or borrow respectively
+    let int0 = crate::num::codegen_checked_int_binop(fx, bin_op, a, b);
+    let c = int0.value_field(fx, mir::Field::new(0));
+    let cb0 = int0.value_field(fx, mir::Field::new(1)).load_scalar(fx);
+
+    // c + carry -> c + second intermediate carry or borrow respectively
+    let cb_in_as_u64 = fx.bcx.ins().uextend(types::I64, cb_in);
+    let cb_in_as_u64 = CValue::by_val(cb_in_as_u64, fx.layout_of(fx.tcx.types.u64));
+    let int1 = crate::num::codegen_checked_int_binop(fx, bin_op, c, cb_in_as_u64);
+    let (c, cb1) = int1.load_scalar_pair(fx);
+
+    // carry0 | carry1 -> carry or borrow respectively
+    let cb_out = fx.bcx.ins().bor(cb0, cb1);
+
+    let layout = fx.layout_of(fx.tcx.mk_tup([fx.tcx.types.u8, fx.tcx.types.u64].iter()));
+    let val = CValue::by_val_pair(cb_out, c, layout);
+    ret.write_cvalue(fx, val);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 0302b843a..e4ac89a7b 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -14,6 +14,8 @@ macro_rules! intrinsic_args {
 
 mod cpuid;
 mod llvm;
+mod llvm_aarch64;
+mod llvm_x86;
 mod simd;
 
 pub(crate) use cpuid::codegen_cpuid_call;
@@ -195,8 +197,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
         ty => ty,
     };
 
-    let val = fx.bcx.ins().bint(int_ty, val);
-    let mut res = fx.bcx.ins().ineg(val);
+    let mut res = fx.bcx.ins().bmask(int_ty, val);
 
     if ty.is_float() {
         res = fx.bcx.ins().bitcast(ty, res);
@@ -632,88 +633,18 @@ fn codegen_regular_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, res);
         }
         sym::bswap => {
-            // FIXME(CraneStation/cranelift#794) add bswap instruction to cranelift
-            fn swap(bcx: &mut FunctionBuilder<'_>, v: Value) -> Value {
-                match bcx.func.dfg.value_type(v) {
-                    types::I8 => v,
-
-                    // https://code.woboq.org/gcc/include/bits/byteswap.h.html
-                    types::I16 => {
-                        let tmp1 = bcx.ins().ishl_imm(v, 8);
-                        let n1 = bcx.ins().band_imm(tmp1, 0xFF00);
-
-                        let tmp2 = bcx.ins().ushr_imm(v, 8);
-                        let n2 = bcx.ins().band_imm(tmp2, 0x00FF);
-
-                        bcx.ins().bor(n1, n2)
-                    }
-                    types::I32 => {
-                        let tmp1 = bcx.ins().ishl_imm(v, 24);
-                        let n1 = bcx.ins().band_imm(tmp1, 0xFF00_0000);
-
-                        let tmp2 = bcx.ins().ishl_imm(v, 8);
-                        let n2 = bcx.ins().band_imm(tmp2, 0x00FF_0000);
-
-                        let tmp3 = bcx.ins().ushr_imm(v, 8);
-                        let n3 = bcx.ins().band_imm(tmp3, 0x0000_FF00);
-
-                        let tmp4 = bcx.ins().ushr_imm(v, 24);
-                        let n4 = bcx.ins().band_imm(tmp4, 0x0000_00FF);
-
-                        let or_tmp1 = bcx.ins().bor(n1, n2);
-                        let or_tmp2 = bcx.ins().bor(n3, n4);
-                        bcx.ins().bor(or_tmp1, or_tmp2)
-                    }
-                    types::I64 => {
-                        let tmp1 = bcx.ins().ishl_imm(v, 56);
-                        let n1 = bcx.ins().band_imm(tmp1, 0xFF00_0000_0000_0000u64 as i64);
-
-                        let tmp2 = bcx.ins().ishl_imm(v, 40);
-                        let n2 = bcx.ins().band_imm(tmp2, 0x00FF_0000_0000_0000u64 as i64);
-
-                        let tmp3 = bcx.ins().ishl_imm(v, 24);
-                        let n3 = bcx.ins().band_imm(tmp3, 0x0000_FF00_0000_0000u64 as i64);
-
-                        let tmp4 = bcx.ins().ishl_imm(v, 8);
-                        let n4 = bcx.ins().band_imm(tmp4, 0x0000_00FF_0000_0000u64 as i64);
-
-                        let tmp5 = bcx.ins().ushr_imm(v, 8);
-                        let n5 = bcx.ins().band_imm(tmp5, 0x0000_0000_FF00_0000u64 as i64);
-
-                        let tmp6 = bcx.ins().ushr_imm(v, 24);
-                        let n6 = bcx.ins().band_imm(tmp6, 0x0000_0000_00FF_0000u64 as i64);
-
-                        let tmp7 = bcx.ins().ushr_imm(v, 40);
-                        let n7 = bcx.ins().band_imm(tmp7, 0x0000_0000_0000_FF00u64 as i64);
-
-                        let tmp8 = bcx.ins().ushr_imm(v, 56);
-                        let n8 = bcx.ins().band_imm(tmp8, 0x0000_0000_0000_00FFu64 as i64);
-
-                        let or_tmp1 = bcx.ins().bor(n1, n2);
-                        let or_tmp2 = bcx.ins().bor(n3, n4);
-                        let or_tmp3 = bcx.ins().bor(n5, n6);
-                        let or_tmp4 = bcx.ins().bor(n7, n8);
-
-                        let or_tmp5 = bcx.ins().bor(or_tmp1, or_tmp2);
-                        let or_tmp6 = bcx.ins().bor(or_tmp3, or_tmp4);
-                        bcx.ins().bor(or_tmp5, or_tmp6)
-                    }
-                    types::I128 => {
-                        let (lo, hi) = bcx.ins().isplit(v);
-                        let lo = swap(bcx, lo);
-                        let hi = swap(bcx, hi);
-                        bcx.ins().iconcat(hi, lo)
-                    }
-                    ty => unreachable!("bswap {}", ty),
-                }
-            }
             intrinsic_args!(fx, args => (arg); intrinsic);
             let val = arg.load_scalar(fx);
 
-            let res = CValue::by_val(swap(&mut fx.bcx, val), arg.layout());
+            let res = if fx.bcx.func.dfg.value_type(val) == types::I8 {
+                val
+            } else {
+                fx.bcx.ins().bswap(val)
+            };
+            let res = CValue::by_val(res, arg.layout());
             ret.write_cvalue(fx, res);
         }
-        sym::assert_inhabited | sym::assert_zero_valid | sym::assert_uninit_valid => {
+        sym::assert_inhabited | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid => {
             intrinsic_args!(fx, args => (); intrinsic);
 
             let layout = fx.layout_of(substs.type_at(0));
@@ -742,7 +673,9 @@ fn codegen_regular_intrinsic_call<'tcx>(
                 return;
             }
 
-            if intrinsic == sym::assert_uninit_valid && !fx.tcx.permits_uninit_init(layout) {
+            if intrinsic == sym::assert_mem_uninitialized_valid
+                && !fx.tcx.permits_uninit_init(layout)
+            {
                 with_no_trimmed_paths!({
                     crate::base::codegen_panic(
                         fx,
@@ -936,8 +869,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let old = fx.bcx.ins().atomic_cas(MemFlags::trusted(), ptr, test_old, new);
             let is_eq = fx.bcx.ins().icmp(IntCC::Equal, old, test_old);
 
-            let ret_val =
-                CValue::by_val_pair(old, fx.bcx.ins().bint(types::I8, is_eq), ret.layout());
+            let ret_val = CValue::by_val_pair(old, is_eq, ret.layout());
             ret.write_cvalue(fx, ret_val)
         }
 
@@ -1259,8 +1191,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
                 flags.set_notrap();
                 let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
                 let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
-                let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val);
-                fx.bcx.ins().bint(types::I8, eq)
+                fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val)
             } else {
                 // Just call `memcmp` (like slices do in core) when the
                 // size is too large or it's not a power-of-two.
@@ -1270,8 +1201,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
                 let returns = vec![AbiParam::new(types::I32)];
                 let args = &[lhs_ref, rhs_ref, bytes_val];
                 let cmp = fx.lib_call("memcmp", params, returns, args)[0];
-                let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0);
-                fx.bcx.ins().bint(types::I8, eq)
+                fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0)
             };
             ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
         }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 51fce8c85..14f5e9187 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -112,10 +112,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                     _ => unreachable!(),
                 };
 
-                let ty = fx.clif_type(res_lane_ty).unwrap();
-
-                let res_lane = fx.bcx.ins().bint(ty, res_lane);
-                fx.bcx.ins().ineg(res_lane)
+                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
             });
         }
 
@@ -716,7 +713,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
             let res_type =
                 Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
-            let mut res = fx.bcx.ins().iconst(res_type, 0);
+            let mut res = type_zero_value(&mut fx.bcx, res_type);
 
             let lanes = match fx.tcx.sess.target.endian {
                 Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
diff --git a/compiler/rustc_codegen_cranelift/src/main_shim.rs b/compiler/rustc_codegen_cranelift/src/main_shim.rs
index f7434633e..c10054e7f 100644
--- a/compiler/rustc_codegen_cranelift/src/main_shim.rs
+++ b/compiler/rustc_codegen_cranelift/src/main_shim.rs
@@ -65,7 +65,7 @@ pub(crate) fn maybe_create_entry_wrapper(
             returns: vec![AbiParam::new(m.target_config().pointer_type() /*isize*/)],
             call_conv: crate::conv_to_call_conv(
                 tcx.sess.target.options.entry_abi,
-                CallConv::triple_default(m.isa().triple()),
+                m.target_config().default_call_conv,
             ),
         };
 
@@ -75,7 +75,7 @@ pub(crate) fn maybe_create_entry_wrapper(
         let instance = Instance::mono(tcx, rust_main_def_id).polymorphize(tcx);
 
         let main_name = tcx.symbol_name(instance).name;
-        let main_sig = get_function_sig(tcx, m.isa().triple(), instance);
+        let main_sig = get_function_sig(tcx, m.target_config().default_call_conv, instance);
         let main_func_id = m.declare_function(main_name, Linkage::Import, &main_sig).unwrap();
 
         let mut ctx = Context::new();
@@ -119,7 +119,7 @@ pub(crate) fn maybe_create_entry_wrapper(
                 .polymorphize(tcx);
 
                 let report_name = tcx.symbol_name(report).name;
-                let report_sig = get_function_sig(tcx, m.isa().triple(), report);
+                let report_sig = get_function_sig(tcx, m.target_config().default_call_conv, report);
                 let report_func_id =
                     m.declare_function(report_name, Linkage::Import, &report_sig).unwrap();
                 let report_func_ref = m.declare_func_in_func(report_func_id, &mut bcx.func);
diff --git a/compiler/rustc_codegen_cranelift/src/num.rs b/compiler/rustc_codegen_cranelift/src/num.rs
index ecbab408d..afacbec64 100644
--- a/compiler/rustc_codegen_cranelift/src/num.rs
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@@ -49,7 +49,6 @@ fn codegen_compare_bin_op<'tcx>(
 ) -> CValue<'tcx> {
     let intcc = crate::num::bin_op_to_intcc(bin_op, signed).unwrap();
     let val = fx.bcx.ins().icmp(intcc, lhs, rhs);
-    let val = fx.bcx.ins().bint(types::I8, val);
     CValue::by_val(val, fx.layout_of(fx.tcx.types.bool))
 }
 
@@ -290,8 +289,6 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
         _ => bug!("binop {:?} on checked int/uint lhs: {:?} rhs: {:?}", bin_op, in_lhs, in_rhs),
     };
 
-    let has_overflow = fx.bcx.ins().bint(types::I8, has_overflow);
-
     let out_layout = fx.layout_of(fx.tcx.mk_tup([in_lhs.layout().ty, fx.tcx.types.bool].iter()));
     CValue::by_val_pair(res, has_overflow, out_layout)
 }
@@ -368,7 +365,6 @@ pub(crate) fn codegen_float_binop<'tcx>(
                 _ => unreachable!(),
             };
             let val = fx.bcx.ins().fcmp(fltcc, lhs, rhs);
-            let val = fx.bcx.ins().bint(types::I8, val);
             return CValue::by_val(val, fx.layout_of(fx.tcx.types.bool));
         }
         _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
@@ -440,7 +436,7 @@ pub(crate) fn codegen_ptr_binop<'tcx>(
             _ => panic!("bin_op {:?} on ptr", bin_op),
         };
 
-        CValue::by_val(fx.bcx.ins().bint(types::I8, res), fx.layout_of(fx.tcx.types.bool))
+        CValue::by_val(res, fx.layout_of(fx.tcx.types.bool))
     }
 }
 
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
index d637b4d89..7f45bbd8f 100644
--- a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@@ -3,19 +3,6 @@
 use cranelift_codegen::ir::{condcodes::IntCC, InstructionData, Opcode, Value, ValueDef};
 use cranelift_frontend::FunctionBuilder;
 
-/// If the given value was produced by a `bint` instruction, return it's input, otherwise return the
-/// given value.
-pub(crate) fn maybe_unwrap_bint(bcx: &mut FunctionBuilder<'_>, arg: Value) -> Value {
-    if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
-        match bcx.func.dfg[arg_inst] {
-            InstructionData::Unary { opcode: Opcode::Bint, arg } => arg,
-            _ => arg,
-        }
-    } else {
-        arg
-    }
-}
-
 /// If the given value was produced by the lowering of `Rvalue::Not` return the input and true,
 /// otherwise return the given value and false.
 pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -> (Value, bool) {
@@ -48,13 +35,6 @@ pub(crate) fn maybe_known_branch_taken(
     };
 
     match bcx.func.dfg[arg_inst] {
-        InstructionData::UnaryBool { opcode: Opcode::Bconst, imm } => {
-            if test_zero {
-                Some(!imm)
-            } else {
-                Some(imm)
-            }
-        }
         InstructionData::UnaryImm { opcode: Opcode::Iconst, imm } => {
             if test_zero {
                 Some(imm.bits() == 0)
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index 34746ff6b..fe8af21ac 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -392,7 +392,7 @@ impl<'tcx> CPlace<'tcx> {
         local: Local,
         layout: TyAndLayout<'tcx>,
     ) -> CPlace<'tcx> {
-        let var = Variable::with_u32(fx.next_ssa_var);
+        let var = Variable::from_u32(fx.next_ssa_var);
         fx.next_ssa_var += 1;
         fx.bcx.declare_var(var, fx.clif_type(layout.ty).unwrap());
         CPlace { inner: CPlaceInner::Var(local, var), layout }
@@ -403,9 +403,9 @@ impl<'tcx> CPlace<'tcx> {
         local: Local,
         layout: TyAndLayout<'tcx>,
     ) -> CPlace<'tcx> {
-        let var1 = Variable::with_u32(fx.next_ssa_var);
+        let var1 = Variable::from_u32(fx.next_ssa_var);
         fx.next_ssa_var += 1;
-        let var2 = Variable::with_u32(fx.next_ssa_var);
+        let var2 = Variable::from_u32(fx.next_ssa_var);
         fx.next_ssa_var += 1;
 
         let (ty1, ty2) = fx.clif_pair_type(layout.ty).unwrap();
@@ -515,9 +515,7 @@ impl<'tcx> CPlace<'tcx> {
                 | (types::F32, types::I32)
                 | (types::I64, types::F64)
                 | (types::F64, types::I64) => fx.bcx.ins().bitcast(dst_ty, data),
-                _ if src_ty.is_vector() && dst_ty.is_vector() => {
-                    fx.bcx.ins().raw_bitcast(dst_ty, data)
-                }
+                _ if src_ty.is_vector() && dst_ty.is_vector() => fx.bcx.ins().bitcast(dst_ty, data),
                 _ if src_ty.is_vector() || dst_ty.is_vector() => {
                     // FIXME do something more efficient for transmutes between vectors and integers.
                     let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
@@ -590,7 +588,10 @@ impl<'tcx> CPlace<'tcx> {
                 return;
             }
             CPlaceInner::VarPair(_local, var1, var2) => {
-                let (data1, data2) = CValue(from.0, dst_layout).load_scalar_pair(fx);
+                let (ptr, meta) = from.force_stack(fx);
+                assert!(meta.is_none());
+                let (data1, data2) =
+                    CValue(CValueInner::ByRef(ptr, None), dst_layout).load_scalar_pair(fx);
                 let (dst_ty1, dst_ty2) = fx.clif_pair_type(self.layout().ty).unwrap();
                 transmute_value(fx, var1, data1, dst_ty1);
                 transmute_value(fx, var2, data2, dst_ty2);
-- 
cgit v1.2.3