diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_codegen_gcc/src/intrinsic/mod.rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_codegen_gcc/src/intrinsic/mod.rs')
-rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/mod.rs | 1134 |
1 files changed, 1134 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs new file mode 100644 index 000000000..5fbdedac0 --- /dev/null +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -0,0 +1,1134 @@ +pub mod llvm; +mod simd; + +use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp, FunctionType}; +use rustc_codegen_ssa::MemFlags; +use rustc_codegen_ssa::base::wants_msvc_seh; +use rustc_codegen_ssa::common::{IntPredicate, span_invalid_monomorphization_error}; +use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; +use rustc_codegen_ssa::mir::place::PlaceRef; +use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods}; +use rustc_middle::bug; +use rustc_middle::ty::{self, Instance, Ty}; +use rustc_middle::ty::layout::LayoutOf; +use rustc_span::{Span, Symbol, symbol::kw, sym}; +use rustc_target::abi::HasDataLayout; +use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode}; +use rustc_target::spec::PanicStrategy; + +use crate::abi::GccType; +use crate::builder::Builder; +use crate::common::{SignType, TypeReflection}; +use crate::context::CodegenCx; +use crate::type_of::LayoutGccExt; +use crate::intrinsic::simd::generic_simd_intrinsic; + +fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> Option<Function<'gcc>> { + let gcc_name = match name { + sym::sqrtf32 => "sqrtf", + sym::sqrtf64 => "sqrt", + sym::powif32 => "__builtin_powif", + sym::powif64 => "__builtin_powi", + sym::sinf32 => "sinf", + sym::sinf64 => "sin", + sym::cosf32 => "cosf", + sym::cosf64 => "cos", + sym::powf32 => "powf", + sym::powf64 => "pow", + sym::expf32 => "expf", + sym::expf64 => "exp", + sym::exp2f32 => "exp2f", + sym::exp2f64 => "exp2", + sym::logf32 => "logf", + sym::logf64 => "log", + sym::log10f32 => "log10f", + sym::log10f64 => "log10", + sym::log2f32 => "log2f", + sym::log2f64 => "log2", + sym::fmaf32 => "fmaf", + sym::fmaf64 => "fma", + sym::fabsf32 => "fabsf", + sym::fabsf64 => "fabs", + sym::minnumf32 => "fminf", + sym::minnumf64 => "fmin", + sym::maxnumf32 => "fmaxf", + sym::maxnumf64 => "fmax", + sym::copysignf32 => "copysignf", + sym::copysignf64 => "copysign", + sym::floorf32 => "floorf", + sym::floorf64 => "floor", + sym::ceilf32 => "ceilf", + sym::ceilf64 => "ceil", + sym::truncf32 => "truncf", + sym::truncf64 => "trunc", + sym::rintf32 => "rintf", + sym::rintf64 => "rint", + sym::nearbyintf32 => "nearbyintf", + sym::nearbyintf64 => "nearbyint", + sym::roundf32 => "roundf", + sym::roundf64 => "round", + sym::abort => "abort", + _ => return None, + }; + Some(cx.context.get_builtin_function(&gcc_name)) +} + +impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { + fn codegen_intrinsic_call(&mut self, instance: Instance<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, args: &[OperandRef<'tcx, RValue<'gcc>>], llresult: RValue<'gcc>, span: Span) { + let tcx = self.tcx; + let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all()); + + let (def_id, substs) = match *callee_ty.kind() { + ty::FnDef(def_id, substs) => (def_id, substs), + _ => bug!("expected fn item type, found {}", callee_ty), + }; + + let sig = callee_ty.fn_sig(tcx); + let sig = tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), sig); + let arg_tys = sig.inputs(); + let ret_ty = sig.output(); + let name = tcx.item_name(def_id); + let name_str = name.as_str(); + + let llret_ty = self.layout_of(ret_ty).gcc_type(self, true); + let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); + + let simple = get_simple_intrinsic(self, name); + let llval = + match name { + _ if simple.is_some() => { + // FIXME(antoyo): remove this cast when the API supports function. + let func = unsafe { std::mem::transmute(simple.expect("simple")) }; + self.call(self.type_void(), func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None) + }, + sym::likely => { + self.expect(args[0].immediate(), true) + } + sym::unlikely => { + self.expect(args[0].immediate(), false) + } + kw::Try => { + try_intrinsic( + self, + args[0].immediate(), + args[1].immediate(), + args[2].immediate(), + llresult, + ); + return; + } + sym::breakpoint => { + unimplemented!(); + } + sym::va_copy => { + unimplemented!(); + } + sym::va_arg => { + unimplemented!(); + } + + sym::volatile_load | sym::unaligned_volatile_load => { + let tp_ty = substs.type_at(0); + let mut ptr = args[0].immediate(); + if let PassMode::Cast(ty) = fn_abi.ret.mode { + ptr = self.pointercast(ptr, self.type_ptr_to(ty.gcc_type(self))); + } + let load = self.volatile_load(ptr.get_type(), ptr); + // TODO(antoyo): set alignment. + self.to_immediate(load, self.layout_of(tp_ty)) + } + sym::volatile_store => { + let dst = args[0].deref(self.cx()); + args[1].val.volatile_store(self, dst); + return; + } + sym::unaligned_volatile_store => { + let dst = args[0].deref(self.cx()); + args[1].val.unaligned_volatile_store(self, dst); + return; + } + sym::prefetch_read_data + | sym::prefetch_write_data + | sym::prefetch_read_instruction + | sym::prefetch_write_instruction => { + unimplemented!(); + } + sym::ctlz + | sym::ctlz_nonzero + | sym::cttz + | sym::cttz_nonzero + | sym::ctpop + | sym::bswap + | sym::bitreverse + | sym::rotate_left + | sym::rotate_right + | sym::saturating_add + | sym::saturating_sub => { + let ty = arg_tys[0]; + match int_type_width_signed(ty, self) { + Some((width, signed)) => match name { + sym::ctlz | sym::cttz => { + let func = self.current_func.borrow().expect("func"); + let then_block = func.new_block("then"); + let else_block = func.new_block("else"); + let after_block = func.new_block("after"); + + let arg = args[0].immediate(); + let result = func.new_local(None, arg.get_type(), "zeros"); + let zero = self.cx.gcc_zero(arg.get_type()); + let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); + self.llbb().end_with_conditional(None, cond, then_block, else_block); + + let zero_result = self.cx.gcc_uint(arg.get_type(), width); + then_block.add_assignment(None, result, zero_result); + then_block.end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place + // count_leading_zeroes() does not expect, the current block + // in the state need to be updated. + self.switch_to_block(else_block); + + let zeros = + match name { + sym::ctlz => self.count_leading_zeroes(width, arg), + sym::cttz => self.count_trailing_zeroes(width, arg), + _ => unreachable!(), + }; + self.llbb().add_assignment(None, result, zeros); + self.llbb().end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + result.to_rvalue() + } + sym::ctlz_nonzero => { + self.count_leading_zeroes(width, args[0].immediate()) + }, + sym::cttz_nonzero => { + self.count_trailing_zeroes(width, args[0].immediate()) + } + sym::ctpop => self.pop_count(args[0].immediate()), + sym::bswap => { + if width == 8 { + args[0].immediate() // byte swap a u8/i8 is just a no-op + } + else { + self.gcc_bswap(args[0].immediate(), width) + } + }, + sym::bitreverse => self.bit_reverse(width, args[0].immediate()), + sym::rotate_left | sym::rotate_right => { + // TODO(antoyo): implement using algorithm from: + // https://blog.regehr.org/archives/1063 + // for other platforms. + let is_left = name == sym::rotate_left; + let val = args[0].immediate(); + let raw_shift = args[1].immediate(); + if is_left { + self.rotate_left(val, raw_shift, width) + } + else { + self.rotate_right(val, raw_shift, width) + } + }, + sym::saturating_add => { + self.saturating_add(args[0].immediate(), args[1].immediate(), signed, width) + }, + sym::saturating_sub => { + self.saturating_sub(args[0].immediate(), args[1].immediate(), signed, width) + }, + _ => bug!(), + }, + None => { + span_invalid_monomorphization_error( + tcx.sess, + span, + &format!( + "invalid monomorphization of `{}` intrinsic: \ + expected basic integer type, found `{}`", + name, ty + ), + ); + return; + } + } + } + + sym::raw_eq => { + use rustc_target::abi::Abi::*; + let tp_ty = substs.type_at(0); + let layout = self.layout_of(tp_ty).layout; + let _use_integer_compare = match layout.abi() { + Scalar(_) | ScalarPair(_, _) => true, + Uninhabited | Vector { .. } => false, + Aggregate { .. } => { + // For rusty ABIs, small aggregates are actually passed + // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), + // so we re-use that same threshold here. + layout.size() <= self.data_layout().pointer_size * 2 + } + }; + + let a = args[0].immediate(); + let b = args[1].immediate(); + if layout.size().bytes() == 0 { + self.const_bool(true) + } + /*else if use_integer_compare { + let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits. + let ptr_ty = self.type_ptr_to(integer_ty); + let a_ptr = self.bitcast(a, ptr_ty); + let a_val = self.load(integer_ty, a_ptr, layout.align.abi); + let b_ptr = self.bitcast(b, ptr_ty); + let b_val = self.load(integer_ty, b_ptr, layout.align.abi); + self.icmp(IntPredicate::IntEQ, a_val, b_val) + }*/ + else { + let void_ptr_type = self.context.new_type::<*const ()>(); + let a_ptr = self.bitcast(a, void_ptr_type); + let b_ptr = self.bitcast(b, void_ptr_type); + let n = self.context.new_cast(None, self.const_usize(layout.size().bytes()), self.sizet_type); + let builtin = self.context.get_builtin_function("memcmp"); + let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]); + self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) + } + } + + sym::black_box => { + args[0].val.store(self, result); + + let block = self.llbb(); + let extended_asm = block.add_extended_asm(None, ""); + extended_asm.add_input_operand(None, "r", result.llval); + extended_asm.add_clobber("memory"); + extended_asm.set_volatile_flag(true); + + // We have copied the value to `result` already. + return; + } + + _ if name_str.starts_with("simd_") => { + match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { + Ok(llval) => llval, + Err(()) => return, + } + } + + _ => bug!("unknown intrinsic '{}'", name), + }; + + if !fn_abi.ret.is_ignore() { + if let PassMode::Cast(ty) = fn_abi.ret.mode { + let ptr_llty = self.type_ptr_to(ty.gcc_type(self)); + let ptr = self.pointercast(result.llval, ptr_llty); + self.store(llval, ptr, result.align); + } + else { + OperandRef::from_immediate_or_packed_pair(self, llval, result.layout) + .val + .store(self, result); + } + } + } + + fn abort(&mut self) { + let func = self.context.get_builtin_function("abort"); + let func: RValue<'gcc> = unsafe { std::mem::transmute(func) }; + self.call(self.type_void(), func, &[], None); + } + + fn assume(&mut self, value: Self::Value) { + // TODO(antoyo): switch to assume when it exists. + // Or use something like this: + // #define __assume(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) + self.expect(value, true); + } + + fn expect(&mut self, cond: Self::Value, _expected: bool) -> Self::Value { + // TODO(antoyo) + cond + } + + fn type_test(&mut self, _pointer: Self::Value, _typeid: Self::Value) -> Self::Value { + // Unsupported. + self.context.new_rvalue_from_int(self.int_type, 0) + } + + fn type_checked_load( + &mut self, + _llvtable: Self::Value, + _vtable_byte_offset: u64, + _typeid: Self::Value, + ) -> Self::Value { + // Unsupported. + self.context.new_rvalue_from_int(self.int_type, 0) + } + + fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } + + fn va_end(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> { + unimplemented!(); + } +} + +impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { + fn store_fn_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, idx: &mut usize, dst: PlaceRef<'tcx, Self::Value>) { + arg_abi.store_fn_arg(self, idx, dst) + } + + fn store_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { + arg_abi.store(self, val, dst) + } + + fn arg_memory_ty(&self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> { + arg_abi.memory_ty(self) + } +} + +pub trait ArgAbiExt<'gcc, 'tcx> { + fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>; + fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>); + fn store_fn_arg(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>); +} + +impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { + /// Gets the LLVM type for a place of the original Rust type of + /// this argument/return, i.e., the result of `type_of::type_of`. + fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> { + self.layout.gcc_type(cx, true) + } + + /// Stores a direct/indirect value described by this ArgAbi into a + /// place for the original Rust type of this argument/return. + /// Can be used for both storing formal arguments into Rust variables + /// or results of call/invoke instructions into their destinations. + fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { + if self.is_ignore() { + return; + } + if self.is_sized_indirect() { + OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst) + } + else if self.is_unsized_indirect() { + bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); + } + else if let PassMode::Cast(cast) = self.mode { + // FIXME(eddyb): Figure out when the simpler Store is safe, clang + // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}. + let can_store_through_cast_ptr = false; + if can_store_through_cast_ptr { + let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx)); + let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty); + bx.store(val, cast_dst, self.layout.align.abi); + } + else { + // The actual return type is a struct, but the ABI + // adaptation code has cast it into some scalar type. The + // code that follows is the only reliable way I have + // found to do a transform like i64 -> {i32,i32}. + // Basically we dump the data onto the stack then memcpy it. + // + // Other approaches I tried: + // - Casting rust ret pointer to the foreign type and using Store + // is (a) unsafe if size of foreign type > size of rust type and + // (b) runs afoul of strict aliasing rules, yielding invalid + // assembly under -O (specifically, the store gets removed). + // - Truncating foreign type to correct integral type and then + // bitcasting to the struct type yields invalid cast errors. + + // We instead thus allocate some scratch space... + let scratch_size = cast.size(bx); + let scratch_align = cast.align(bx); + let llscratch = bx.alloca(cast.gcc_type(bx), scratch_align); + bx.lifetime_start(llscratch, scratch_size); + + // ... where we first store the value... + bx.store(val, llscratch, scratch_align); + + // ... and then memcpy it to the intended destination. + bx.memcpy( + dst.llval, + self.layout.align.abi, + llscratch, + scratch_align, + bx.const_usize(self.layout.size.bytes()), + MemFlags::empty(), + ); + + bx.lifetime_end(llscratch, scratch_size); + } + } + else { + OperandValue::Immediate(val).store(bx, dst); + } + } + + fn store_fn_arg<'a>(&self, bx: &mut Builder<'a, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>) { + let mut next = || { + let val = bx.current_func().get_param(*idx as i32); + *idx += 1; + val.to_rvalue() + }; + match self.mode { + PassMode::Ignore => {}, + PassMode::Pair(..) => { + OperandValue::Pair(next(), next()).store(bx, dst); + }, + PassMode::Indirect { extra_attrs: Some(_), .. } => { + OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst); + }, + PassMode::Direct(_) | PassMode::Indirect { extra_attrs: None, .. } | PassMode::Cast(_) => { + let next_arg = next(); + self.store(bx, next_arg, dst); + }, + } + } +} + +fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -> Option<(u64, bool)> { + match ty.kind() { + ty::Int(t) => Some(( + match t { + rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width), + rustc_middle::ty::IntTy::I8 => 8, + rustc_middle::ty::IntTy::I16 => 16, + rustc_middle::ty::IntTy::I32 => 32, + rustc_middle::ty::IntTy::I64 => 64, + rustc_middle::ty::IntTy::I128 => 128, + }, + true, + )), + ty::Uint(t) => Some(( + match t { + rustc_middle::ty::UintTy::Usize => u64::from(cx.tcx.sess.target.pointer_width), + rustc_middle::ty::UintTy::U8 => 8, + rustc_middle::ty::UintTy::U16 => 16, + rustc_middle::ty::UintTy::U32 => 32, + rustc_middle::ty::UintTy::U64 => 64, + rustc_middle::ty::UintTy::U128 => 128, + }, + false, + )), + _ => None, + } +} + +impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { + fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> { + let result_type = value.get_type(); + let typ = result_type.to_unsigned(self.cx); + + let value = + if result_type.is_signed(self.cx) { + self.gcc_int_cast(value, typ) + } + else { + value + }; + + let context = &self.cx.context; + let result = + match width { + 8 => { + // First step. + let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0)); + let left = self.lshr(left, context.new_rvalue_from_int(typ, 4)); + let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F)); + let right = self.shl(right, context.new_rvalue_from_int(typ, 4)); + let step1 = self.or(left, right); + + // Second step. + let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC)); + let left = self.lshr(left, context.new_rvalue_from_int(typ, 2)); + let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33)); + let right = self.shl(right, context.new_rvalue_from_int(typ, 2)); + let step2 = self.or(left, right); + + // Third step. + let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA)); + let left = self.lshr(left, context.new_rvalue_from_int(typ, 1)); + let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55)); + let right = self.shl(right, context.new_rvalue_from_int(typ, 1)); + let step3 = self.or(left, right); + + step3 + }, + 16 => { + // First step. + let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555)); + let left = self.shl(left, context.new_rvalue_from_int(typ, 1)); + let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA)); + let right = self.lshr(right, context.new_rvalue_from_int(typ, 1)); + let step1 = self.or(left, right); + + // Second step. + let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333)); + let left = self.shl(left, context.new_rvalue_from_int(typ, 2)); + let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC)); + let right = self.lshr(right, context.new_rvalue_from_int(typ, 2)); + let step2 = self.or(left, right); + + // Third step. + let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F)); + let left = self.shl(left, context.new_rvalue_from_int(typ, 4)); + let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0)); + let right = self.lshr(right, context.new_rvalue_from_int(typ, 4)); + let step3 = self.or(left, right); + + // Fourth step. + let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF)); + let left = self.shl(left, context.new_rvalue_from_int(typ, 8)); + let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00)); + let right = self.lshr(right, context.new_rvalue_from_int(typ, 8)); + let step4 = self.or(left, right); + + step4 + }, + 32 => { + // TODO(antoyo): Refactor with other implementations. + // First step. + let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 1)); + let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA)); + let right = self.lshr(right, context.new_rvalue_from_long(typ, 1)); + let step1 = self.or(left, right); + + // Second step. + let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 2)); + let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC)); + let right = self.lshr(right, context.new_rvalue_from_long(typ, 2)); + let step2 = self.or(left, right); + + // Third step. + let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 4)); + let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0)); + let right = self.lshr(right, context.new_rvalue_from_long(typ, 4)); + let step3 = self.or(left, right); + + // Fourth step. + let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 8)); + let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00)); + let right = self.lshr(right, context.new_rvalue_from_long(typ, 8)); + let step4 = self.or(left, right); + + // Fifth step. + let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 16)); + let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000)); + let right = self.lshr(right, context.new_rvalue_from_long(typ, 16)); + let step5 = self.or(left, right); + + step5 + }, + 64 => { + // First step. + let left = self.shl(value, context.new_rvalue_from_long(typ, 32)); + let right = self.lshr(value, context.new_rvalue_from_long(typ, 32)); + let step1 = self.or(left, right); + + // Second step. + let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF)); + let left = self.shl(left, context.new_rvalue_from_long(typ, 15)); + let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead? + let right = self.lshr(right, context.new_rvalue_from_long(typ, 17)); + let step2 = self.or(left, right); + + // Third step. + let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10)); + let left = self.xor(step2, left); + let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F)); + + let left = self.shl(temp, context.new_rvalue_from_long(typ, 10)); + let left = self.or(temp, left); + let step3 = self.xor(left, step2); + + // Fourth step. + let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4)); + let left = self.xor(step3, left); + let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421)); + + let left = self.shl(temp, context.new_rvalue_from_long(typ, 4)); + let left = self.or(temp, left); + let step4 = self.xor(left, step3); + + // Fifth step. + let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2)); + let left = self.xor(step4, left); + let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842)); + + let left = self.shl(temp, context.new_rvalue_from_long(typ, 2)); + let left = self.or(temp, left); + let step5 = self.xor(left, step4); + + step5 + }, + 128 => { + // TODO(antoyo): find a more efficient implementation? + let sixty_four = self.gcc_int(typ, 64); + let right_shift = self.gcc_lshr(value, sixty_four); + let high = self.gcc_int_cast(right_shift, self.u64_type); + let low = self.gcc_int_cast(value, self.u64_type); + + let reversed_high = self.bit_reverse(64, high); + let reversed_low = self.bit_reverse(64, low); + + let new_low = self.gcc_int_cast(reversed_high, typ); + let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four); + + self.gcc_or(new_low, new_high) + }, + _ => { + panic!("cannot bit reverse with width = {}", width); + }, + }; + + self.gcc_int_cast(result, result_type) + } + + fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + // TODO(antoyo): use width? + let arg_type = arg.get_type(); + let count_leading_zeroes = + // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here + // instead of using is_uint(). + if arg_type.is_uint(&self.cx) { + "__builtin_clz" + } + else if arg_type.is_ulong(&self.cx) { + "__builtin_clzl" + } + else if arg_type.is_ulonglong(&self.cx) { + "__builtin_clzll" + } + else if width == 128 { + // Algorithm from: https://stackoverflow.com/a/28433850/389119 + let array_type = self.context.new_array_type(None, arg_type, 3); + let result = self.current_func() + .new_local(None, array_type, "count_loading_zeroes_results"); + + let sixty_four = self.const_uint(arg_type, 64); + let shift = self.lshr(arg, sixty_four); + let high = self.gcc_int_cast(shift, self.u64_type); + let low = self.gcc_int_cast(arg, self.u64_type); + + let zero = self.context.new_rvalue_zero(self.usize_type); + let one = self.context.new_rvalue_one(self.usize_type); + let two = self.context.new_rvalue_from_long(self.usize_type, 2); + + let clzll = self.context.get_builtin_function("__builtin_clzll"); + + let first_elem = self.context.new_array_access(None, result, zero); + let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type); + self.llbb() + .add_assignment(None, first_elem, first_value); + + let second_elem = self.context.new_array_access(None, result, one); + let cast = self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), arg_type); + let second_value = self.add(cast, sixty_four); + self.llbb() + .add_assignment(None, second_elem, second_value); + + let third_elem = self.context.new_array_access(None, result, two); + let third_value = self.const_uint(arg_type, 128); + self.llbb() + .add_assignment(None, third_elem, third_value); + + let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); + let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); + let not_low_and_not_high = not_low & not_high; + let index = not_high + not_low_and_not_high; + // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in + // gcc. + // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the + // compilation stage. + let index = self.context.new_cast(None, index, self.i32_type); + + let res = self.context.new_array_access(None, result, index); + + return self.gcc_int_cast(res.to_rvalue(), arg_type); + } + else { + let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); + let arg = self.context.new_cast(None, arg, self.ulonglong_type); + let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; + let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8); + let res = self.context.new_call(None, count_leading_zeroes, &[arg]) - diff; + return self.context.new_cast(None, res, arg_type); + }; + let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes); + let res = self.context.new_call(None, count_leading_zeroes, &[arg]); + self.context.new_cast(None, res, arg_type) + } + + fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + let result_type = arg.get_type(); + let arg = + if result_type.is_signed(self.cx) { + let new_type = result_type.to_unsigned(self.cx); + self.gcc_int_cast(arg, new_type) + } + else { + arg + }; + let arg_type = arg.get_type(); + let (count_trailing_zeroes, expected_type) = + // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here + // instead of using is_uint(). + if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) { + // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero. + ("__builtin_ctz", self.cx.uint_type) + } + else if arg_type.is_ulong(&self.cx) { + ("__builtin_ctzl", self.cx.ulong_type) + } + else if arg_type.is_ulonglong(&self.cx) { + ("__builtin_ctzll", self.cx.ulonglong_type) + } + else if arg_type.is_u128(&self.cx) { + // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119 + let array_type = self.context.new_array_type(None, arg_type, 3); + let result = self.current_func() + .new_local(None, array_type, "count_loading_zeroes_results"); + + let sixty_four = self.gcc_int(arg_type, 64); + let shift = self.gcc_lshr(arg, sixty_four); + let high = self.gcc_int_cast(shift, self.u64_type); + let low = self.gcc_int_cast(arg, self.u64_type); + + let zero = self.context.new_rvalue_zero(self.usize_type); + let one = self.context.new_rvalue_one(self.usize_type); + let two = self.context.new_rvalue_from_long(self.usize_type, 2); + + let ctzll = self.context.get_builtin_function("__builtin_ctzll"); + + let first_elem = self.context.new_array_access(None, result, zero); + let first_value = self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), arg_type); + self.llbb() + .add_assignment(None, first_elem, first_value); + + let second_elem = self.context.new_array_access(None, result, one); + let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), arg_type), sixty_four); + self.llbb() + .add_assignment(None, second_elem, second_value); + + let third_elem = self.context.new_array_access(None, result, two); + let third_value = self.gcc_int(arg_type, 128); + self.llbb() + .add_assignment(None, third_elem, third_value); + + let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); + let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); + let not_low_and_not_high = not_low & not_high; + let index = not_low + not_low_and_not_high; + // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in + // gcc. + // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the + // compilation stage. + let index = self.context.new_cast(None, index, self.i32_type); + + let res = self.context.new_array_access(None, result, index); + + return self.gcc_int_cast(res.to_rvalue(), result_type); + } + else { + let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); + let arg_size = arg_type.get_size(); + let casted_arg = self.context.new_cast(None, arg, self.ulonglong_type); + let byte_diff = self.ulonglong_type.get_size() as i64 - arg_size as i64; + let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8); + let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set. + let masked = mask & self.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, arg); + let cond = self.context.new_comparison(None, ComparisonOp::Equals, masked, mask); + let diff = diff * self.context.new_cast(None, cond, self.int_type); + let res = self.context.new_call(None, count_trailing_zeroes, &[casted_arg]) - diff; + return self.context.new_cast(None, res, result_type); + }; + let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes); + let arg = + if arg_type != expected_type { + self.context.new_cast(None, arg, expected_type) + } + else { + arg + }; + let res = self.context.new_call(None, count_trailing_zeroes, &[arg]); + self.context.new_cast(None, res, result_type) + } + + fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> { + // TODO(antoyo): use the optimized version with fewer operations. + let result_type = value.get_type(); + let value_type = result_type.to_unsigned(self.cx); + + let value = + if result_type.is_signed(self.cx) { + self.gcc_int_cast(value, value_type) + } + else { + value + }; + + if value_type.is_u128(&self.cx) { + // TODO(antoyo): implement in the normal algorithm below to have a more efficient + // implementation (that does not require a call to __popcountdi2). + let popcount = self.context.get_builtin_function("__builtin_popcountll"); + let sixty_four = self.gcc_int(value_type, 64); + let right_shift = self.gcc_lshr(value, sixty_four); + let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type); + let high = self.context.new_call(None, popcount, &[high]); + let low = self.gcc_int_cast(value, self.cx.ulonglong_type); + let low = self.context.new_call(None, popcount, &[low]); + let res = high + low; + return self.gcc_int_cast(res, result_type); + } + + // First step. + let mask = self.context.new_rvalue_from_long(value_type, 0x5555555555555555); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 1); + let right = shifted & mask; + let value = left + right; + + // Second step. + let mask = self.context.new_rvalue_from_long(value_type, 0x3333333333333333); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 2); + let right = shifted & mask; + let value = left + right; + + // Third step. + let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F0F0F0F0F); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 4); + let right = shifted & mask; + let value = left + right; + + if value_type.is_u8(&self.cx) { + return self.context.new_cast(None, value, result_type); + } + + // Fourth step. + let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF00FF00FF); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 8); + let right = shifted & mask; + let value = left + right; + + if value_type.is_u16(&self.cx) { + return self.context.new_cast(None, value, result_type); + } + + // Fifth step. + let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF0000FFFF); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 16); + let right = shifted & mask; + let value = left + right; + + if value_type.is_u32(&self.cx) { + return self.context.new_cast(None, value, result_type); + } + + // Sixth step. + let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF); + let left = value & mask; + let shifted = value >> self.context.new_rvalue_from_int(value_type, 32); + let right = shifted & mask; + let value = left + right; + + self.context.new_cast(None, value, result_type) + } + + // Algorithm from: https://blog.regehr.org/archives/1063 + fn rotate_left(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { + let max = self.const_uint(shift.get_type(), width); + let shift = self.urem(shift, max); + let lhs = self.shl(value, shift); + let result_neg = self.neg(shift); + let result_and = + self.and( + result_neg, + self.const_uint(shift.get_type(), width - 1), + ); + let rhs = self.lshr(value, result_and); + self.or(lhs, rhs) + } + + // Algorithm from: https://blog.regehr.org/archives/1063 + fn rotate_right(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { + let max = self.const_uint(shift.get_type(), width); + let shift = self.urem(shift, max); + let lhs = self.lshr(value, shift); + let result_neg = self.neg(shift); + let result_and = + self.and( + result_neg, + self.const_uint(shift.get_type(), width - 1), + ); + let rhs = self.shl(value, result_and); + self.or(lhs, rhs) + } + + fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { + let result_type = lhs.get_type(); + if signed { + // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 + let func = self.current_func.borrow().expect("func"); + let res = func.new_local(None, result_type, "saturating_sum"); + let supports_native_type = self.is_native_int_type(result_type); + let overflow = + if supports_native_type { + let func_name = + match width { + 8 => "__builtin_add_overflow", + 16 => "__builtin_add_overflow", + 32 => "__builtin_sadd_overflow", + 64 => "__builtin_saddll_overflow", + 128 => "__builtin_add_overflow", + _ => unreachable!(), + }; + let overflow_func = self.context.get_builtin_function(func_name); + self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) + } + else { + let func_name = + match width { + 128 => "__rust_i128_addo", + _ => unreachable!(), + }; + let param_a = self.context.new_parameter(None, result_type, "a"); + let param_b = self.context.new_parameter(None, result_type, "b"); + let result_field = self.context.new_field(None, result_type, "result"); + let overflow_field = self.context.new_field(None, self.bool_type, "overflow"); + let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]); + let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false); + let result = self.context.new_call(None, func, &[lhs, rhs]); + let overflow = result.access_field(None, overflow_field); + let int_result = result.access_field(None, result_field); + self.llbb().add_assignment(None, res, int_result); + overflow + }; + + let then_block = func.new_block("then"); + let after_block = func.new_block("after"); + + // Return `result_type`'s maximum or minimum value on overflow + // NOTE: convert the type to unsigned to have an unsigned shift. + let unsigned_type = result_type.to_unsigned(&self.cx); + let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); + let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); + let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); + then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); + then_block.end_with_jump(None, after_block); + + self.llbb().end_with_conditional(None, overflow, then_block, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + res.to_rvalue() + } + else { + // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/ + let res = self.gcc_add(lhs, rhs); + let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs); + let value = self.gcc_neg(self.gcc_int_cast(cond, result_type)); + self.gcc_or(res, value) + } + } + + // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/ + fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { + let result_type = lhs.get_type(); + if signed { + // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 + let func = self.current_func.borrow().expect("func"); + let res = func.new_local(None, result_type, "saturating_diff"); + let supports_native_type = self.is_native_int_type(result_type); + let overflow = + if supports_native_type { + let func_name = + match width { + 8 => "__builtin_sub_overflow", + 16 => "__builtin_sub_overflow", + 32 => "__builtin_ssub_overflow", + 64 => "__builtin_ssubll_overflow", + 128 => "__builtin_sub_overflow", + _ => unreachable!(), + }; + let overflow_func = self.context.get_builtin_function(func_name); + self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) + } + else { + let func_name = + match width { + 128 => "__rust_i128_subo", + _ => unreachable!(), + }; + let param_a = self.context.new_parameter(None, result_type, "a"); + let param_b = self.context.new_parameter(None, result_type, "b"); + let result_field = self.context.new_field(None, result_type, "result"); + let overflow_field = self.context.new_field(None, self.bool_type, "overflow"); + let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]); + let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false); + let result = self.context.new_call(None, func, &[lhs, rhs]); + let overflow = result.access_field(None, overflow_field); + let int_result = result.access_field(None, result_field); + self.llbb().add_assignment(None, res, int_result); + overflow + }; + + let then_block = func.new_block("then"); + let after_block = func.new_block("after"); + + // Return `result_type`'s maximum or minimum value on overflow + // NOTE: convert the type to unsigned to have an unsigned shift. + let unsigned_type = result_type.to_unsigned(&self.cx); + let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); + let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); + let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); + then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); + then_block.end_with_jump(None, after_block); + + self.llbb().end_with_conditional(None, overflow, then_block, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + res.to_rvalue() + } + else { + let res = self.gcc_sub(lhs, rhs); + let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs); + let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type)); + self.gcc_and(res, value) + } + } +} + +fn try_intrinsic<'gcc, 'tcx>(bx: &mut Builder<'_, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) { + // NOTE: the `|| true` here is to use the panic=abort strategy with panic=unwind too + if bx.sess().panic_strategy() == PanicStrategy::Abort || true { + // TODO(bjorn3): Properly implement unwinding and remove the `|| true` once this is done. + bx.call(bx.type_void(), try_func, &[data], None); + // Return 0 unconditionally from the intrinsic call; + // we can never unwind. + let ret_align = bx.tcx.data_layout.i32_align.abi; + bx.store(bx.const_i32(0), dest, ret_align); + } + else if wants_msvc_seh(bx.sess()) { + unimplemented!(); + } + else { + unimplemented!(); + } +} |