1 files changed, 332 insertions, 81 deletions
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index e88c12716..a3c8142be 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -217,7 +217,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
                 let actual_ty = actual_val.get_type();
                 if expected_ty != actual_ty {
-                    if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
+                    if !actual_ty.is_vector() && !expected_ty.is_vector() && (actual_ty.is_integral() && expected_ty.is_integral()) || (actual_ty.get_pointee().is_some() && expected_ty.get_pointee().is_some()) {
                         self.context.new_cast(None, actual_val, expected_ty)
                     }
                     else if on_stack_param_indices.contains(&index) {
@@ -226,6 +226,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                     else {
                         assert!(!((actual_ty.is_vector() && !expected_ty.is_vector()) || (!actual_ty.is_vector() && expected_ty.is_vector())), "{:?} ({}) -> {:?} ({}), index: {:?}[{}]", actual_ty, actual_ty.is_vector(), expected_ty, expected_ty.is_vector(), func_ptr, index);
                         // TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
+                        // TODO: remove bitcast now that vector types can be compared?
                         self.bitcast(actual_val, expected_ty)
                     }
                 }
@@ -279,21 +280,30 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
-        let args = self.check_ptr_call("call", func_ptr, args);
+        let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
+        let func_name = format!("{:?}", func_ptr);
+        let previous_arg_count = args.len();
+        let orig_args = args;
+        let args = {
+            let function_address_names = self.function_address_names.borrow();
+            let original_function_name = function_address_names.get(&func_ptr);
+            llvm::adjust_intrinsic_arguments(&self, gcc_func, args.into(), &func_name, original_function_name)
+        };
+        let args_adjusted = args.len() != previous_arg_count;
+        let args = self.check_ptr_call("call", func_ptr, &*args);
 
         // gccjit requires to use the result of functions, even when it's not used.
         // That's why we assign the result to a local or call add_eval().
-        let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
         let return_type = gcc_func.get_return_type();
         let void_type = self.context.new_type::<()>();
         let current_func = self.block.get_function();
 
         if return_type != void_type {
             unsafe { RETURN_VALUE_COUNT += 1 };
-            let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
-            let func_name = format!("{:?}", func_ptr);
-            let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
-            self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
+            let return_value = self.cx.context.new_call_through_ptr(None, func_ptr, &args);
+            let return_value = llvm::adjust_intrinsic_return_value(&self, return_value, &func_name, &args, args_adjusted, orig_args);
+            let result = current_func.new_local(None, return_value.get_type(), &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
+            self.block.add_assignment(None, result, return_value);
             result.to_rvalue()
         }
         else {
@@ -366,10 +376,10 @@ impl<'tcx> FnAbiOfHelpers<'tcx> for Builder<'_, '_, 'tcx> {
     }
 }
 
-impl<'gcc, 'tcx> Deref for Builder<'_, 'gcc, 'tcx> {
+impl<'a, 'gcc, 'tcx> Deref for Builder<'a, 'gcc, 'tcx> {
     type Target = CodegenCx<'gcc, 'tcx>;
 
-    fn deref(&self) -> &Self::Target {
+    fn deref<'b>(&'b self) -> &'a Self::Target {
         self.cx
     }
 }
@@ -387,7 +397,7 @@ impl<'gcc, 'tcx> BackendTypes for Builder<'_, 'gcc, 'tcx> {
 }
 
 impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn build(cx: &'a CodegenCx<'gcc, 'tcx>, block: Block<'gcc>) -> Self {
+    fn build(cx: &'a CodegenCx<'gcc, 'tcx>, block: Block<'gcc>) -> Builder<'a, 'gcc, 'tcx> {
         Builder::with_cx(cx, block)
     }
 
@@ -444,17 +454,36 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.block.end_with_switch(None, value, default_block, &gcc_cases);
     }
 
-    fn invoke(
-        &mut self,
-        typ: Type<'gcc>,
-        fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>,
-        func: RValue<'gcc>,
-        args: &[RValue<'gcc>],
-        then: Block<'gcc>,
-        catch: Block<'gcc>,
-        _funclet: Option<&Funclet>,
-    ) -> RValue<'gcc> {
-        // TODO(bjorn3): Properly implement unwinding.
+    #[cfg(feature="master")]
+    fn invoke(&mut self, typ: Type<'gcc>, _fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
+        let try_block = self.current_func().new_block("try");
+
+        let current_block = self.block.clone();
+        self.block = try_block;
+        let call = self.call(typ, None, func, args, None); // TODO(antoyo): use funclet here?
+        self.block = current_block;
+
+        let return_value = self.current_func()
+            .new_local(None, call.get_type(), "invokeResult");
+
+        try_block.add_assignment(None, return_value, call);
+
+        try_block.end_with_jump(None, then);
+
+        if self.cleanup_blocks.borrow().contains(&catch) {
+            self.block.add_try_finally(None, try_block, catch);
+        }
+        else {
+            self.block.add_try_catch(None, try_block, catch);
+        }
+
+        self.block.end_with_jump(None, then);
+
+        return_value.to_rvalue()
+    }
+
+    #[cfg(not(feature="master"))]
+    fn invoke(&mut self, typ: Type<'gcc>, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
         let call_site = self.call(typ, None, func, args, None);
         let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
         self.llbb().end_with_conditional(None, condition, then, catch);
@@ -542,6 +571,31 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        // TODO(antoyo): add check in libgccjit since using the binary operator % causes the following error:
+        // during RTL pass: expand
+        // libgccjit.so: error: in expmed_mode_index, at expmed.h:240
+        // 0x7f0101d58dc6 expmed_mode_index
+        //     ../../../gcc/gcc/expmed.h:240
+        // 0x7f0101d58e35 expmed_op_cost_ptr
+        //     ../../../gcc/gcc/expmed.h:262
+        // 0x7f0101d594a1 sdiv_cost_ptr
+        //     ../../../gcc/gcc/expmed.h:531
+        // 0x7f0101d594f3 sdiv_cost
+        //     ../../../gcc/gcc/expmed.h:549
+        // 0x7f0101d6af7e expand_divmod(int, tree_code, machine_mode, rtx_def*, rtx_def*, rtx_def*, int, optab_methods)
+        //     ../../../gcc/gcc/expmed.cc:4356
+        // 0x7f0101d94f9e expand_expr_divmod
+        //     ../../../gcc/gcc/expr.cc:8929
+        // 0x7f0101d97a26 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
+        //     ../../../gcc/gcc/expr.cc:9566
+        // 0x7f0101bef6ef expand_gimple_stmt_1
+        //     ../../../gcc/gcc/cfgexpand.cc:3967
+        // 0x7f0101bef910 expand_gimple_stmt
+        //     ../../../gcc/gcc/cfgexpand.cc:4028
+        // 0x7f0101bf6ee7 expand_gimple_basic_block
+        //     ../../../gcc/gcc/cfgexpand.cc:6069
+        // 0x7f0101bf9194 execute
+        //     ../../../gcc/gcc/cfgexpand.cc:6795
         if a.get_type().is_compatible_with(self.cx.float_type) {
             let fmodf = self.context.get_builtin_function("fmodf");
             // FIXME(antoyo): this seems to produce the wrong result.
@@ -616,24 +670,29 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         a * b
     }
 
-    fn fadd_fast(&mut self, _lhs: RValue<'gcc>, _rhs: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
+        // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        lhs + rhs
     }
 
-    fn fsub_fast(&mut self, _lhs: RValue<'gcc>, _rhs: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    fn fsub_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
+        // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        lhs - rhs
     }
 
-    fn fmul_fast(&mut self, _lhs: RValue<'gcc>, _rhs: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    fn fmul_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
+        // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        lhs * rhs
     }
 
-    fn fdiv_fast(&mut self, _lhs: RValue<'gcc>, _rhs: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    fn fdiv_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
+        // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        lhs / rhs
     }
 
-    fn frem_fast(&mut self, _lhs: RValue<'gcc>, _rhs: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    fn frem_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
+        // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        self.frem(lhs, rhs)
     }
 
     fn checked_binop(&mut self, oop: OverflowOp, typ: Ty<'_>, lhs: Self::Value, rhs: Self::Value) -> (Self::Value, Self::Value) {
@@ -722,7 +781,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             }
             else if place.layout.is_gcc_immediate() {
                 let load = self.load(
-                    place.layout.gcc_type(self, false),
+                    place.layout.gcc_type(self),
                     place.llval,
                     place.align,
                 );
@@ -733,7 +792,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             }
             else if let abi::Abi::ScalarPair(ref a, ref b) = place.layout.abi {
                 let b_offset = a.size(self).align_to(b.align(self).abi);
-                let pair_type = place.layout.gcc_type(self, false);
+                let pair_type = place.layout.gcc_type(self);
 
                 let mut load = |i, scalar: &abi::Scalar, align| {
                     let llptr = self.struct_gep(pair_type, place.llval, i as u64);
@@ -833,26 +892,31 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
-        let mut result = ptr;
+        let ptr_type = ptr.get_type();
+        let mut pointee_type = ptr.get_type();
+        // NOTE: we cannot use array indexing here like in inbounds_gep because array indexing is
+        // always considered in bounds in GCC (TODO(antoyo): to be verified).
+        // So, we have to cast to a number.
+        let mut result = self.context.new_bitcast(None, ptr, self.sizet_type);
+        // FIXME(antoyo): if there were more than 1 index, this code is probably wrong and would
+        // require dereferencing the pointer.
         for index in indices {
-            result = self.context.new_array_access(None, result, *index).get_address(None).to_rvalue();
+            pointee_type = pointee_type.get_pointee().expect("pointee type");
+            let pointee_size = self.context.new_rvalue_from_int(index.get_type(), pointee_type.get_size() as i32);
+            result = result + self.gcc_int_cast(*index * pointee_size, self.sizet_type);
         }
-        result
+        self.context.new_bitcast(None, result, ptr_type)
     }
 
     fn inbounds_gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
-        // FIXME(antoyo): would be safer if doing the same thing (loop) as gep.
-        // TODO(antoyo): specify inbounds somehow.
-        match indices.len() {
-            1 => {
-                self.context.new_array_access(None, ptr, indices[0]).get_address(None)
-            },
-            2 => {
-                let array = ptr.dereference(None); // TODO(antoyo): assert that first index is 0?
-                self.context.new_array_access(None, array, indices[1]).get_address(None)
-            },
-            _ => unimplemented!(),
+        // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified).
+        let mut indices = indices.into_iter();
+        let index = indices.next().expect("first index in inbounds_gep");
+        let mut result = self.context.new_array_access(None, ptr, *index);
+        for index in indices {
+            result = self.context.new_array_access(None, result, *index);
         }
+        result.get_address(None)
     }
 
     fn struct_gep(&mut self, value_type: Type<'gcc>, ptr: RValue<'gcc>, idx: u64) -> RValue<'gcc> {
@@ -1034,8 +1098,19 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn extract_element(&mut self, _vec: RValue<'gcc>, _idx: RValue<'gcc>) -> RValue<'gcc> {
-        unimplemented!();
+    #[cfg(feature="master")]
+    fn extract_element(&mut self, vec: RValue<'gcc>, idx: RValue<'gcc>) -> RValue<'gcc> {
+        self.context.new_vector_access(None, vec, idx).to_rvalue()
+    }
+
+    #[cfg(not(feature="master"))]
+    fn extract_element(&mut self, vec: RValue<'gcc>, idx: RValue<'gcc>) -> RValue<'gcc> {
+        let vector_type = vec.get_type().unqualified().dyncast_vector().expect("Called extract_element on a non-vector type");
+        let element_type = vector_type.get_element_type();
+        let vec_num_units = vector_type.get_num_units();
+        let array_type = self.context.new_array_type(None, element_type, vec_num_units as u64);
+        let array = self.context.new_bitcast(None, vec, array_type).to_rvalue();
+        self.context.new_array_access(None, array, idx).to_rvalue()
     }
 
     fn vector_splat(&mut self, _num_elts: usize, _elt: RValue<'gcc>) -> RValue<'gcc> {
@@ -1116,22 +1191,52 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn set_personality_fn(&mut self, _personality: RValue<'gcc>) {
-        // TODO(antoyo)
+        #[cfg(feature="master")]
+        {
+            let personality = self.rvalue_as_function(_personality);
+            self.current_func().set_personality_function(personality);
+        }
+    }
+
+    #[cfg(feature="master")]
+    fn cleanup_landing_pad(&mut self, pers_fn: RValue<'gcc>) -> (RValue<'gcc>, RValue<'gcc>) {
+        self.set_personality_fn(pers_fn);
+
+        // NOTE: insert the current block in a variable so that a later call to invoke knows to
+        // generate a try/finally instead of a try/catch for this block.
+        self.cleanup_blocks.borrow_mut().insert(self.block);
+
+        let eh_pointer_builtin = self.cx.context.get_target_builtin_function("__builtin_eh_pointer");
+        let zero = self.cx.context.new_rvalue_zero(self.int_type);
+        let ptr = self.cx.context.new_call(None, eh_pointer_builtin, &[zero]);
+
+        let value1_type = self.u8_type.make_pointer();
+        let ptr = self.cx.context.new_cast(None, ptr, value1_type);
+        let value1 = ptr;
+        let value2 = zero; // TODO(antoyo): set the proper value here (the type of exception?).
+
+        (value1, value2)
     }
 
+    #[cfg(not(feature="master"))]
     fn cleanup_landing_pad(&mut self, _pers_fn: RValue<'gcc>) -> (RValue<'gcc>, RValue<'gcc>) {
-        (
-            self.current_func().new_local(None, self.u8_type.make_pointer(), "landing_pad0")
-                .to_rvalue(),
-            self.current_func().new_local(None, self.i32_type, "landing_pad1").to_rvalue(),
-        )
-        // TODO(antoyo): Properly implement unwinding.
-        // the above is just to make the compilation work as it seems
-        // rustc_codegen_ssa now calls the unwinding builder methods even on panic=abort.
+        let value1 = self.current_func().new_local(None, self.u8_type.make_pointer(), "landing_pad0")
+                .to_rvalue();
+        let value2 = self.current_func().new_local(None, self.i32_type, "landing_pad1").to_rvalue();
+        (value1, value2)
+    }
+
+    #[cfg(feature="master")]
+    fn resume(&mut self, exn0: RValue<'gcc>, _exn1: RValue<'gcc>) {
+        let exn_type = exn0.get_type();
+        let exn = self.context.new_cast(None, exn0, exn_type);
+        let unwind_resume = self.context.get_target_builtin_function("__builtin_unwind_resume");
+        self.llbb().add_eval(None, self.context.new_call(None, unwind_resume, &[exn]));
+        self.unreachable();
     }
 
+    #[cfg(not(feature="master"))]
     fn resume(&mut self, _exn0: RValue<'gcc>, _exn1: RValue<'gcc>) {
-        // TODO(bjorn3): Properly implement unwinding.
         self.unreachable();
     }
 
@@ -1160,6 +1265,15 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     fn atomic_cmpxchg(&mut self, dst: RValue<'gcc>, cmp: RValue<'gcc>, src: RValue<'gcc>, order: AtomicOrdering, failure_order: AtomicOrdering, weak: bool) -> RValue<'gcc> {
         let expected = self.current_func().new_local(None, cmp.get_type(), "expected");
         self.llbb().add_assignment(None, expected, cmp);
+        // NOTE: gcc doesn't support a failure memory model that is stronger than the success
+        // memory model.
+        let order =
+            if failure_order as i32 > order as i32 {
+                failure_order
+            }
+            else {
+                order
+            };
         let success = self.compare_exchange(dst, expected, src, order, failure_order, weak);
 
         let pair_type = self.cx.type_struct(&[src.get_type(), self.bool_type], false);
@@ -1469,7 +1583,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     #[cfg(feature="master")]
     pub fn shuffle_vector(&mut self, v1: RValue<'gcc>, v2: RValue<'gcc>, mask: RValue<'gcc>) -> RValue<'gcc> {
-        let struct_type = mask.get_type().is_struct().expect("mask of struct type");
+        let struct_type = mask.get_type().is_struct().expect("mask should be of struct type");
 
         // TODO(antoyo): use a recursive unqualified() here.
         let vector_type = v1.get_type().unqualified().dyncast_vector().expect("vector type");
@@ -1501,22 +1615,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             vector_elements.push(self.context.new_rvalue_zero(mask_element_type));
         }
 
-        let array_type = self.context.new_array_type(None, element_type, vec_num_units as i32);
         let result_type = self.context.new_vector_type(element_type, mask_num_units as u64);
         let (v1, v2) =
             if vec_num_units < mask_num_units {
                 // NOTE: the mask needs to be the same length as the input vectors, so join the 2
                 // vectors and create a dummy second vector.
-                // TODO(antoyo): switch to using new_vector_access.
-                let array = self.context.new_bitcast(None, v1, array_type);
                 let mut elements = vec![];
                 for i in 0..vec_num_units {
-                    elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
+                    elements.push(self.context.new_vector_access(None, v1, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
                 }
-                // TODO(antoyo): switch to using new_vector_access.
-                let array = self.context.new_bitcast(None, v2, array_type);
                 for i in 0..(mask_num_units - vec_num_units) {
-                    elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
+                    elements.push(self.context.new_vector_access(None, v2, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
                 }
                 let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);
                 let zero = self.context.new_rvalue_zero(element_type);
@@ -1536,10 +1645,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             // NOTE: if padding was added, only select the number of elements of the masks to
             // remove that padding in the result.
             let mut elements = vec![];
-            // TODO(antoyo): switch to using new_vector_access.
-            let array = self.context.new_bitcast(None, result, array_type);
             for i in 0..mask_num_units {
-                elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
+                elements.push(self.context.new_vector_access(None, result, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
             }
             self.context.new_rvalue_from_vector(None, result_type, &elements)
         }
@@ -1558,18 +1665,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
     {
         let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_type = vector_type.get_element_type();
+        let mask_element_type = self.type_ix(element_type.get_size() as u64 * 8);
         let element_count = vector_type.get_num_units();
         let mut vector_elements = vec![];
         for i in 0..element_count {
             vector_elements.push(i);
         }
-        let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
+        let mask_type = self.context.new_vector_type(mask_element_type, element_count as u64);
         let mut shift = 1;
         let mut res = src;
         while shift < element_count {
             let vector_elements: Vec<_> =
                 vector_elements.iter()
-                    .map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
+                    .map(|i| self.context.new_rvalue_from_int(mask_element_type, ((i + shift) % element_count) as i32))
                     .collect();
             let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
             let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);
@@ -1581,7 +1690,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     #[cfg(not(feature="master"))]
-    pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
+    pub fn vector_reduce<F>(&mut self, _src: RValue<'gcc>, _op: F) -> RValue<'gcc>
     where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
     {
         unimplemented!();
@@ -1595,15 +1704,47 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
+    #[cfg(feature="master")]
+    pub fn vector_reduce_fadd(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
+        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_count = vector_type.get_num_units();
+        (0..element_count).into_iter()
+            .map(|i| self.context
+                .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
+                .to_rvalue())
+            .fold(acc, |x, i| x + i)
+    }
+
+    #[cfg(not(feature="master"))]
+    pub fn vector_reduce_fadd(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+        unimplemented!();
+    }
+
     pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
         unimplemented!();
     }
 
+    #[cfg(feature="master")]
+    pub fn vector_reduce_fmul(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
+        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_count = vector_type.get_num_units();
+        (0..element_count).into_iter()
+            .map(|i| self.context
+                .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
+                .to_rvalue())
+            .fold(acc, |x, i| x * i)
+    }
+
+    #[cfg(not(feature="master"))]
+    pub fn vector_reduce_fmul(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
+        unimplemented!()
+    }
+
     // Inspired by Hacker's Delight min implementation.
     pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
         self.vector_reduce(src, |a, b, context| {
             let differences_or_zeros = difference_or_zero(a, b, context);
-            context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
+            context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
         })
     }
 
@@ -1611,38 +1752,148 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
         self.vector_reduce(src, |a, b, context| {
             let differences_or_zeros = difference_or_zero(a, b, context);
-            context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
+            context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
         })
     }
 
+    fn vector_extremum(&mut self, a: RValue<'gcc>, b: RValue<'gcc>, direction: ExtremumOperation) -> RValue<'gcc> {
+        let vector_type = a.get_type();
+
+        // mask out the NaNs in b and replace them with the corresponding lane in a, so when a and
+        // b get compared & spliced together, we get the numeric values instead of NaNs.
+        let b_nan_mask = self.context.new_comparison(None, ComparisonOp::NotEquals, b, b);
+        let mask_type = b_nan_mask.get_type();
+        let b_nan_mask_inverted = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, mask_type, b_nan_mask);
+        let a_cast = self.context.new_bitcast(None, a, mask_type);
+        let b_cast = self.context.new_bitcast(None, b, mask_type);
+        let res = (b_nan_mask & a_cast) | (b_nan_mask_inverted & b_cast);
+        let b = self.context.new_bitcast(None, res, vector_type);
+
+        // now do the actual comparison
+        let comparison_op = match direction {
+            ExtremumOperation::Min => ComparisonOp::LessThan,
+            ExtremumOperation::Max => ComparisonOp::GreaterThan,
+        };
+        let cmp = self.context.new_comparison(None, comparison_op, a, b);
+        let cmp_inverted = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, cmp.get_type(), cmp);
+        let res = (cmp & a_cast) | (cmp_inverted & res);
+        self.context.new_bitcast(None, res, vector_type)
+    }
+
+    pub fn vector_fmin(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        self.vector_extremum(a, b, ExtremumOperation::Min)
+    }
+
+    #[cfg(feature="master")]
+    pub fn vector_reduce_fmin(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
+        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_count = vector_type.get_num_units();
+        let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue();
+        for i in 1..element_count {
+            let elem = self.context
+                .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
+                .to_rvalue();
+            let cmp = self.context.new_comparison(None, ComparisonOp::LessThan, acc, elem);
+            acc = self.select(cmp, acc, elem);
+        }
+        acc
+    }
+
+    #[cfg(not(feature="master"))]
+    pub fn vector_reduce_fmin(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> {
+        unimplemented!();
+    }
+
+    pub fn vector_fmax(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        self.vector_extremum(a, b, ExtremumOperation::Max)
+    }
+
+    #[cfg(feature="master")]
+    pub fn vector_reduce_fmax(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
+        let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
+        let element_count = vector_type.get_num_units();
+        let mut acc = self.context.new_vector_access(None, src, self.context.new_rvalue_zero(self.int_type)).to_rvalue();
+        for i in 1..element_count {
+            let elem = self.context
+                .new_vector_access(None, src, self.context.new_rvalue_from_int(self.int_type, i as _))
+                .to_rvalue();
+            let cmp = self.context.new_comparison(None, ComparisonOp::GreaterThan, acc, elem);
+            acc = self.select(cmp, acc, elem);
+        }
+        acc
+    }
+
+    #[cfg(not(feature="master"))]
+    pub fn vector_reduce_fmax(&mut self, _src: RValue<'gcc>) -> RValue<'gcc> {
+        unimplemented!();
+    }
+
     pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> {
         // cond is a vector of integers, not of bools.
-        let cond_type = cond.get_type();
-        let vector_type = cond_type.unqualified().dyncast_vector().expect("vector type");
+        let vector_type = cond.get_type().unqualified().dyncast_vector().expect("vector type");
         let num_units = vector_type.get_num_units();
         let element_type = vector_type.get_element_type();
+
+        #[cfg(feature="master")]
+        let (cond, element_type) = {
+            let then_val_vector_type = then_val.get_type().dyncast_vector().expect("vector type");
+            let then_val_element_type = then_val_vector_type.get_element_type();
+            let then_val_element_size = then_val_element_type.get_size();
+
+            // NOTE: the mask needs to be of the same size as the other arguments in order for the &
+            // operation to work.
+            if then_val_element_size != element_type.get_size() {
+                let new_element_type = self.type_ix(then_val_element_size as u64 * 8);
+                let new_vector_type = self.context.new_vector_type(new_element_type, num_units as u64);
+                let cond = self.context.convert_vector(None, cond, new_vector_type);
+                (cond, new_element_type)
+            }
+            else {
+                (cond, element_type)
+            }
+        };
+
+        let cond_type = cond.get_type();
+
         let zeros = vec![self.context.new_rvalue_zero(element_type); num_units];
         let zeros = self.context.new_rvalue_from_vector(None, cond_type, &zeros);
 
+        let result_type = then_val.get_type();
+
         let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
+        // NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
+        // the & operation work.
+        let then_val = self.bitcast_if_needed(then_val, masks.get_type());
         let then_vals = masks & then_val;
 
-        let ones = vec![self.context.new_rvalue_one(element_type); num_units];
-        let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones);
-        let inverted_masks = masks + ones;
+        let minus_ones = vec![self.context.new_rvalue_from_int(element_type, -1); num_units];
+        let minus_ones = self.context.new_rvalue_from_vector(None, cond_type, &minus_ones);
+        let inverted_masks = masks ^ minus_ones;
         // NOTE: sometimes, the type of else_val can be different than the type of then_val in
         // libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND
         // operation to work.
+        // TODO: remove bitcast now that vector types can be compared?
         let else_val = self.context.new_bitcast(None, else_val, then_val.get_type());
         let else_vals = inverted_masks & else_val;
 
-        then_vals | else_vals
+        let res = then_vals | else_vals;
+        self.bitcast_if_needed(res, result_type)
     }
 }
 
 fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
     let difference = a - b;
     let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
+    // NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
+    // the & operation work.
+    let a_type = a.get_type();
+    let masks =
+        if masks.get_type() != a_type {
+            context.new_bitcast(None, masks, a_type)
+        }
+        else {
+            masks
+        };
     difference & masks
 }