diff options
Diffstat (limited to 'third_party/rust/naga/src/back/spv/block.rs')
-rw-r--r-- | third_party/rust/naga/src/back/spv/block.rs | 242 |
1 files changed, 199 insertions, 43 deletions
diff --git a/third_party/rust/naga/src/back/spv/block.rs b/third_party/rust/naga/src/back/spv/block.rs index 6c96fa09e3..81f2fc10e0 100644 --- a/third_party/rust/naga/src/back/spv/block.rs +++ b/third_party/rust/naga/src/back/spv/block.rs @@ -731,12 +731,41 @@ impl<'w> BlockContext<'w> { Some(crate::ScalarKind::Uint) => spirv::GLOp::UMax, other => unimplemented!("Unexpected max({:?})", other), }), - Mf::Clamp => MathOp::Ext(match arg_scalar_kind { - Some(crate::ScalarKind::Float) => spirv::GLOp::FClamp, - Some(crate::ScalarKind::Sint) => spirv::GLOp::SClamp, - Some(crate::ScalarKind::Uint) => spirv::GLOp::UClamp, + Mf::Clamp => match arg_scalar_kind { + // Clamp is undefined if min > max. In practice this means it can use a median-of-three + // instruction to determine the value. This is fine according to the WGSL spec for float + // clamp, but integer clamp _must_ use min-max. As such we write out min/max. + Some(crate::ScalarKind::Float) => MathOp::Ext(spirv::GLOp::FClamp), + Some(_) => { + let (min_op, max_op) = match arg_scalar_kind { + Some(crate::ScalarKind::Sint) => { + (spirv::GLOp::SMin, spirv::GLOp::SMax) + } + Some(crate::ScalarKind::Uint) => { + (spirv::GLOp::UMin, spirv::GLOp::UMax) + } + _ => unreachable!(), + }; + + let max_id = self.gen_id(); + block.body.push(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + max_op, + result_type_id, + max_id, + &[arg0_id, arg1_id], + )); + + MathOp::Custom(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + min_op, + result_type_id, + id, + &[max_id, arg2_id], + )) + } other => unimplemented!("Unexpected max({:?})", other), - }), + }, Mf::Saturate => { let (maybe_size, scalar) = match *arg_ty { crate::TypeInner::Vector { size, scalar } => (Some(size), scalar), @@ -915,8 +944,7 @@ impl<'w> BlockContext<'w> { )), Mf::CountTrailingZeros => { let uint_id = match *arg_ty { - crate::TypeInner::Vector { size, mut scalar } => { - scalar.kind = crate::ScalarKind::Uint; + crate::TypeInner::Vector { size, scalar } => { let ty = LocalType::Value { vector_size: Some(size), scalar, @@ -927,15 +955,15 @@ impl<'w> BlockContext<'w> { self.temp_list.clear(); self.temp_list.resize( size as _, - self.writer.get_constant_scalar_with(32, scalar)?, + self.writer + .get_constant_scalar_with(scalar.width * 8, scalar)?, ); self.writer.get_constant_composite(ty, &self.temp_list) } - crate::TypeInner::Scalar(mut scalar) => { - scalar.kind = crate::ScalarKind::Uint; - self.writer.get_constant_scalar_with(32, scalar)? - } + crate::TypeInner::Scalar(scalar) => self + .writer + .get_constant_scalar_with(scalar.width * 8, scalar)?, _ => unreachable!(), }; @@ -957,9 +985,8 @@ impl<'w> BlockContext<'w> { )) } Mf::CountLeadingZeros => { - let (int_type_id, int_id) = match *arg_ty { - crate::TypeInner::Vector { size, mut scalar } => { - scalar.kind = crate::ScalarKind::Sint; + let (int_type_id, int_id, width) = match *arg_ty { + crate::TypeInner::Vector { size, scalar } => { let ty = LocalType::Value { vector_size: Some(size), scalar, @@ -970,32 +997,41 @@ impl<'w> BlockContext<'w> { self.temp_list.clear(); self.temp_list.resize( size as _, - self.writer.get_constant_scalar_with(31, scalar)?, + self.writer + .get_constant_scalar_with(scalar.width * 8 - 1, scalar)?, ); ( self.get_type_id(ty), self.writer.get_constant_composite(ty, &self.temp_list), + scalar.width, ) } - crate::TypeInner::Scalar(mut scalar) => { - scalar.kind = crate::ScalarKind::Sint; - ( - self.get_type_id(LookupType::Local(LocalType::Value { - vector_size: None, - scalar, - pointer_space: None, - })), - self.writer.get_constant_scalar_with(31, scalar)?, - ) - } + crate::TypeInner::Scalar(scalar) => ( + self.get_type_id(LookupType::Local(LocalType::Value { + vector_size: None, + scalar, + pointer_space: None, + })), + self.writer + .get_constant_scalar_with(scalar.width * 8 - 1, scalar)?, + scalar.width, + ), _ => unreachable!(), }; + if width != 4 { + unreachable!("This is validated out until a polyfill is implemented. https://github.com/gfx-rs/wgpu/issues/5276"); + }; + let msb_id = self.gen_id(); block.body.push(Instruction::ext_inst( self.writer.gl450_ext_inst_id, - spirv::GLOp::FindUMsb, + if width != 4 { + spirv::GLOp::FindILsb + } else { + spirv::GLOp::FindUMsb + }, int_type_id, msb_id, &[arg0_id], @@ -1021,30 +1057,144 @@ impl<'w> BlockContext<'w> { Some(crate::ScalarKind::Sint) => spirv::Op::BitFieldSExtract, other => unimplemented!("Unexpected sign({:?})", other), }; + + // The behavior of ExtractBits is undefined when offset + count > bit_width. We need + // to first sanitize the offset and count first. If we don't do this, AMD and Intel + // will return out-of-spec values if the extracted range is not within the bit width. + // + // This encodes the exact formula specified by the wgsl spec: + // https://gpuweb.github.io/gpuweb/wgsl/#extractBits-unsigned-builtin + // + // w = sizeof(x) * 8 + // o = min(offset, w) + // tmp = w - o + // c = min(count, tmp) + // + // bitfieldExtract(x, o, c) + + let bit_width = arg_ty.scalar_width().unwrap(); + let width_constant = self + .writer + .get_constant_scalar(crate::Literal::U32(bit_width as u32)); + + let u32_type = self.get_type_id(LookupType::Local(LocalType::Value { + vector_size: None, + scalar: crate::Scalar { + kind: crate::ScalarKind::Uint, + width: 4, + }, + pointer_space: None, + })); + + // o = min(offset, w) + let offset_id = self.gen_id(); + block.body.push(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + spirv::GLOp::UMin, + u32_type, + offset_id, + &[arg1_id, width_constant], + )); + + // tmp = w - o + let max_count_id = self.gen_id(); + block.body.push(Instruction::binary( + spirv::Op::ISub, + u32_type, + max_count_id, + width_constant, + offset_id, + )); + + // c = min(count, tmp) + let count_id = self.gen_id(); + block.body.push(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + spirv::GLOp::UMin, + u32_type, + count_id, + &[arg2_id, max_count_id], + )); + MathOp::Custom(Instruction::ternary( op, result_type_id, id, arg0_id, + offset_id, + count_id, + )) + } + Mf::InsertBits => { + // The behavior of InsertBits has the same undefined behavior as ExtractBits. + + let bit_width = arg_ty.scalar_width().unwrap(); + let width_constant = self + .writer + .get_constant_scalar(crate::Literal::U32(bit_width as u32)); + + let u32_type = self.get_type_id(LookupType::Local(LocalType::Value { + vector_size: None, + scalar: crate::Scalar { + kind: crate::ScalarKind::Uint, + width: 4, + }, + pointer_space: None, + })); + + // o = min(offset, w) + let offset_id = self.gen_id(); + block.body.push(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + spirv::GLOp::UMin, + u32_type, + offset_id, + &[arg2_id, width_constant], + )); + + // tmp = w - o + let max_count_id = self.gen_id(); + block.body.push(Instruction::binary( + spirv::Op::ISub, + u32_type, + max_count_id, + width_constant, + offset_id, + )); + + // c = min(count, tmp) + let count_id = self.gen_id(); + block.body.push(Instruction::ext_inst( + self.writer.gl450_ext_inst_id, + spirv::GLOp::UMin, + u32_type, + count_id, + &[arg3_id, max_count_id], + )); + + MathOp::Custom(Instruction::quaternary( + spirv::Op::BitFieldInsert, + result_type_id, + id, + arg0_id, arg1_id, - arg2_id, + offset_id, + count_id, )) } - Mf::InsertBits => MathOp::Custom(Instruction::quaternary( - spirv::Op::BitFieldInsert, - result_type_id, - id, - arg0_id, - arg1_id, - arg2_id, - arg3_id, - )), Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb), - Mf::FindMsb => MathOp::Ext(match arg_scalar_kind { - Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb, - Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb, - other => unimplemented!("Unexpected findMSB({:?})", other), - }), + Mf::FindMsb => { + if arg_ty.scalar_width() == Some(32) { + let thing = match arg_scalar_kind { + Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb, + Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb, + other => unimplemented!("Unexpected findMSB({:?})", other), + }; + MathOp::Ext(thing) + } else { + unreachable!("This is validated out until a polyfill is implemented. https://github.com/gfx-rs/wgpu/issues/5276"); + } + } Mf::Pack4x8unorm => MathOp::Ext(spirv::GLOp::PackUnorm4x8), Mf::Pack4x8snorm => MathOp::Ext(spirv::GLOp::PackSnorm4x8), Mf::Pack2x16float => MathOp::Ext(spirv::GLOp::PackHalf2x16), @@ -1250,6 +1400,12 @@ impl<'w> BlockContext<'w> { (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { Cast::Unary(spirv::Op::UConvert) } + (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } // We assume it's either an identity cast, or int-uint. _ => Cast::Unary(spirv::Op::Bitcast), } |