diff options
Diffstat (limited to 'third_party/rust/wgpu-hal/src')
23 files changed, 889 insertions, 163 deletions
diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs index 6af4b77bb3..e5162362f7 100644 --- a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs @@ -261,6 +261,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Unorm10_10_10_2 => DXGI_FORMAT_R10G10B10A2_UNORM, Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), } } diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs index b417a88a6f..faf25cc852 100644 --- a/third_party/rust/wgpu-hal/src/dx12/adapter.rs +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -115,18 +115,6 @@ impl super::Adapter { ) }); - let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = - d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { - HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, - }; - assert_eq!(0, unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_SHADER_MODEL, - &mut shader_model_support as *mut _ as *mut _, - mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, - ) - }); - let mut workarounds = super::Workarounds::default(); let info = wgt::AdapterInfo { @@ -181,6 +169,53 @@ impl super::Adapter { hr == 0 && features3.CastingFullyTypedFormatSupported != 0 }; + let shader_model = if dxc_container.is_none() { + naga::back::hlsl::ShaderModel::V5_1 + } else { + let mut versions = [ + crate::dx12::types::D3D_SHADER_MODEL_6_7, + crate::dx12::types::D3D_SHADER_MODEL_6_6, + crate::dx12::types::D3D_SHADER_MODEL_6_5, + crate::dx12::types::D3D_SHADER_MODEL_6_4, + crate::dx12::types::D3D_SHADER_MODEL_6_3, + crate::dx12::types::D3D_SHADER_MODEL_6_2, + crate::dx12::types::D3D_SHADER_MODEL_6_1, + crate::dx12::types::D3D_SHADER_MODEL_6_0, + crate::dx12::types::D3D_SHADER_MODEL_5_1, + ] + .iter(); + match loop { + if let Some(&sm) = versions.next() { + let mut sm = crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: sm, + }; + if 0 == unsafe { + device.CheckFeatureSupport( + 7, // D3D12_FEATURE_SHADER_MODEL + &mut sm as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>() + as _, + ) + } { + break sm.HighestShaderModel; + } + } else { + break crate::dx12::types::D3D_SHADER_MODEL_5_1; + } + } { + crate::dx12::types::D3D_SHADER_MODEL_5_1 => naga::back::hlsl::ShaderModel::V5_1, + crate::dx12::types::D3D_SHADER_MODEL_6_0 => naga::back::hlsl::ShaderModel::V6_0, + crate::dx12::types::D3D_SHADER_MODEL_6_1 => naga::back::hlsl::ShaderModel::V6_1, + crate::dx12::types::D3D_SHADER_MODEL_6_2 => naga::back::hlsl::ShaderModel::V6_2, + crate::dx12::types::D3D_SHADER_MODEL_6_3 => naga::back::hlsl::ShaderModel::V6_3, + crate::dx12::types::D3D_SHADER_MODEL_6_4 => naga::back::hlsl::ShaderModel::V6_4, + crate::dx12::types::D3D_SHADER_MODEL_6_5 => naga::back::hlsl::ShaderModel::V6_5, + crate::dx12::types::D3D_SHADER_MODEL_6_6 => naga::back::hlsl::ShaderModel::V6_6, + crate::dx12::types::D3D_SHADER_MODEL_6_7 => naga::back::hlsl::ShaderModel::V6_7, + _ => unreachable!(), + } + }; + let private_caps = super::PrivateCapabilities { instance_flags, heterogeneous_resource_heaps: options.ResourceHeapTier @@ -196,6 +231,7 @@ impl super::Adapter { casting_fully_typed_format_supported, // See https://github.com/gfx-rs/wgpu/issues/3552 suballocation_supported: !info.name.contains("Iris(R) Xe"), + shader_model, }; // Theoretically vram limited, but in practice 2^20 is the limit @@ -273,7 +309,7 @@ impl super::Adapter { wgt::Features::TEXTURE_BINDING_ARRAY | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, - shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + shader_model >= naga::back::hlsl::ShaderModel::V5_1, ); let bgra8unorm_storage_supported = { @@ -295,21 +331,28 @@ impl super::Adapter { bgra8unorm_storage_supported, ); - // we must be using DXC because uint64_t was added with Shader Model 6 - // and FXC only supports up to 5.1 - let int64_shader_ops_supported = dxc_container.is_some() && { - let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = - unsafe { mem::zeroed() }; - let hr = unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, - &mut features1 as *mut _ as *mut _, - mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _, - ) - }; - hr == 0 && features1.Int64ShaderOps != 0 + let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, + &mut features1 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _, + ) }; - features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported); + + features.set( + wgt::Features::SHADER_INT64, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.Int64ShaderOps != 0, + ); + + features.set( + wgt::Features::SUBGROUP, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.WaveOps != 0, + ); // float32-filterable should always be available on d3d12 features.set(wgt::Features::FLOAT32_FILTERABLE, true); @@ -377,6 +420,8 @@ impl super::Adapter { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable + max_subgroup_size: 128, // The push constants are part of the root signature which // has a limit of 64 DWORDS (256 bytes), but other resources // also share the root signature: diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs index 2b6c1d959e..b09ea76080 100644 --- a/third_party/rust/wgpu-hal/src/dx12/conv.rs +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -224,7 +224,7 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { } /// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending -/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc). +/// (see <https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc>). /// Therefore this function takes an additional `is_alpha` argument /// which if set will return an equivalent `_ALPHA` factor. fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs index 23bd409dc4..82075294ee 100644 --- a/third_party/rust/wgpu-hal/src/dx12/device.rs +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -218,21 +218,39 @@ impl super::Device { use naga::back::hlsl; let stage_bit = crate::auxil::map_naga_stage(naga_stage); - let module = &stage.module.naga.module; + + let (module, info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?; + + let needs_temp_options = stage.zero_initialize_workgroup_memory + != layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + temp_options = layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &layout.naga_options + }; + //TODO: reuse the writer let mut source = String::new(); - let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let mut writer = hlsl::Writer::new(&mut source, naga_options); let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer - .write(module, &stage.module.naga.info) + .write(&module, &info) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? }; let full_stage = format!( "{}_{}\0", naga_stage.to_hlsl_str(), - layout.naga_options.shader_model.to_str() + naga_options.shader_model.to_str() ); let ep_index = module @@ -1062,12 +1080,7 @@ impl crate::Device for super::Device { }, bind_group_infos, naga_options: hlsl::Options { - shader_model: match self.dxc_container { - // DXC - Some(_) => hlsl::ShaderModel::V6_0, - // FXC doesn't support SM 6.0 - None => hlsl::ShaderModel::V5_1, - }, + shader_model: self.private_caps.shader_model, binding_map, fake_missing_bindings: false, special_constants_binding, diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs index 4f958943ca..9f021bc241 100644 --- a/third_party/rust/wgpu-hal/src/dx12/mod.rs +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -195,6 +195,7 @@ struct PrivateCapabilities { heap_create_not_zeroed: bool, casting_fully_typed_format_supported: bool, suballocation_supported: bool, + shader_model: naga::back::hlsl::ShaderModel, } #[derive(Default)] @@ -439,7 +440,7 @@ impl Texture { } } - /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + /// see <https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice> fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count } diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs index b4ad38324a..17b608b840 100644 --- a/third_party/rust/wgpu-hal/src/dx12/types.rs +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -41,3 +41,25 @@ winapi::STRUCT! { BarycentricsSupported: winapi::shared::minwindef::BOOL, } } + +winapi::ENUM! { + enum D3D_SHADER_MODEL { + D3D_SHADER_MODEL_NONE = 0, + D3D_SHADER_MODEL_5_1 = 0x51, + D3D_SHADER_MODEL_6_0 = 0x60, + D3D_SHADER_MODEL_6_1 = 0x61, + D3D_SHADER_MODEL_6_2 = 0x62, + D3D_SHADER_MODEL_6_3 = 0x63, + D3D_SHADER_MODEL_6_4 = 0x64, + D3D_SHADER_MODEL_6_5 = 0x65, + D3D_SHADER_MODEL_6_6 = 0x66, + D3D_SHADER_MODEL_6_7 = 0x67, + D3D_HIGHEST_SHADER_MODEL = 0x67, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: D3D_SHADER_MODEL, + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs index b9d044337c..052c77006b 100644 --- a/third_party/rust/wgpu-hal/src/gles/adapter.rs +++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs @@ -104,7 +104,7 @@ impl super::Adapter { } } - fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + fn make_info(vendor_orig: String, renderer_orig: String, version: String) -> wgt::AdapterInfo { let vendor = vendor_orig.to_lowercase(); let renderer = renderer_orig.to_lowercase(); @@ -179,13 +179,33 @@ impl super::Adapter { 0 }; + let driver; + let driver_info; + if version.starts_with("WebGL ") || version.starts_with("OpenGL ") { + let es_sig = " ES"; + match version.find(es_sig) { + Some(pos) => { + driver = version[..pos + es_sig.len()].to_owned(); + driver_info = version[pos + es_sig.len() + 1..].to_owned(); + } + None => { + let pos = version.find(' ').unwrap(); + driver = version[..pos].to_owned(); + driver_info = version[pos + 1..].to_owned(); + } + } + } else { + driver = "OpenGL".to_owned(); + driver_info = version; + } + wgt::AdapterInfo { name: renderer_orig, vendor: vendor_id, device: 0, device_type: inferred_device_type, - driver: String::new(), - driver_info: String::new(), + driver, + driver_info, backend: wgt::Backend::Gl, } } @@ -507,8 +527,7 @@ impl super::Adapter { let has_etc = if cfg!(any(webgl, Emscripten)) { extensions.contains("WEBGL_compressed_texture_etc") } else { - // This is a required part of GLES3, but not part of Desktop GL at all. - es_ver.is_some() + es_ver.is_some() || extensions.contains("GL_ARB_ES3_compatibility") }; features.set(wgt::Features::TEXTURE_COMPRESSION_ETC2, has_etc); @@ -728,6 +747,8 @@ impl super::Adapter { } else { !0 }, + min_subgroup_size: 0, + max_subgroup_size: 0, max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, @@ -825,7 +846,7 @@ impl super::Adapter { max_msaa_samples: max_samples, }), }, - info: Self::make_info(vendor, renderer), + info: Self::make_info(vendor, renderer, version), features, capabilities: crate::Capabilities { limits, diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs index bde69b8629..a6c924f162 100644 --- a/third_party/rust/wgpu-hal/src/gles/conv.rs +++ b/third_party/rust/wgpu-hal/src/gles/conv.rs @@ -212,6 +212,7 @@ pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super: Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer), Vf::Sint32x4 => (4, glow::INT, Vak::Integer), Vf::Float32x4 => (4, glow::FLOAT, Vak::Float), + Vf::Unorm10_10_10_2 => (4, glow::UNSIGNED_INT_10_10_10_2, Vak::Float), Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), }; diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs index 50c07f3ff0..a1e2736aa6 100644 --- a/third_party/rust/wgpu-hal/src/gles/device.rs +++ b/third_party/rust/wgpu-hal/src/gles/device.rs @@ -220,9 +220,17 @@ impl super::Device { multiview: context.multiview, }; - let shader = &stage.module.naga; - let entry_point_index = shader - .module + let (module, info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + let entry_point_index = module .entry_points .iter() .position(|ep| ep.name.as_str() == stage.entry_point) @@ -247,11 +255,23 @@ impl super::Device { }; let mut output = String::new(); + let needs_temp_options = stage.zero_initialize_workgroup_memory + != context.layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + // We use a conditional here, as cloning the naga_options could be expensive + // That is, we want to avoid doing that unless we cannot avoid it + temp_options = context.layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &context.layout.naga_options + }; let mut writer = glsl::Writer::new( &mut output, - &shader.module, - &shader.info, - &context.layout.naga_options, + &module, + &info, + naga_options, &pipeline_options, policies, ) @@ -269,8 +289,8 @@ impl super::Device { context.consume_reflection( gl, - &shader.module, - shader.info.get_entry_point(entry_point_index), + &module, + info.get_entry_point(entry_point_index), reflection_info, naga_stage, program, @@ -297,6 +317,7 @@ impl super::Device { naga_stage: naga_stage.to_owned(), shader_id: stage.module.id, entry_point: stage.entry_point.to_owned(), + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }); } let mut guard = self diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs index b166f4f102..00ef70ba88 100644 --- a/third_party/rust/wgpu-hal/src/gles/egl.rs +++ b/third_party/rust/wgpu-hal/src/gles/egl.rs @@ -526,28 +526,51 @@ impl Inner { } let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; - egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let supports_opengl = if version >= (1, 4) { + let client_apis = egl + .query_string(Some(display), khronos_egl::CLIENT_APIS) + .unwrap() + .to_string_lossy(); + client_apis + .split(' ') + .any(|client_api| client_api == "OpenGL") + } else { + false + }; + egl.bind_api(if supports_opengl { + khronos_egl::OPENGL_API + } else { + khronos_egl::OPENGL_ES_API + }) + .unwrap(); let needs_robustness = true; let mut khr_context_flags = 0; let supports_khr_context = display_extensions.contains("EGL_KHR_create_context"); - //TODO: make it so `Device` == EGL Context - let mut context_attributes = vec![ - khronos_egl::CONTEXT_MAJOR_VERSION, - 3, // Request GLES 3.0 or higher - ]; - - if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + let mut context_attributes = vec![]; + if supports_opengl { + context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION); + context_attributes.push(3); context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION); - context_attributes.push(match force_gles_minor_version { - wgt::Gles3MinorVersion::Version0 => 0, - wgt::Gles3MinorVersion::Version1 => 1, - wgt::Gles3MinorVersion::Version2 => 2, - _ => unreachable!(), - }); + context_attributes.push(3); + if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + log::warn!("Ignoring specified GLES minor version as OpenGL is used"); + } + } else { + context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION); + context_attributes.push(3); // Request GLES 3.0 or higher + if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION); + context_attributes.push(match force_gles_minor_version { + wgt::Gles3MinorVersion::Automatic => unreachable!(), + wgt::Gles3MinorVersion::Version0 => 0, + wgt::Gles3MinorVersion::Version1 => 1, + wgt::Gles3MinorVersion::Version2 => 2, + }); + } } - if flags.contains(wgt::InstanceFlags::DEBUG) { if version >= (1, 5) { log::debug!("\tEGL context: +debug"); @@ -577,8 +600,6 @@ impl Inner { // because it's for desktop GL only, not GLES. log::warn!("\tEGL context: -robust access"); } - - //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`? } if khr_context_flags != 0 { context_attributes.push(EGL_CONTEXT_FLAGS_KHR); @@ -977,6 +998,7 @@ impl crate::Instance for Instance { srgb_kind: inner.srgb_kind, }) } + unsafe fn destroy_surface(&self, _surface: Surface) {} unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { @@ -993,6 +1015,12 @@ impl crate::Instance for Instance { }) }; + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. + if !matches!(inner.srgb_kind, SrgbFrameBufferKind::None) { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + if self.flags.contains(wgt::InstanceFlags::DEBUG) && gl.supports_debug() { log::debug!("Max label length: {}", unsafe { gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) @@ -1106,6 +1134,13 @@ impl Surface { unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + + if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) { + // Disable sRGB conversions for `glBlitFramebuffer` as behavior does diverge between + // drivers and formats otherwise and we want to ensure no sRGB conversions happen. + unsafe { gl.disable(glow::FRAMEBUFFER_SRGB) }; + } + // Note the Y-flipping here. GL's presentation is not flipped, // but main rendering is. Therefore, we Y-flip the output positions // in the shader, and also this blit. @@ -1123,6 +1158,11 @@ impl Surface { glow::NEAREST, ) }; + + if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; self.egl diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs index 6f41f7c000..0fcb09be46 100644 --- a/third_party/rust/wgpu-hal/src/gles/mod.rs +++ b/third_party/rust/wgpu-hal/src/gles/mod.rs @@ -602,6 +602,7 @@ struct ProgramStage { naga_stage: naga::ShaderStage, shader_id: ShaderId, entry_point: String, + zero_initialize_workgroup_memory: bool, } #[derive(PartialEq, Eq, Hash)] diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs index 29dfb79d04..7c728d3978 100644 --- a/third_party/rust/wgpu-hal/src/gles/queue.rs +++ b/third_party/rust/wgpu-hal/src/gles/queue.rs @@ -213,12 +213,27 @@ impl super::Queue { instance_count, ref first_instance_location, } => { - match base_vertex { - 0 => { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; + let supports_full_instancing = self + .shared + .private_caps + .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); + if supports_full_instancing { + unsafe { + gl.draw_elements_instanced_base_vertex_base_instance( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + first_instance, + ) + } + } else { + unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) }; + + if base_vertex == 0 { unsafe { // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. @@ -231,41 +246,17 @@ impl super::Queue { instance_count as i32, ) } - } - _ => { - let supports_full_instancing = self - .shared - .private_caps - .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); - - if supports_full_instancing { - unsafe { - gl.draw_elements_instanced_base_vertex_base_instance( - topology, - index_count as i32, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - first_instance, - ) - } - } else { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; - - // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. - unsafe { - gl.draw_elements_instanced_base_vertex( - topology, - index_count as _, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - ) - } + } else { + // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. + unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) } } } diff --git a/third_party/rust/wgpu-hal/src/gles/wgl.rs b/third_party/rust/wgpu-hal/src/gles/wgl.rs index 2564892969..aae70478b4 100644 --- a/third_party/rust/wgpu-hal/src/gles/wgl.rs +++ b/third_party/rust/wgpu-hal/src/gles/wgl.rs @@ -507,6 +507,8 @@ impl crate::Instance for Instance { .supported_extensions() .contains("GL_ARB_framebuffer_sRGB"); + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. if srgb_capable { unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; } diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs index 79bd54e66e..d300ca30cc 100644 --- a/third_party/rust/wgpu-hal/src/lib.rs +++ b/third_party/rust/wgpu-hal/src/lib.rs @@ -1,17 +1,208 @@ -/*! This library describes the internal unsafe graphics abstraction API. - * It follows WebGPU for the most part, re-using wgpu-types, - * with the following deviations: - * - Fully unsafe: zero overhead, zero validation. - * - Compile-time backend selection via traits. - * - Objects are passed by references and returned by value. No IDs. - * - Mapping is persistent, with explicit synchronization. - * - Resource transitions are explicit. - * - All layouts are explicit. Binding model has compatibility. +/*! A cross-platform unsafe graphics abstraction. * - * General design direction is to follow the majority by the following weights: - * - wgpu-core: 1.5 - * - primary backends (Vulkan/Metal/DX12): 1.0 each - * - secondary backend (GLES): 0.5 + * This crate defines a set of traits abstracting over modern graphics APIs, + * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL. + * + * `wgpu-hal` is a spiritual successor to + * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and + * oriented towards WebGPU implementation goals. It has no overhead for + * validation or tracking, and the API translation overhead is kept to the bare + * minimum by the design of WebGPU. This API can be used for resource-demanding + * applications and engines. + * + * The `wgpu-hal` crate's main design choices: + * + * - Our traits are meant to be *portable*: proper use + * should get equivalent results regardless of the backend. + * + * - Our traits' contracts are *unsafe*: implementations perform minimal + * validation, if any, and incorrect use will often cause undefined behavior. + * This allows us to minimize the overhead we impose over the underlying + * graphics system. If you need safety, the [`wgpu-core`] crate provides a + * safe API for driving `wgpu-hal`, implementing all necessary validation, + * resource state tracking, and so on. (Note that `wgpu-core` is designed for + * use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for + * `wgpu-core`.) Or, you can do your own validation. + * + * - In the same vein, returned errors *only cover cases the user can't + * anticipate*, like running out of memory or losing the device. Any errors + * that the user could reasonably anticipate are their responsibility to + * avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's + * not mappable: as the buffer creator, the user should already know if they + * can map it. + * + * - We use *static dispatch*. The traits are not + * generally object-safe. You must select a specific backend type + * like [`vulkan::Api`] or [`metal::Api`], and then use that + * according to the main traits, or call backend-specific methods. + * + * - We use *idiomatic Rust parameter passing*, + * taking objects by reference, returning them by value, and so on, + * unlike `wgpu-core`, which refers to objects by ID. + * + * - We map buffer contents *persistently*. This means that the buffer + * can remain mapped on the CPU while the GPU reads or writes to it. + * You must explicitly indicate when data might need to be + * transferred between CPU and GPU, if `wgpu-hal` indicates that the + * mapping is not coherent (that is, automatically synchronized + * between the two devices). + * + * - You must record *explicit barriers* between different usages of a + * resource. For example, if a buffer is written to by a compute + * shader, and then used as and index buffer to a draw call, you + * must use [`CommandEncoder::transition_buffers`] between those two + * operations. + * + * - Pipeline layouts are *explicitly specified* when setting bind + * group. Incompatible layouts disturb groups bound at higher indices. + * + * - The API *accepts collections as iterators*, to avoid forcing the user to + * store data in particular containers. The implementation doesn't guarantee + * that any of the iterators are drained, unless stated otherwise by the + * function documentation. For this reason, we recommend that iterators don't + * do any mutating work. + * + * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented. + * Ideally, all trait methods would have doc comments setting out the + * requirements users must meet to ensure correct and portable behavior. If you + * are aware of a specific requirement that a backend imposes that is not + * ensured by the traits' documented rules, please file an issue. Or, if you are + * a capable technical writer, please file a pull request! + * + * [`wgpu-core`]: https://crates.io/crates/wgpu-core + * [`wgpu`]: https://crates.io/crates/wgpu + * [`vulkan::Api`]: vulkan/struct.Api.html + * [`metal::Api`]: metal/struct.Api.html + * + * ## Primary backends + * + * The `wgpu-hal` crate has full-featured backends implemented on the following + * platform graphics APIs: + * + * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's + * Vulkan bindings. It's also available on macOS, if you install [MoltenVK]. + * + * - Metal on macOS, using the [`metal`] crate's bindings. + * + * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings. + * + * [`ash`]: https://crates.io/crates/ash + * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK + * [`metal`]: https://crates.io/crates/metal + * [`d3d12`]: ahttps://crates.io/crates/d3d12 + * + * ## Secondary backends + * + * The `wgpu-hal` crate has a partial implementation based on the following + * platform graphics API: + * + * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are + * available. See the [`gles`] module documentation for details. + * + * [`gles`]: gles/index.html + * + * You can see what capabilities an adapter is missing by checking the + * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available + * from [`Instance::enumerate_adapters`]. + * + * The API is generally designed to fit the primary backends better than the + * secondary backends, so the latter may impose more overhead. + * + * [tdc]: wgt::DownlevelCapabilities + * + * ## Traits + * + * The `wgpu-hal` crate defines a handful of traits that together + * represent a cross-platform abstraction for modern GPU APIs. + * + * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its + * own, only a collection of associated types. + * + * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`] + * creates an instance value, which you can use to enumerate the adapters + * available on the system. For example, [`vulkan::Api::Instance::init`][Ii] + * returns an instance that can enumerate the Vulkan physical devices on your + * system. + * + * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a + * particular device from a particular backend. For example, a Vulkan instance + * might have a Lavapipe software adapter and a GPU-based adapter. + * + * - [`Api::Device`] implements the [`Device`] trait, representing an active + * link to a device. You get a device value by calling [`Adapter::open`], and + * then use it to create buffers, textures, shader modules, and so on. + * + * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit + * command buffers to a given device. + * + * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you + * use to build buffers of commands to submit to a queue. This has all the + * methods for drawing and running compute shaders, which is presumably what + * you're here for. + * + * - [`Api::Surface`] implements the [`Surface`] trait, which represents a + * swapchain for presenting images on the screen, via interaction with the + * system's window manager. + * + * The [`Api`] trait has various other associated types like [`Api::Buffer`] and + * [`Api::Texture`] that represent resources the rest of the interface can + * operate on, but these generally do not have their own traits. + * + * [Ii]: Instance::init + * + * ## Validation is the calling code's responsibility, not `wgpu-hal`'s + * + * As much as possible, `wgpu-hal` traits place the burden of validation, + * resource tracking, and state tracking on the caller, not on the trait + * implementations themselves. Anything which can reasonably be handled in + * backend-independent code should be. A `wgpu_hal` backend's sole obligation is + * to provide portable behavior, and report conditions that the calling code + * can't reasonably anticipate, like device loss or running out of memory. + * + * The `wgpu` crate collection is intended for use in security-sensitive + * applications, like web browsers, where the API is available to untrusted + * code. This means that `wgpu-core`'s validation is not simply a service to + * developers, to be provided opportunistically when the performance costs are + * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s + * validation must be exhaustive, to ensure that even malicious content cannot + * provoke and exploit undefined behavior in the platform's graphics API. + * + * Because graphics APIs' requirements are complex, the only practical way for + * `wgpu` to provide exhaustive validation is to comprehensively track the + * lifetime and state of all the resources in the system. Implementing this + * separately for each backend is infeasible; effort would be better spent + * making the cross-platform validation in `wgpu-core` legible and trustworthy. + * Fortunately, the requirements are largely similar across the various + * platforms, so cross-platform validation is practical. + * + * Some backends have specific requirements that aren't practical to foist off + * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or + * Microsoft COM reference counts is best handled by using appropriate pointer + * types within the backend. + * + * A desire for "defense in depth" may suggest performing additional validation + * in `wgpu-hal` when the opportunity arises, but this must be done with + * caution. Even experienced contributors infer the expectations their changes + * must meet by considering not just requirements made explicit in types, tests, + * assertions, and comments, but also those implicit in the surrounding code. + * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting + * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry + * about it - that would be redundant!" The responsibility for exhaustive + * validation always rests with `wgpu-core`, regardless of what may or may not + * be checked in `wgpu-hal`. + * + * To this end, any "defense in depth" validation that does appear in `wgpu-hal` + * for requirements that `wgpu-core` should have enforced should report failure + * via the `unreachable!` macro, because problems detected at this stage always + * indicate a bug in `wgpu-core`. + * + * ## Debugging + * + * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug] + * page still applies to this API, with the exception of API tracing/replay + * functionality, which is only available in `wgpu-core`. + * + * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications */ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] @@ -198,6 +389,15 @@ pub trait Api: Clone + fmt::Debug + Sized { type Queue: Queue<A = Self>; type CommandEncoder: CommandEncoder<A = Self>; + + /// This API's command buffer type. + /// + /// The only thing you can do with `CommandBuffer`s is build them + /// with a [`CommandEncoder`] and then pass them to + /// [`Queue::submit`] for execution, or destroy them by passing + /// them to [`CommandEncoder::reset_all`]. + /// + /// [`CommandEncoder`]: Api::CommandEncoder type CommandBuffer: WasmNotSendSync + fmt::Debug; type Buffer: fmt::Debug + WasmNotSendSync + 'static; @@ -206,6 +406,24 @@ pub trait Api: Clone + fmt::Debug + Sized { type TextureView: fmt::Debug + WasmNotSendSync; type Sampler: fmt::Debug + WasmNotSendSync; type QuerySet: fmt::Debug + WasmNotSendSync; + + /// A value you can block on to wait for something to finish. + /// + /// A `Fence` holds a monotonically increasing [`FenceValue`]. You can call + /// [`Device::wait`] to block until a fence reaches or passes a value you + /// choose. [`Queue::submit`] can take a `Fence` and a [`FenceValue`] to + /// store in it when the submitted work is complete. + /// + /// Attempting to set a fence to a value less than its current value has no + /// effect. + /// + /// Waiting on a fence returns as soon as the fence reaches *or passes* the + /// requested value. This implies that, in order to reliably determine when + /// an operation has completed, operations must finish in order of + /// increasing fence values: if a higher-valued operation were to finish + /// before a lower-valued operation, then waiting for the fence to reach the + /// lower value could return before the lower-valued operation has actually + /// finished. type Fence: fmt::Debug + WasmNotSendSync; type BindGroupLayout: fmt::Debug + WasmNotSendSync; @@ -405,7 +623,25 @@ pub trait Device: WasmNotSendSync { &self, fence: &<Self::A as Api>::Fence, ) -> Result<FenceValue, DeviceError>; - /// Calling wait with a lower value than the current fence value will immediately return. + + /// Wait for `fence` to reach `value`. + /// + /// Operations like [`Queue::submit`] can accept a [`Fence`] and a + /// [`FenceValue`] to store in it, so you can use this `wait` function + /// to wait for a given queue submission to finish execution. + /// + /// The `value` argument must be a value that some actual operation you have + /// already presented to the device is going to store in `fence`. You cannot + /// wait for values yet to be submitted. (This restriction accommodates + /// implementations like the `vulkan` backend's [`FencePool`] that must + /// allocate a distinct synchronization object for each fence value one is + /// able to wait for.) + /// + /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value + /// returns immediately. + /// + /// [`Fence`]: Api::Fence + /// [`FencePool`]: vulkan/enum.Fence.html#variant.FencePool unsafe fn wait( &self, fence: &<Self::A as Api>::Fence, @@ -437,14 +673,48 @@ pub trait Device: WasmNotSendSync { pub trait Queue: WasmNotSendSync { type A: Api; - /// Submits the command buffers for execution on GPU. + /// Submit `command_buffers` for execution on GPU. + /// + /// If `signal_fence` is `Some(fence, value)`, update `fence` to `value` + /// when the operation is complete. See [`Fence`] for details. + /// + /// If two calls to `submit` on a single `Queue` occur in a particular order + /// (that is, they happen on the same thread, or on two threads that have + /// synchronized to establish an ordering), then the first submission's + /// commands all complete execution before any of the second submission's + /// commands begin. All results produced by one submission are visible to + /// the next. + /// + /// Within a submission, command buffers execute in the order in which they + /// appear in `command_buffers`. All results produced by one buffer are + /// visible to the next. + /// + /// If two calls to `submit` on a single `Queue` from different threads are + /// not synchronized to occur in a particular order, they must pass distinct + /// [`Fence`]s. As explained in the [`Fence`] documentation, waiting for + /// operations to complete is only trustworthy when operations finish in + /// order of increasing fence value, but submissions from different threads + /// cannot determine how to order the fence values if the submissions + /// themselves are unordered. If each thread uses a separate [`Fence`], this + /// problem does not arise. /// /// Valid usage: - /// - all of the command buffers were created from command pools - /// that are associated with this queue. - /// - all of the command buffers had `CommandBuffer::finish()` called. - /// - all surface textures that the command buffers write to must be - /// passed to the surface_textures argument. + /// + /// - All of the [`CommandBuffer`][cb]s were created from + /// [`CommandEncoder`][ce]s that are associated with this queue. + /// + /// - All of those [`CommandBuffer`][cb]s must remain alive until + /// the submitted commands have finished execution. (Since + /// command buffers must not outlive their encoders, this + /// implies that the encoders must remain alive as well.) + /// + /// - All of the [`SurfaceTexture`][st]s that the command buffers + /// write to appear in the `surface_textures` argument. + /// + /// [`Fence`]: Api::Fence + /// [cb]: Api::CommandBuffer + /// [ce]: Api::CommandEncoder + /// [st]: Api::SurfaceTexture unsafe fn submit( &self, command_buffers: &[&<Self::A as Api>::CommandBuffer], @@ -459,7 +729,12 @@ pub trait Queue: WasmNotSendSync { unsafe fn get_timestamp_period(&self) -> f32; } -/// Encoder and allocation pool for `CommandBuffer`. +/// Encoder and allocation pool for `CommandBuffer`s. +/// +/// A `CommandEncoder` not only constructs `CommandBuffer`s but also +/// acts as the allocation pool that owns the buffers' underlying +/// storage. Thus, `CommandBuffer`s must not outlive the +/// `CommandEncoder` that created them. /// /// The life cycle of a `CommandBuffer` is as follows: /// @@ -472,14 +747,17 @@ pub trait Queue: WasmNotSendSync { /// /// - Call methods like `copy_buffer_to_buffer`, `begin_render_pass`, /// etc. on a "recording" `CommandEncoder` to add commands to the -/// list. +/// list. (If an error occurs, you must call `discard_encoding`; see +/// below.) /// /// - Call `end_encoding` on a recording `CommandEncoder` to close the /// encoder and construct a fresh `CommandBuffer` consisting of the /// list of commands recorded up to that point. /// /// - Call `discard_encoding` on a recording `CommandEncoder` to drop -/// the commands recorded thus far and close the encoder. +/// the commands recorded thus far and close the encoder. This is +/// the only safe thing to do on a `CommandEncoder` if an error has +/// occurred while recording commands. /// /// - Call `reset_all` on a closed `CommandEncoder`, passing all the /// live `CommandBuffers` built from it. All the `CommandBuffer`s @@ -497,6 +775,10 @@ pub trait Queue: WasmNotSendSync { /// built it. /// /// - A `CommandEncoder` must not outlive its `Device`. +/// +/// It is the user's responsibility to meet this requirements. This +/// allows `CommandEncoder` implementations to keep their state +/// tracking to a minimum. pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { type A: Api; @@ -509,13 +791,20 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// This `CommandEncoder` must be in the "closed" state. unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; - /// Discard the command list under construction, if any. + /// Discard the command list under construction. + /// + /// If an error has occurred while recording commands, this + /// is the only safe thing to do with the encoder. /// /// This puts this `CommandEncoder` in the "closed" state. /// /// # Safety /// /// This `CommandEncoder` must be in the "recording" state. + /// + /// Callers must not assume that implementations of this + /// function are idempotent, and thus should not call it + /// multiple times in a row. unsafe fn discard_encoding(&mut self); /// Return a fresh [`CommandBuffer`] holding the recorded commands. @@ -1318,6 +1607,13 @@ pub struct ProgrammableStage<'a, A: Api> { /// The name of the entry point in the compiled shader. There must be a function with this name /// in the shader. pub entry_point: &'a str, + /// Pipeline constants + pub constants: &'a naga::back::PipelineConstants, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, } // Rust gets confused about the impl requirements for `A` @@ -1326,6 +1622,8 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> { Self { module: self.module, entry_point: self.entry_point, + constants: self.constants, + zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, } } } diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs index 6211896838..cddba472bd 100644 --- a/third_party/rust/wgpu-hal/src/metal/adapter.rs +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -562,7 +562,11 @@ impl super::PrivateCapabilities { Self { family_check, - msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) { + msl_version: if os_is_xr || version.at_least((14, 0), (17, 0), os_is_mac) { + MTLLanguageVersion::V3_1 + } else if version.at_least((13, 0), (16, 0), os_is_mac) { + MTLLanguageVersion::V3_0 + } else if version.at_least((12, 0), (15, 0), os_is_mac) { MTLLanguageVersion::V2_4 } else if version.at_least((11, 0), (14, 0), os_is_mac) { MTLLanguageVersion::V2_3 @@ -809,6 +813,14 @@ impl super::PrivateCapabilities { None }, timestamp_query_support, + supports_simd_scoped_operations: family_check + && (device.supports_family(MTLGPUFamily::Metal3) + || device.supports_family(MTLGPUFamily::Mac2) + || device.supports_family(MTLGPUFamily::Apple7)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=5 + int64: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Metal3)), } } @@ -882,7 +894,7 @@ impl super::PrivateCapabilities { } features.set( F::SHADER_INT64, - self.msl_version >= MTLLanguageVersion::V2_3, + self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, ); features.set( @@ -894,6 +906,10 @@ impl super::PrivateCapabilities { features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + if self.supports_simd_scoped_operations { + features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); + } + features } @@ -948,6 +964,8 @@ impl super::PrivateCapabilities { max_vertex_buffers: self.max_vertex_buffers, max_vertex_attributes: 31, max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + min_subgroup_size: 4, + max_subgroup_size: 64, max_push_constant_size: 0x1000, min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs index 8f6439b50b..6ebabee1a6 100644 --- a/third_party/rust/wgpu-hal/src/metal/conv.rs +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -222,6 +222,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { Vf::Uint32x4 => UInt4, Vf::Sint32x4 => Int4, Vf::Float32x4 => Float4, + Vf::Unorm10_10_10_2 => UInt1010102Normalized, Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), } } diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs index 179429f5d7..2c8f5a2bfb 100644 --- a/third_party/rust/wgpu-hal/src/metal/device.rs +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -69,7 +69,13 @@ impl super::Device { ) -> Result<CompiledShader, crate::PipelineError> { let stage_bit = map_naga_stage(naga_stage); - let module = &stage.module.naga.module; + let (module, module_info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + let ep_resources = &layout.per_stage_map[naga_stage]; let bounds_check_policy = if stage.module.runtime_checks { @@ -88,6 +94,8 @@ impl super::Device { metal::MTLLanguageVersion::V2_2 => (2, 2), metal::MTLLanguageVersion::V2_3 => (2, 3), metal::MTLLanguageVersion::V2_4 => (2, 4), + metal::MTLLanguageVersion::V3_0 => (3, 0), + metal::MTLLanguageVersion::V3_1 => (3, 1), }, inline_samplers: Default::default(), spirv_cross_compatibility: false, @@ -104,7 +112,7 @@ impl super::Device { // TODO: support bounds checks on binding arrays binding_array: naga::proc::BoundsCheckPolicy::Unchecked, }, - zero_initialize_workgroup_memory: true, + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }; let pipeline_options = naga::back::msl::PipelineOptions { @@ -114,13 +122,9 @@ impl super::Device { }, }; - let (source, info) = naga::back::msl::write_string( - module, - &stage.module.naga.info, - &options, - &pipeline_options, - ) - .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + let (source, info) = + naga::back::msl::write_string(&module, &module_info, &options, &pipeline_options) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; log::debug!( "Naga generated shader for entry point '{}' and stage {:?}\n{}", @@ -168,7 +172,7 @@ impl super::Device { })?; // collect sizes indices, immutable buffers, and work group memory sizes - let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let ep_info = &module_info.get_entry_point(ep_index); let mut wg_memory_sizes = Vec::new(); let mut sized_bindings = Vec::new(); let mut immutable_buffer_mask = 0; diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs index 6aeafb0f86..7d547cfe3c 100644 --- a/third_party/rust/wgpu-hal/src/metal/mod.rs +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -269,6 +269,8 @@ struct PrivateCapabilities { supports_shader_primitive_index: bool, has_unified_memory: Option<bool>, timestamp_query_support: TimestampQuerySupport, + supports_simd_scoped_operations: bool, + int64: bool, } #[derive(Clone, Debug)] @@ -649,7 +651,7 @@ struct BufferResource { /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can /// hold WGSL runtime-sized arrays. When one does, we must pass its size to /// shader entry points to implement bounds checks and WGSL's `arrayLength` - /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// function. See `device::CompiledShader::sized_bindings` for details. /// /// [`Storage`]: wgt::BufferBindingType::Storage binding_size: Option<wgt::BufferSize>, @@ -680,12 +682,12 @@ struct PipelineStageInfo { /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sizes_slot: Option<naga::back::msl::Slot>, /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sized_bindings: Vec<naga::ResourceBinding>, } @@ -801,7 +803,7 @@ struct CommandState { /// /// Specifically: /// - /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group` /// and `@binding` attributes) for `var<storage>` global variables in the /// current module that contain runtime-sized arrays. /// @@ -813,7 +815,7 @@ struct CommandState { /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must /// pass to the entry point. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. /// /// [`ResourceBinding`]: naga::ResourceBinding storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs index 2665463792..21219361f4 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features { /// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`] /// constructs an value of this type indicating which Vulkan features to /// enable, based on the `wgpu_types::Features` requested. +/// +/// [`Instance::expose_adapter`]: super::Instance::expose_adapter #[derive(Debug, Default)] pub struct PhysicalDeviceFeatures { /// Basic Vulkan 1.0 features. @@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures { /// /// However, we do populate this when creating a device if /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter + /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>, /// Features provided by `VK_KHR_ray_query`, @@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures { /// this from `vkGetPhysicalDeviceFeatures2`. /// /// However, we do populate this when creating a device if ray tracing is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>, /// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted /// to Vulkan 1.3. zero_initialize_workgroup_memory: Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, + + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures>, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.subgroup_size_control { + info = info.push_next(feature); + } info } @@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures { /// [`Features`]: wgt::Features /// [`DownlevelFlags`]: wgt::DownlevelFlags /// [`PrivateCapabilities`]: super::PrivateCapabilities + /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder /// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( @@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures { } else { None }, + subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name()) + { + Some( + vk::PhysicalDeviceSubgroupSizeControlFeatures::builder() + .subgroup_size_control(true) + .build(), + ) + } else { + None + }, } } @@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures { /// Given `self`, together with the instance and physical device it was /// built from, and a `caps` also built from those, determine which wgpu /// features and downlevel flags the device can support. + /// + /// [`Features`]: wgt::Features + /// [`DownlevelFlags`]: wgt::DownlevelFlags fn to_wgpu( &self, instance: &ash::Instance, @@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref subgroup) = caps.subgroup { + if (caps.device_api_version >= vk::API_VERSION_1_3 + || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name())) + && subgroup.supported_operations.contains( + vk::SubgroupFeatureFlags::BASIC + | vk::SubgroupFeatureFlags::VOTE + | vk::SubgroupFeatureFlags::ARITHMETIC + | vk::SubgroupFeatureFlags::BALLOT + | vk::SubgroupFeatureFlags::SHUFFLE + | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE, + ) + { + features.set( + F::SUBGROUP, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT), + ); + features.set( + F::SUBGROUP_VERTEX, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::VERTEX), + ); + features.insert(F::SUBGROUP_BARRIER); + } + } + let supports_depth_format = |format| { supports_format( instance, @@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties { /// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2. driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1. + subgroup: Option<vk::PhysicalDeviceSubgroupProperties>, + + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties>, + /// The device API version. /// /// Which is the version of Vulkan supported for device-level functionality. @@ -888,6 +951,11 @@ impl PhysicalDeviceProperties { if self.supports_extension(vk::ExtImageRobustnessFn::name()) { extensions.push(vk::ExtImageRobustnessFn::name()); } + + // Require `VK_EXT_subgroup_size_control` if the associated feature was requested + if requested_features.contains(wgt::Features::SUBGROUP) { + extensions.push(vk::ExtSubgroupSizeControlFn::name()); + } } // Optional `VK_KHR_swapchain_mutable_format` @@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: limits.max_vertex_input_attributes, max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + min_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.min_subgroup_size) + .unwrap_or(0), + max_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.max_subgroup_size) + .unwrap_or(0), max_push_constant_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, @@ -1042,6 +1118,9 @@ impl super::InstanceShared { let supports_driver_properties = capabilities.device_api_version >= vk::API_VERSION_1_2 || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + let supports_subgroup_size_control = capabilities.device_api_version + >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()); let supports_acceleration_structure = capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); @@ -1075,6 +1154,20 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if capabilities.device_api_version >= vk::API_VERSION_1_1 { + let next = capabilities + .subgroup + .insert(vk::PhysicalDeviceSubgroupProperties::default()); + builder = builder.push_next(next); + } + + if supports_subgroup_size_control { + let next = capabilities + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -1190,6 +1283,16 @@ impl super::InstanceShared { builder = builder.push_next(next); } + // `VK_EXT_subgroup_size_control` is promoted to 1.3 + if capabilities.device_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()) + { + let next = features + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default()); + builder = builder.push_next(next); + } + let mut features2 = builder.build(); unsafe { get_device_properties.get_physical_device_features2(phd, &mut features2); @@ -1382,6 +1485,9 @@ impl super::Instance { }), image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2 || phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()), + subgroup_size_control: phd_features + .subgroup_size_control + .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1581,6 +1687,15 @@ impl super::Adapter { capabilities.push(spv::Capability::Geometry); } + if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) { + capabilities.push(spv::Capability::GroupNonUniform); + capabilities.push(spv::Capability::GroupNonUniformVote); + capabilities.push(spv::Capability::GroupNonUniformArithmetic); + capabilities.push(spv::Capability::GroupNonUniformBallot); + capabilities.push(spv::Capability::GroupNonUniformShuffle); + capabilities.push(spv::Capability::GroupNonUniformShuffleRelative); + } + if features.intersects( wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, @@ -1616,7 +1731,13 @@ impl super::Adapter { true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` ); spv::Options { - lang_version: (1, 0), + lang_version: if features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) + { + (1, 3) + } else { + (1, 0) + }, flags, capabilities: Some(capabilities.iter().cloned().collect()), bounds_check_policies: naga::proc::BoundsCheckPolicies { diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs index 43a2471954..ceb44dfbe6 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/command.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn discard_encoding(&mut self) { + // Safe use requires this is not called in the "closed" state, so the buffer + // shouldn't be null. Assert this to make sure we're not pushing null + // buffers to the discard pile. + assert_ne!(self.active, vk::CommandBuffer::null()); + self.discarded.push(self.active); self.active = vk::CommandBuffer::null(); } diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs index 8202c93aa3..fe284f32a9 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/conv.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -399,6 +399,7 @@ pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { Vf::Float64x2 => vk::Format::R64G64_SFLOAT, Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + Vf::Unorm10_10_10_2 => vk::Format::A2B10G10R10_UNORM_PACK32, } } diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs index 70028cc700..ec392533a0 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/device.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -2,6 +2,7 @@ use super::conv; use arrayvec::ArrayVec; use ash::{extensions::khr, vk}; +use naga::back::spv::ZeroInitializeWorkgroupMemoryMode; use parking_lot::Mutex; use std::{ @@ -737,7 +738,8 @@ impl super::Device { }; let needs_temp_options = !runtime_checks || !binding_map.is_empty() - || naga_shader.debug_source.is_some(); + || naga_shader.debug_source.is_some() + || !stage.zero_initialize_workgroup_memory; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -760,27 +762,40 @@ impl super::Device { file_name: debug.file_name.as_ref().as_ref(), }) } + if !stage.zero_initialize_workgroup_memory { + temp_options.zero_initialize_workgroup_memory = + ZeroInitializeWorkgroupMemoryMode::None; + } &temp_options } else { &self.naga_options }; + + let (module, info) = naga::back::pipeline_constants::process_overrides( + &naga_shader.module, + &naga_shader.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + let spv = { profiling::scope!("naga::spv::write_vec"); - naga::back::spv::write_vec( - &naga_shader.module, - &naga_shader.info, - options, - Some(&pipeline_options), - ) + naga::back::spv::write_vec(&module, &info, options, Some(&pipeline_options)) } .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; self.create_shader_module_impl(&spv)? } }; + let mut flags = vk::PipelineShaderStageCreateFlags::empty(); + if self.shared.private_caps.subgroup_size_control { + flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE + } + let entry_point = CString::new(stage.entry_point).unwrap(); let create_info = vk::PipelineShaderStageCreateInfo::builder() + .flags(flags) .stage(conv::map_shader_stage(stage_flags)) .module(vk_module) .name(&entry_point) @@ -1587,6 +1602,7 @@ impl crate::Device for super::Device { .shared .workarounds .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + || !naga_shader.module.overrides.is_empty() { return Ok(super::ShaderModule::Intermediate { naga_shader, diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs index 0cd385045c..d1ea82772e 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/mod.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -238,6 +238,7 @@ struct PrivateCapabilities { robust_image_access2: bool, zero_initialize_workgroup_memory: bool, image_format_list: bool, + subgroup_size_control: bool, } bitflags::bitflags!( @@ -413,6 +414,15 @@ pub struct TextureView { attachment: FramebufferAttachment, } +impl TextureView { + /// # Safety + /// + /// - The image view handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::ImageView { + self.raw + } +} + #[derive(Debug)] pub struct Sampler { raw: vk::Sampler, @@ -438,6 +448,7 @@ pub struct BindGroup { set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, } +/// Miscellaneous allocation recycling pool for `CommandAllocator`. #[derive(Default)] struct Temp { marker: Vec<u8>, @@ -467,11 +478,31 @@ impl Temp { pub struct CommandEncoder { raw: vk::CommandPool, device: Arc<DeviceShared>, + + /// The current command buffer, if `self` is in the ["recording"] + /// state. + /// + /// ["recording"]: crate::CommandEncoder + /// + /// If non-`null`, the buffer is in the Vulkan "recording" state. active: vk::CommandBuffer, + + /// What kind of pass we are currently within: compute or render. bind_point: vk::PipelineBindPoint, + + /// Allocation recycling pool for this encoder. temp: Temp, + + /// A pool of available command buffers. + /// + /// These are all in the Vulkan "initial" state. free: Vec<vk::CommandBuffer>, + + /// A pool of discarded command buffers. + /// + /// These could be in any Vulkan state except "pending". discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, /// and needs to be disabled on renderpass close. rpass_debug_marker_active: bool, @@ -481,6 +512,15 @@ pub struct CommandEncoder { end_of_pass_timer_query: Option<(vk::QueryPool, u32)>, } +impl CommandEncoder { + /// # Safety + /// + /// - The command buffer handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::CommandBuffer { + self.active + } +} + impl fmt::Debug for CommandEncoder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("CommandEncoder") @@ -519,9 +559,47 @@ pub struct QuerySet { raw: vk::QueryPool, } +/// The [`Api::Fence`] type for [`vulkan::Api`]. +/// +/// This is an `enum` because there are two possible implementations of +/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of +/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but +/// require non-1.0 features. +/// +/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if +/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`] +/// otherwise. +/// +/// [`Api::Fence`]: crate::Api::Fence +/// [`vulkan::Api`]: Api +/// [`Device::create_fence`]: crate::Device::create_fence +/// [`TimelineSemaphore`]: Fence::TimelineSemaphore +/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore +/// [`FencePool`]: Fence::FencePool #[derive(Debug)] pub enum Fence { + /// A Vulkan [timeline semaphore]. + /// + /// These are simpler to use than Vulkan fences, since timeline semaphores + /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work. + /// + /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores + /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence TimelineSemaphore(vk::Semaphore), + + /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`]. + /// + /// The effective [`FenceValue`] of this variant is the greater of + /// `last_completed` and the maximum value associated with a signalled fence + /// in `active`. + /// + /// Fences are available in all versions of Vulkan, but since they only have + /// two states, "signaled" and "unsignaled", we need to use a separate fence + /// for each queue submission we might want to wait for, and remember which + /// [`FenceValue`] each one represents. + /// + /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences + /// [`FenceValue`]: crate::FenceValue FencePool { last_completed: crate::FenceValue, /// The pending fence values have to be ascending. @@ -531,21 +609,32 @@ pub enum Fence { } impl Fence { + /// Return the highest [`FenceValue`] among the signalled fences in `active`. + /// + /// As an optimization, assume that we already know that the fence has + /// reached `last_completed`, and don't bother checking fences whose values + /// are less than that: those fences remain in the `active` array only + /// because we haven't called `maintain` yet to clean them up. + /// + /// [`FenceValue`]: crate::FenceValue fn check_active( device: &ash::Device, - mut max_value: crate::FenceValue, + mut last_completed: crate::FenceValue, active: &[(crate::FenceValue, vk::Fence)], ) -> Result<crate::FenceValue, crate::DeviceError> { for &(value, raw) in active.iter() { unsafe { - if value > max_value && device.get_fence_status(raw)? { - max_value = value; + if value > last_completed && device.get_fence_status(raw)? { + last_completed = value; } } } - Ok(max_value) + Ok(last_completed) } + /// Return the highest signalled [`FenceValue`] for `self`. + /// + /// [`FenceValue`]: crate::FenceValue fn get_latest( &self, device: &ash::Device, @@ -566,6 +655,18 @@ impl Fence { } } + /// Trim the internal state of this [`Fence`]. + /// + /// This function has no externally visible effect, but you should call it + /// periodically to keep this fence's resource consumption under control. + /// + /// For fences using the [`FencePool`] implementation, this function + /// recycles fences that have been signaled. If you don't call this, + /// [`Queue::submit`] will just keep allocating a new Vulkan fence every + /// time it's called. + /// + /// [`FencePool`]: Fence::FencePool + /// [`Queue::submit`]: crate::Queue::submit fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { match *self { Self::TimelineSemaphore(_) => {} |