diff options
Diffstat (limited to 'third_party/rust/wgpu-hal/src/vulkan')
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/adapter.rs | 123 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/command.rs | 5 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/conv.rs | 1 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/device.rs | 30 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/mod.rs | 109 |
5 files changed, 256 insertions, 12 deletions
diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs index 2665463792..21219361f4 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features { /// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`] /// constructs an value of this type indicating which Vulkan features to /// enable, based on the `wgpu_types::Features` requested. +/// +/// [`Instance::expose_adapter`]: super::Instance::expose_adapter #[derive(Debug, Default)] pub struct PhysicalDeviceFeatures { /// Basic Vulkan 1.0 features. @@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures { /// /// However, we do populate this when creating a device if /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter + /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>, /// Features provided by `VK_KHR_ray_query`, @@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures { /// this from `vkGetPhysicalDeviceFeatures2`. /// /// However, we do populate this when creating a device if ray tracing is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>, /// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted /// to Vulkan 1.3. zero_initialize_workgroup_memory: Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, + + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures>, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.subgroup_size_control { + info = info.push_next(feature); + } info } @@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures { /// [`Features`]: wgt::Features /// [`DownlevelFlags`]: wgt::DownlevelFlags /// [`PrivateCapabilities`]: super::PrivateCapabilities + /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder /// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( @@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures { } else { None }, + subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name()) + { + Some( + vk::PhysicalDeviceSubgroupSizeControlFeatures::builder() + .subgroup_size_control(true) + .build(), + ) + } else { + None + }, } } @@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures { /// Given `self`, together with the instance and physical device it was /// built from, and a `caps` also built from those, determine which wgpu /// features and downlevel flags the device can support. + /// + /// [`Features`]: wgt::Features + /// [`DownlevelFlags`]: wgt::DownlevelFlags fn to_wgpu( &self, instance: &ash::Instance, @@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref subgroup) = caps.subgroup { + if (caps.device_api_version >= vk::API_VERSION_1_3 + || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name())) + && subgroup.supported_operations.contains( + vk::SubgroupFeatureFlags::BASIC + | vk::SubgroupFeatureFlags::VOTE + | vk::SubgroupFeatureFlags::ARITHMETIC + | vk::SubgroupFeatureFlags::BALLOT + | vk::SubgroupFeatureFlags::SHUFFLE + | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE, + ) + { + features.set( + F::SUBGROUP, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT), + ); + features.set( + F::SUBGROUP_VERTEX, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::VERTEX), + ); + features.insert(F::SUBGROUP_BARRIER); + } + } + let supports_depth_format = |format| { supports_format( instance, @@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties { /// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2. driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1. + subgroup: Option<vk::PhysicalDeviceSubgroupProperties>, + + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties>, + /// The device API version. /// /// Which is the version of Vulkan supported for device-level functionality. @@ -888,6 +951,11 @@ impl PhysicalDeviceProperties { if self.supports_extension(vk::ExtImageRobustnessFn::name()) { extensions.push(vk::ExtImageRobustnessFn::name()); } + + // Require `VK_EXT_subgroup_size_control` if the associated feature was requested + if requested_features.contains(wgt::Features::SUBGROUP) { + extensions.push(vk::ExtSubgroupSizeControlFn::name()); + } } // Optional `VK_KHR_swapchain_mutable_format` @@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: limits.max_vertex_input_attributes, max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + min_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.min_subgroup_size) + .unwrap_or(0), + max_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.max_subgroup_size) + .unwrap_or(0), max_push_constant_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, @@ -1042,6 +1118,9 @@ impl super::InstanceShared { let supports_driver_properties = capabilities.device_api_version >= vk::API_VERSION_1_2 || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + let supports_subgroup_size_control = capabilities.device_api_version + >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()); let supports_acceleration_structure = capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); @@ -1075,6 +1154,20 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if capabilities.device_api_version >= vk::API_VERSION_1_1 { + let next = capabilities + .subgroup + .insert(vk::PhysicalDeviceSubgroupProperties::default()); + builder = builder.push_next(next); + } + + if supports_subgroup_size_control { + let next = capabilities + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -1190,6 +1283,16 @@ impl super::InstanceShared { builder = builder.push_next(next); } + // `VK_EXT_subgroup_size_control` is promoted to 1.3 + if capabilities.device_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()) + { + let next = features + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default()); + builder = builder.push_next(next); + } + let mut features2 = builder.build(); unsafe { get_device_properties.get_physical_device_features2(phd, &mut features2); @@ -1382,6 +1485,9 @@ impl super::Instance { }), image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2 || phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()), + subgroup_size_control: phd_features + .subgroup_size_control + .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1581,6 +1687,15 @@ impl super::Adapter { capabilities.push(spv::Capability::Geometry); } + if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) { + capabilities.push(spv::Capability::GroupNonUniform); + capabilities.push(spv::Capability::GroupNonUniformVote); + capabilities.push(spv::Capability::GroupNonUniformArithmetic); + capabilities.push(spv::Capability::GroupNonUniformBallot); + capabilities.push(spv::Capability::GroupNonUniformShuffle); + capabilities.push(spv::Capability::GroupNonUniformShuffleRelative); + } + if features.intersects( wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, @@ -1616,7 +1731,13 @@ impl super::Adapter { true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` ); spv::Options { - lang_version: (1, 0), + lang_version: if features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) + { + (1, 3) + } else { + (1, 0) + }, flags, capabilities: Some(capabilities.iter().cloned().collect()), bounds_check_policies: naga::proc::BoundsCheckPolicies { diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs index 43a2471954..ceb44dfbe6 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/command.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn discard_encoding(&mut self) { + // Safe use requires this is not called in the "closed" state, so the buffer + // shouldn't be null. Assert this to make sure we're not pushing null + // buffers to the discard pile. + assert_ne!(self.active, vk::CommandBuffer::null()); + self.discarded.push(self.active); self.active = vk::CommandBuffer::null(); } diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs index 8202c93aa3..fe284f32a9 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/conv.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -399,6 +399,7 @@ pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { Vf::Float64x2 => vk::Format::R64G64_SFLOAT, Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + Vf::Unorm10_10_10_2 => vk::Format::A2B10G10R10_UNORM_PACK32, } } diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs index 70028cc700..ec392533a0 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/device.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -2,6 +2,7 @@ use super::conv; use arrayvec::ArrayVec; use ash::{extensions::khr, vk}; +use naga::back::spv::ZeroInitializeWorkgroupMemoryMode; use parking_lot::Mutex; use std::{ @@ -737,7 +738,8 @@ impl super::Device { }; let needs_temp_options = !runtime_checks || !binding_map.is_empty() - || naga_shader.debug_source.is_some(); + || naga_shader.debug_source.is_some() + || !stage.zero_initialize_workgroup_memory; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -760,27 +762,40 @@ impl super::Device { file_name: debug.file_name.as_ref().as_ref(), }) } + if !stage.zero_initialize_workgroup_memory { + temp_options.zero_initialize_workgroup_memory = + ZeroInitializeWorkgroupMemoryMode::None; + } &temp_options } else { &self.naga_options }; + + let (module, info) = naga::back::pipeline_constants::process_overrides( + &naga_shader.module, + &naga_shader.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + let spv = { profiling::scope!("naga::spv::write_vec"); - naga::back::spv::write_vec( - &naga_shader.module, - &naga_shader.info, - options, - Some(&pipeline_options), - ) + naga::back::spv::write_vec(&module, &info, options, Some(&pipeline_options)) } .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; self.create_shader_module_impl(&spv)? } }; + let mut flags = vk::PipelineShaderStageCreateFlags::empty(); + if self.shared.private_caps.subgroup_size_control { + flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE + } + let entry_point = CString::new(stage.entry_point).unwrap(); let create_info = vk::PipelineShaderStageCreateInfo::builder() + .flags(flags) .stage(conv::map_shader_stage(stage_flags)) .module(vk_module) .name(&entry_point) @@ -1587,6 +1602,7 @@ impl crate::Device for super::Device { .shared .workarounds .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + || !naga_shader.module.overrides.is_empty() { return Ok(super::ShaderModule::Intermediate { naga_shader, diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs index 0cd385045c..d1ea82772e 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/mod.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -238,6 +238,7 @@ struct PrivateCapabilities { robust_image_access2: bool, zero_initialize_workgroup_memory: bool, image_format_list: bool, + subgroup_size_control: bool, } bitflags::bitflags!( @@ -413,6 +414,15 @@ pub struct TextureView { attachment: FramebufferAttachment, } +impl TextureView { + /// # Safety + /// + /// - The image view handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::ImageView { + self.raw + } +} + #[derive(Debug)] pub struct Sampler { raw: vk::Sampler, @@ -438,6 +448,7 @@ pub struct BindGroup { set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, } +/// Miscellaneous allocation recycling pool for `CommandAllocator`. #[derive(Default)] struct Temp { marker: Vec<u8>, @@ -467,11 +478,31 @@ impl Temp { pub struct CommandEncoder { raw: vk::CommandPool, device: Arc<DeviceShared>, + + /// The current command buffer, if `self` is in the ["recording"] + /// state. + /// + /// ["recording"]: crate::CommandEncoder + /// + /// If non-`null`, the buffer is in the Vulkan "recording" state. active: vk::CommandBuffer, + + /// What kind of pass we are currently within: compute or render. bind_point: vk::PipelineBindPoint, + + /// Allocation recycling pool for this encoder. temp: Temp, + + /// A pool of available command buffers. + /// + /// These are all in the Vulkan "initial" state. free: Vec<vk::CommandBuffer>, + + /// A pool of discarded command buffers. + /// + /// These could be in any Vulkan state except "pending". discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, /// and needs to be disabled on renderpass close. rpass_debug_marker_active: bool, @@ -481,6 +512,15 @@ pub struct CommandEncoder { end_of_pass_timer_query: Option<(vk::QueryPool, u32)>, } +impl CommandEncoder { + /// # Safety + /// + /// - The command buffer handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::CommandBuffer { + self.active + } +} + impl fmt::Debug for CommandEncoder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("CommandEncoder") @@ -519,9 +559,47 @@ pub struct QuerySet { raw: vk::QueryPool, } +/// The [`Api::Fence`] type for [`vulkan::Api`]. +/// +/// This is an `enum` because there are two possible implementations of +/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of +/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but +/// require non-1.0 features. +/// +/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if +/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`] +/// otherwise. +/// +/// [`Api::Fence`]: crate::Api::Fence +/// [`vulkan::Api`]: Api +/// [`Device::create_fence`]: crate::Device::create_fence +/// [`TimelineSemaphore`]: Fence::TimelineSemaphore +/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore +/// [`FencePool`]: Fence::FencePool #[derive(Debug)] pub enum Fence { + /// A Vulkan [timeline semaphore]. + /// + /// These are simpler to use than Vulkan fences, since timeline semaphores + /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work. + /// + /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores + /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence TimelineSemaphore(vk::Semaphore), + + /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`]. + /// + /// The effective [`FenceValue`] of this variant is the greater of + /// `last_completed` and the maximum value associated with a signalled fence + /// in `active`. + /// + /// Fences are available in all versions of Vulkan, but since they only have + /// two states, "signaled" and "unsignaled", we need to use a separate fence + /// for each queue submission we might want to wait for, and remember which + /// [`FenceValue`] each one represents. + /// + /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences + /// [`FenceValue`]: crate::FenceValue FencePool { last_completed: crate::FenceValue, /// The pending fence values have to be ascending. @@ -531,21 +609,32 @@ pub enum Fence { } impl Fence { + /// Return the highest [`FenceValue`] among the signalled fences in `active`. + /// + /// As an optimization, assume that we already know that the fence has + /// reached `last_completed`, and don't bother checking fences whose values + /// are less than that: those fences remain in the `active` array only + /// because we haven't called `maintain` yet to clean them up. + /// + /// [`FenceValue`]: crate::FenceValue fn check_active( device: &ash::Device, - mut max_value: crate::FenceValue, + mut last_completed: crate::FenceValue, active: &[(crate::FenceValue, vk::Fence)], ) -> Result<crate::FenceValue, crate::DeviceError> { for &(value, raw) in active.iter() { unsafe { - if value > max_value && device.get_fence_status(raw)? { - max_value = value; + if value > last_completed && device.get_fence_status(raw)? { + last_completed = value; } } } - Ok(max_value) + Ok(last_completed) } + /// Return the highest signalled [`FenceValue`] for `self`. + /// + /// [`FenceValue`]: crate::FenceValue fn get_latest( &self, device: &ash::Device, @@ -566,6 +655,18 @@ impl Fence { } } + /// Trim the internal state of this [`Fence`]. + /// + /// This function has no externally visible effect, but you should call it + /// periodically to keep this fence's resource consumption under control. + /// + /// For fences using the [`FencePool`] implementation, this function + /// recycles fences that have been signaled. If you don't call this, + /// [`Queue::submit`] will just keep allocating a new Vulkan fence every + /// time it's called. + /// + /// [`FencePool`]: Fence::FencePool + /// [`Queue::submit`]: crate::Queue::submit fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { match *self { Self::TimelineSemaphore(_) => {} |