summaryrefslogtreecommitdiffstats
path: root/third_party/rust/wgpu-hal/src/vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/wgpu-hal/src/vulkan')
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/adapter.rs123
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/command.rs5
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/conv.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/device.rs30
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/mod.rs109
5 files changed, 256 insertions, 12 deletions
diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
index 2665463792..21219361f4 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
@@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features {
/// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`]
/// constructs an value of this type indicating which Vulkan features to
/// enable, based on the `wgpu_types::Features` requested.
+///
+/// [`Instance::expose_adapter`]: super::Instance::expose_adapter
#[derive(Debug, Default)]
pub struct PhysicalDeviceFeatures {
/// Basic Vulkan 1.0 features.
@@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures {
///
/// However, we do populate this when creating a device if
/// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested.
+ ///
+ /// [`Instance::expose_adapter`]: super::Instance::expose_adapter
+ /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE
buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>,
/// Features provided by `VK_KHR_ray_query`,
@@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures {
/// this from `vkGetPhysicalDeviceFeatures2`.
///
/// However, we do populate this when creating a device if ray tracing is requested.
+ ///
+ /// [`Instance::expose_adapter`]: super::Instance::expose_adapter
ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>,
/// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted
/// to Vulkan 1.3.
zero_initialize_workgroup_memory:
Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>,
+
+ /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3.
+ subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures>,
}
// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read.
@@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures {
if let Some(ref mut feature) = self.ray_query {
info = info.push_next(feature);
}
+ if let Some(ref mut feature) = self.subgroup_size_control {
+ info = info.push_next(feature);
+ }
info
}
@@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures {
/// [`Features`]: wgt::Features
/// [`DownlevelFlags`]: wgt::DownlevelFlags
/// [`PrivateCapabilities`]: super::PrivateCapabilities
+ /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder
/// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder
/// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions
fn from_extensions_and_requested_features(
@@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures {
} else {
None
},
+ subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3
+ || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name())
+ {
+ Some(
+ vk::PhysicalDeviceSubgroupSizeControlFeatures::builder()
+ .subgroup_size_control(true)
+ .build(),
+ )
+ } else {
+ None
+ },
}
}
@@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures {
/// Given `self`, together with the instance and physical device it was
/// built from, and a `caps` also built from those, determine which wgpu
/// features and downlevel flags the device can support.
+ ///
+ /// [`Features`]: wgt::Features
+ /// [`DownlevelFlags`]: wgt::DownlevelFlags
fn to_wgpu(
&self,
instance: &ash::Instance,
@@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures {
);
}
+ if let Some(ref subgroup) = caps.subgroup {
+ if (caps.device_api_version >= vk::API_VERSION_1_3
+ || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name()))
+ && subgroup.supported_operations.contains(
+ vk::SubgroupFeatureFlags::BASIC
+ | vk::SubgroupFeatureFlags::VOTE
+ | vk::SubgroupFeatureFlags::ARITHMETIC
+ | vk::SubgroupFeatureFlags::BALLOT
+ | vk::SubgroupFeatureFlags::SHUFFLE
+ | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE,
+ )
+ {
+ features.set(
+ F::SUBGROUP,
+ subgroup
+ .supported_stages
+ .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT),
+ );
+ features.set(
+ F::SUBGROUP_VERTEX,
+ subgroup
+ .supported_stages
+ .contains(vk::ShaderStageFlags::VERTEX),
+ );
+ features.insert(F::SUBGROUP_BARRIER);
+ }
+ }
+
let supports_depth_format = |format| {
supports_format(
instance,
@@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties {
/// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2.
driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>,
+ /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1.
+ subgroup: Option<vk::PhysicalDeviceSubgroupProperties>,
+
+ /// Additional `vk::PhysicalDevice` properties from the
+ /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3.
+ subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties>,
+
/// The device API version.
///
/// Which is the version of Vulkan supported for device-level functionality.
@@ -888,6 +951,11 @@ impl PhysicalDeviceProperties {
if self.supports_extension(vk::ExtImageRobustnessFn::name()) {
extensions.push(vk::ExtImageRobustnessFn::name());
}
+
+ // Require `VK_EXT_subgroup_size_control` if the associated feature was requested
+ if requested_features.contains(wgt::Features::SUBGROUP) {
+ extensions.push(vk::ExtSubgroupSizeControlFn::name());
+ }
}
// Optional `VK_KHR_swapchain_mutable_format`
@@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: limits.max_vertex_input_attributes,
max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
+ min_subgroup_size: self
+ .subgroup_size_control
+ .map(|subgroup_size| subgroup_size.min_subgroup_size)
+ .unwrap_or(0),
+ max_subgroup_size: self
+ .subgroup_size_control
+ .map(|subgroup_size| subgroup_size.max_subgroup_size)
+ .unwrap_or(0),
max_push_constant_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
@@ -1042,6 +1118,9 @@ impl super::InstanceShared {
let supports_driver_properties = capabilities.device_api_version
>= vk::API_VERSION_1_2
|| capabilities.supports_extension(vk::KhrDriverPropertiesFn::name());
+ let supports_subgroup_size_control = capabilities.device_api_version
+ >= vk::API_VERSION_1_3
+ || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name());
let supports_acceleration_structure =
capabilities.supports_extension(vk::KhrAccelerationStructureFn::name());
@@ -1075,6 +1154,20 @@ impl super::InstanceShared {
builder = builder.push_next(next);
}
+ if capabilities.device_api_version >= vk::API_VERSION_1_1 {
+ let next = capabilities
+ .subgroup
+ .insert(vk::PhysicalDeviceSubgroupProperties::default());
+ builder = builder.push_next(next);
+ }
+
+ if supports_subgroup_size_control {
+ let next = capabilities
+ .subgroup_size_control
+ .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default());
+ builder = builder.push_next(next);
+ }
+
let mut properties2 = builder.build();
unsafe {
get_device_properties.get_physical_device_properties2(phd, &mut properties2);
@@ -1190,6 +1283,16 @@ impl super::InstanceShared {
builder = builder.push_next(next);
}
+ // `VK_EXT_subgroup_size_control` is promoted to 1.3
+ if capabilities.device_api_version >= vk::API_VERSION_1_3
+ || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name())
+ {
+ let next = features
+ .subgroup_size_control
+ .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default());
+ builder = builder.push_next(next);
+ }
+
let mut features2 = builder.build();
unsafe {
get_device_properties.get_physical_device_features2(phd, &mut features2);
@@ -1382,6 +1485,9 @@ impl super::Instance {
}),
image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2
|| phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()),
+ subgroup_size_control: phd_features
+ .subgroup_size_control
+ .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE),
};
let capabilities = crate::Capabilities {
limits: phd_capabilities.to_wgpu_limits(),
@@ -1581,6 +1687,15 @@ impl super::Adapter {
capabilities.push(spv::Capability::Geometry);
}
+ if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) {
+ capabilities.push(spv::Capability::GroupNonUniform);
+ capabilities.push(spv::Capability::GroupNonUniformVote);
+ capabilities.push(spv::Capability::GroupNonUniformArithmetic);
+ capabilities.push(spv::Capability::GroupNonUniformBallot);
+ capabilities.push(spv::Capability::GroupNonUniformShuffle);
+ capabilities.push(spv::Capability::GroupNonUniformShuffleRelative);
+ }
+
if features.intersects(
wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING
| wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING,
@@ -1616,7 +1731,13 @@ impl super::Adapter {
true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS`
);
spv::Options {
- lang_version: (1, 0),
+ lang_version: if features
+ .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX)
+ {
+ (1, 3)
+ } else {
+ (1, 0)
+ },
flags,
capabilities: Some(capabilities.iter().cloned().collect()),
bounds_check_policies: naga::proc::BoundsCheckPolicies {
diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs
index 43a2471954..ceb44dfbe6 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/command.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs
@@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
}
unsafe fn discard_encoding(&mut self) {
+ // Safe use requires this is not called in the "closed" state, so the buffer
+ // shouldn't be null. Assert this to make sure we're not pushing null
+ // buffers to the discard pile.
+ assert_ne!(self.active, vk::CommandBuffer::null());
+
self.discarded.push(self.active);
self.active = vk::CommandBuffer::null();
}
diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs
index 8202c93aa3..fe284f32a9 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/conv.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs
@@ -399,6 +399,7 @@ pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format {
Vf::Float64x2 => vk::Format::R64G64_SFLOAT,
Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT,
Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT,
+ Vf::Unorm10_10_10_2 => vk::Format::A2B10G10R10_UNORM_PACK32,
}
}
diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs
index 70028cc700..ec392533a0 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/device.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs
@@ -2,6 +2,7 @@ use super::conv;
use arrayvec::ArrayVec;
use ash::{extensions::khr, vk};
+use naga::back::spv::ZeroInitializeWorkgroupMemoryMode;
use parking_lot::Mutex;
use std::{
@@ -737,7 +738,8 @@ impl super::Device {
};
let needs_temp_options = !runtime_checks
|| !binding_map.is_empty()
- || naga_shader.debug_source.is_some();
+ || naga_shader.debug_source.is_some()
+ || !stage.zero_initialize_workgroup_memory;
let mut temp_options;
let options = if needs_temp_options {
temp_options = self.naga_options.clone();
@@ -760,27 +762,40 @@ impl super::Device {
file_name: debug.file_name.as_ref().as_ref(),
})
}
+ if !stage.zero_initialize_workgroup_memory {
+ temp_options.zero_initialize_workgroup_memory =
+ ZeroInitializeWorkgroupMemoryMode::None;
+ }
&temp_options
} else {
&self.naga_options
};
+
+ let (module, info) = naga::back::pipeline_constants::process_overrides(
+ &naga_shader.module,
+ &naga_shader.info,
+ stage.constants,
+ )
+ .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
+
let spv = {
profiling::scope!("naga::spv::write_vec");
- naga::back::spv::write_vec(
- &naga_shader.module,
- &naga_shader.info,
- options,
- Some(&pipeline_options),
- )
+ naga::back::spv::write_vec(&module, &info, options, Some(&pipeline_options))
}
.map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
self.create_shader_module_impl(&spv)?
}
};
+ let mut flags = vk::PipelineShaderStageCreateFlags::empty();
+ if self.shared.private_caps.subgroup_size_control {
+ flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE
+ }
+
let entry_point = CString::new(stage.entry_point).unwrap();
let create_info = vk::PipelineShaderStageCreateInfo::builder()
+ .flags(flags)
.stage(conv::map_shader_stage(stage_flags))
.module(vk_module)
.name(&entry_point)
@@ -1587,6 +1602,7 @@ impl crate::Device for super::Device {
.shared
.workarounds
.contains(super::Workarounds::SEPARATE_ENTRY_POINTS)
+ || !naga_shader.module.overrides.is_empty()
{
return Ok(super::ShaderModule::Intermediate {
naga_shader,
diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs
index 0cd385045c..d1ea82772e 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/mod.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs
@@ -238,6 +238,7 @@ struct PrivateCapabilities {
robust_image_access2: bool,
zero_initialize_workgroup_memory: bool,
image_format_list: bool,
+ subgroup_size_control: bool,
}
bitflags::bitflags!(
@@ -413,6 +414,15 @@ pub struct TextureView {
attachment: FramebufferAttachment,
}
+impl TextureView {
+ /// # Safety
+ ///
+ /// - The image view handle must not be manually destroyed
+ pub unsafe fn raw_handle(&self) -> vk::ImageView {
+ self.raw
+ }
+}
+
#[derive(Debug)]
pub struct Sampler {
raw: vk::Sampler,
@@ -438,6 +448,7 @@ pub struct BindGroup {
set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
}
+/// Miscellaneous allocation recycling pool for `CommandAllocator`.
#[derive(Default)]
struct Temp {
marker: Vec<u8>,
@@ -467,11 +478,31 @@ impl Temp {
pub struct CommandEncoder {
raw: vk::CommandPool,
device: Arc<DeviceShared>,
+
+ /// The current command buffer, if `self` is in the ["recording"]
+ /// state.
+ ///
+ /// ["recording"]: crate::CommandEncoder
+ ///
+ /// If non-`null`, the buffer is in the Vulkan "recording" state.
active: vk::CommandBuffer,
+
+ /// What kind of pass we are currently within: compute or render.
bind_point: vk::PipelineBindPoint,
+
+ /// Allocation recycling pool for this encoder.
temp: Temp,
+
+ /// A pool of available command buffers.
+ ///
+ /// These are all in the Vulkan "initial" state.
free: Vec<vk::CommandBuffer>,
+
+ /// A pool of discarded command buffers.
+ ///
+ /// These could be in any Vulkan state except "pending".
discarded: Vec<vk::CommandBuffer>,
+
/// If this is true, the active renderpass enabled a debug span,
/// and needs to be disabled on renderpass close.
rpass_debug_marker_active: bool,
@@ -481,6 +512,15 @@ pub struct CommandEncoder {
end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
}
+impl CommandEncoder {
+ /// # Safety
+ ///
+ /// - The command buffer handle must not be manually destroyed
+ pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
+ self.active
+ }
+}
+
impl fmt::Debug for CommandEncoder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CommandEncoder")
@@ -519,9 +559,47 @@ pub struct QuerySet {
raw: vk::QueryPool,
}
+/// The [`Api::Fence`] type for [`vulkan::Api`].
+///
+/// This is an `enum` because there are two possible implementations of
+/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
+/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
+/// require non-1.0 features.
+///
+/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
+/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
+/// otherwise.
+///
+/// [`Api::Fence`]: crate::Api::Fence
+/// [`vulkan::Api`]: Api
+/// [`Device::create_fence`]: crate::Device::create_fence
+/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
+/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
+/// [`FencePool`]: Fence::FencePool
#[derive(Debug)]
pub enum Fence {
+ /// A Vulkan [timeline semaphore].
+ ///
+ /// These are simpler to use than Vulkan fences, since timeline semaphores
+ /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
+ ///
+ /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
+ /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
TimelineSemaphore(vk::Semaphore),
+
+ /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
+ ///
+ /// The effective [`FenceValue`] of this variant is the greater of
+ /// `last_completed` and the maximum value associated with a signalled fence
+ /// in `active`.
+ ///
+ /// Fences are available in all versions of Vulkan, but since they only have
+ /// two states, "signaled" and "unsignaled", we need to use a separate fence
+ /// for each queue submission we might want to wait for, and remember which
+ /// [`FenceValue`] each one represents.
+ ///
+ /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
+ /// [`FenceValue`]: crate::FenceValue
FencePool {
last_completed: crate::FenceValue,
/// The pending fence values have to be ascending.
@@ -531,21 +609,32 @@ pub enum Fence {
}
impl Fence {
+ /// Return the highest [`FenceValue`] among the signalled fences in `active`.
+ ///
+ /// As an optimization, assume that we already know that the fence has
+ /// reached `last_completed`, and don't bother checking fences whose values
+ /// are less than that: those fences remain in the `active` array only
+ /// because we haven't called `maintain` yet to clean them up.
+ ///
+ /// [`FenceValue`]: crate::FenceValue
fn check_active(
device: &ash::Device,
- mut max_value: crate::FenceValue,
+ mut last_completed: crate::FenceValue,
active: &[(crate::FenceValue, vk::Fence)],
) -> Result<crate::FenceValue, crate::DeviceError> {
for &(value, raw) in active.iter() {
unsafe {
- if value > max_value && device.get_fence_status(raw)? {
- max_value = value;
+ if value > last_completed && device.get_fence_status(raw)? {
+ last_completed = value;
}
}
}
- Ok(max_value)
+ Ok(last_completed)
}
+ /// Return the highest signalled [`FenceValue`] for `self`.
+ ///
+ /// [`FenceValue`]: crate::FenceValue
fn get_latest(
&self,
device: &ash::Device,
@@ -566,6 +655,18 @@ impl Fence {
}
}
+ /// Trim the internal state of this [`Fence`].
+ ///
+ /// This function has no externally visible effect, but you should call it
+ /// periodically to keep this fence's resource consumption under control.
+ ///
+ /// For fences using the [`FencePool`] implementation, this function
+ /// recycles fences that have been signaled. If you don't call this,
+ /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
+ /// time it's called.
+ ///
+ /// [`FencePool`]: Fence::FencePool
+ /// [`Queue::submit`]: crate::Queue::submit
fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
match *self {
Self::TimelineSemaphore(_) => {}