diff options
Diffstat (limited to 'third_party/rust/wgpu-hal/src/vulkan/mod.rs')
-rw-r--r-- | third_party/rust/wgpu-hal/src/vulkan/mod.rs | 626 |
1 files changed, 626 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs new file mode 100644 index 0000000000..27200dc4e0 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -0,0 +1,626 @@ +/*! +# Vulkan API internals. + +## Stack memory + +Ash expects slices, which we don't generally have available. +We cope with this requirement by the combination of the following ways: + - temporarily allocating `Vec` on heap, where overhead is permitted + - growing temporary local storage + - using `implace_it` on iterators + +## Framebuffers and Render passes + +Render passes are cached on the device and kept forever. + +Framebuffers are also cached on the device, but they are removed when +any of the image views (they have) gets removed. +If Vulkan supports image-less framebuffers, +then the actual views are excluded from the framebuffer key. + +## Fences + +If timeline semaphores are available, they are used 1:1 with wgpu-hal fences. +Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod instance; + +use std::{borrow::Borrow, ffi::CStr, fmt, num::NonZeroU32, sync::Arc}; + +use arrayvec::ArrayVec; +use ash::{ + extensions::{ext, khr}, + vk, +}; +use parking_lot::Mutex; + +const MILLIS_TO_NANOS: u64 = 1_000_000; +const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +struct DebugUtils { + extension: ext::DebugUtils, + messenger: vk::DebugUtilsMessengerEXT, +} + +pub struct InstanceShared { + raw: ash::Instance, + extensions: Vec<&'static CStr>, + drop_guard: Option<crate::DropGuard>, + flags: crate::InstanceFlags, + debug_utils: Option<DebugUtils>, + get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>, + entry: ash::Entry, + has_nv_optimus: bool, + android_sdk_version: u32, + driver_api_version: u32, +} + +pub struct Instance { + shared: Arc<InstanceShared>, +} + +struct Swapchain { + raw: vk::SwapchainKHR, + raw_flags: vk::SwapchainCreateFlagsKHR, + functor: khr::Swapchain, + device: Arc<DeviceShared>, + fence: vk::Fence, + images: Vec<vk::Image>, + config: crate::SurfaceConfiguration, + view_formats: Vec<wgt::TextureFormat>, +} + +pub struct Surface { + raw: vk::SurfaceKHR, + functor: khr::Surface, + instance: Arc<InstanceShared>, + swapchain: Option<Swapchain>, +} + +#[derive(Debug)] +pub struct SurfaceTexture { + index: u32, + texture: Texture, +} + +impl Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +pub struct Adapter { + raw: vk::PhysicalDevice, + instance: Arc<InstanceShared>, + //queue_families: Vec<vk::QueueFamilyProperties>, + known_memory_flags: vk::MemoryPropertyFlags, + phd_capabilities: adapter::PhysicalDeviceCapabilities, + //phd_features: adapter::PhysicalDeviceFeatures, + downlevel_flags: wgt::DownlevelFlags, + private_caps: PrivateCapabilities, + workarounds: Workarounds, +} + +// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`. +enum ExtensionFn<T> { + /// The loaded function pointer struct for an extension. + Extension(T), + /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used. + Promoted, +} + +struct DeviceExtensionFunctions { + draw_indirect_count: Option<khr::DrawIndirectCount>, + timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>, +} + +/// Set of internal capabilities, which don't show up in the exposed +/// device geometry, but affect the code paths taken internally. +#[derive(Clone, Debug)] +struct PrivateCapabilities { + /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift. + /// + /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`). + flip_y_requires_shift: bool, + imageless_framebuffers: bool, + image_view_usage: bool, + timeline_semaphores: bool, + texture_d24: bool, + texture_d24_s8: bool, + texture_s8: bool, + /// Ability to present contents to any screen. Only needed to work around broken platform configurations. + can_present: bool, + non_coherent_map_mask: wgt::BufferAddress, + robust_buffer_access: bool, + robust_image_access: bool, + zero_initialize_workgroup_memory: bool, +} + +bitflags::bitflags!( + /// Workaround flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct Workarounds: u32 { + /// Only generate SPIR-V for one entry point at a time. + const SEPARATE_ENTRY_POINTS = 0x1; + /// Qualcomm OOMs when there are zero color attachments but a non-null pointer + /// to a subpass resolve attachment array. This nulls out that pointer in that case. + const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2; + } +); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct AttachmentKey { + format: vk::Format, + layout: vk::ImageLayout, + ops: crate::AttachmentOps, +} + +impl AttachmentKey { + /// Returns an attachment key for a compatible attachment. + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { + Self { + format, + layout, + ops: crate::AttachmentOps::all(), + } + } +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct ColorAttachmentKey { + base: AttachmentKey, + resolve: Option<AttachmentKey>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct DepthStencilAttachmentKey { + base: AttachmentKey, + stencil_ops: crate::AttachmentOps, +} + +#[derive(Clone, Eq, Default, Hash, PartialEq)] +struct RenderPassKey { + colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>, + depth_stencil: Option<DepthStencilAttachmentKey>, + sample_count: u32, + multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct FramebufferAttachment { + /// Can be NULL if the framebuffer is image-less + raw: vk::ImageView, + raw_image_flags: vk::ImageCreateFlags, + view_usage: crate::TextureUses, + view_format: wgt::TextureFormat, + raw_view_formats: Vec<vk::Format>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct FramebufferKey { + attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>, + extent: wgt::Extent3d, + sample_count: u32, +} + +struct DeviceShared { + raw: ash::Device, + family_index: u32, + queue_index: u32, + raw_queue: ash::vk::Queue, + handle_is_owned: bool, + instance: Arc<InstanceShared>, + physical_device: ash::vk::PhysicalDevice, + enabled_extensions: Vec<&'static CStr>, + extension_fns: DeviceExtensionFunctions, + vendor_id: u32, + timestamp_period: f32, + private_caps: PrivateCapabilities, + workarounds: Workarounds, + render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>, + framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>, +} + +pub struct Device { + shared: Arc<DeviceShared>, + mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>, + desc_allocator: + Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>, + valid_ash_memory_types: u32, + naga_options: naga::back::spv::Options, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + raw: vk::Queue, + swapchain_fn: khr::Swapchain, + device: Arc<DeviceShared>, + family_index: u32, + /// We use a redundant chain of semaphores to pass on the signal + /// from submissions to the last present, since it's required by the + /// specification. + /// It would be correct to use a single semaphore there, but + /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508). + relay_semaphores: [vk::Semaphore; 2], + relay_index: Option<usize>, +} + +#[derive(Debug)] +pub struct Buffer { + raw: vk::Buffer, + block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, +} + +#[derive(Debug)] +pub struct Texture { + raw: vk::Image, + drop_guard: Option<crate::DropGuard>, + block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, + usage: crate::TextureUses, + format: wgt::TextureFormat, + raw_flags: vk::ImageCreateFlags, + copy_size: crate::CopyExtent, + view_formats: Vec<wgt::TextureFormat>, +} + +impl Texture { + /// # Safety + /// + /// - The image handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::Image { + self.raw + } +} + +#[derive(Debug)] +pub struct TextureView { + raw: vk::ImageView, + layers: NonZeroU32, + attachment: FramebufferAttachment, +} + +#[derive(Debug)] +pub struct Sampler { + raw: vk::Sampler, +} + +#[derive(Debug)] +pub struct BindGroupLayout { + raw: vk::DescriptorSetLayout, + desc_count: gpu_descriptor::DescriptorTotalCount, + types: Box<[(vk::DescriptorType, u32)]>, + /// Map of binding index to size, + binding_arrays: Vec<(u32, NonZeroU32)>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + raw: vk::PipelineLayout, + binding_arrays: naga::back::spv::BindingMap, +} + +#[derive(Debug)] +pub struct BindGroup { + set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, +} + +#[derive(Default)] +struct Temp { + marker: Vec<u8>, + buffer_barriers: Vec<vk::BufferMemoryBarrier>, + image_barriers: Vec<vk::ImageMemoryBarrier>, +} + +unsafe impl Send for Temp {} +unsafe impl Sync for Temp {} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.buffer_barriers.clear(); + self.image_barriers.clear(); + //see also - https://github.com/NotIntMan/inplace_it/issues/8 + } + + fn make_c_str(&mut self, name: &str) -> &CStr { + self.marker.clear(); + self.marker.extend_from_slice(name.as_bytes()); + self.marker.push(0); + unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) } + } +} + +pub struct CommandEncoder { + raw: vk::CommandPool, + device: Arc<DeviceShared>, + active: vk::CommandBuffer, + bind_point: vk::PipelineBindPoint, + temp: Temp, + free: Vec<vk::CommandBuffer>, + discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, + /// and needs to be disabled on renderpass close. + rpass_debug_marker_active: bool, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw", &self.raw) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: vk::CommandBuffer, +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum ShaderModule { + Raw(vk::ShaderModule), + Intermediate { + naga_shader: crate::NagaShader, + runtime_checks: bool, + }, +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct QuerySet { + raw: vk::QueryPool, +} + +#[derive(Debug)] +pub enum Fence { + TimelineSemaphore(vk::Semaphore), + FencePool { + last_completed: crate::FenceValue, + /// The pending fence values have to be ascending. + active: Vec<(crate::FenceValue, vk::Fence)>, + free: Vec<vk::Fence>, + }, +} + +impl Fence { + fn check_active( + device: &ash::Device, + mut max_value: crate::FenceValue, + active: &[(crate::FenceValue, vk::Fence)], + ) -> Result<crate::FenceValue, crate::DeviceError> { + for &(value, raw) in active.iter() { + unsafe { + if value > max_value && device.get_fence_status(raw)? { + max_value = value; + } + } + } + Ok(max_value) + } + + fn get_latest( + &self, + device: &ash::Device, + extension: Option<&ExtensionFn<khr::TimelineSemaphore>>, + ) -> Result<crate::FenceValue, crate::DeviceError> { + match *self { + Self::TimelineSemaphore(raw) => unsafe { + Ok(match *extension.unwrap() { + ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?, + ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?, + }) + }, + Self::FencePool { + last_completed, + ref active, + free: _, + } => Self::check_active(device, last_completed, active), + } + } + + fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { + match *self { + Self::TimelineSemaphore(_) => {} + Self::FencePool { + ref mut last_completed, + ref mut active, + ref mut free, + } => { + let latest = Self::check_active(device, *last_completed, active)?; + let base_free = free.len(); + for &(value, raw) in active.iter() { + if value <= latest { + free.push(raw); + } + } + if free.len() != base_free { + active.retain(|&(value, _)| value > latest); + unsafe { + device.reset_fences(&free[base_free..])?; + } + } + *last_completed = latest; + } + } + Ok(()) + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::<Vec<_>>(); + + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + let mut fence_raw = vk::Fence::null(); + let mut vk_timeline_info; + let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; + let signal_values; + + if let Some((fence, value)) = signal_fence { + fence.maintain(&self.device.raw)?; + match *fence { + Fence::TimelineSemaphore(raw) => { + signal_values = [!0, value]; + signal_semaphores[1] = raw; + vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() + .signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } + Fence::FencePool { + ref mut active, + ref mut free, + .. + } => { + fence_raw = match free.pop() { + Some(raw) => raw, + None => unsafe { + self.device + .raw + .create_fence(&vk::FenceCreateInfo::builder(), None)? + }, + }; + active.push((value, fence_raw)); + } + } + } + + let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; + let sem_index = match self.relay_index { + Some(old_index) => { + vk_info = vk_info + .wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]) + .wait_dst_stage_mask(&wait_stage_mask); + (old_index + 1) % self.relay_semaphores.len() + } + None => 0, + }; + self.relay_index = Some(sem_index); + signal_semaphores[0] = self.relay_semaphores[sem_index]; + + let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { + 1 + } else { + 2 + }; + vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + + profiling::scope!("vkQueueSubmit"); + unsafe { + self.device + .raw + .queue_submit(self.raw, &[vk_info.build()], fence_raw)? + }; + Ok(()) + } + + unsafe fn present( + &mut self, + surface: &mut Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let ssc = surface.swapchain.as_ref().unwrap(); + + let swapchains = [ssc.raw]; + let image_indices = [texture.index]; + let mut vk_info = vk::PresentInfoKHR::builder() + .swapchains(&swapchains) + .image_indices(&image_indices); + + if let Some(old_index) = self.relay_index.take() { + vk_info = vk_info.wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]); + } + + let suboptimal = { + profiling::scope!("vkQueuePresentKHR"); + unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| { + match error { + vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated, + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + _ => crate::DeviceError::from(error).into(), + } + })? + }; + if suboptimal { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation + // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation). + // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`. + #[cfg(not(target_os = "android"))] + log::warn!("Suboptimal present of frame {}", texture.index); + } + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.device.timestamp_period + } +} + +impl From<vk::Result> for crate::DeviceError { + fn from(result: vk::Result) -> Self { + match result { + vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => { + Self::OutOfMemory + } + vk::Result::ERROR_DEVICE_LOST => Self::Lost, + _ => { + log::warn!("Unrecognized device error {:?}", result); + Self::Lost + } + } + } +} |