diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/wgpu-hal/src | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-hal/src')
53 files changed, 29464 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs new file mode 100644 index 0000000000..0456cd719b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs @@ -0,0 +1,258 @@ +use winapi::shared::dxgiformat; + +pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgiformat::DXGI_FORMAT> { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + Some(match format { + Tf::R8Unorm => DXGI_FORMAT_R8_UNORM, + Tf::R8Snorm => DXGI_FORMAT_R8_SNORM, + Tf::R8Uint => DXGI_FORMAT_R8_UINT, + Tf::R8Sint => DXGI_FORMAT_R8_SINT, + Tf::R16Uint => DXGI_FORMAT_R16_UINT, + Tf::R16Sint => DXGI_FORMAT_R16_SINT, + Tf::R16Unorm => DXGI_FORMAT_R16_UNORM, + Tf::R16Snorm => DXGI_FORMAT_R16_SNORM, + Tf::R16Float => DXGI_FORMAT_R16_FLOAT, + Tf::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Tf::Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Tf::Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Tf::Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Tf::Rg16Unorm => DXGI_FORMAT_R16G16_UNORM, + Tf::Rg16Snorm => DXGI_FORMAT_R16G16_SNORM, + Tf::R32Uint => DXGI_FORMAT_R32_UINT, + Tf::R32Sint => DXGI_FORMAT_R32_SINT, + Tf::R32Float => DXGI_FORMAT_R32_FLOAT, + Tf::Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Tf::Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Tf::Rg16Float => DXGI_FORMAT_R16G16_FLOAT, + Tf::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Tf::Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Tf::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Tf::Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Tf::Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, + Tf::Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Tf::Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Tf::Rgba16Unorm => DXGI_FORMAT_R16G16B16A16_UNORM, + Tf::Rgba16Snorm => DXGI_FORMAT_R16G16B16A16_SNORM, + Tf::Rgba16Float => DXGI_FORMAT_R16G16B16A16_FLOAT, + Tf::Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Tf::Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Tf::Rgba32Float => DXGI_FORMAT_R32G32B32A32_FLOAT, + Tf::Stencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth16Unorm => DXGI_FORMAT_D16_UNORM, + Tf::Depth24Plus => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth24PlusStencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth32Float => DXGI_FORMAT_D32_FLOAT, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + Tf::Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Tf::Bc2RgbaUnorm => DXGI_FORMAT_BC2_UNORM, + Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Tf::Bc3RgbaUnorm => DXGI_FORMAT_BC3_UNORM, + Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Tf::Bc4RUnorm => DXGI_FORMAT_BC4_UNORM, + Tf::Bc4RSnorm => DXGI_FORMAT_BC4_SNORM, + Tf::Bc5RgUnorm => DXGI_FORMAT_BC5_UNORM, + Tf::Bc5RgSnorm => DXGI_FORMAT_BC5_SNORM, + Tf::Bc6hRgbUfloat => DXGI_FORMAT_BC6H_UF16, + Tf::Bc6hRgbFloat => DXGI_FORMAT_BC6H_SF16, + Tf::Bc7RgbaUnorm => DXGI_FORMAT_BC7_UNORM, + Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_UNORM_SRGB, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm + | Tf::Astc { + block: _, + channel: _, + } => return None, + }) +} + +pub fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match map_texture_format_failable(format) { + Some(f) => f, + None => unreachable!(), + } +} + +// Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. +pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + _ => map_texture_format(format), + } +} + +// SRV and UAV can't use the depth or typeless formats +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#view-creation +pub fn map_texture_format_for_srv_uav( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + (wgt::TextureFormat::Depth32Float, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT + } + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS + } + ( + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS, + + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::STENCIL) => { + dxgiformat::DXGI_FORMAT_X32_TYPELESS_G8X24_UINT + } + ( + wgt::TextureFormat::Stencil8 | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_X24_TYPELESS_G8_UINT, + + (format, crate::FormatAspects::COLOR) => map_texture_format(format), + + _ => return None, + }) +} + +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#planar-layout-for-staging-from-buffer +pub fn map_texture_format_for_copy( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + ( + wgt::TextureFormat::Depth32Float | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R32_FLOAT, + + ( + wgt::TextureFormat::Stencil8 + | wgt::TextureFormat::Depth24PlusStencil8 + | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_R8_UINT, + + (format, crate::FormatAspects::COLOR) => map_texture_format(format), + + _ => return None, + }) +} + +pub fn map_texture_format_for_resource( + format: wgt::TextureFormat, + usage: crate::TextureUses, + has_view_formats: bool, + casting_fully_typed_format_supported: bool, +) -> dxgiformat::DXGI_FORMAT { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + if casting_fully_typed_format_supported { + map_texture_format(format) + + // We might view this resource as srgb or non-srgb + } else if has_view_formats { + match format { + Tf::Rgba8Unorm | Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_TYPELESS, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_TYPELESS, + Tf::Bc1RgbaUnorm | Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_TYPELESS, + Tf::Bc2RgbaUnorm | Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_TYPELESS, + Tf::Bc3RgbaUnorm | Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_TYPELESS, + Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_TYPELESS, + format => map_texture_format(format), + } + + // We might view this resource as SRV/UAV but also as DSV + } else if format.is_depth_stencil_format() + && usage.intersects( + crate::TextureUses::RESOURCE + | crate::TextureUses::STORAGE_READ + | crate::TextureUses::STORAGE_READ_WRITE, + ) + { + match format { + Tf::Depth16Unorm => DXGI_FORMAT_R16_TYPELESS, + Tf::Depth32Float => DXGI_FORMAT_R32_TYPELESS, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_R32G8X24_TYPELESS, + Tf::Stencil8 | Tf::Depth24Plus | Tf::Depth24PlusStencil8 => DXGI_FORMAT_R24G8_TYPELESS, + _ => unreachable!(), + } + } else { + map_texture_format(format) + } +} + +pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, + wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT, + } +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_acomposite_alpha_mode(_mode: wgt::CompositeAlphaMode) -> d3d12::AlphaMode { + d3d12::AlphaMode::Ignore +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs new file mode 100644 index 0000000000..fceac7db5f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs @@ -0,0 +1,100 @@ +use std::{borrow::Cow, slice}; + +use parking_lot::{lock_api::RawMutex, Mutex}; +use winapi::{ + um::{errhandlingapi, winnt}, + vc::excpt, +}; + +// This is a mutex as opposed to an atomic as we need to completely +// lock everyone out until we have registered or unregistered the +// exception handler, otherwise really nasty races could happen. +// +// By routing all the registration through these functions we can guarentee +// there is either 1 or 0 exception handlers registered, not multiple. +static EXCEPTION_HANLDER_COUNT: Mutex<usize> = Mutex::const_new(parking_lot::RawMutex::INIT, 0); + +pub fn register_exception_handler() { + let mut count_guard = EXCEPTION_HANLDER_COUNT.lock(); + if *count_guard == 0 { + unsafe { + errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)) + }; + } + *count_guard += 1; +} + +pub fn unregister_exception_handler() { + let mut count_guard = EXCEPTION_HANLDER_COUNT.lock(); + if *count_guard == 1 { + unsafe { + errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _) + }; + } + *count_guard -= 1; +} + +const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[ + ("CORRUPTION", log::Level::Error), + ("ERROR", log::Level::Error), + ("WARNING", log::Level::Warn), + ("INFO", log::Level::Info), + ("MESSAGE", log::Level::Debug), +]; + +unsafe extern "system" fn output_debug_string_handler( + exception_info: *mut winnt::EXCEPTION_POINTERS, +) -> i32 { + // See https://stackoverflow.com/a/41480827 + let record = unsafe { &*(*exception_info).ExceptionRecord }; + if record.NumberParameters != 2 { + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + let message = match record.ExceptionCode { + winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u8, + record.ExceptionInformation[0], + ) + }), + winnt::DBG_PRINTEXCEPTION_WIDE_C => Cow::Owned(String::from_utf16_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u16, + record.ExceptionInformation[0], + ) + })), + _ => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let message = match message.strip_prefix("D3D12 ") { + Some(msg) => msg + .trim_end_matches("\n\0") + .trim_end_matches("[ STATE_CREATION WARNING #0: UNKNOWN]"), + None => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let (message, level) = match MESSAGE_PREFIXES + .iter() + .find(|&&(prefix, _)| message.starts_with(prefix)) + { + Some(&(prefix, level)) => (&message[prefix.len() + 2..], level), + None => (message, log::Level::Debug), + }; + + if level == log::Level::Warn && message.contains("#82") { + // This is are useless spammy warnings (#820, #821): + // "The application did not pass any clear value to resource creation" + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "{}", message); + }); + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } + + excpt::EXCEPTION_CONTINUE_EXECUTION +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs new file mode 100644 index 0000000000..ec3c74c194 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs @@ -0,0 +1,210 @@ +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, winerror}, + Interface, +}; + +use super::result::HResult as _; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum DxgiFactoryType { + Factory1, + Factory2, + Factory4, + Factory6, +} + +pub fn enumerate_adapters(factory: d3d12::DxgiFactory) -> Vec<d3d12::DxgiAdapter> { + let mut adapters = Vec::with_capacity(8); + + for cur_index in 0.. { + if let Some(factory6) = factory.as_factory6() { + profiling::scope!("IDXGIFactory6::EnumAdapterByGpuPreference"); + // We're already at dxgi1.6, we can grab IDXGIAdapater4 directly + let mut adapter4 = d3d12::WeakPtr::<dxgi1_6::IDXGIAdapter4>::null(); + let hr = unsafe { + factory6.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_6::IDXGIAdapter4::uuidof(), + adapter4.mut_void(), + ) + }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + adapters.push(d3d12::DxgiAdapter::Adapter4(adapter4)); + continue; + } + + profiling::scope!("IDXGIFactory1::EnumAdapters1"); + let mut adapter1 = d3d12::WeakPtr::<dxgi::IDXGIAdapter1>::null(); + let hr = unsafe { factory.EnumAdapters1(cur_index, adapter1.mut_self()) }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + // Do the most aggressive casts first, skipping Adpater4 as we definitely don't have dxgi1_6. + + // Adapter1 -> Adapter3 + unsafe { + match adapter1.cast::<dxgi1_4::IDXGIAdapter3>().into_result() { + Ok(adapter3) => { + adapter1.destroy(); + adapters.push(d3d12::DxgiAdapter::Adapter3(adapter3)); + continue; + } + Err(err) => { + log::info!("Failed casting Adapter1 to Adapter3: {}", err); + } + } + } + + // Adapter1 -> Adapter2 + unsafe { + match adapter1.cast::<dxgi1_2::IDXGIAdapter2>().into_result() { + Ok(adapter2) => { + adapter1.destroy(); + adapters.push(d3d12::DxgiAdapter::Adapter2(adapter2)); + continue; + } + Err(err) => { + log::info!("Failed casting Adapter1 to Adapter2: {}", err); + } + } + } + + adapters.push(d3d12::DxgiAdapter::Adapter1(adapter1)); + } + + adapters +} + +/// Tries to create a IDXGIFactory6, then a IDXGIFactory4, then a IDXGIFactory2, then a IDXGIFactory1, +/// returning the one that succeeds, or if the required_factory_type fails to be +/// created. +pub fn create_factory( + required_factory_type: DxgiFactoryType, + instance_flags: crate::InstanceFlags, +) -> Result<(d3d12::DxgiLib, d3d12::DxgiFactory), crate::InstanceError> { + let lib_dxgi = d3d12::DxgiLib::new().map_err(|_| crate::InstanceError)?; + + let mut factory_flags = d3d12::FactoryCreationFlags::empty(); + + if instance_flags.contains(crate::InstanceFlags::VALIDATION) { + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + unsafe { debug_controller.destroy() }; + factory_flags |= d3d12::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + + // Intercept `OutputDebugString` calls + super::exception::register_exception_handler(); + } + + // Try to create IDXGIFactory4 + let factory4 = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.into_result() { + Ok(factory) => Some(factory), + // We hard error here as we _should have_ been able to make a factory4 but couldn't. + Err(err) => { + log::error!("Failed to create IDXGIFactory4: {}", err); + return Err(crate::InstanceError); + } + }, + // If we require factory4, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory4 => { + log::error!("IDXGIFactory1 creation function not found: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info as all win7 will hit this case. + Err(err) => { + log::info!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + if let Some(factory4) = factory4 { + // Try to cast the IDXGIFactory4 into IDXGIFactory6 + let factory6 = unsafe { factory4.cast::<dxgi1_6::IDXGIFactory6>().into_result() }; + match factory6 { + Ok(factory6) => { + unsafe { + factory4.destroy(); + } + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory6(factory6))); + } + // If we require factory6, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory6 => { + log::warn!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info. + Err(err) => { + log::info!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err); + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory4(factory4))); + } + } + } + + // Try to create IDXGIFactory1 + let factory1 = match lib_dxgi.create_factory1() { + Ok(pair) => match pair.into_result() { + Ok(factory) => factory, + Err(err) => { + log::error!("Failed to create IDXGIFactory1: {}", err); + return Err(crate::InstanceError); + } + }, + // We always require at least factory1, so hard error + Err(err) => { + log::error!("IDXGIFactory1 creation function not found: {:?}", err); + return Err(crate::InstanceError); + } + }; + + // Try to cast the IDXGIFactory1 into IDXGIFactory2 + let factory2 = unsafe { factory1.cast::<dxgi1_2::IDXGIFactory2>().into_result() }; + match factory2 { + Ok(factory2) => { + unsafe { + factory1.destroy(); + } + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory2(factory2))); + } + // If we require factory2, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory2 => { + log::warn!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info. + Err(err) => { + log::info!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err); + } + } + + // We tried to create 4 and 2, but only succeeded with 1. + Ok((lib_dxgi, d3d12::DxgiFactory::Factory1(factory1))) +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs new file mode 100644 index 0000000000..559969633c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs @@ -0,0 +1,5 @@ +pub mod conv; +pub mod exception; +pub mod factory; +pub mod result; +pub mod time; diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs new file mode 100644 index 0000000000..db013d2dec --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs @@ -0,0 +1,42 @@ +use std::borrow::Cow; + +use winapi::shared::winerror; + +pub(crate) trait HResult<O> { + fn into_result(self) -> Result<O, Cow<'static, str>>; + fn into_device_result(self, description: &str) -> Result<O, crate::DeviceError>; +} +impl HResult<()> for i32 { + fn into_result(self) -> Result<(), Cow<'static, str>> { + if self >= 0 { + return Ok(()); + } + let description = match self { + winerror::E_UNEXPECTED => "unexpected", + winerror::E_NOTIMPL => "not implemented", + winerror::E_OUTOFMEMORY => "out of memory", + winerror::E_INVALIDARG => "invalid argument", + _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))), + }; + Err(Cow::Borrowed(description)) + } + fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.into_result().map_err(|err| { + log::error!("{} failed: {}", description, err); + if self == winerror::E_OUTOFMEMORY { + crate::DeviceError::OutOfMemory + } else { + crate::DeviceError::Lost + } + }) + } +} + +impl<T> HResult<T> for (T, i32) { + fn into_result(self) -> Result<T, Cow<'static, str>> { + self.1.into_result().map(|()| self.0) + } + fn into_device_result(self, description: &str) -> Result<T, crate::DeviceError> { + self.1.into_device_result(description).map(|()| self.0) + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs new file mode 100644 index 0000000000..fd99c097d7 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs @@ -0,0 +1,94 @@ +#![allow(dead_code)] // IPresentationManager is unused currently + +use std::mem; + +use winapi::um::{ + profileapi::{QueryPerformanceCounter, QueryPerformanceFrequency}, + winnt::LARGE_INTEGER, +}; + +pub enum PresentationTimer { + /// DXGI uses QueryPerformanceCounter + Dxgi { + /// How many ticks of QPC per second + frequency: u64, + }, + /// IPresentationManager uses QueryInterruptTimePrecise + #[allow(non_snake_case)] + IPresentationManager { + fnQueryInterruptTimePrecise: unsafe extern "system" fn(*mut winapi::ctypes::c_ulonglong), + }, +} + +impl std::fmt::Debug for PresentationTimer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + Self::Dxgi { frequency } => f + .debug_struct("DXGI") + .field("frequency", &frequency) + .finish(), + Self::IPresentationManager { + fnQueryInterruptTimePrecise, + } => f + .debug_struct("IPresentationManager") + .field( + "QueryInterruptTimePrecise", + &(fnQueryInterruptTimePrecise as usize), + ) + .finish(), + } + } +} + +impl PresentationTimer { + /// Create a presentation timer using QueryPerformanceFrequency (what DXGI uses for presentation times) + pub fn new_dxgi() -> Self { + let mut frequency: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceFrequency(&mut frequency) }; + assert_ne!(success, 0); + + Self::Dxgi { + frequency: unsafe { *frequency.QuadPart() } as u64, + } + } + + /// Create a presentation timer using QueryInterruptTimePrecise (what IPresentationManager uses for presentation times) + /// + /// Panics if QueryInterruptTimePrecise isn't found (below Win10) + pub fn new_ipresentation_manager() -> Self { + // We need to load this explicitly, as QueryInterruptTimePrecise is only available on Windows 10+ + // + // Docs say it's in kernel32.dll, but it's actually in kernelbase.dll. + let kernelbase = + libloading::os::windows::Library::open_already_loaded("kernelbase.dll").unwrap(); + // No concerns about lifetimes here as kernelbase is always there. + let ptr = unsafe { kernelbase.get(b"QueryInterruptTimePrecise").unwrap() }; + Self::IPresentationManager { + fnQueryInterruptTimePrecise: *ptr, + } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + // Always do u128 math _after_ hitting the timing function. + match *self { + PresentationTimer::Dxgi { frequency } => { + let mut counter: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceCounter(&mut counter) }; + assert_ne!(success, 0); + + // counter * (1_000_000_000 / freq) but re-ordered to make more precise + (unsafe { *counter.QuadPart() } as u128 * 1_000_000_000) / frequency as u128 + } + PresentationTimer::IPresentationManager { + fnQueryInterruptTimePrecise, + } => { + let mut counter = 0; + unsafe { fnQueryInterruptTimePrecise(&mut counter) }; + + // QueryInterruptTimePrecise uses units of 100ns for its tick. + counter as u128 * 100 + } + } + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/mod.rs b/third_party/rust/wgpu-hal/src/auxil/mod.rs new file mode 100644 index 0000000000..f0aa6a4a89 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/mod.rs @@ -0,0 +1,138 @@ +#[cfg(all(any(feature = "dx11", feature = "dx12"), windows))] +pub(super) mod dxgi; + +#[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] +pub(super) mod renderdoc; + +pub mod db { + pub mod amd { + pub const VENDOR: u32 = 0x1002; + } + pub mod apple { + pub const VENDOR: u32 = 0x106B; + } + pub mod arm { + pub const VENDOR: u32 = 0x13B5; + } + pub mod broadcom { + pub const VENDOR: u32 = 0x14E4; + } + pub mod imgtec { + pub const VENDOR: u32 = 0x1010; + } + pub mod intel { + pub const VENDOR: u32 = 0x8086; + pub const DEVICE_KABY_LAKE_MASK: u32 = 0x5900; + pub const DEVICE_SKY_LAKE_MASK: u32 = 0x1900; + } + pub mod mesa { + // Mesa does not actually have a PCI vendor id. + // + // To match Vulkan, we use the VkVendorId for Mesa in the gles backend so that lavapipe (Vulkan) and + // llvmpipe (OpenGL) have the same vendor id. + pub const VENDOR: u32 = 0x10005; + } + pub mod nvidia { + pub const VENDOR: u32 = 0x10DE; + } + pub mod qualcomm { + pub const VENDOR: u32 = 0x5143; + } +} + +/// Maximum binding size for the shaders that only support `i32` indexing. +/// Interestingly, the index itself can't reach that high, because the minimum +/// element size is 4 bytes, but the compiler toolchain still computes the +/// offset at some intermediate point, internally, as i32. +pub const MAX_I32_BINDING_SIZE: u32 = 1 << 31; + +pub fn map_naga_stage(stage: naga::ShaderStage) -> wgt::ShaderStages { + match stage { + naga::ShaderStage::Vertex => wgt::ShaderStages::VERTEX, + naga::ShaderStage::Fragment => wgt::ShaderStages::FRAGMENT, + naga::ShaderStage::Compute => wgt::ShaderStages::COMPUTE, + } +} + +impl crate::CopyExtent { + pub fn map_extent_to_copy_size(extent: &wgt::Extent3d, dim: wgt::TextureDimension) -> Self { + Self { + width: extent.width, + height: extent.height, + depth: match dim { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => 1, + wgt::TextureDimension::D3 => extent.depth_or_array_layers, + }, + } + } + + pub fn min(&self, other: &Self) -> Self { + Self { + width: self.width.min(other.width), + height: self.height.min(other.height), + depth: self.depth.min(other.depth), + } + } + + // Get the copy size at a specific mipmap level. This doesn't make most sense, + // since the copy extents are provided *for* a mipmap level to start with. + // But backends use `CopyExtent` more sparingly, and this piece is shared. + pub fn at_mip_level(&self, level: u32) -> Self { + Self { + width: (self.width >> level).max(1), + height: (self.height >> level).max(1), + depth: (self.depth >> level).max(1), + } + } +} + +impl crate::TextureCopyBase { + pub fn max_copy_size(&self, full_size: &crate::CopyExtent) -> crate::CopyExtent { + let mip = full_size.at_mip_level(self.mip_level); + crate::CopyExtent { + width: mip.width - self.origin.x, + height: mip.height - self.origin.y, + depth: mip.depth - self.origin.z, + } + } +} + +impl crate::BufferTextureCopy { + pub fn clamp_size_to_virtual(&mut self, full_size: &crate::CopyExtent) { + let max_size = self.texture_base.max_copy_size(full_size); + self.size = self.size.min(&max_size); + } +} + +impl crate::TextureCopy { + pub fn clamp_size_to_virtual( + &mut self, + full_src_size: &crate::CopyExtent, + full_dst_size: &crate::CopyExtent, + ) { + let max_src_size = self.src_base.max_copy_size(full_src_size); + let max_dst_size = self.dst_base.max_copy_size(full_dst_size); + self.size = self.size.min(&max_src_size).min(&max_dst_size); + } +} + +/// Construct a `CStr` from a byte slice, up to the first zero byte. +/// +/// Return a `CStr` extending from the start of `bytes` up to and +/// including the first zero byte. If there is no zero byte in +/// `bytes`, return `None`. +/// +/// This can be removed when `CStr::from_bytes_until_nul` is stabilized. +/// ([#95027](https://github.com/rust-lang/rust/issues/95027)) +#[allow(dead_code)] +pub(crate) fn cstr_from_bytes_until_nul(bytes: &[std::os::raw::c_char]) -> Option<&std::ffi::CStr> { + if bytes.contains(&0) { + // Safety for `CStr::from_ptr`: + // - We've ensured that the slice does contain a null terminator. + // - The range is valid to read, because the slice covers it. + // - The memory won't be changed, because the slice borrows it. + unsafe { Some(std::ffi::CStr::from_ptr(bytes.as_ptr())) } + } else { + None + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs new file mode 100644 index 0000000000..15b2c1039a --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs @@ -0,0 +1,138 @@ +//! RenderDoc integration - <https://renderdoc.org/> + +use std::{ffi, os, ptr}; + +/// The dynamically loaded RenderDoc API function table +#[repr(C)] +#[derive(Debug)] +pub struct RenderDocApi { + api: renderdoc_sys::RENDERDOC_API_1_4_1, + lib: libloading::Library, +} + +unsafe impl Send for RenderDocApi {} +unsafe impl Sync for RenderDocApi {} + +/// RenderDoc API type +#[derive(Debug)] +pub enum RenderDoc { + /// RenderDoc functionality is available + Available { + /// RenderDoc API with function pointers + api: RenderDocApi, + }, + /// RenderDoc functionality is _not_ available + NotAvailable { + /// A description why renderdoc functionality is not available + reason: String, + }, +} + +// TODO: replace with libloading API once supported +#[cfg(unix)] +const RTLD_NOLOAD: i32 = 0x4; + +impl RenderDoc { + pub unsafe fn new() -> Self { + type GetApiFn = unsafe extern "C" fn(version: u32, out: *mut *mut ffi::c_void) -> i32; + + #[cfg(windows)] + let renderdoc_filename = "renderdoc.dll"; + #[cfg(all(unix, not(target_os = "android")))] + let renderdoc_filename = "librenderdoc.so"; + #[cfg(target_os = "android")] + let renderdoc_filename = "libVkLayer_GLES_RenderDoc.so"; + + #[cfg(unix)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = unsafe { + libloading::os::unix::Library::open( + Some(renderdoc_filename), + libloading::os::unix::RTLD_NOW | RTLD_NOLOAD, + ) + } + .map(|lib| lib.into()); + + #[cfg(windows)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = + libloading::os::windows::Library::open_already_loaded(renderdoc_filename) + .map(|lib| lib.into()); + + let renderdoc_lib = match renderdoc_result { + Ok(lib) => lib, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to load renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + + let get_api: libloading::Symbol<GetApiFn> = + match unsafe { renderdoc_lib.get(b"RENDERDOC_GetAPI\0") } { + Ok(api) => api, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to get RENDERDOC_GetAPI from renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + let mut obj = ptr::null_mut(); + match unsafe { get_api(10401, &mut obj) } { + 1 => RenderDoc::Available { + api: RenderDocApi { + api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) }, + lib: renderdoc_lib, + }, + }, + return_value => RenderDoc::NotAvailable { + reason: format!( + "Unable to get API from renderdoc library '{renderdoc_filename}': {return_value}" + ), + }, + } + } +} + +impl Default for RenderDoc { + fn default() -> Self { + if !cfg!(debug_assertions) { + return RenderDoc::NotAvailable { + reason: "RenderDoc support is only enabled with 'debug_assertions'".into(), + }; + } + unsafe { Self::new() } + } +} +/// A implementation specific handle +pub type Handle = *mut os::raw::c_void; + +impl RenderDoc { + /// Start a RenderDoc frame capture + pub unsafe fn start_frame_capture(&self, device_handle: Handle, window_handle: Handle) -> bool { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.StartFrameCapture.unwrap()(device_handle, window_handle) }; + true + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not start RenderDoc frame capture: {}", reason); + false + } + } + } + + /// End a RenderDoc frame capture + pub unsafe fn end_frame_capture(&self, device_handle: Handle, window_handle: Handle) { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.EndFrameCapture.unwrap()(device_handle, window_handle) }; + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not end RenderDoc frame capture: {}", reason) + } + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/adapter.rs b/third_party/rust/wgpu-hal/src/dx11/adapter.rs new file mode 100644 index 0000000000..a28106a9bb --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/adapter.rs @@ -0,0 +1,291 @@ +use std::num::NonZeroU64; + +use winapi::um::{d3d11, d3dcommon}; + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + todo!() + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + todo!() + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + todo!() + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + todo!() + } +} + +impl super::Adapter { + pub(super) fn expose( + instance: &super::library::D3D11Lib, + adapter: d3d12::DxgiAdapter, + ) -> Option<crate::ExposedAdapter<super::Api>> { + use d3dcommon::{ + D3D_FEATURE_LEVEL_10_0 as FL10_0, D3D_FEATURE_LEVEL_10_1 as FL10_1, + D3D_FEATURE_LEVEL_11_0 as FL11_0, D3D_FEATURE_LEVEL_11_1 as FL11_1, + D3D_FEATURE_LEVEL_9_1 as FL9_1, D3D_FEATURE_LEVEL_9_2 as FL9_2, + D3D_FEATURE_LEVEL_9_3 as FL9_3, + }; + + let (device, feature_level) = instance.create_device(adapter)?; + + // + // Query Features from d3d11 + // + + let d3d9_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D9_OPTIONS1>( + d3d11::D3D11_FEATURE_D3D9_OPTIONS1, + ) + }; + + let d3d10_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS>( + d3d11::D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, + ) + }; + + let d3d11_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS, + ) + }; + + let d3d11_features1 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS1>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS1, + ) + }; + + let d3d11_features2 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS2>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS2, + ) + }; + + let d3d11_features3 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS3>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS3, + ) + }; + + // + // Fill out features and downlevel features + // + // TODO(cwfitzgerald): Needed downlevel features: 3D dispatch + + let mut features = wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO; + let mut downlevel = wgt::DownlevelFlags::BASE_VERTEX + | wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL + | wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER + | wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES; + + // Features from queries + downlevel.set( + wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES, + d3d9_features.FullNonPow2TextureSupported == 1, + ); + downlevel.set( + wgt::DownlevelFlags::COMPUTE_SHADERS, + d3d10_features.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x == 1, + ); + + // Features from feature level + if feature_level >= FL9_2 { + downlevel |= wgt::DownlevelFlags::INDEPENDENT_BLEND; + // formally FL9_1 supports aniso 2, but we don't support that level of distinction + downlevel |= wgt::DownlevelFlags::ANISOTROPIC_FILTERING; + // this is actually the first FL that supports u32 at all + downlevel |= wgt::DownlevelFlags::FULL_DRAW_INDEX_UINT32; + } + + if feature_level >= FL9_3 { + downlevel |= wgt::DownlevelFlags::COMPARISON_SAMPLERS; + } + + if feature_level >= FL10_0 { + downlevel |= wgt::DownlevelFlags::FRAGMENT_STORAGE; + downlevel |= wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE; + downlevel |= wgt::DownlevelFlags::DEPTH_BIAS_CLAMP; + downlevel |= wgt::DownlevelFlags::VERTEX_STORAGE; + features |= wgt::Features::DEPTH_CLIP_CONTROL; + features |= wgt::Features::TIMESTAMP_QUERY; + features |= wgt::Features::PIPELINE_STATISTICS_QUERY; + features |= wgt::Features::SHADER_PRIMITIVE_INDEX; + } + + if feature_level >= FL10_1 { + downlevel |= wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES; + downlevel |= wgt::DownlevelFlags::MULTISAMPLED_SHADING; + } + + if feature_level >= FL11_0 { + downlevel |= wgt::DownlevelFlags::INDIRECT_EXECUTION; + downlevel |= wgt::DownlevelFlags::WEBGPU_TEXTURE_FORMAT_SUPPORT; + features |= wgt::Features::TEXTURE_COMPRESSION_BC; + } + + if feature_level >= FL11_1 { + features |= wgt::Features::VERTEX_WRITABLE_STORAGE; + } + + // + // Fill out limits and alignments + // + + let max_texture_dimension_2d = match feature_level { + FL9_1 | FL9_2 => 2048, + FL9_3 => 4096, + FL10_0 | FL10_1 => 8192, + _ => d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + }; + + let max_texture_dimension_3d = match feature_level { + FL9_1..=FL9_3 => 256, + _ => d3d11::D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + }; + let max_vertex_buffers = match feature_level { + FL9_1..=FL9_3 => 16, + _ => 32, + }; + let max_compute_workgroup_storage_size = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => 4096 * 4, // This doesn't have an equiv SM4 constant :\ + _ => d3d11::D3D11_CS_TGSM_REGISTER_COUNT * 4, + }; + let max_workgroup_size_xy = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => d3d11::D3D11_CS_4_X_THREAD_GROUP_MAX_X, + _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_X, + }; + let max_workgroup_size_z = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => 1, + _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_Z, + }; + // let max_workgroup_count_z = match feature_level { + // FL9_1..=FL9_3 => 0, + // FL10_0 | FL10_1 => 1, + // _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_Z, + // }; + + let max_sampled_textures = d3d11::D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT; + let max_samplers = d3d11::D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; + let max_constant_buffers = d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1; + let max_uavs = if device.as_device1().is_some() { + d3d11::D3D11_1_UAV_SLOT_COUNT + } else { + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT + }; + let max_output_registers = d3d11::D3D11_VS_OUTPUT_REGISTER_COMPONENTS; + let max_compute_invocations_per_workgroup = + d3d11::D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + let max_compute_workgroups_per_dimension = + d3d11::D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + + let limits = wgt::Limits { + max_texture_dimension_1d: max_texture_dimension_2d, + max_texture_dimension_2d, + max_texture_dimension_3d, + max_texture_array_layers: max_texture_dimension_3d, + max_bind_groups: u32::MAX, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: max_constant_buffers, + max_dynamic_storage_buffers_per_pipeline_layout: 0, + max_sampled_textures_per_shader_stage: max_sampled_textures, + max_samplers_per_shader_stage: max_samplers, + max_storage_buffers_per_shader_stage: max_uavs, + max_storage_textures_per_shader_stage: max_uavs, + max_uniform_buffers_per_shader_stage: max_constant_buffers, + max_uniform_buffer_binding_size: 1 << 16, + max_storage_buffer_binding_size: u32::MAX, + max_vertex_buffers, + max_vertex_attributes: max_vertex_buffers, + max_vertex_buffer_array_stride: u32::MAX, + max_push_constant_size: 1 << 16, + min_uniform_buffer_offset_alignment: 256, + min_storage_buffer_offset_alignment: 1, + max_inter_stage_shader_components: max_output_registers, + max_compute_workgroup_storage_size, + max_compute_invocations_per_workgroup, + max_compute_workgroup_size_x: max_workgroup_size_xy, + max_compute_workgroup_size_y: max_workgroup_size_xy, + max_compute_workgroup_size_z: max_workgroup_size_z, + max_compute_workgroups_per_dimension, + // D3D11_BUFFER_DESC represents the buffer size as a 32 bit int. + max_buffer_size: u32::MAX as u64, + }; + + // + // Other capabilities + // + + let shader_model = match feature_level { + FL9_1..=FL9_3 => wgt::ShaderModel::Sm2, + FL10_0 | FL10_1 => wgt::ShaderModel::Sm4, + _ => wgt::ShaderModel::Sm5, + }; + + let device_info = wgt::AdapterInfo { + name: String::new(), + vendor: 0, + device: 0, + device_type: match d3d11_features2.UnifiedMemoryArchitecture { + 0 => wgt::DeviceType::DiscreteGpu, + 1 => wgt::DeviceType::IntegratedGpu, + _ => unreachable!(), + }, + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Dx11, + }; + + // + // Build up the structs + // + + let api_adapter = super::Adapter { device }; + + let alignments = crate::Alignments { + buffer_copy_offset: NonZeroU64::new(1).unwrap(), // todo + buffer_copy_pitch: NonZeroU64::new(1).unwrap(), // todo + }; + + let capabilities = crate::Capabilities { + limits, + alignments, + downlevel: wgt::DownlevelCapabilities { + flags: downlevel, + limits: wgt::DownlevelLimits {}, + shader_model, + }, + }; + + Some(crate::ExposedAdapter { + adapter: api_adapter, + info: device_info, + features, + capabilities, + }) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/command.rs b/third_party/rust/wgpu-hal/src/dx11/command.rs new file mode 100644 index 0000000000..1c73f3c325 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/command.rs @@ -0,0 +1,268 @@ +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn discard_encoding(&mut self) { + todo!() + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + todo!() + } + + unsafe fn reset_all<I>(&mut self, command_buffers: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + todo!() + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + todo!() + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + todo!() + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + todo!() + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + todo!() + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + todo!() + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + todo!() + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + todo!() + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + todo!() + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + todo!() + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + todo!() + } + + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + todo!() + } + + unsafe fn end_debug_marker(&mut self) { + todo!() + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: std::ops::Range<u32>) { + todo!() + } + + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: std::ops::Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + todo!() + } + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + todo!() + } + + unsafe fn end_render_pass(&mut self) { + todo!() + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + todo!() + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + todo!() + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + todo!() + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: std::ops::Range<f32>) { + todo!() + } + + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + todo!() + } + + unsafe fn set_stencil_reference(&mut self, value: u32) { + todo!() + } + + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + todo!() + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + todo!() + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + todo!() + } + + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + todo!() + } + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + todo!() + } + + unsafe fn end_compute_pass(&mut self) { + todo!() + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + todo!() + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + todo!() + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/device.rs b/third_party/rust/wgpu-hal/src/dx11/device.rs new file mode 100644 index 0000000000..3b087c4311 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/device.rs @@ -0,0 +1,242 @@ +use std::{ffi::c_void, mem}; + +use winapi::um::d3d11; + +use crate::auxil::dxgi::result::HResult; + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + todo!() + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + todo!() + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + todo!() + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + todo!() + } + + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + todo!() + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_texture(&self, texture: super::Texture) { + todo!() + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + todo!() + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + todo!() + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_command_encoder(&self, pool: super::CommandEncoder) { + todo!() + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + todo!() + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + todo!() + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + todo!() + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + todo!() + } + + unsafe fn destroy_shader_module(&self, module: super::ShaderModule) { + todo!() + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + todo!() + } + + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + todo!() + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + todo!() + } + + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + todo!() + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + todo!() + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_fence(&self, fence: super::Fence) { + todo!() + } + + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + todo!() + } + + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + todo!() + } + + unsafe fn start_capture(&self) -> bool { + todo!() + } + + unsafe fn stop_capture(&self) { + todo!() + } +} + +impl crate::Queue<super::Api> for super::Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&super::CommandBuffer], + signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn present( + &mut self, + surface: &mut super::Surface, + texture: super::SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + todo!() + } + + unsafe fn get_timestamp_period(&self) -> f32 { + todo!() + } +} + +impl super::D3D11Device { + #[allow(trivial_casts)] // come on + pub unsafe fn check_feature_support<T>(&self, feature: d3d11::D3D11_FEATURE) -> T { + unsafe { + let mut value = mem::zeroed::<T>(); + let ret = self.CheckFeatureSupport( + feature, + &mut value as *mut T as *mut c_void, + mem::size_of::<T>() as u32, + ); + assert_eq!(ret.into_result(), Ok(())); + + value + } + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/instance.rs b/third_party/rust/wgpu-hal/src/dx11/instance.rs new file mode 100644 index 0000000000..104ba9e045 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/instance.rs @@ -0,0 +1,48 @@ +use crate::auxil; + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + let enable_dx11 = match std::env::var("WGPU_UNSTABLE_DX11_BACKEND") { + Ok(string) => string == "1" || string == "true", + Err(_) => false, + }; + + if !enable_dx11 { + return Err(crate::InstanceError); + } + + let lib_d3d11 = super::library::D3D11Lib::new().ok_or(crate::InstanceError)?; + + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory1, + desc.flags, + )?; + + Ok(super::Instance { + lib_d3d11, + lib_dxgi, + factory, + }) + } + + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + todo!() + } + + unsafe fn destroy_surface(&self, surface: super::Surface) { + todo!() + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory); + + adapters + .into_iter() + .filter_map(|adapter| super::Adapter::expose(&self.lib_d3d11, adapter)) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/library.rs b/third_party/rust/wgpu-hal/src/dx11/library.rs new file mode 100644 index 0000000000..ea597abd56 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/library.rs @@ -0,0 +1,144 @@ +use std::ptr; + +use winapi::{ + shared::{ + dxgi, + minwindef::{HMODULE, UINT}, + winerror, + }, + um::{d3d11, d3d11_1, d3d11_2, d3dcommon}, +}; + +use crate::auxil::dxgi::result::HResult; + +type D3D11CreateDeviceFun = unsafe extern "system" fn( + *mut dxgi::IDXGIAdapter, + d3dcommon::D3D_DRIVER_TYPE, + HMODULE, + UINT, + *const d3dcommon::D3D_FEATURE_LEVEL, + UINT, + UINT, + *mut *mut d3d11::ID3D11Device, + *mut d3dcommon::D3D_FEATURE_LEVEL, + *mut *mut d3d11::ID3D11DeviceContext, +) -> d3d12::HRESULT; + +pub(super) struct D3D11Lib { + // We use the os specific symbol to drop the lifetime parameter. + // + // SAFETY: we must ensure this outlives the Library. + d3d11_create_device: libloading::os::windows::Symbol<D3D11CreateDeviceFun>, + + lib: libloading::Library, +} +impl D3D11Lib { + pub fn new() -> Option<Self> { + unsafe { + let lib = libloading::Library::new("d3d11.dll").ok()?; + + let d3d11_create_device = lib + .get::<D3D11CreateDeviceFun>(b"D3D11CreateDevice") + .ok()? + .into_raw(); + + Some(Self { + lib, + d3d11_create_device, + }) + } + } + + pub fn create_device( + &self, + adapter: d3d12::DxgiAdapter, + ) -> Option<(super::D3D11Device, d3dcommon::D3D_FEATURE_LEVEL)> { + let feature_levels = [ + d3dcommon::D3D_FEATURE_LEVEL_11_1, + d3dcommon::D3D_FEATURE_LEVEL_11_0, + d3dcommon::D3D_FEATURE_LEVEL_10_1, + d3dcommon::D3D_FEATURE_LEVEL_10_0, + d3dcommon::D3D_FEATURE_LEVEL_9_3, + d3dcommon::D3D_FEATURE_LEVEL_9_2, + d3dcommon::D3D_FEATURE_LEVEL_9_1, + ]; + + let mut device = d3d12::WeakPtr::<d3d11::ID3D11Device>::null(); + let mut feature_level: d3dcommon::D3D_FEATURE_LEVEL = 0; + + // We need to try this twice. If the first time fails due to E_INVALIDARG + // we are running on a machine without a D3D11.1 runtime, and need to + // retry without the feature level 11_1 feature level. + // + // Why they thought this was a good API, who knows. + + let mut hr = unsafe { + (self.d3d11_create_device)( + adapter.as_mut_ptr() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), // software implementation DLL??? + 0, // flags + feature_levels.as_ptr(), + feature_levels.len() as u32, + d3d11::D3D11_SDK_VERSION, + device.mut_self(), + &mut feature_level, + ptr::null_mut(), // device context + ) + }; + + // Try again without FL11_1 + if hr == winerror::E_INVALIDARG { + hr = unsafe { + (self.d3d11_create_device)( + adapter.as_mut_ptr() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), // software implementation DLL??? + 0, // flags + feature_levels[1..].as_ptr(), + feature_levels[1..].len() as u32, + d3d11::D3D11_SDK_VERSION, + device.mut_self(), + &mut feature_level, + ptr::null_mut(), // device context + ) + }; + } + + // Any errors here are real and we should complain about + if let Err(err) = hr.into_result() { + log::error!("Failed to make a D3D11 device: {}", err); + return None; + } + + // We always try to upcast in highest -> lowest order + + // Device -> Device2 + unsafe { + match device.cast::<d3d11_2::ID3D11Device2>().into_result() { + Ok(device2) => { + device.destroy(); + return Some((super::D3D11Device::Device2(device2), feature_level)); + } + Err(hr) => { + log::info!("Failed to cast device to ID3D11Device2: {}", hr) + } + } + } + + // Device -> Device1 + unsafe { + match device.cast::<d3d11_1::ID3D11Device1>().into_result() { + Ok(device1) => { + device.destroy(); + return Some((super::D3D11Device::Device1(device1), feature_level)); + } + Err(hr) => { + log::info!("Failed to cast device to ID3D11Device1: {}", hr) + } + } + } + + Some((super::D3D11Device::Device(device), feature_level)) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/mod.rs b/third_party/rust/wgpu-hal/src/dx11/mod.rs new file mode 100644 index 0000000000..91827874b1 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/mod.rs @@ -0,0 +1,137 @@ +#![allow(dead_code)] +#![allow(unused_variables)] + +use winapi::um::{d3d11, d3d11_1, d3d11_2}; + +mod adapter; +mod command; +mod device; +mod instance; +mod library; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +pub struct Instance { + lib_d3d11: library::D3D11Lib, + lib_dxgi: d3d12::DxgiLib, + factory: d3d12::DxgiFactory, +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +pub struct Surface {} + +pub struct Adapter { + device: D3D11Device, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +d3d12::weak_com_inheritance_chain! { + #[derive(Debug, Copy, Clone, PartialEq)] + enum D3D11Device { + Device(d3d11::ID3D11Device), from_device, as_device, device; + Device1(d3d11_1::ID3D11Device1), from_device1, as_device1, unwrap_device1; + Device2(d3d11_2::ID3D11Device2), from_device2, as_device2, unwrap_device2; + } +} + +pub struct Device {} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue {} + +#[derive(Debug)] +pub struct CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer {} +#[derive(Debug)] +pub struct Texture {} +#[derive(Debug)] +pub struct SurfaceTexture {} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + todo!() + } +} + +#[derive(Debug)] +pub struct TextureView {} +#[derive(Debug)] +pub struct Sampler {} +#[derive(Debug)] +pub struct QuerySet {} +#[derive(Debug)] +pub struct Fence {} +#[derive(Debug)] + +pub struct BindGroupLayout {} +#[derive(Debug)] +pub struct BindGroup {} +#[derive(Debug)] +pub struct PipelineLayout {} +#[derive(Debug)] +pub struct ShaderModule {} +pub struct RenderPipeline {} +pub struct ComputePipeline {} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + todo!() + } + + unsafe fn unconfigure(&mut self, device: &Device) { + todo!() + } + + unsafe fn acquire_texture( + &mut self, + _timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + todo!() + } + + unsafe fn discard_texture(&mut self, texture: SurfaceTexture) { + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..13530afb3e --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,587 @@ +use crate::{ + auxil::{self, dxgi::result::HResult as _}, + dx12::SurfaceTarget, +}; +use std::{mem, ptr, sync::Arc, thread}; +use winapi::{ + shared::{dxgi, dxgi1_2, minwindef::DWORD, windef, winerror}, + um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser}, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + // Debug tracking alive objects + if !thread::panicking() + && self + .private_caps + .instance_flags + .contains(crate::InstanceFlags::VALIDATION) + { + unsafe { + self.report_live_objects(); + } + } + unsafe { + self.raw.destroy(); + } + } +} + +impl super::Adapter { + pub unsafe fn report_live_objects(&self) { + if let Ok(debug_device) = unsafe { + self.raw + .cast::<d3d12sdklayers::ID3D12DebugDevice>() + .into_result() + } { + unsafe { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ) + }; + unsafe { debug_device.destroy() }; + } + } + + pub fn raw_adapter(&self) -> &d3d12::DxgiAdapter { + &self.raw + } + + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: d3d12::DxgiAdapter, + library: &Arc<d3d12::D3D12Lib>, + instance_flags: crate::InstanceFlags, + dx12_shader_compiler: &wgt::Dx12Compiler, + ) -> Option<crate::ExposedAdapter<super::Api>> { + // Create the device so that we can get the capabilities. + let device = { + profiling::scope!("ID3D12Device::create_device"); + match library.create_device(*adapter, d3d12::FeatureLevel::L11_0) { + Ok(pair) => match pair.into_result() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + } + }; + + profiling::scope!("feature queries"); + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.unwrap_adapter2().GetDesc2(&mut desc); + } + + let device_name = { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _, + ) + }); + + let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = + d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, + }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_SHADER_MODEL, + &mut shader_model_support as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId, + device: desc.DeviceId, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::Cpu + } else if features_architecture.UMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + driver: String::new(), + driver_info: String::new(), + }; + + let mut options: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let casting_fully_typed_format_supported = { + let mut features3: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 21, // D3D12_FEATURE_D3D12_OPTIONS3 + &mut features3 as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _, + ) + }; + hr == 0 && features3.CastingFullyTypedFormatSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + instance_flags, + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12_ty::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + heap_create_not_zeroed: false, //TODO: winapi support for Options7 + casting_fully_typed_format_supported, + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, _uav_count) = match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, // conservative, is 64 on feature level 11.1 + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::DEPTH32FLOAT_STENCIL8 + | wgt::Features::INDIRECT_FIRST_INSTANCE + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::POLYGON_MODE_POINT + | wgt::Features::VERTEX_WRITABLE_STORAGE + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES + | wgt::Features::TEXTURE_COMPRESSION_BC + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::SHADER_PRIMITIVE_INDEX + | wgt::Features::RG11B10UFLOAT_RENDERABLE; + //TODO: in order to expose this, we need to run a compute shader + // that extract the necessary statistics out of the D3D12 result. + // Alternatively, we could allocate a buffer for the query set, + // write the results there, and issue a bunch of copy commands. + //| wgt::Features::PIPELINE_STATISTICS_QUERY + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + features.set( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + ); + + // TODO: Determine if IPresentationManager is supported + let presentation_timer = auxil::dxgi::time::PresentationTimer::new_dxgi(); + + let base = wgt::Limits::default(); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + presentation_timer, + workarounds, + dx12_shader_compiler: dx12_shader_compiler.clone(), + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12_ty::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12_ty::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12_ty::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12_ty::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12_ty::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 128, + _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 16, + _ => d3d12_ty::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base + .max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: + d3d12_ty::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, + max_storage_buffer_binding_size: crate::auxil::MAX_I32_BINDING_SIZE, + max_vertex_buffers: d3d12_ty::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + // The push constants are part of the root signature which + // has a limit of 64 DWORDS (256 bytes), but other resources + // also share the root signature: + // + // - push constants consume a `DWORD` for each `4 bytes` of data + // - If a bind group has buffers it will consume a `DWORD` + // for the descriptor table + // - If a bind group has samplers it will consume a `DWORD` + // for the descriptor table + // - Each dynamic buffer will consume `2 DWORDs` for the + // root descriptor + // - The special constants buffer count as constants + // + // Since we can't know beforehand all root signatures that + // will be created, the max size to be used for push + // constants needs to be set to a reasonable number instead. + // + // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs + max_push_constant_size: 128, + min_uniform_buffer_offset_alignment: + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, + min_storage_buffer_offset_alignment: 4, + max_inter_stage_shader_components: base.max_inter_stage_shader_components, + max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO? + max_compute_invocations_per_workgroup: + d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP, + max_compute_workgroup_size_x: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_X, + max_compute_workgroup_size_y: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Y, + max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z, + max_compute_workgroups_per_dimension: + d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + max_buffer_size: u64::MAX, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + }, + downlevel: wgt::DownlevelCapabilities::default(), + }, + }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + _features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = { + profiling::scope!("ID3D12Device::CreateCommandQueue"); + self.device + .create_command_queue( + d3d12::CmdListType::Direct, + d3d12::Priority::Normal, + d3d12::CommandQueueFlags::empty(), + 0, + ) + .into_device_result("Queue creation")? + }; + + let device = super::Device::new( + self.device, + queue, + self.private_caps, + &self.library, + self.dx12_shader_compiler.clone(), + )?; + Ok(crate::OpenDevice { + device, + queue: super::Queue { + raw: queue, + temp_lists: Vec::new(), + }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let raw_format = match auxil::dxgi::conv::map_texture_format_failable(format) { + Some(f) => f, + None => return Tfc::empty(), + }; + let srv_uav_format = if format.is_combined_depth_stencil_format() { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + // use the depth aspect here as opposed to stencil since it has more capabilities + crate::FormatAspects::DEPTH, + ) + } else { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + crate::FormatAspects::from(format), + ) + } + .unwrap(); + + let mut data = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: unsafe { mem::zeroed() }, + Support2: unsafe { mem::zeroed() }, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }); + + // Because we use a different format for SRV and UAV views of depth textures, we need to check + // the features that use SRV/UAVs using the no-depth format. + let mut data_srv_uav = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: srv_uav_format, + Support1: d3d12_ty::D3D12_FORMAT_SUPPORT1_NONE, + Support2: d3d12_ty::D3D12_FORMAT_SUPPORT2_NONE, + }; + if raw_format != srv_uav_format { + // Only-recheck if we're using a different format + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + ptr::addr_of_mut!(data_srv_uav).cast(), + DWORD::try_from(mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>()) + .unwrap(), + ) + }); + } else { + // Same format, just copy over. + data_srv_uav = data; + } + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let is_texture = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + != 0; + // SRVs use srv_uav_format + caps.set( + Tfc::SAMPLED, + is_texture && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0, + ); + caps.set( + Tfc::SAMPLED_LINEAR, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + // UAVs use srv_uav_format + caps.set( + Tfc::STORAGE, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW + != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data_srv_uav.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + // We load via UAV/SRV so use srv_uav_format + let no_msaa_load = caps.contains(Tfc::SAMPLED) + && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD == 0; + + let no_msaa_target = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET + | d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) + != 0 + && data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET == 0; + + caps.set( + Tfc::MULTISAMPLE_RESOLVE, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE != 0, + ); + + let mut ms_levels = d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS { + Format: raw_format, + SampleCount: 0, + Flags: d3d12_ty::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, + NumQualityLevels: 0, + }; + + let mut set_sample_count = |sc: u32, tfc: Tfc| { + ms_levels.SampleCount = sc; + + if unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + <*mut _>::cast(&mut ms_levels), + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _, + ) + } == winerror::S_OK + && ms_levels.NumQualityLevels != 0 + { + caps.set(tfc, !no_msaa_load && !no_msaa_target); + } + }; + + set_sample_count(2, Tfc::MULTISAMPLE_X2); + set_sample_count(4, Tfc::MULTISAMPLE_X4); + set_sample_count(8, Tfc::MULTISAMPLE_X8); + set_sample_count(16, Tfc::MULTISAMPLE_X16); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = { + match surface.target { + SurfaceTarget::WndHandle(wnd_handle) => { + let mut rect: windef::RECT = unsafe { mem::zeroed() }; + if unsafe { winuser::GetClientRect(wnd_handle, &mut rect) } != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => None, + } + }; + + let mut present_modes = vec![wgt::PresentMode::Mailbox, wgt::PresentMode::Fifo]; + if surface.supports_allow_tearing { + present_modes.push(wgt::PresentMode::Immediate); + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // we currently use a flip effect which supports 2..=16 buffers + swap_chain_sizes: 2..=16, + current_extent, + // TODO: figure out the exact bounds + extents: wgt::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: 4096, + height: 4096, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp(self.presentation_timer.get_timestamp_ns()) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/command.rs b/third_party/rust/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..4786a61bf9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,1143 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::conv; +use std::{mem, ops::Range, ptr}; +use winapi::um::d3d12 as d3d12_ty; + +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12_ty::D3D12_BOX { + d3d12_ty::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + +impl crate::BufferTextureCopy { + fn to_subresource_footprint( + &self, + format: wgt::TextureFormat, + ) -> d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + let (block_width, block_height) = format.block_dimensions(); + d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: self.buffer_layout.offset, + Footprint: d3d12_ty::D3D12_SUBRESOURCE_FOOTPRINT { + Format: auxil::dxgi::conv::map_texture_format_for_copy( + format, + self.texture_base.aspect, + ) + .unwrap(), + Width: self.size.width, + Height: self + .buffer_layout + .rows_per_image + .map_or(self.size.height, |count| count * block_height), + Depth: self.size.depth, + RowPitch: { + let actual = self.buffer_layout.bytes_per_row.unwrap_or_else(|| { + // this may happen for single-line updates + let block_size = format + .block_size(Some(self.texture_base.aspect.map())) + .unwrap(); + (self.size.width / block_width) * block_size + }); + wgt::math::align_to(actual, d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }, + }, + } + } +} + +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { + let list = self.list.unwrap(); + self.pass.kind = kind; + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) }; + self.pass.has_label = true; + } + self.pass.dirty_root_elements = 0; + self.pass.dirty_vertex_buffers = 0; + list.set_descriptor_heaps(&[self.shared.heap_views.raw, self.shared.heap_samplers.raw]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.unwrap(); + list.set_descriptor_heaps(&[]); + if self.pass.has_label { + unsafe { list.EndEvent() }; + } + self.pass.clear(); + } + + unsafe fn prepare_draw(&mut self, base_vertex: i32, base_instance: u32) { + while self.pass.dirty_vertex_buffers != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + unsafe { + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex: other_vertex, + base_instance: other_instance, + other: _, + } => base_vertex != other_vertex || base_instance != other_instance, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other: 0, + }; + } + } + self.update_root_elements(); + } + + fn prepare_dispatch(&mut self, count: [u32; 3]) { + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => [base_vertex as u32, base_instance, other] != count, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: count[0] as i32, + base_instance: count[1], + other: count[2], + }; + } + } + self.update_root_elements(); + } + + //Note: we have to call this lazily before draw calls. Otherwise, D3D complains + // about the root parameters being incompatible with root signature. + fn update_root_elements(&mut self) { + use super::{BufferViewKind as Bvk, PassKind as Pk}; + + while self.pass.dirty_root_elements != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_root_elements.trailing_zeros(); + self.pass.dirty_root_elements ^= 1 << index; + + match self.pass.root_elements[index as usize] { + super::RootElement::Empty => log::error!("Root index {} is not bound", index), + super::RootElement::Constant => { + let info = self.pass.layout.root_constant_info.as_ref().unwrap(); + + for offset in info.range.clone() { + let val = self.pass.constant_data[offset as usize]; + match self.pass.kind { + Pk::Render => list.set_graphics_root_constant(index, val, offset), + Pk::Compute => list.set_compute_root_constant(index, val, offset), + Pk::Transfer => (), + } + } + } + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => match self.pass.kind { + Pk::Render => { + list.set_graphics_root_constant(index, base_vertex as u32, 0); + list.set_graphics_root_constant(index, base_instance, 1); + } + Pk::Compute => { + list.set_compute_root_constant(index, base_vertex as u32, 0); + list.set_compute_root_constant(index, base_instance, 1); + list.set_compute_root_constant(index, other, 2); + } + Pk::Transfer => (), + }, + super::RootElement::Table(descriptor) => match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(index, descriptor), + Pk::Transfer => (), + }, + super::RootElement::DynamicOffsetBuffer { kind, address } => { + match (self.pass.kind, kind) { + (Pk::Render, Bvk::Constant) => { + list.set_graphics_root_constant_buffer_view(index, address) + } + (Pk::Compute, Bvk::Constant) => { + list.set_compute_root_constant_buffer_view(index, address) + } + (Pk::Render, Bvk::ShaderResource) => { + list.set_graphics_root_shader_resource_view(index, address) + } + (Pk::Compute, Bvk::ShaderResource) => { + list.set_compute_root_shader_resource_view(index, address) + } + (Pk::Render, Bvk::UnorderedAccess) => { + list.set_graphics_root_unordered_access_view(index, address) + } + (Pk::Compute, Bvk::UnorderedAccess) => { + list.set_compute_root_unordered_access_view(index, address) + } + (Pk::Transfer, _) => (), + } + } + } + } + } + + fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) { + log::trace!("Reset signature {:?}", layout.signature); + if let Some(root_index) = layout.special_constants_root_index { + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: 0, + base_instance: 0, + other: 0, + }; + } + self.pass.layout = layout.clone(); + self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = loop { + if let Some(list) = self.free_lists.pop() { + let reset_result = list + .reset(self.allocator, d3d12::PipelineState::null()) + .into_result(); + if reset_result.is_ok() { + break Some(list); + } else { + unsafe { + list.destroy(); + } + } + } else { + break None; + } + }; + + let list = if let Some(list) = list { + list + } else { + self.device + .create_graphics_command_list( + d3d12::CmdListType::Direct, + self.allocator, + d3d12::PipelineState::null(), + 0, + ) + .into_device_result("Create command list")? + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + unsafe { list.SetName(cwstr.as_ptr()) }; + } + + self.list = Some(list); + self.temp.clear(); + self.pass.clear(); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + if list.close().into_result().is_ok() { + self.free_lists.push(list); + } else { + unsafe { + list.destroy(); + } + } + } + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.list.take().unwrap(); + let closed = raw.close().into_result().is_ok(); + Ok(super::CommandBuffer { raw, closed }) + } + unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + if cmd_buf.closed { + self.free_lists.push(cmd_buf.raw); + } else { + unsafe { + cmd_buf.raw.destroy(); + } + } + } + self.allocator.reset(); + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} buffer transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}", + barrier.buffer.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end + ); + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} texture transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}, range {:?}", + barrier.texture.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end, + barrier.range + ); + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + + let tex_mip_level_count = barrier.texture.mip_level_count; + let tex_array_layer_count = barrier.texture.array_layer_count(); + + if barrier.range.is_full_resource( + barrier.texture.format, + tex_mip_level_count, + tex_array_layer_count, + ) { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Selected texture aspect is relevant if the texture format has both depth _and_ stencil aspects. + let planes = if barrier.texture.format.is_combined_depth_stencil_format() { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..2, + wgt::TextureAspect::DepthOnly => 0..1, + wgt::TextureAspect::StencilOnly => 1..2, + } + } else { + match barrier.texture.format { + wgt::TextureFormat::Stencil8 => 1..2, + wgt::TextureFormat::Depth24Plus => 0..2, // TODO: investigate why tests fail if we set this to 0..1 + _ => 0..1, + } + }; + + for mip_level in barrier.range.mip_range(tex_mip_level_count) { + for array_layer in barrier.range.layer_range(tex_array_layer_count) { + for plane in planes.clone() { + unsafe { + raw.u.Transition_mut().Subresource = barrier + .texture + .calc_subresource(mip_level, array_layer, plane); + }; + self.temp.barriers.push(raw); + } + } + } + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let list = self.list.unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + unsafe { + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.shared.zero_buffer.as_mut_ptr(), + 0, + size, + ) + }; + offset += size; + } + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let list = self.list.unwrap(); + for r in regions { + unsafe { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ) + }; + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + + for r in regions { + let src_box = make_box(&r.src_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base) + }; + + unsafe { + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + unsafe { + *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = + dst.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&r.texture_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = + src.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format) + }; + unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box) }; + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list.unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + // nothing to do here + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + unsafe { + self.list.unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; + let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + *rtv = cat.target.view.handle_rtv.unwrap().raw; + } else { + *rtv = self.null_rtv_handle.raw; + } + } + + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + let list = self.list.unwrap(); + unsafe { + list.OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ) + }; + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base, + dst: target.view.target_base, + format: target.view.raw_format, + }); + } + } + } + + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = d3d12::ClearFlags::empty(); + let aspects = ds.target.view.aspects; + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::DEPTH) + { + flags |= d3d12::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::STENCIL) + { + flags |= d3d12::ClearFlags::STENCIL; + } + + if !ds_view.is_null() && !flags.is_empty() { + list.clear_depth_stencil_view( + unsafe { *ds_view }, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } + + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: 0.0, + TopLeftY: 0.0, + Width: desc.extent.width as f32, + Height: desc.extent.height as f32, + MinDepth: 0.0, + MaxDepth: 1.0, + }; + let raw_rect = d3d12_ty::D3D12_RECT { + left: 0, + top: 0, + right: desc.extent.width as i32, + bottom: desc.extent.height as i32, + }; + unsafe { list.RSSetViewports(1, &raw_vp) }; + unsafe { list.RSSetScissorRects(1, &raw_rect) }; + } + + unsafe fn end_render_pass(&mut self) { + if !self.pass.resolves.is_empty() { + let list = self.list.unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + } + }; + self.temp.barriers.push(barrier); + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_DEST, + } + }; + self.temp.barriers.push(barrier); + } + + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + + for resolve in self.pass.resolves.iter() { + profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource"); + unsafe { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ) + }; + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = unsafe { barrier.u.Transition_mut() }; + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + } + + unsafe { self.end_pass() }; + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + log::trace!("Set group[{}]", index); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index as usize; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + log::trace!("\tBind element[{}] = view", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_views.unwrap().gpu); + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + log::trace!("\tBind element[{}] = sampler", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_samplers.unwrap().gpu); + root_index += 1; + } + + // Bind root descriptors + for ((&kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + log::trace!("\tBind element[{}] = dynamic", root_index); + self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { + kind, + address: gpu_base + offset as d3d12::GpuAddress, + }; + root_index += 1; + } + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= (1 << root_index) - (1 << info.base_root_index); + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + let info = layout.shared.root_constant_info.as_ref().unwrap(); + + self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; + + self.pass.constant_data[(offset as usize)..(offset as usize + data.len())] + .copy_from_slice(data); + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << info.root_index; + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { + self.list + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + unsafe { + self.list + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn end_debug_marker(&mut self) { + unsafe { self.list.unwrap().EndEvent() } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_graphics_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + unsafe { list.IASetPrimitiveTopology(pipeline.topology) }; + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.list.unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + auxil::dxgi::conv::map_index_format(format), + ); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + unsafe { self.list.unwrap().RSSetViewports(1, &raw_vp) }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let raw_rect = d3d12_ty::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + unsafe { self.list.unwrap().RSSetScissorRects(1, &raw_rect) }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.unwrap().set_blend_factor(*color); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(start_vertex as i32, start_instance) }; + self.list + .unwrap() + .draw(vertex_count, instance_count, start_vertex, start_instance); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(base_vertex, start_instance) }; + self.list.unwrap().draw_indexed( + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; + } + unsafe fn end_compute_pass(&mut self) { + unsafe { self.end_pass() }; + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_compute_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.prepare_dispatch(count); + self.list.unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.prepare_dispatch([0; 3]); + //TODO: update special constants indirectly + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..7b39e98ad2 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,350 @@ +use std::iter; +use winapi::{ + shared::minwindef::BOOL, + um::{d3d12 as d3d12_ty, d3dcommon}, +}; + +pub fn map_buffer_usage_to_resource_flags( + usage: crate::BufferUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + flags +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12_ty::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.contains(crate::TextureUses::RESOURCE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12_ty::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12_ty::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12_ty::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12_ty::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12_ty::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12_ty::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12_ty::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12_ty::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12_ty::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12_ty::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option<wgt::SamplerBorderColor>) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | Some(Sbc::Zero) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} + +pub fn map_visibility(visibility: wgt::ShaderStages) -> d3d12::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => d3d12::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => d3d12::ShaderVisibility::PS, + _ => d3d12::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => d3d12::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } => d3d12::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV, + } +} + +pub fn map_label(name: &str) -> Vec<u16> { + name.encode_utf16().chain(iter::once(0)).collect() +} + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } else if usage.intersects(Bu::STORAGE_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::RESOURCE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12_ty::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12_ty::D3D12_BLEND_ZERO, + Bf::One => d3d12_ty::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12_ty::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12_ty::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12_ty::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12_ty::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12_ty::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12_ty::D3D12_BLEND_OP, + d3d12_ty::D3D12_BLEND, + d3d12_ty::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12_ty::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12_ty::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12_ty::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12_ty::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12_ty::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[Option<wgt::ColorTargetState>], +) -> [d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] { + let dummy_target = d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12_ty::D3D12_BLEND_ZERO, + DestBlend: d3d12_ty::D3D12_BLEND_ZERO, + BlendOp: d3d12_ty::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12_ty::D3D12_BLEND_OP_ADD, + LogicOp: d3d12_ty::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + if let Some(ct) = ct.as_ref() { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12_ty::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12_ty::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12_ty::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12_ty::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12_ty::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12_ty::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12_ty::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12_ty::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12_ty::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: BOOL::from(ds.is_depth_enabled()), + DepthWriteMask: if ds.depth_write_enabled { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: BOOL::from(ds.stencil.is_enabled()), + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/descriptor.rs b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..430e3a2f4b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,311 @@ +use crate::auxil::dxgi::result::HResult as _; +use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: d3d12::CpuDescriptor, + pub gpu: d3d12::GpuDescriptor, + /// How large the block allocated to this handle is. + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } +} + +type DescriptorIndex = u64; + +pub(super) struct GeneralHeap { + pub raw: d3d12::DescriptorHeap, + ty: d3d12::DescriptorHeapType, + handle_size: u64, + total_handles: u64, + start: DualHandle, + ranges: Mutex<RangeAllocator<DescriptorIndex>>, +} + +impl GeneralHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total_handles: u64, + ) -> Result<Self, crate::DeviceError> { + let raw = { + profiling::scope!("ID3D12Device::CreateDescriptorHeap"); + device + .create_descriptor_heap( + total_handles as u32, + ty, + d3d12::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")? + }; + + Ok(Self { + raw, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + count: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + count, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> d3d12::GpuDescriptor { + d3d12::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + raw: d3d12::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: d3d12::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + d3d12::DescriptorHeapFlags::empty(), + 0, + ); + + Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + fn alloc_handle(&mut self) -> d3d12::CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + d3d12::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + fn free_handle(&mut self, handle: d3d12::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } + + unsafe fn destroy(&self) { + unsafe { self.raw.destroy() }; + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: d3d12::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + heaps: Vec<FixedSizeHeap>, + avaliable_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(FixedSizeHeap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub(super) unsafe fn destroy(&self) { + for heap in &self.heaps { + unsafe { heap.destroy() }; + } + } +} + +pub(super) struct CpuHeapInner { + pub raw: d3d12::DescriptorHeap, + pub stage: Vec<d3d12::CpuDescriptor>, +} + +pub(super) struct CpuHeap { + pub inner: Mutex<CpuHeapInner>, + start: d3d12::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total: u32, + ) -> Result<Self, crate::DeviceError> { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + unsafe { self.inner.into_inner().raw.destroy() }; + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: d3d12::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result<DualHandle, crate::DeviceError> { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + unsafe { + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ) + }; + Ok(dst.at(index, count as u64)) +} diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..7e14818572 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,1590 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::{conv, descriptor, view}; +use parking_lot::Mutex; +use std::{ffi, mem, num::NonZeroU32, ptr, sync::Arc}; +use winapi::{ + shared::{dxgiformat, dxgitype, minwindef::BOOL, winerror}, + um::{d3d12 as d3d12_ty, synchapi, winbase}, + Interface, +}; + +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; + +impl super::Device { + pub(super) fn new( + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + private_caps: super::PrivateCapabilities, + library: &Arc<d3d12::D3D12Lib>, + dx12_shader_compiler: wgt::Dx12Compiler, + ) -> Result<Self, crate::DeviceError> { + let mem_allocator = super::suballocation::create_allocator_wrapper(&raw)?; + + let dxc_container = match dx12_shader_compiler { + wgt::Dx12Compiler::Dxc { + dxil_path, + dxc_path, + } => super::shader_compilation::get_dxc_container(dxc_path, dxil_path)?, + wgt::Dx12Compiler::Fxc => None, + }; + + let mut idle_fence = d3d12::Fence::null(); + let hr = unsafe { + profiling::scope!("ID3D12Device::CreateFence"); + raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.into_device_result("Idle fence creation")?; + + let mut zero_buffer = d3d12::Resource::null(); + unsafe { + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12_ty::D3D12_RESOURCE_FLAG_NONE, + }; + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + profiling::scope!("Zero Buffer Allocation"); + raw.CreateCommittedResource( + &heap_properties, + d3d12_ty::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ) + .into_device_result("Zero buffer creation")?; + + // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. + }; + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = 1_000_000; + let capacity_samplers = 2_048; + + let shared = super::DeviceShared { + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw()], + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw_indexed()], + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::dispatch()], + mem::size_of::<wgt::DispatchIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + heap_views: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + capacity_views, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::Sampler, + capacity_samplers, + )?, + }; + + let mut rtv_pool = descriptor::CpuPool::new(raw, d3d12::DescriptorHeapType::Rtv); + let null_rtv_handle = rtv_pool.alloc_handle(); + // A null pResource is used to initialize a null descriptor, + // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded) + raw.create_render_target_view( + d3d12::WeakPtr::null(), + &d3d12::RenderTargetViewDesc::texture_2d( + winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + 0, + 0, + ), + null_rtv_handle.raw, + ); + + Ok(super::Device { + raw, + present_queue, + idler: super::Idler { + fence: idle_fence, + event: d3d12::Event::create(false, false), + }, + private_caps, + shared: Arc::new(shared), + rtv_pool: Mutex::new(rtv_pool), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Sampler, + )), + library: Arc::clone(library), + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + null_rtv_handle, + mem_allocator, + dxc_container, + }) + } + + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; + log::info!("Waiting for idle with value {}", value); + self.present_queue.signal(self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + unsafe { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE) }; + Ok(()) + } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result<super::CompiledShader, crate::PipelineError> { + use naga::back::hlsl; + + let stage_bit = crate::auxil::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let reflection_info = { + profiling::scope!("naga::back::hlsl::write"); + writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? + }; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + let raw_ep = reflection_info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; + + let source_name = stage + .module + .raw_name + .as_ref() + .and_then(|cstr| cstr.to_str().ok()) + .unwrap_or_default(); + + // Compile with DXC if available, otherwise fall back to FXC + let (result, log_level) = if let Some(ref dxc_container) = self.dxc_container { + super::shader_compilation::compile_dxc( + self, + &source, + source_name, + raw_ep, + stage_bit, + full_stage, + dxc_container, + ) + } else { + super::shader_compilation::compile_fxc( + self, + &source, + source_name, + &ffi::CString::new(raw_ep.as_str()).unwrap(), + stage_bit, + full_stage, + ) + }; + + log::log!( + log_level, + "Naga generated shader for {:?} at {:?}:\n{}", + raw_ep, + naga_stage, + source + ); + result + } + + pub fn raw_device(&self) -> &d3d12::Device { + &self.raw + } + + pub fn raw_queue(&self) -> &d3d12::CommandQueue { + &self.present_queue + } + + pub unsafe fn texture_from_raw( + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + ) -> super::Texture { + super::Texture { + resource, + format, + dimension, + size, + mip_level_count, + sample_count, + allocation: None, + } + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(mut self, queue: super::Queue) { + self.rtv_pool.lock().free_handle(self.null_rtv_handle); + unsafe { self.rtv_pool.into_inner().destroy() }; + unsafe { self.dsv_pool.into_inner().destroy() }; + unsafe { self.srv_uav_pool.into_inner().destroy() }; + unsafe { self.sampler_pool.into_inner().destroy() }; + unsafe { self.shared.destroy() }; + unsafe { self.idler.destroy() }; + self.mem_allocator = None; + unsafe { queue.raw.destroy() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let mut resource = d3d12::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = + super::suballocation::create_buffer_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Buffer creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Buffer { + resource, + size, + allocation, + }) + } + + unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) { + unsafe { buffer.resource.destroy() }; + // Only happens when it's using the windows_rs feature and there's an allocation + if let Some(alloc) = buffer.allocation.take() { + super::suballocation::free_buffer_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let mut ptr = ptr::null_mut(); + // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware? + let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) }; + hr.into_device_result("Map buffer")?; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::<u8>() }) + .unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe { (*buffer.resource).Unmap(0, ptr::null()) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + use super::suballocation::create_texture_resource; + + let mut resource = d3d12::Resource::null(); + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + Format: auxil::dxgi::conv::map_texture_format_for_resource( + desc.format, + desc.usage, + !desc.view_formats.is_empty(), + self.private_caps.casting_fully_typed_format_supported, + ), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = create_texture_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Texture creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Texture { + resource, + format: desc.format, + dimension: desc.dimension, + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + allocation, + }) + } + + unsafe fn destroy_texture(&self, mut texture: super::Texture) { + unsafe { texture.resource.destroy() }; + if let Some(alloc) = texture.allocation.take() { + super::suballocation::free_texture_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let view_desc = desc.to_internal(texture); + + Ok(super::TextureView { + raw_format: view_desc.rtv_dsv_format, + aspects: view_desc.aspects, + target_base: ( + texture.resource, + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), + handle_srv: if desc.usage.intersects(crate::TextureUses::RESOURCE) { + let raw_desc = unsafe { view_desc.to_srv() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateShaderResourceView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_uav: if desc.usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + let raw_desc = unsafe { view_desc.to_uav() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + let raw_desc = unsafe { view_desc.to_rtv() }; + let handle = self.rtv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateRenderTargetView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + let raw_desc = unsafe { view_desc.to_dsv(true) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + let raw_desc = unsafe { view_desc.to_dsv(false) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } + } + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let handle = self.sampler_pool.lock().alloc_handle(); + + let reduction = match desc.compare { + Some(_) => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let mut filter = conv::map_filter_mode(desc.min_filter) << d3d12_ty::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12_ty::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12_ty::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_SHIFT; + + if desc.anisotropy_clamp != 1 { + filter |= d3d12_ty::D3D12_FILTER_ANISOTROPIC; + }; + + let border_color = conv::map_border_color(desc.border_color); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp as u32, + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + border_color, + desc.lod_clamp.clone(), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let allocator = self + .raw + .create_command_allocator(d3d12::CmdListType::Direct) + .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { allocator.SetName(cwstr.as_ptr()) }; + } + + Ok(super::CommandEncoder { + allocator, + device: self.raw, + shared: Arc::clone(&self.shared), + null_rtv_handle: self.null_rtv_handle, + list: None, + free_lists: Vec::new(), + pass: super::PassState::new(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + unsafe { list.destroy() }; + } + for list in encoder.free_lists { + unsafe { list.destroy() }; + } + unsafe { encoder.allocator.destroy() }; + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + let count = entry.count.map_or(1, NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += count, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += count + } + wgt::BindingType::Sampler { .. } => num_samplers += count, + } + } + + let num_views = num_buffer_views + num_texture_views; + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + unsafe { cpu_heap.destroy() }; + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + unsafe { cpu_heap.destroy() }; + } + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::hlsl; + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Buffers with dynamic offsets are implemented as root descriptors. + // This is easier than trying to patch up the offset on the shader side. + // + // Root signature layout: + // Root Constants: Parameter=0, Space=0 + // ... + // (bind group [0]) - Space=0 + // View descriptor table, if any + // Sampler descriptor table, if any + // Root descriptors (for dynamic offset buffers) + // (bind group [1]) - Space=0 + // ... + // (bind group [2]) - Space=0 + // Special constant buffer: Space=0 + + //TODO: put lower bind group indices futher down the root signature. See: + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + // Currently impossible because wgpu-core only re-binds the descriptor sets based + // on Vulkan-like layout compatibility rules. + + fn native_binding(bt: &hlsl::BindTarget) -> d3d12::Binding { + d3d12::Binding { + space: bt.space as u32, + register: bt.register, + } + } + + log::debug!( + "Creating Root Signature '{}'", + desc.label.unwrap_or_default() + ); + + let mut binding_map = hlsl::BindingMap::default(); + let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + ); + let mut parameters = Vec::new(); + let mut push_constants_target = None; + let mut root_constant_info = None; + + let mut pc_start = u32::MAX; + let mut pc_end = u32::MIN; + + for pc in desc.push_constant_ranges.iter() { + pc_start = pc_start.min(pc.range.start); + pc_end = pc_end.max(pc.range.end); + } + + if pc_start != u32::MAX && pc_end != u32::MIN { + let parameter_index = parameters.len(); + let size = (pc_end - pc_start) / 4; + log::debug!( + "\tParam[{}] = push constant (count = {})", + parameter_index, + size, + ); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, + native_binding(&bind_cbv), + size, + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + root_constant_info = Some(super::RootConstantInfo { + root_index: parameter_index as u32, + range: (pc_start / 4)..(pc_end / 4), + }); + push_constants_target = Some(binding); + + bind_cbv.space += 1; + } + + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut bind_group_infos = + arrayvec::ArrayVec::<super::BindGroupInfo, { crate::MAX_BIND_GROUPS }>::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => continue, + ref other => conv::map_binding_type(other), + }; + let bt = match range_ty { + d3d12::DescriptorRangeType::CBV => &mut bind_cbv, + d3d12::DescriptorRangeType::SRV => &mut bind_srv, + d3d12::DescriptorRangeType::UAV => &mut bind_uav, + d3d12::DescriptorRangeType::Sampler => continue, + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(bt), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = views (vis = {:?}, count = {})", + parameters.len(), + visibility_view_static, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SRV_CBV_UAV; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + _ => continue, + }; + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bind_sampler.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(&bind_sampler), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = samplers (vis = {:?}, count = {})", + parameters.len(), + visibility_sampler, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SAMPLERS; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + + let (kind, parameter_ty, bt) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_CBV, + &mut bind_cbv, + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_SRV, + &mut bind_srv, + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_UAV, + &mut bind_uav, + ), + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + info.dynamic_buffers.push(kind); + + log::debug!( + "\tParam[{}] = dynamic {:?} (vis = {:?})", + parameters.len(), + buffer_ty, + dynamic_buffers_visibility, + ); + parameters.push(d3d12::RootParameter::descriptor( + parameter_ty, + dynamic_buffers_visibility, + native_binding(bt), + )); + + bt.register += entry.count.map_or(1, NonZeroU32::get); + } + + bind_group_infos.push(info); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + + let (special_constants_root_index, special_constants_binding) = if desc.flags.intersects( + crate::PipelineLayoutFlags::BASE_VERTEX_INSTANCE + | crate::PipelineLayoutFlags::NUM_WORK_GROUPS, + ) { + let parameter_index = parameters.len(); + log::debug!("\tParam[{}] = special", parameter_index); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, // really needed for VS and CS only + native_binding(&bind_cbv), + 3, // 0 = base vertex, 1 = base instance, 2 = other + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + (Some(parameter_index as u32), Some(binding)) + } else { + (None, None) + }; + + log::trace!("{:#?}", parameters); + log::trace!("Bindings {:#?}", binding_map); + + let (blob, error) = self + .library + .serialize_root_signature( + d3d12::RootSignatureVersion::V1_0, + ¶meters, + &[], + d3d12::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + crate::DeviceError::Lost + })? + .into_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + unsafe { error.as_c_str() }.to_str().unwrap() + ); + unsafe { error.destroy() }; + return Err(crate::DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .into_device_result("Root signature creation")?; + unsafe { blob.destroy() }; + + log::debug!("\traw = {:?}", raw); + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::PipelineLayout { + shared: super::PipelineLayoutShared { + signature: raw, + total_root_elements: parameters.len() as super::RootIndex, + special_constants_root_index, + root_constant_info, + }, + bind_group_infos, + naga_options: hlsl::Options { + shader_model: match self.dxc_container { + // DXC + Some(_) => hlsl::ShaderModel::V6_0, + // FXC doesn't support SM 6.0 + None => hlsl::ShaderModel::V5_1, + }, + binding_map, + fake_missing_bindings: false, + special_constants_binding, + push_constants_target, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { pipeline_layout.shared.signature.destroy() }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + dynamic_buffers.push(data.resolve_address()); + } + } + wgt::BindingType::Buffer { ty, .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12_ty::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + unsafe { self.raw.CreateConstantBufferView(&raw_desc, handle) }; + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: + view::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12_ty::D3D12_SRV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_SRV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12_ty::D3D12_BUFFER_SRV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ) + }; + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12_ty::D3D12_UAV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_UAV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12_ty::D3D12_BUFFER_UAV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ) + }; + } + } + inner.stage.push(handle); + } + } + wgt::BindingType::Texture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.samplers[start..end] { + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + + Ok(super::BindGroup { + handle_views, + handle_samplers, + dynamic_buffers, + }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + self.shared.heap_samplers.free_slice(dual); + } + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let raw_name = desc.label.and_then(|label| ffi::CString::new(label).ok()); + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga, raw_name }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + } + None => None, + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::VertexStepMode::Vertex => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::VertexStepMode::Instance => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC { + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, + SemanticIndex: attribute.shader_location, + Format: auxil::dxgi::conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + if let Some(ct) = ct.as_ref() { + *rtv_format = auxil::dxgi::conv::map_texture_format(ct.format); + } + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(); + + let raw_rasterizer = d3d12_ty::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12_ty::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12_ty::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12_ty::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: BOOL::from(!desc.primitive.unclipped_depth), + MultisampleEnable: BOOL::from(desc.multisample.count > 1), + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12_ty::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.shared.signature.as_mut_ptr(), + VS: *blob_vs.create_native_shader(), + PS: match blob_fs { + Some(ref shader) => *shader.create_native_shader(), + None => *d3d12::Shader::null(), + }, + GS: *d3d12::Shader::null(), + DS: *d3d12::Shader::null(), + HS: *d3d12::Shader::null(), + StreamOutput: d3d12_ty::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12_ty::D3D12_BLEND_DESC { + AlphaToCoverageEnable: BOOL::from(desc.multisample.alpha_to_coverage_enabled), + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => unsafe { mem::zeroed() }, + }, + InputLayout: d3d12_ty::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + } + Some(wgt::IndexFormat::Uint32) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + auxil::dxgi::conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12_ty::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12_ty::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = d3d12::PipelineState::null(); + let hr = { + profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); + unsafe { + self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12_ty::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ) + } + }; + + unsafe { blob_vs.destroy() }; + if let Some(blob_fs) = blob_fs { + unsafe { blob_fs.destroy() }; + }; + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::RenderPipeline { + raw, + layout: desc.layout.shared.clone(), + topology, + vertex_strides, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = { + profiling::scope!("ID3D12Device::CreateComputePipelineState"); + self.raw.create_compute_pipeline_state( + desc.layout.shared.signature, + blob_cs.create_native_shader(), + 0, + d3d12::CachedPSO::null(), + d3d12::PipelineStateFlags::empty(), + ) + }; + + unsafe { blob_cs.destroy() }; + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::ComputePipeline { + raw, + layout: desc.layout.shared.clone(), + }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + d3d12::QueryHeapType::Occlusion, + d3d12_ty::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + d3d12::QueryHeapType::PipelineStatistics, + d3d12_ty::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), + wgt::QueryType::Timestamp => ( + d3d12::QueryHeapType::Timestamp, + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + ), + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .into_device_result("Query heap creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::QuerySet { raw, raw_ty }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { set.raw.destroy() }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + let mut raw = d3d12::Fence::null(); + let hr = unsafe { + self.raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + raw.mut_void(), + ) + }; + hr.into_device_result("Fence creation")?; + Ok(super::Fence { raw }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + unsafe { fence.raw.destroy() }; + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + Ok(unsafe { fence.raw.GetCompletedValue() }) + } + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if unsafe { fence.raw.GetCompletedValue() } >= value { + return Ok(true); + } + let hr = fence.raw.set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + + match unsafe { synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::DeviceError::Lost) + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + unsafe { + self.render_doc + .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + unsafe { + self.render_doc + .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/instance.rs b/third_party/rust/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..be7a3f7306 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,117 @@ +use winapi::shared::{dxgi1_5, minwindef}; + +use super::SurfaceTarget; +use crate::auxil::{self, dxgi::result::HResult as _}; +use std::{mem, sync::Arc}; + +impl Drop for super::Instance { + fn drop(&mut self) { + unsafe { self.factory.destroy() }; + crate::auxil::dxgi::exception::unregister_exception_handler(); + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + let lib_main = d3d12::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + unsafe { debug_controller.Release() }; + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + } + + // Create DXGIFactory4 + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory4, + desc.flags, + )?; + + // Create IDXGIFactoryMedia + let factory_media = match lib_dxgi.create_factory_media() { + Ok(pair) => match pair.into_result() { + Ok(factory_media) => Some(factory_media), + Err(err) => { + log::error!("Failed to create IDXGIFactoryMedia: {}", err); + None + } + }, + Err(err) => { + log::info!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + let mut supports_allow_tearing = false; + #[allow(trivial_casts)] + if let Some(factory5) = factory.as_factory5() { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = unsafe { + factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::<minwindef::BOOL>() as _, + ) + }; + + match hr.into_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => supports_allow_tearing = true, + } + } + + Ok(Self { + // The call to create_factory will only succeed if we get a factory4, so this is safe. + factory, + factory_media, + library: Arc::new(lib_main), + _lib_dxgi: lib_dxgi, + supports_allow_tearing, + flags: desc.flags, + dx12_shader_compiler: desc.dx12_shader_compiler.clone(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + match window_handle { + raw_window_handle::RawWindowHandle::Win32(handle) => Ok(super::Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::WndHandle(handle.hwnd as *mut _), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + }), + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory); + + adapters + .into_iter() + .filter_map(|raw| { + super::Adapter::expose(raw, &self.library, self.flags, &self.dx12_shader_compiler) + }) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..6cdf3ffe64 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,902 @@ +/*! +# DirectX12 API internals. + +Generally the mapping is straightforwad. + +## Resource transitions + +D3D12 API matches WebGPU internal states very well. The only +caveat here is issuing a special UAV barrier whenever both source +and destination states match, and they are for storage sync. + +## Memory + +For now, all resources are created with "committed" memory. + +## Resource binding + +See ['Device::create_pipeline_layout`] documentation for the structure +of the root signature corresponding to WebGPU pipeline layout. + +Binding groups is mostly straightforward, with one big caveat: +all bindings have to be reset whenever the pipeline layout changes. +This is the rule of D3D12, and we can do nothing to help it. + +We detect this change at both [`crate::CommandEncoder::set_bind_group`] +and [`crate::CommandEncoder::set_render_pipeline`] with +[`crate::CommandEncoder::set_compute_pipeline`]. + +For this reason, in order avoid repeating the binding code, +we are binding everything in `CommandEncoder::update_root_elements`. +When the pipeline layout is changed, we reset all bindings. +Otherwise, we pass a range corresponding only to the current bind group. + +!*/ + +mod adapter; +mod command; +mod conv; +mod descriptor; +mod device; +mod instance; +mod shader_compilation; +mod suballocation; +mod types; +mod view; + +use crate::auxil::{self, dxgi::result::HResult as _}; + +use arrayvec::ArrayVec; +use parking_lot::Mutex; +use std::{ffi, fmt, mem, num::NonZeroU32, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_4, dxgitype, windef, winerror}, + um::{d3d12 as d3d12_ty, dcomp, synchapi, winbase, winnt}, + Interface as _, +}; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. +const MAX_ROOT_ELEMENTS: usize = 64; +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + +pub struct Instance { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + library: Arc<d3d12::D3D12Lib>, + supports_allow_tearing: bool, + _lib_dxgi: d3d12::DxgiLib, + flags: crate::InstanceFlags, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +impl Instance { + pub unsafe fn create_surface_from_visual( + &self, + visual: *mut dcomp::IDCompositionVisual, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::Visual(unsafe { d3d12::WeakPtr::from_raw(visual) }), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } + + pub unsafe fn create_surface_from_surface_handle( + &self, + surface_handle: winnt::HANDLE, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::SurfaceHandle(surface_handle), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +struct SwapChain { + raw: d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3>, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec<d3d12::Resource>, + waitable: winnt::HANDLE, + acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, +} + +enum SurfaceTarget { + WndHandle(windef::HWND), + Visual(d3d12::WeakPtr<dcomp::IDCompositionVisual>), + SurfaceHandle(winnt::HANDLE), +} + +pub struct Surface { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + target: SurfaceTarget, + supports_allow_tearing: bool, + swap_chain: Option<SwapChain>, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { + #[allow(unused)] + cache_coherent: bool, + }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + instance_flags: crate::InstanceFlags, + #[allow(unused)] + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, + #[allow(unused)] // TODO: Exists until windows-rs is standard, then it can probably be removed? + heap_create_not_zeroed: bool, + casting_fully_typed_format_supported: bool, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: d3d12::DxgiAdapter, + device: d3d12::Device, + library: Arc<d3d12::D3D12Lib>, + private_caps: PrivateCapabilities, + presentation_timer: auxil::dxgi::time::PresentationTimer, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] + workarounds: Workarounds, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +/// Helper structure for waiting for GPU. +struct Idler { + fence: d3d12::Fence, + event: d3d12::Event, +} + +impl Idler { + unsafe fn destroy(self) { + unsafe { self.fence.destroy() }; + } +} + +struct CommandSignatures { + draw: d3d12::CommandSignature, + draw_indexed: d3d12::CommandSignature, + dispatch: d3d12::CommandSignature, +} + +impl CommandSignatures { + unsafe fn destroy(&self) { + unsafe { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } + } +} + +struct DeviceShared { + zero_buffer: d3d12::Resource, + cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, +} + +impl DeviceShared { + unsafe fn destroy(&self) { + unsafe { + self.zero_buffer.destroy(); + self.cmd_signatures.destroy(); + self.heap_views.raw.destroy(); + self.heap_samplers.raw.destroy(); + } + } +} + +pub struct Device { + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, + shared: Arc<DeviceShared>, + // CPU only pools + rtv_pool: Mutex<descriptor::CpuPool>, + dsv_pool: Mutex<descriptor::CpuPool>, + srv_uav_pool: Mutex<descriptor::CpuPool>, + sampler_pool: Mutex<descriptor::CpuPool>, + // library + library: Arc<d3d12::D3D12Lib>, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, + null_rtv_handle: descriptor::Handle, + mem_allocator: Option<Mutex<suballocation::GpuAllocatorWrapper>>, + dxc_container: Option<shader_compilation::DxcContainer>, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: d3d12::CommandQueue, + temp_lists: Vec<d3d12::CommandList>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +#[derive(Default)] +struct Temp { + marker: Vec<u16>, + barriers: Vec<d3d12_ty::D3D12_RESOURCE_BARRIER>, +} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.barriers.clear(); + } +} + +struct PassResolve { + src: (d3d12::Resource, u32), + dst: (d3d12::Resource, u32), + format: d3d12::Format, +} + +#[derive(Clone, Copy)] +enum RootElement { + Empty, + Constant, + SpecialConstantBuffer { + base_vertex: i32, + base_instance: u32, + other: u32, + }, + /// Descriptor table. + Table(d3d12::GpuDescriptor), + /// Descriptor for a buffer that has dynamic offset. + DynamicOffsetBuffer { + kind: BufferViewKind, + address: d3d12::GpuAddress, + }, +} + +#[derive(Clone, Copy)] +enum PassKind { + Render, + Compute, + Transfer, +} + +struct PassState { + has_label: bool, + resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>, + layout: PipelineLayoutShared, + root_elements: [RootElement; MAX_ROOT_ELEMENTS], + constant_data: [u32; MAX_ROOT_ELEMENTS], + dirty_root_elements: u64, + vertex_buffers: [d3d12_ty::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, +} + +#[test] +fn test_dirty_mask() { + assert_eq!(MAX_ROOT_ELEMENTS, std::mem::size_of::<u64>() * 8); +} + +impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + layout: PipelineLayoutShared { + signature: d3d12::RootSignature::null(), + total_root_elements: 0, + special_constants_root_index: None, + root_constant_info: None, + }, + root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], + constant_data: [0; MAX_ROOT_ELEMENTS], + dirty_root_elements: 0, + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + + fn clear(&mut self) { + // careful about heap allocations! + *self = Self::new(); + } +} + +pub struct CommandEncoder { + allocator: d3d12::CommandAllocator, + device: d3d12::Device, + shared: Arc<DeviceShared>, + null_rtv_handle: descriptor::Handle, + list: Option<d3d12::GraphicsCommandList>, + free_lists: Vec<d3d12::GraphicsCommandList>, + pass: PassState, + temp: Temp, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("allocator", &self.allocator) + .field("device", &self.allocator) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: d3d12::GraphicsCommandList, + closed: bool, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer { + resource: d3d12::Resource, + size: wgt::BufferAddress, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + +#[derive(Debug)] +pub struct Texture { + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } + + /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count + } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + let plane = match base.aspect { + crate::FormatAspects::COLOR | crate::FormatAspects::DEPTH => 0, + crate::FormatAspects::STENCIL => 1, + _ => unreachable!(), + }; + self.calc_subresource(base.mip_level, base.array_layer, plane) + } +} + +#[derive(Debug)] +pub struct TextureView { + raw_format: d3d12::Format, + aspects: crate::FormatAspects, + target_base: (d3d12::Resource, u32), + handle_srv: Option<descriptor::Handle>, + handle_uav: Option<descriptor::Handle>, + handle_rtv: Option<descriptor::Handle>, + handle_dsv_ro: Option<descriptor::Handle>, + handle_dsv_rw: Option<descriptor::Handle>, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +#[derive(Debug)] +pub struct Sampler { + handle: descriptor::Handle, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet { + raw: d3d12::QueryHeap, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: d3d12::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec<wgt::BindGroupLayoutEntry>, + cpu_heap_views: Option<descriptor::CpuHeap>, + cpu_heap_samplers: Option<descriptor::CpuHeap>, + copy_counts: Vec<u32>, // all 1's +} + +#[derive(Clone, Copy)] +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +#[derive(Debug)] +pub struct BindGroup { + handle_views: Option<descriptor::DualHandle>, + handle_samplers: Option<descriptor::DualHandle>, + dynamic_buffers: Vec<d3d12::GpuAddress>, +} + +bitflags::bitflags! { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct TableTypes: u8 { + const SRV_CBV_UAV = 1 << 0; + const SAMPLERS = 1 << 1; + } +} + +// Element (also known as parameter) index into the root signature. +type RootIndex = u32; + +struct BindGroupInfo { + base_root_index: RootIndex, + tables: TableTypes, + dynamic_buffers: Vec<BufferViewKind>, +} + +#[derive(Clone)] +struct RootConstantInfo { + root_index: RootIndex, + range: std::ops::Range<u32>, +} + +#[derive(Clone)] +struct PipelineLayoutShared { + signature: d3d12::RootSignature, + total_root_elements: RootIndex, + special_constants_root_index: Option<RootIndex>, + root_constant_info: Option<RootConstantInfo>, +} + +unsafe impl Send for PipelineLayoutShared {} +unsafe impl Sync for PipelineLayoutShared {} + +pub struct PipelineLayout { + shared: PipelineLayoutShared, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + bind_group_infos: ArrayVec<BindGroupInfo, { crate::MAX_BIND_GROUPS }>, + naga_options: naga::back::hlsl::Options, +} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + raw_name: Option<ffi::CString>, +} + +pub(super) enum CompiledShader { + #[allow(unused)] + Dxc(Vec<u8>), + Fxc(d3d12::Blob), +} + +impl CompiledShader { + fn create_native_shader(&self) -> d3d12::Shader { + match *self { + CompiledShader::Dxc(ref shader) => d3d12::Shader::from_raw(shader), + CompiledShader::Fxc(shader) => d3d12::Shader::from_blob(shader), + } + } + + unsafe fn destroy(self) { + match self { + CompiledShader::Dxc(_) => {} + CompiledShader::Fxc(shader) => unsafe { + shader.destroy(); + }, + } + } +} + +pub struct RenderPipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, + topology: d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +impl SwapChain { + unsafe fn release_resources(self) -> d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3> { + for resource in self.resources { + unsafe { resource.destroy() }; + } + self.raw + } + + unsafe fn wait( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<bool, crate::SurfaceError> { + let timeout_ms = match timeout { + Some(duration) => duration.as_millis() as u32, + None => winbase::INFINITE, + }; + match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + // We always set ALLOW_TEARING on the swapchain no matter + // what kind of swapchain we want because ResizeBuffers + // cannot change if ALLOW_TEARING is applied to the swapchain. + if self.supports_allow_tearing { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + + let swap_chain = match self.swap_chain.take() { + //Note: this path doesn't properly re-initialize all of the things + Some(sc) => { + // can't have image resources in flight used by GPU + let _ = unsafe { device.wait_idle() }; + + let raw = unsafe { sc.release_resources() }; + let result = unsafe { + raw.ResizeBuffers( + config.swap_chain_size, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ) + }; + if let Err(err) = result.into_result() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let desc = d3d12::SwapchainDesc { + alpha_mode: auxil::dxgi::conv::map_acomposite_alpha_mode( + config.composite_alpha_mode, + ), + width: config.extent.width, + height: config.extent.height, + format: non_srgb_format, + stereo: false, + sample: d3d12::SampleDesc { + count: 1, + quality: 0, + }, + buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + buffer_count: config.swap_chain_size, + scaling: d3d12::Scaling::Stretch, + swap_effect: d3d12::SwapEffect::FlipDiscard, + flags, + }; + let swap_chain1 = match self.target { + SurfaceTarget::Visual(_) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForComposition"); + self.factory + .unwrap_factory2() + .create_swapchain_for_composition( + device.present_queue.as_mut_ptr() as *mut _, + &desc, + ) + .into_result() + } + SurfaceTarget::SurfaceHandle(handle) => { + profiling::scope!( + "IDXGIFactoryMedia::CreateSwapChainForCompositionSurfaceHandle" + ); + self.factory_media + .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))? + .create_swapchain_for_composition_surface_handle( + device.present_queue.as_mut_ptr() as *mut _, + handle, + &desc, + ) + .into_result() + } + SurfaceTarget::WndHandle(hwnd) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd"); + self.factory + .as_factory2() + .unwrap() + .create_swapchain_for_hwnd( + device.present_queue.as_mut_ptr() as *mut _, + hwnd, + &desc, + ) + .into_result() + } + }; + + let swap_chain1 = match swap_chain1 { + Ok(s) => s, + Err(err) => { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + }; + + match self.target { + SurfaceTarget::WndHandle(_) | SurfaceTarget::SurfaceHandle(_) => {} + SurfaceTarget::Visual(visual) => { + if let Err(err) = + unsafe { visual.SetContent(swap_chain1.as_unknown()) }.into_result() + { + log::error!("Unable to SetContent: {}", err); + return Err(crate::SurfaceError::Other( + "IDCompositionVisual::SetContent", + )); + } + } + } + + match unsafe { swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>() }.into_result() { + Ok(swap_chain3) => { + unsafe { swap_chain1.destroy() }; + swap_chain3 + } + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + match self.target { + SurfaceTarget::WndHandle(wnd_handle) => { + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + unsafe { + self.factory.MakeWindowAssociation( + wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ) + }; + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => {} + } + + unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; + + let mut resources = vec![d3d12::Resource::null(); config.swap_chain_size as usize]; + for (i, res) in resources.iter_mut().enumerate() { + unsafe { + swap_chain.GetBuffer(i as _, &d3d12_ty::ID3D12Resource::uuidof(), res.mut_void()) + }; + } + + self.swap_chain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + present_mode: config.present_mode, + format: config.format, + size: config.extent, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Device) { + if let Some(mut sc) = self.swap_chain.take() { + unsafe { + let _ = sc.wait(None); + //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + let _ = device.wait_idle(); + let raw = sc.release_resources(); + raw.destroy(); + } + } + } + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + let sc = self.swap_chain.as_mut().unwrap(); + + unsafe { sc.wait(timeout) }?; + + let base_index = unsafe { sc.raw.GetCurrentBackBufferIndex() } as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + allocation: None, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + self.temp_lists.clear(); + for cmd_buf in command_buffers { + self.temp_lists.push(cmd_buf.raw.as_list()); + } + + { + profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists"); + self.raw.execute_command_lists(&self.temp_lists); + } + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(fence.raw, value) + .into_device_result("Signal fence")?; + } + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Surface, + _texture: Texture, + ) -> Result<(), crate::SurfaceError> { + let sc = surface.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + // We only allow immediate if ALLOW_TEARING is valid. + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Mailbox => (0, 0), + wgt::PresentMode::Fifo => (1, 0), + m => unreachable!("Cannot make surface with present mode {m:?}"), + }; + + profiling::scope!("IDXGISwapchain3::Present"); + unsafe { sc.raw.Present(interval, flags) }; + + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + let mut frequency = 0u64; + unsafe { self.raw.GetTimestampFrequency(&mut frequency) }; + (1_000_000_000.0 / frequency as f64) as f32 + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs new file mode 100644 index 0000000000..9f9be7c409 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs @@ -0,0 +1,294 @@ +use std::ptr; + +pub(super) use dxc::{compile_dxc, get_dxc_container, DxcContainer}; +use winapi::um::d3dcompiler; + +use crate::auxil::dxgi::result::HResult; + +// This exists so that users who don't want to use dxc can disable the dxc_shader_compiler feature +// and not have to compile hassle_rs. +// Currently this will use Dxc if it is chosen as the dx12 compiler at `Instance` creation time, and will +// fallback to FXC if the Dxc libraries (dxil.dll and dxcompiler.dll) are not found, or if Fxc is chosen at' +// `Instance` creation time. + +pub(super) fn compile_fxc( + device: &super::Device, + source: &String, + source_name: &str, + raw_ep: &std::ffi::CString, + stage_bit: wgt::ShaderStages, + full_stage: String, +) -> ( + Result<super::CompiledShader, crate::PipelineError>, + log::Level, +) { + profiling::scope!("compile_fxc"); + let mut shader_data = d3d12::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG | d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION; + } + let mut error = d3d12::Blob::null(); + let hr = unsafe { + profiling::scope!("d3dcompiler::D3DCompile"); + d3dcompiler::D3DCompile( + source.as_ptr().cast(), + source.len(), + source_name.as_ptr().cast(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr().cast(), + compile_flags, + 0, + shader_data.mut_void().cast(), + error.mut_void().cast(), + ) + }; + + match hr.into_result() { + Ok(()) => ( + Ok(super::CompiledShader::Fxc(shader_data)), + log::Level::Info, + ), + Err(e) => { + let mut full_msg = format!("FXC D3DCompile error ({e})"); + if !error.is_null() { + use std::fmt::Write as _; + let message = unsafe { + std::slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ) + }; + let _ = write!(full_msg, ": {}", String::from_utf8_lossy(message)); + unsafe { + error.destroy(); + } + } + ( + Err(crate::PipelineError::Linkage(stage_bit, full_msg)), + log::Level::Warn, + ) + } + } +} + +// The Dxc implementation is behind a feature flag so that users who don't want to use dxc can disable the feature. +#[cfg(feature = "dxc_shader_compiler")] +mod dxc { + use std::path::PathBuf; + + // Destructor order should be fine since _dxil and _dxc don't rely on each other. + pub(crate) struct DxcContainer { + compiler: hassle_rs::DxcCompiler, + library: hassle_rs::DxcLibrary, + validator: hassle_rs::DxcValidator, + // Has to be held onto for the lifetime of the device otherwise shaders will fail to compile. + _dxc: hassle_rs::Dxc, + // Also Has to be held onto for the lifetime of the device otherwise shaders will fail to validate. + _dxil: hassle_rs::Dxil, + } + + pub(crate) fn get_dxc_container( + dxc_path: Option<PathBuf>, + dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Make sure that dxil.dll exists. + let dxil = match hassle_rs::Dxil::new(dxil_path) { + Ok(dxil) => dxil, + Err(e) => { + log::warn!("Failed to load dxil.dll. Defaulting to Fxc instead: {}", e); + return Ok(None); + } + }; + + // Needed for explicit validation. + let validator = dxil.create_validator()?; + + let dxc = match hassle_rs::Dxc::new(dxc_path) { + Ok(dxc) => dxc, + Err(e) => { + log::warn!( + "Failed to load dxcompiler.dll. Defaulting to Fxc instead: {}", + e + ); + return Ok(None); + } + }; + let compiler = dxc.create_compiler()?; + let library = dxc.create_library()?; + + Ok(Some(DxcContainer { + _dxc: dxc, + compiler, + library, + _dxil: dxil, + validator, + })) + } + + pub(crate) fn compile_dxc( + device: &crate::dx12::Device, + source: &str, + source_name: &str, + raw_ep: &str, + stage_bit: wgt::ShaderStages, + full_stage: String, + dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + profiling::scope!("compile_dxc"); + let mut compile_flags = arrayvec::ArrayVec::<&str, 4>::new_const(); + compile_flags.push("-Ges"); // d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS + compile_flags.push("-Vd"); // Disable implicit validation to work around bugs when dxil.dll isn't in the local directory. + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags.push("-Zi"); // d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION + compile_flags.push("-Od"); // d3dcompiler::D3DCOMPILE_DEBUG + } + + let blob = match dxc_container + .library + .create_blob_with_encoding_from_str(source) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("DXC blob error: {e}"))) + { + Ok(blob) => blob, + Err(e) => return (Err(e), log::Level::Error), + }; + + let compiled = dxc_container.compiler.compile( + &blob, + source_name, + raw_ep, + &full_stage, + &compile_flags, + None, + &[], + ); + + let (result, log_level) = match compiled { + Ok(dxc_result) => match dxc_result.get_result() { + Ok(dxc_blob) => { + // Validate the shader. + match dxc_container.validator.validate(dxc_blob) { + Ok(validated_blob) => ( + Ok(crate::dx12::CompiledShader::Dxc(validated_blob.to_vec())), + log::Level::Info, + ), + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC validation error: {:?}\n{:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default(), + e.1 + ), + )), + log::Level::Error, + ), + } + } + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!("DXC compile error: {e}"), + )), + log::Level::Error, + ), + }, + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC compile error: {:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default() + ), + )), + log::Level::Error, + ), + }; + + (result, log_level) + } + + impl From<hassle_rs::HassleError> for crate::DeviceError { + fn from(value: hassle_rs::HassleError) -> Self { + match value { + hassle_rs::HassleError::Win32Error(e) => { + // TODO: This returns an HRESULT, should we try and use the associated Windows error message? + log::error!("Win32 error: {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LoadLibraryError { filename, inner } => { + log::error!("Failed to load dxc library {filename:?}. Inner error: {inner:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LibLoadingError(e) => { + log::error!("Failed to load dxc library. {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::WindowsOnly(e) => { + log::error!("Signing with dxil.dll is only supported on Windows. {e:?}"); + crate::DeviceError::Lost + } + // `ValidationError` and `CompileError` should never happen in a context involving `DeviceError` + hassle_rs::HassleError::ValidationError(_e) => unimplemented!(), + hassle_rs::HassleError::CompileError(_e) => unimplemented!(), + } + } + } + + fn get_error_string_from_dxc_result( + library: &hassle_rs::DxcLibrary, + error: &hassle_rs::DxcOperationResult, + ) -> Result<String, hassle_rs::HassleError> { + error + .get_error_buffer() + .and_then(|error| library.get_blob_as_string(&hassle_rs::DxcBlob::from(error))) + } +} + +// These are stubs for when the `dxc_shader_compiler` feature is disabled. +#[cfg(not(feature = "dxc_shader_compiler"))] +mod dxc { + use std::path::PathBuf; + + pub(crate) struct DxcContainer {} + + pub(crate) fn get_dxc_container( + _dxc_path: Option<PathBuf>, + _dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Falls back to Fxc and logs an error. + log::error!("DXC shader compiler was requested on Instance creation, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + Ok(None) + } + + // It shouldn't be possible that this gets called with the `dxc_shader_compiler` feature disabled. + pub(crate) fn compile_dxc( + _device: &crate::dx12::Device, + _source: &str, + _source_name: &str, + _raw_ep: &str, + _stage_bit: wgt::ShaderStages, + _full_stage: String, + _dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + unimplemented!("Something went really wrong, please report this. Attempted to compile shader with DXC, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/suballocation.rs b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs new file mode 100644 index 0000000000..01ca997f21 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs @@ -0,0 +1,331 @@ +pub(crate) use allocation::{ + create_allocator_wrapper, create_buffer_resource, create_texture_resource, + free_buffer_allocation, free_texture_allocation, AllocationWrapper, GpuAllocatorWrapper, +}; + +// This exists to work around https://github.com/gfx-rs/wgpu/issues/3207 +// Currently this will work the older, slower way if the windows_rs feature is disabled, +// and will use the fast path of suballocating buffers and textures using gpu_allocator if +// the windows_rs feature is enabled. + +// This is the fast path using gpu_allocator to suballocate buffers and textures. +#[cfg(feature = "windows_rs")] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use wgt::assertions::StrictAssertUnwrapExt; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + use gpu_allocator::{ + d3d12::{AllocationCreateDesc, ToWinapi, ToWindows}, + MemoryLocation, + }; + + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper { + pub(crate) allocator: gpu_allocator::d3d12::Allocator, + } + + #[derive(Debug)] + pub(crate) struct AllocationWrapper { + pub(crate) allocation: gpu_allocator::d3d12::Allocation, + } + + pub(crate) fn create_allocator_wrapper( + raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + let device = raw.as_ptr(); + + match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc { + device: device.as_windows().clone(), + debug_settings: Default::default(), + }) { + Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))), + Err(e) => { + log::error!("Failed to create d3d12 allocator, error: {}", e); + Err(e)? + } + } + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let location = match (is_cpu_read, is_cpu_write) { + (true, true) => MemoryLocation::CpuToGpu, + (true, false) => MemoryLocation::GpuToCpu, + (false, true) => MemoryLocation::CpuToGpu, + (false, false) => MemoryLocation::GpuOnly, + }; + + let name = desc.label.unwrap_or("Unlabeled buffer"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + + // let mut allocator = unsafe { device.mem_allocator.as_ref().unwrap_unchecked().lock() }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let location = MemoryLocation::GpuOnly; + + let name = desc.label.unwrap_or("Unlabeled texture"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn free_buffer_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 buffer, {e}"), + }; + } + + pub(crate) fn free_texture_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 texture, {e}"), + }; + } + + #[cfg(feature = "windows_rs")] + impl From<gpu_allocator::AllocationError> for crate::DeviceError { + fn from(result: gpu_allocator::AllocationError) -> Self { + match result { + gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, + gpu_allocator::AllocationError::FailedToMap(e) => { + log::error!("DX12 gpu-allocator: Failed to map: {}", e); + Self::Lost + } + gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { + log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { + log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { + log::error!( + "DX12 gpu-allocator: Invalid Allocator Creation Description: {}", + e + ); + Self::Lost + } + gpu_allocator::AllocationError::Internal(e) => { + log::error!("DX12 gpu-allocator: Internal Error: {}", e); + Self::Lost + } + } + } + } +} + +// This is the older, slower path where it doesn't suballocate buffers. +// Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207 +#[cfg(not(feature = "windows_rs"))] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12_ty::D3D12_HEAP_FLAG_NONE; // TODO: find the exact value + + // Allocator isn't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper {} + + // Allocations aren't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct AllocationWrapper {} + + pub(crate) fn create_allocator_wrapper( + _raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + Ok(None) + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_read { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12_ty::D3D12_MEMORY_POOL_L1 + } + _ => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + _desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + crate::dx12::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn free_buffer_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } + + pub(crate) fn free_texture_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs new file mode 100644 index 0000000000..dec4e71337 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -0,0 +1,34 @@ +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +winapi::ENUM! { + enum D3D12_VIEW_INSTANCING_TIER { + D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED = 0, + D3D12_VIEW_INSTANCING_TIER_1 = 1, + D3D12_VIEW_INSTANCING_TIER_2 = 2, + D3D12_VIEW_INSTANCING_TIER_3 = 3, + } +} + +winapi::ENUM! { + enum D3D12_COMMAND_LIST_SUPPORT_FLAGS { + D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_ENCODE, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 { + CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL, + CastingFullyTypedFormatSupported: winapi::shared::minwindef::BOOL, + WriteBufferImmediateSupportFlags: D3D12_COMMAND_LIST_SUPPORT_FLAGS, + ViewInstancingTier: D3D12_VIEW_INSTANCING_TIER, + BarycentricsSupported: winapi::shared::minwindef::BOOL, + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/view.rs b/third_party/rust/wgpu-hal/src/dx12/view.rs new file mode 100644 index 0000000000..e7a051b535 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/view.rs @@ -0,0 +1,380 @@ +use crate::auxil; +use std::mem; +use winapi::um::d3d12 as d3d12_ty; + +pub(crate) const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; + +pub(super) struct ViewDescriptor { + dimension: wgt::TextureViewDimension, + pub aspects: crate::FormatAspects, + pub rtv_dsv_format: d3d12::Format, + srv_uav_format: Option<d3d12::Format>, + multisampled: bool, + array_layer_base: u32, + array_layer_count: u32, + mip_level_base: u32, + mip_level_count: u32, +} + +impl crate::TextureViewDescriptor<'_> { + pub(super) fn to_internal(&self, texture: &super::Texture) -> ViewDescriptor { + let aspects = crate::FormatAspects::new(self.format, self.range.aspect); + + ViewDescriptor { + dimension: self.dimension, + aspects, + rtv_dsv_format: auxil::dxgi::conv::map_texture_format(self.format), + srv_uav_format: auxil::dxgi::conv::map_texture_format_for_srv_uav(self.format, aspects), + multisampled: texture.sample_count > 1, + mip_level_base: self.range.base_mip_level, + mip_level_count: self.range.mip_level_count.unwrap_or(!0), + array_layer_base: self.range.base_array_layer, + array_layer_count: self.range.array_layer_count.unwrap_or(!0), + } + } +} + +impl ViewDescriptor { + pub(crate) unsafe fn to_srv(&self) -> Option<d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBE; + unsafe { + *desc.u.TextureCube_mut() = d3d12_ty::D3D12_TEXCUBE_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + unsafe { + *desc.u.TextureCubeArray_mut() = d3d12_ty::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + First2DArrayFace: self.array_layer_base, + NumCubes: if self.array_layer_count == !0 { + !0 + } else { + self.array_layer_count / 6 + }, + ResourceMinLODClamp: 0.0, + } + } + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_uav(&self) -> Option<d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_UAV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_UAV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_UAV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_rtv(&self) -> d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_RTV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_RTV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_RTV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + desc + } + + pub(crate) unsafe fn to_dsv(&self, read_only: bool) -> d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + Flags: { + let mut flags = d3d12_ty::D3D12_DSV_FLAG_NONE; + if read_only { + if self.aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if self.aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + } + flags + }, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_DSV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_DSV { + MipSlice: self.mip_level_base, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + desc + } +} diff --git a/third_party/rust/wgpu-hal/src/empty.rs b/third_party/rust/wgpu-hal/src/empty.rs new file mode 100644 index 0000000000..1497acad91 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/empty.rs @@ -0,0 +1,413 @@ +#![allow(unused_variables)] + +use std::ops::Range; + +#[derive(Clone)] +pub struct Api; +pub struct Context; +#[derive(Debug)] +pub struct Encoder; +#[derive(Debug)] +pub struct Resource; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +impl crate::Api for Api { + type Instance = Context; + type Surface = Context; + type Adapter = Context; + type Device = Context; + + type Queue = Context; + type CommandEncoder = Encoder; + type CommandBuffer = Resource; + + type Buffer = Resource; + type Texture = Resource; + type SurfaceTexture = Resource; + type TextureView = Resource; + type Sampler = Resource; + type QuerySet = Resource; + type Fence = Resource; + + type BindGroupLayout = Resource; + type BindGroup = Resource; + type PipelineLayout = Resource; + type ShaderModule = Resource; + type RenderPipeline = Resource; + type ComputePipeline = Resource; +} + +impl crate::Instance<Api> for Context { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + Ok(Context) + } + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + _window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Context, crate::InstanceError> { + Ok(Context) + } + unsafe fn destroy_surface(&self, surface: Context) {} + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + Vec::new() + } +} + +impl crate::Surface<Api> for Context { + unsafe fn configure( + &mut self, + device: &Context, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Context) {} + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + Ok(None) + } + unsafe fn discard_texture(&mut self, texture: Resource) {} +} + +impl crate::Adapter<Api> for Context { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> DeviceResult<crate::OpenDevice<Api>> { + Err(crate::DeviceError::Lost) + } + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + crate::TextureFormatCapabilities::empty() + } + + unsafe fn surface_capabilities(&self, surface: &Context) -> Option<crate::SurfaceCapabilities> { + None + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl crate::Queue<Api> for Context { + unsafe fn submit( + &mut self, + command_buffers: &[&Resource], + signal_fence: Option<(&mut Resource, crate::FenceValue)>, + ) -> DeviceResult<()> { + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Context, + texture: Resource, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +impl crate::Device<Api> for Context { + unsafe fn exit(self, queue: Context) {} + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_buffer(&self, buffer: Resource) {} + unsafe fn map_buffer( + &self, + buffer: &Resource, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + Err(crate::DeviceError::Lost) + } + unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + + unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( + &self, + texture: &Resource, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_sampler(&self, sampler: Resource) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<Api>, + ) -> DeviceResult<Encoder> { + Ok(Encoder) + } + unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<Resource, crate::ShaderError> { + Ok(Resource) + } + unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_query_set(&self, set: Resource) {} + unsafe fn create_fence(&self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_fence(&self, fence: Resource) {} + unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult<crate::FenceValue> { + Ok(0) + } + unsafe fn wait( + &self, + fence: &Resource, + value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} +} + +impl crate::CommandEncoder<Api> for Encoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> DeviceResult<()> { + Ok(()) + } + unsafe fn discard_encoding(&mut self) {} + unsafe fn end_encoding(&mut self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn reset_all<I>(&mut self, command_buffers: I) {} + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange) {} + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &Resource, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} + unsafe fn end_query(&mut self, set: &Resource, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn reset_queries(&mut self, set: &Resource, range: Range<u32>) {} + unsafe fn copy_query_results( + &mut self, + set: &Resource, + range: Range<u32>, + buffer: &Resource, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {} + unsafe fn end_render_pass(&mut self) {} + + unsafe fn set_bind_group( + &mut self, + layout: &Resource, + index: u32, + group: &Resource, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + } + unsafe fn set_push_constants( + &mut self, + layout: &Resource, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) {} + unsafe fn begin_debug_marker(&mut self, group_label: &str) {} + unsafe fn end_debug_marker(&mut self) {} + + unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, Api>, + format: wgt::IndexFormat, + ) { + } + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) { + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {} + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {} + unsafe fn set_stencil_reference(&mut self, value: u32) {} + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {} + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} + unsafe fn end_compute_pass(&mut self) {} + + unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn dispatch(&mut self, count: [u32; 3]) {} + unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs new file mode 100644 index 0000000000..b14857ae22 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs @@ -0,0 +1,972 @@ +use glow::HasContext; +use std::sync::Arc; +use wgt::AstcChannel; + +use crate::auxil::db; + +// https://webgl2fundamentals.org/webgl/lessons/webgl-data-textures.html + +const GL_UNMASKED_VENDOR_WEBGL: u32 = 0x9245; +const GL_UNMASKED_RENDERER_WEBGL: u32 = 0x9246; + +impl super::Adapter { + /// According to the OpenGL specification, the version information is + /// expected to follow the following syntax: + /// + /// ~~~bnf + /// <major> ::= <number> + /// <minor> ::= <number> + /// <revision> ::= <number> + /// <vendor-info> ::= <string> + /// <release> ::= <major> "." <minor> ["." <release>] + /// <version> ::= <release> [" " <vendor-info>] + /// ~~~ + /// + /// Note that this function is intentionally lenient in regards to parsing, + /// and will try to recover at least the first two version numbers without + /// resulting in an `Err`. + /// # Notes + /// `WebGL 2` version returned as `OpenGL ES 3.0` + fn parse_version(mut src: &str) -> Result<(u8, u8), crate::InstanceError> { + let webgl_sig = "WebGL "; + // According to the WebGL specification + // VERSION WebGL<space>1.0<space><vendor-specific information> + // SHADING_LANGUAGE_VERSION WebGL<space>GLSL<space>ES<space>1.0<space><vendor-specific information> + let is_webgl = src.starts_with(webgl_sig); + if is_webgl { + let pos = src.rfind(webgl_sig).unwrap_or(0); + src = &src[pos + webgl_sig.len()..]; + } else { + let es_sig = " ES "; + match src.rfind(es_sig) { + Some(pos) => { + src = &src[pos + es_sig.len()..]; + } + None => { + log::warn!("ES not found in '{}'", src); + return Err(crate::InstanceError); + } + } + }; + + let glsl_es_sig = "GLSL ES "; + let is_glsl = match src.find(glsl_es_sig) { + Some(pos) => { + src = &src[pos + glsl_es_sig.len()..]; + true + } + None => false, + }; + + let (version, _vendor_info) = match src.find(' ') { + Some(i) => (&src[..i], src[i + 1..].to_string()), + None => (src, String::new()), + }; + + // TODO: make this even more lenient so that we can also accept + // `<major> "." <minor> [<???>]` + let mut it = version.split('.'); + let major = it.next().and_then(|s| s.parse().ok()); + let minor = it.next().and_then(|s| { + let trimmed = if s.starts_with('0') { + "0" + } else { + s.trim_end_matches('0') + }; + trimmed.parse().ok() + }); + + match (major, minor) { + (Some(major), Some(minor)) => Ok(( + // Return WebGL 2.0 version as OpenGL ES 3.0 + if is_webgl && !is_glsl { + major + 1 + } else { + major + }, + minor, + )), + _ => { + log::warn!("Unable to extract the version from '{}'", version); + Err(crate::InstanceError) + } + } + } + + fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + let vendor = vendor_orig.to_lowercase(); + let renderer = renderer_orig.to_lowercase(); + + // opengl has no way to discern device_type, so we can try to infer it from the renderer string + let strings_that_imply_integrated = [ + " xpress", // space here is on purpose so we don't match express + "amd renoir", + "radeon hd 4200", + "radeon hd 4250", + "radeon hd 4290", + "radeon hd 4270", + "radeon hd 4225", + "radeon hd 3100", + "radeon hd 3200", + "radeon hd 3000", + "radeon hd 3300", + "radeon(tm) r4 graphics", + "radeon(tm) r5 graphics", + "radeon(tm) r6 graphics", + "radeon(tm) r7 graphics", + "radeon r7 graphics", + "nforce", // all nvidia nforce are integrated + "tegra", // all nvidia tegra are integrated + "shield", // all nvidia shield are integrated + "igp", + "mali", + "intel", + "v3d", + "apple m", // all apple m are integrated + ]; + let strings_that_imply_cpu = ["mesa offscreen", "swiftshader", "llvmpipe"]; + + //TODO: handle Intel Iris XE as discreet + let inferred_device_type = if vendor.contains("qualcomm") + || vendor.contains("intel") + || strings_that_imply_integrated + .iter() + .any(|&s| renderer.contains(s)) + { + wgt::DeviceType::IntegratedGpu + } else if strings_that_imply_cpu.iter().any(|&s| renderer.contains(s)) { + wgt::DeviceType::Cpu + } else { + // At this point the Device type is Unknown. + // It's most likely DiscreteGpu, but we do not know for sure. + // Use "Other" to avoid possibly making incorrect assumptions. + // Note that if this same device is available under some other API (ex: Vulkan), + // It will mostly likely get a different device type (probably DiscreteGpu). + wgt::DeviceType::Other + }; + + // source: Sascha Willems at Vulkan + let vendor_id = if vendor.contains("amd") { + db::amd::VENDOR + } else if vendor.contains("imgtec") { + db::imgtec::VENDOR + } else if vendor.contains("nvidia") { + db::nvidia::VENDOR + } else if vendor.contains("arm") { + db::arm::VENDOR + } else if vendor.contains("qualcomm") { + db::qualcomm::VENDOR + } else if vendor.contains("intel") { + db::intel::VENDOR + } else if vendor.contains("broadcom") { + db::broadcom::VENDOR + } else if vendor.contains("mesa") { + db::mesa::VENDOR + } else if vendor.contains("apple") { + db::apple::VENDOR + } else { + 0 + }; + + wgt::AdapterInfo { + name: renderer_orig, + vendor: vendor_id, + device: 0, + device_type: inferred_device_type, + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Gl, + } + } + + pub(super) unsafe fn expose( + context: super::AdapterContext, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let gl = context.lock(); + let extensions = gl.supported_extensions(); + + let (vendor_const, renderer_const) = if extensions.contains("WEBGL_debug_renderer_info") { + // emscripten doesn't enable "WEBGL_debug_renderer_info" extension by default. so, we do it manually. + // See https://github.com/gfx-rs/wgpu/issues/3245 for context + #[cfg(target_os = "emscripten")] + if unsafe { super::emscripten::enable_extension("WEBGL_debug_renderer_info\0") } { + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + } + // glow already enables WEBGL_debug_renderer_info on wasm32-unknown-unknown target by default. + #[cfg(not(target_os = "emscripten"))] + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + }; + + let (vendor, renderer) = { + let vendor = unsafe { gl.get_parameter_string(vendor_const) }; + let renderer = unsafe { gl.get_parameter_string(renderer_const) }; + + (vendor, renderer) + }; + let version = unsafe { gl.get_parameter_string(glow::VERSION) }; + log::info!("Vendor: {}", vendor); + log::info!("Renderer: {}", renderer); + log::info!("Version: {}", version); + + log::debug!("Extensions: {:#?}", extensions); + + let ver = Self::parse_version(&version).ok()?; + if ver < (3, 0) { + log::warn!( + "Returned GLES context is {}.{}, when 3.0+ was requested", + ver.0, + ver.1 + ); + return None; + } + + let supports_storage = ver >= (3, 1); + let supports_work_group_params = ver >= (3, 1); + + let shading_language_version = { + let sl_version = unsafe { gl.get_parameter_string(glow::SHADING_LANGUAGE_VERSION) }; + log::info!("SL version: {}", &sl_version); + let (sl_major, sl_minor) = Self::parse_version(&sl_version).ok()?; + let value = sl_major as u16 * 100 + sl_minor as u16 * 10; + naga::back::glsl::Version::Embedded { + version: value, + is_webgl: cfg!(target_arch = "wasm32"), + } + }; + + // ANGLE provides renderer strings like: "ANGLE (Apple, Apple M1 Pro, OpenGL 4.1)" + let is_angle = renderer.contains("ANGLE"); + + let vertex_shader_storage_blocks = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS) } as u32) + } else { + 0 + }; + let fragment_shader_storage_blocks = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS) } as u32) + } else { + 0 + }; + let vertex_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let fragment_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let max_storage_block_size = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } as u32) + } else { + 0 + }; + let max_element_index = unsafe { gl.get_parameter_i32(glow::MAX_ELEMENT_INDEX) } as u32; + + // WORKAROUND: In order to work around an issue with GL on RPI4 and similar, we ignore a + // zero vertex ssbo count if there are vertex sstos. (more info: + // https://github.com/gfx-rs/wgpu/pull/1607#issuecomment-874938961) The hardware does not + // want us to write to these SSBOs, but GLES cannot express that. We detect this case and + // disable writing to SSBOs. + let vertex_ssbo_false_zero = + vertex_shader_storage_blocks == 0 && vertex_shader_storage_textures != 0; + if vertex_ssbo_false_zero { + // We only care about fragment here as the 0 is a lie. + log::warn!("Max vertex shader SSBO == 0 and SSTO != 0. Interpreting as false zero."); + } + + let max_storage_buffers_per_shader_stage = if vertex_shader_storage_blocks == 0 { + fragment_shader_storage_blocks + } else { + vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) + }; + let max_storage_textures_per_shader_stage = if vertex_shader_storage_textures == 0 { + fragment_shader_storage_textures + } else { + vertex_shader_storage_textures.min(fragment_shader_storage_textures) + }; + + let mut downlevel_flags = wgt::DownlevelFlags::empty() + | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES + | wgt::DownlevelFlags::COMPARISON_SAMPLERS; + downlevel_flags.set(wgt::DownlevelFlags::COMPUTE_SHADERS, ver >= (3, 1)); + downlevel_flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + max_storage_block_size != 0, + ); + downlevel_flags.set(wgt::DownlevelFlags::INDIRECT_EXECUTION, ver >= (3, 1)); + //TODO: we can actually support positive `base_vertex` in the same way + // as we emulate the `start_instance`. But we can't deal with negatives... + downlevel_flags.set(wgt::DownlevelFlags::BASE_VERTEX, ver >= (3, 2)); + downlevel_flags.set( + wgt::DownlevelFlags::INDEPENDENT_BLEND, + ver >= (3, 2) || extensions.contains("GL_EXT_draw_buffers_indexed"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::VERTEX_STORAGE, + max_storage_block_size != 0 + && max_storage_buffers_per_shader_stage != 0 + && (vertex_shader_storage_blocks != 0 || vertex_ssbo_false_zero), + ); + downlevel_flags.set(wgt::DownlevelFlags::FRAGMENT_STORAGE, supports_storage); + if extensions.contains("EXT_texture_filter_anisotropic") { + let max_aniso = + unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_MAX_ANISOTROPY_EXT) } as u32; + downlevel_flags.set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, max_aniso >= 16); + } + downlevel_flags.set( + wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED, + !(cfg!(target_arch = "wasm32") || is_angle), + ); + // see https://registry.khronos.org/webgl/specs/latest/2.0/#BUFFER_OBJECT_BINDING + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER, + !cfg!(target_arch = "wasm32"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES, + !cfg!(target_arch = "wasm32"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::FULL_DRAW_INDEX_UINT32, + max_element_index == u32::MAX, + ); + downlevel_flags.set( + wgt::DownlevelFlags::MULTISAMPLED_SHADING, + ver >= (3, 2) || extensions.contains("OES_sample_variables"), + ); + + let mut features = wgt::Features::empty() + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::PUSH_CONSTANTS; + features.set( + wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO, + extensions.contains("GL_EXT_texture_border_clamp"), + ); + features.set( + wgt::Features::DEPTH_CLIP_CONTROL, + extensions.contains("GL_EXT_depth_clamp"), + ); + features.set( + wgt::Features::VERTEX_WRITABLE_STORAGE, + downlevel_flags.contains(wgt::DownlevelFlags::VERTEX_STORAGE) + && vertex_shader_storage_textures != 0, + ); + features.set( + wgt::Features::MULTIVIEW, + extensions.contains("OVR_multiview2"), + ); + features.set( + wgt::Features::SHADER_PRIMITIVE_INDEX, + ver >= (3, 2) || extensions.contains("OES_geometry_shader"), + ); + features.set(wgt::Features::SHADER_EARLY_DEPTH_TEST, ver >= (3, 1)); + let gles_bcn_exts = [ + "GL_EXT_texture_compression_s3tc_srgb", + "GL_EXT_texture_compression_rgtc", + "GL_EXT_texture_compression_bptc", + ]; + let webgl_bcn_exts = [ + "WEBGL_compressed_texture_s3tc", + "WEBGL_compressed_texture_s3tc_srgb", + "EXT_texture_compression_rgtc", + "EXT_texture_compression_bptc", + ]; + let bcn_exts = if cfg!(target_arch = "wasm32") { + &webgl_bcn_exts[..] + } else { + &gles_bcn_exts[..] + }; + features.set( + wgt::Features::TEXTURE_COMPRESSION_BC, + bcn_exts.iter().all(|&ext| extensions.contains(ext)), + ); + features.set( + wgt::Features::TEXTURE_COMPRESSION_ETC2, + // This is a part of GLES-3 but not WebGL2 core + !cfg!(target_arch = "wasm32") || extensions.contains("WEBGL_compressed_texture_etc"), + ); + // `OES_texture_compression_astc` provides 2D + 3D, LDR + HDR support + if extensions.contains("WEBGL_compressed_texture_astc") + || extensions.contains("GL_OES_texture_compression_astc") + { + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC); + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR); + } else { + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC, + extensions.contains("GL_KHR_texture_compression_astc_ldr"), + ); + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, + extensions.contains("GL_KHR_texture_compression_astc_hdr"), + ); + } + + let mut private_caps = super::PrivateCapabilities::empty(); + private_caps.set( + super::PrivateCapabilities::BUFFER_ALLOCATION, + extensions.contains("GL_EXT_buffer_storage"), + ); + private_caps.set( + super::PrivateCapabilities::SHADER_BINDING_LAYOUT, + ver >= (3, 1), + ); + private_caps.set( + super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD, + extensions.contains("GL_EXT_texture_shadow_lod"), + ); + private_caps.set(super::PrivateCapabilities::MEMORY_BARRIERS, ver >= (3, 1)); + private_caps.set( + super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT, + ver >= (3, 1), + ); + private_caps.set( + super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE, + !cfg!(target_arch = "wasm32"), + ); + private_caps.set( + super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER, + !cfg!(target_arch = "wasm32"), + ); + private_caps.set( + super::PrivateCapabilities::GET_BUFFER_SUB_DATA, + cfg!(target_arch = "wasm32"), + ); + let color_buffer_float = extensions.contains("GL_EXT_color_buffer_float") + || extensions.contains("EXT_color_buffer_float"); + let color_buffer_half_float = extensions.contains("GL_EXT_color_buffer_half_float"); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + color_buffer_half_float || color_buffer_float, + ); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + color_buffer_float, + ); + private_caps.set( + super::PrivateCapabilities::TEXTURE_FLOAT_LINEAR, + extensions.contains("OES_texture_float_linear"), + ); + + let max_texture_size = unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) } as u32; + let max_texture_3d_size = unsafe { gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) } as u32; + + let min_uniform_buffer_offset_alignment = + (unsafe { gl.get_parameter_i32(glow::UNIFORM_BUFFER_OFFSET_ALIGNMENT) } as u32); + let min_storage_buffer_offset_alignment = if ver >= (3, 1) { + (unsafe { gl.get_parameter_i32(glow::SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT) } as u32) + } else { + 256 + }; + let max_uniform_buffers_per_shader_stage = + unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS) } + .min(unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS) }) + as u32; + + let max_compute_workgroups_per_dimension = if supports_work_group_params { + unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 0) } + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 1) }) + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 2) }) + as u32 + } else { + 0 + }; + + let limits = wgt::Limits { + max_texture_dimension_1d: max_texture_size, + max_texture_dimension_2d: max_texture_size, + max_texture_dimension_3d: max_texture_3d_size, + max_texture_array_layers: unsafe { + gl.get_parameter_i32(glow::MAX_ARRAY_TEXTURE_LAYERS) + } as u32, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: max_uniform_buffers_per_shader_stage, + max_dynamic_storage_buffers_per_pipeline_layout: max_storage_buffers_per_shader_stage, + max_sampled_textures_per_shader_stage: super::MAX_TEXTURE_SLOTS as u32, + max_samplers_per_shader_stage: super::MAX_SAMPLERS as u32, + max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage, + max_uniform_buffer_binding_size: unsafe { + gl.get_parameter_i32(glow::MAX_UNIFORM_BLOCK_SIZE) + } as u32, + max_storage_buffer_binding_size: if ver >= (3, 1) { + unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } + } else { + 0 + } as u32, + max_vertex_buffers: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_BINDINGS) } as u32) + } else { + 16 // should this be different? + }, + max_vertex_attributes: (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIBS) } + as u32) + .min(super::MAX_VERTEX_ATTRIBUTES as u32), + max_vertex_buffer_array_stride: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_STRIDE) } as u32) + } else { + !0 + }, + max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, + min_uniform_buffer_offset_alignment, + min_storage_buffer_offset_alignment, + max_inter_stage_shader_components: unsafe { + gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS) + } as u32, + max_compute_workgroup_storage_size: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) } as u32) + } else { + 0 + }, + max_compute_invocations_per_workgroup: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_WORK_GROUP_INVOCATIONS) } as u32) + } else { + 0 + }, + max_compute_workgroup_size_x: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_y: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 1) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_z: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 2) } + as u32) + } else { + 0 + }, + max_compute_workgroups_per_dimension, + max_buffer_size: i32::MAX as u64, + }; + + let mut workarounds = super::Workarounds::empty(); + + workarounds.set( + super::Workarounds::EMULATE_BUFFER_MAP, + cfg!(target_arch = "wasm32"), + ); + + let r = renderer.to_lowercase(); + // Check for Mesa sRGB clear bug. See + // [`super::PrivateCapabilities::MESA_I915_SRGB_SHADER_CLEAR`]. + if context.is_owned() + && r.contains("mesa") + && r.contains("intel") + && r.split(&[' ', '(', ')'][..]) + .any(|substr| substr.len() == 3 && substr.chars().nth(2) == Some('l')) + { + log::warn!( + "Detected skylake derivative running on mesa i915. Clears to srgb textures will \ + use manual shader clears." + ); + workarounds.set(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR, true); + } + + let downlevel_defaults = wgt::DownlevelLimits {}; + + // Drop the GL guard so we can move the context into AdapterShared + // ( on WASM the gl handle is just a ref so we tell clippy to allow + // dropping the ref ) + #[allow(clippy::drop_ref)] + drop(gl); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + shared: Arc::new(super::AdapterShared { + context, + private_caps, + workarounds, + features, + shading_language_version, + max_texture_size, + next_shader_id: Default::default(), + program_cache: Default::default(), + }), + }, + info: Self::make_info(vendor, renderer), + features, + capabilities: crate::Capabilities { + limits, + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: downlevel_defaults, + shader_model: wgt::ShaderModel::Sm5, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(4).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + }, + }) + } + + unsafe fn create_shader_clear_program( + gl: &glow::Context, + ) -> (glow::Program, glow::UniformLocation) { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = + unsafe { gl.create_shader(glow::VERTEX_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(vertex, include_str!("./shaders/clear.vert")) }; + unsafe { gl.compile_shader(vertex) }; + let fragment = + unsafe { gl.create_shader(glow::FRAGMENT_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(fragment, include_str!("./shaders/clear.frag")) }; + unsafe { gl.compile_shader(fragment) }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + let color_uniform_location = unsafe { gl.get_uniform_location(program, "color") } + .expect("Could not find color uniform in shader clear shader"); + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + + (program, color_uniform_location) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let gl = &self.shared.context.lock(); + unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) }; + unsafe { gl.pixel_store_i32(glow::PACK_ALIGNMENT, 1) }; + let main_vao = + unsafe { gl.create_vertex_array() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_vertex_array(Some(main_vao)) }; + + let zero_buffer = + unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(zero_buffer)) }; + let zeroes = vec![0u8; super::ZERO_BUFFER_SIZE]; + unsafe { gl.buffer_data_u8_slice(glow::COPY_READ_BUFFER, &zeroes, glow::STATIC_DRAW) }; + + // Compile the shader program we use for doing manual clears to work around Mesa fastclear + // bug. + let (shader_clear_program, shader_clear_program_color_uniform_location) = + unsafe { Self::create_shader_clear_program(gl) }; + + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + main_vao, + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + render_doc: Default::default(), + }, + queue: super::Queue { + shared: Arc::clone(&self.shared), + features, + draw_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + copy_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + shader_clear_program, + shader_clear_program_color_uniform_location, + zero_buffer, + temp_query_results: Vec::new(), + draw_buffer_count: 1, + current_index_buffer: None, + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let sample_count = { + let max_samples = unsafe { + self.shared + .context + .lock() + .get_parameter_i32(glow::MAX_SAMPLES) + }; + if max_samples >= 16 { + Tfc::MULTISAMPLE_X2 + | Tfc::MULTISAMPLE_X4 + | Tfc::MULTISAMPLE_X8 + | Tfc::MULTISAMPLE_X16 + } else if max_samples >= 8 { + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 | Tfc::MULTISAMPLE_X8 + } else if max_samples >= 4 { + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 + } else { + Tfc::MULTISAMPLE_X2 + } + }; + + // Base types are pulled from the table in the OpenGLES 3.0 spec in section 3.8. + // + // The storage types are based on table 8.26, in section + // "TEXTURE IMAGE LOADS AND STORES" of OpenGLES-3.2 spec. + let empty = Tfc::empty(); + let base = Tfc::COPY_SRC | Tfc::COPY_DST; + let unfilterable = base | Tfc::SAMPLED; + let depth = base | Tfc::SAMPLED | sample_count | Tfc::DEPTH_STENCIL_ATTACHMENT; + let filterable = unfilterable | Tfc::SAMPLED_LINEAR; + let renderable = + unfilterable | Tfc::COLOR_ATTACHMENT | sample_count | Tfc::MULTISAMPLE_RESOLVE; + let filterable_renderable = filterable | renderable | Tfc::COLOR_ATTACHMENT_BLEND; + let storage = base | Tfc::STORAGE | Tfc::STORAGE_READ_WRITE; + + let feature_fn = |f, caps| { + if self.shared.features.contains(f) { + caps + } else { + empty + } + }; + + let bcn_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_BC, filterable); + let etc2_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ETC2, filterable); + let astc_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC, filterable); + let astc_hdr_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, filterable); + + let private_caps_fn = |f, caps| { + if self.shared.private_caps.contains(f) { + caps + } else { + empty + } + }; + + let half_float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let texture_float_linear = + private_caps_fn(super::PrivateCapabilities::TEXTURE_FLOAT_LINEAR, filterable); + + match format { + Tf::R8Unorm => filterable_renderable, + Tf::R8Snorm => filterable, + Tf::R8Uint => renderable, + Tf::R8Sint => renderable, + Tf::R16Uint => renderable, + Tf::R16Sint => renderable, + Tf::R16Unorm => empty, + Tf::R16Snorm => empty, + Tf::R16Float => filterable | half_float_renderable, + Tf::Rg8Unorm => filterable_renderable, + Tf::Rg8Snorm => filterable, + Tf::Rg8Uint => renderable, + Tf::Rg8Sint => renderable, + Tf::R32Uint => renderable | storage, + Tf::R32Sint => renderable | storage, + Tf::R32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Rg16Uint => renderable, + Tf::Rg16Sint => renderable, + Tf::Rg16Unorm => empty, + Tf::Rg16Snorm => empty, + Tf::Rg16Float => filterable | half_float_renderable, + Tf::Rgba8Unorm | Tf::Rgba8UnormSrgb => filterable_renderable | storage, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => filterable_renderable, + Tf::Rgba8Snorm => filterable, + Tf::Rgba8Uint => renderable | storage, + Tf::Rgba8Sint => renderable | storage, + Tf::Rgb10a2Unorm => filterable_renderable, + Tf::Rg11b10Float => filterable | float_renderable, + Tf::Rg32Uint => renderable, + Tf::Rg32Sint => renderable, + Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear, + Tf::Rgba16Uint => renderable | storage, + Tf::Rgba16Sint => renderable | storage, + Tf::Rgba16Unorm => empty, + Tf::Rgba16Snorm => empty, + Tf::Rgba16Float => filterable | storage | half_float_renderable, + Tf::Rgba32Uint => renderable | storage, + Tf::Rgba32Sint => renderable | storage, + Tf::Rgba32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Stencil8 + | Tf::Depth16Unorm + | Tf::Depth32Float + | Tf::Depth32FloatStencil8 + | Tf::Depth24Plus + | Tf::Depth24PlusStencil8 => depth, + Tf::Rgb9e5Ufloat => filterable, + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbFloat + | Tf::Bc6hRgbUfloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => bcn_features, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => etc2_features, + Tf::Astc { + block: _, + channel: AstcChannel::Unorm | AstcChannel::UnormSrgb, + } => astc_features, + Tf::Astc { + block: _, + channel: AstcChannel::Hdr, + } => astc_hdr_features, + } + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if surface.presentable { + let mut formats = vec![ + wgt::TextureFormat::Rgba8Unorm, + #[cfg(not(target_arch = "wasm32"))] + wgt::TextureFormat::Bgra8Unorm, + ]; + if surface.supports_srgb() { + formats.extend([ + wgt::TextureFormat::Rgba8UnormSrgb, + #[cfg(not(target_arch = "wasm32"))] + wgt::TextureFormat::Bgra8UnormSrgb, + ]) + } + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT) + { + formats.push(wgt::TextureFormat::Rgba16Float) + } + + Some(crate::SurfaceCapabilities { + formats, + present_modes: vec![wgt::PresentMode::Fifo], //TODO + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO + swap_chain_sizes: 2..=2, + current_extent: None, + extents: wgt::Extent3d { + width: 4, + height: 4, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: self.shared.max_texture_size, + height: self.shared.max_texture_size, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET, + }) + } else { + None + } + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl super::AdapterShared { + pub(super) unsafe fn get_buffer_sub_data( + &self, + gl: &glow::Context, + target: u32, + offset: i32, + dst_data: &mut [u8], + ) { + if self + .private_caps + .contains(super::PrivateCapabilities::GET_BUFFER_SUB_DATA) + { + unsafe { gl.get_buffer_sub_data(target, offset, dst_data) }; + } else { + log::error!("Fake map"); + let length = dst_data.len(); + let buffer_mapping = + unsafe { gl.map_buffer_range(target, offset, length as _, glow::MAP_READ_BIT) }; + + unsafe { std::ptr::copy_nonoverlapping(buffer_mapping, dst_data.as_mut_ptr(), length) }; + + unsafe { gl.unmap_buffer(target) }; + } + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Adapter {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Adapter {} + +#[cfg(test)] +mod tests { + use super::super::Adapter; + + #[test] + fn test_version_parse() { + let error = Err(crate::InstanceError); + assert_eq!(Adapter::parse_version("1"), error); + assert_eq!(Adapter::parse_version("1."), error); + assert_eq!(Adapter::parse_version("1 h3l1o. W0rld"), error); + assert_eq!(Adapter::parse_version("1. h3l1o. W0rld"), error); + assert_eq!(Adapter::parse_version("1.2.3"), error); + assert_eq!(Adapter::parse_version("OpenGL ES 3.1"), Ok((3, 1))); + assert_eq!( + Adapter::parse_version("OpenGL ES 2.0 Google Nexus"), + Ok((2, 0)) + ); + assert_eq!(Adapter::parse_version("GLSL ES 1.1"), Ok((1, 1))); + assert_eq!(Adapter::parse_version("OpenGL ES GLSL ES 3.20"), Ok((3, 2))); + assert_eq!( + // WebGL 2.0 should parse as OpenGL ES 3.0 + Adapter::parse_version("WebGL 2.0 (OpenGL ES 3.0 Chromium)"), + Ok((3, 0)) + ); + assert_eq!( + Adapter::parse_version("WebGL GLSL ES 3.00 (OpenGL ES GLSL ES 3.0 Chromium)"), + Ok((3, 0)) + ); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/command.rs b/third_party/rust/wgpu-hal/src/gles/command.rs new file mode 100644 index 0000000000..5e3a1c52c8 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/command.rs @@ -0,0 +1,1063 @@ +use super::{conv, Command as C}; +use arrayvec::ArrayVec; +use std::{mem, ops::Range}; + +#[derive(Clone, Copy, Debug, Default)] +struct TextureSlotDesc { + tex_target: super::BindTarget, + sampler_index: Option<u8>, +} + +#[derive(Default)] +pub(super) struct State { + topology: u32, + primitive: super::PrimitiveState, + index_format: wgt::IndexFormat, + index_offset: wgt::BufferAddress, + vertex_buffers: + [(super::VertexBufferDesc, Option<super::BufferBinding>); crate::MAX_VERTEX_BUFFERS], + vertex_attributes: ArrayVec<super::AttributeDesc, { super::MAX_VERTEX_ATTRIBUTES }>, + color_targets: ArrayVec<super::ColorTargetDesc, { crate::MAX_COLOR_ATTACHMENTS }>, + stencil: super::StencilState, + depth_bias: wgt::DepthBiasState, + alpha_to_coverage_enabled: bool, + samplers: [Option<glow::Sampler>; super::MAX_SAMPLERS], + texture_slots: [TextureSlotDesc; super::MAX_TEXTURE_SLOTS], + render_size: wgt::Extent3d, + resolve_attachments: ArrayVec<(u32, super::TextureView), { crate::MAX_COLOR_ATTACHMENTS }>, + invalidate_attachments: ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>, + has_pass_label: bool, + instance_vbuf_mask: usize, + dirty_vbuf_mask: usize, + active_first_instance: u32, + push_offset_to_uniform: ArrayVec<super::UniformDesc, { super::MAX_PUSH_CONSTANTS }>, +} + +impl super::CommandBuffer { + fn clear(&mut self) { + self.label = None; + self.commands.clear(); + self.data_bytes.clear(); + self.queries.clear(); + } + + fn add_marker(&mut self, marker: &str) -> Range<u32> { + let start = self.data_bytes.len() as u32; + self.data_bytes.extend(marker.as_bytes()); + start..self.data_bytes.len() as u32 + } + + fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> { + let data_raw = unsafe { + std::slice::from_raw_parts( + data.as_ptr() as *const _, + data.len() * mem::size_of::<u32>(), + ) + }; + let start = self.data_bytes.len(); + assert!(start < u32::MAX as usize); + self.data_bytes.extend_from_slice(data_raw); + let end = self.data_bytes.len(); + assert!(end < u32::MAX as usize); + (start as u32)..(end as u32) + } +} + +impl super::CommandEncoder { + fn rebind_stencil_func(&mut self) { + fn make(s: &super::StencilSide, face: u32) -> C { + C::SetStencilFunc { + face, + function: s.function, + reference: s.reference, + read_mask: s.mask_read, + } + } + + let s = &self.state.stencil; + if s.front.function == s.back.function + && s.front.mask_read == s.back.mask_read + && s.front.reference == s.back.reference + { + self.cmd_buffer + .commands + .push(make(&s.front, glow::FRONT_AND_BACK)); + } else { + self.cmd_buffer.commands.push(make(&s.front, glow::FRONT)); + self.cmd_buffer.commands.push(make(&s.back, glow::BACK)); + } + } + + fn rebind_vertex_data(&mut self, first_instance: u32) { + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for (index, pair) in self.state.vertex_buffers.iter().enumerate() { + if self.state.dirty_vbuf_mask & (1 << index) == 0 { + continue; + } + let (buffer_desc, vb) = match *pair { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + let instance_offset = match buffer_desc.step { + wgt::VertexStepMode::Vertex => 0, + wgt::VertexStepMode::Instance => first_instance * buffer_desc.stride, + }; + + self.cmd_buffer.commands.push(C::SetVertexBuffer { + index: index as u32, + buffer: super::BufferBinding { + raw: vb.raw, + offset: vb.offset + instance_offset as wgt::BufferAddress, + }, + buffer_desc, + }); + self.state.dirty_vbuf_mask ^= 1 << index; + } + } else { + let mut vbuf_mask = 0; + for attribute in self.state.vertex_attributes.iter() { + if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 { + continue; + } + let (buffer_desc, vb) = + match self.state.vertex_buffers[attribute.buffer_index as usize] { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + + let mut attribute_desc = attribute.clone(); + attribute_desc.offset += vb.offset as u32; + if buffer_desc.step == wgt::VertexStepMode::Instance { + attribute_desc.offset += buffer_desc.stride * first_instance; + } + + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: Some(vb.raw), + buffer_desc, + attribute_desc, + }); + vbuf_mask |= 1 << attribute.buffer_index; + } + self.state.dirty_vbuf_mask ^= vbuf_mask; + } + } + + fn rebind_sampler_states(&mut self, dirty_textures: u32, dirty_samplers: u32) { + for (texture_index, slot) in self.state.texture_slots.iter().enumerate() { + if dirty_textures & (1 << texture_index) != 0 + || slot + .sampler_index + .map_or(false, |si| dirty_samplers & (1 << si) != 0) + { + let sampler = slot + .sampler_index + .and_then(|si| self.state.samplers[si as usize]); + self.cmd_buffer + .commands + .push(C::BindSampler(texture_index as u32, sampler)); + } + } + } + + fn prepare_draw(&mut self, first_instance: u32) { + if first_instance != self.state.active_first_instance { + // rebind all per-instance buffers on first-instance change + self.state.dirty_vbuf_mask |= self.state.instance_vbuf_mask; + self.state.active_first_instance = first_instance; + } + if self.state.dirty_vbuf_mask != 0 { + self.rebind_vertex_data(first_instance); + } + } + + fn set_pipeline_inner(&mut self, inner: &super::PipelineInner) { + self.cmd_buffer.commands.push(C::SetProgram(inner.program)); + + self.state.push_offset_to_uniform.clear(); + self.state + .push_offset_to_uniform + .extend(inner.uniforms.iter().cloned()); + + // rebind textures, if needed + let mut dirty_textures = 0u32; + for (texture_index, (slot, &sampler_index)) in self + .state + .texture_slots + .iter_mut() + .zip(inner.sampler_map.iter()) + .enumerate() + { + if slot.sampler_index != sampler_index { + slot.sampler_index = sampler_index; + dirty_textures |= 1 << texture_index; + } + } + if dirty_textures != 0 { + self.rebind_sampler_states(dirty_textures, 0); + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + self.state = State::default(); + self.cmd_buffer.label = label.map(str::to_string); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + self.cmd_buffer.clear(); + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + Ok(mem::take(&mut self.cmd_buffer)) + } + unsafe fn reset_all<I>(&mut self, _command_buffers: I) { + //TODO: could re-use the allocations in all these command buffers + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::BufferUses::STORAGE_READ_WRITE) + { + continue; + } + self.cmd_buffer + .commands + .push(C::BufferBarrier(bar.buffer.raw.unwrap(), bar.usage.end)); + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + + let mut combined_usage = crate::TextureUses::empty(); + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::TextureUses::STORAGE_READ_WRITE) + { + continue; + } + // unlike buffers, there is no need for a concrete texture + // object to be bound anywhere for a barrier + combined_usage |= bar.usage.end; + } + + if !combined_usage.is_empty() { + self.cmd_buffer + .commands + .push(C::TextureBarrier(combined_usage)); + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + self.cmd_buffer.commands.push(C::ClearBuffer { + dst: buffer.clone(), + dst_target: buffer.target, + range, + }); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let (src_target, dst_target) = if src.target == dst.target { + (glow::COPY_READ_BUFFER, glow::COPY_WRITE_BUFFER) + } else { + (src.target, dst.target) + }; + for copy in regions { + self.cmd_buffer.commands.push(C::CopyBufferToBuffer { + src: src.clone(), + src_target, + dst: dst.clone(), + dst_target, + copy, + }) + } + } + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &super::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + for copy in regions { + self.cmd_buffer + .commands + .push(C::CopyExternalImageToTexture { + src: src.clone(), + dst: dst_raw, + dst_target, + dst_format: dst.format, + dst_premultiplication, + copy, + }) + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + let (dst_raw, dst_target) = dst.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size, &dst.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToTexture { + src: src_raw, + src_target, + dst: dst_raw, + dst_target, + copy, + dst_is_cubemap: dst.is_cubemap, + }) + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + + for mut copy in regions { + copy.clamp_size_to_virtual(&dst.copy_size); + self.cmd_buffer.commands.push(C::CopyBufferToTexture { + src: src.clone(), + src_target: src.target, + dst: dst_raw, + dst_target, + dst_format: dst.format, + copy, + }) + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToBuffer { + src: src_raw, + src_target, + src_format: src.format, + dst: dst.clone(), + dst_target: dst.target, + copy, + }) + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + let query = set.queries[index as usize]; + self.cmd_buffer + .commands + .push(C::BeginQuery(query, set.target)); + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + self.cmd_buffer.commands.push(C::EndQuery(set.target)); + } + unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) { + unimplemented!() + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + //TODO: what do we do here? + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + let start = self.cmd_buffer.queries.len(); + self.cmd_buffer + .queries + .extend_from_slice(&set.queries[range.start as usize..range.end as usize]); + let query_range = start as u32..self.cmd_buffer.queries.len() as u32; + self.cmd_buffer.commands.push(C::CopyQueryResults { + query_range, + dst: buffer.clone(), + dst_target: buffer.target, + dst_offset: offset, + }); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.state.render_size = desc.extent; + self.state.resolve_attachments.clear(); + self.state.invalidate_attachments.clear(); + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + + let rendering_to_external_framebuffer = desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .any(|at| match at.target.view.inner { + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { .. } => true, + _ => false, + }); + + if rendering_to_external_framebuffer && desc.color_attachments.len() != 1 { + panic!("Multiple render attachments with external framebuffers are not supported."); + } + + match desc + .color_attachments + .first() + .filter(|at| at.is_some()) + .and_then(|at| at.as_ref().map(|at| &at.target.view.inner)) + { + // default framebuffer (provided externally) + Some(&super::TextureInner::DefaultRenderbuffer) => { + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: true }); + } + _ => { + // set the framebuffer + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: false }); + + for (i, cat) in desc.color_attachments.iter().enumerate() { + if let Some(cat) = cat.as_ref() { + let attachment = glow::COLOR_ATTACHMENT0 + i as u32; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: cat.target.view.clone(), + }); + if let Some(ref rat) = cat.resolve_target { + self.state + .resolve_attachments + .push((attachment, rat.view.clone())); + } + if !cat.ops.contains(crate::AttachmentOps::STORE) { + self.state.invalidate_attachments.push(attachment); + } + } + } + if let Some(ref dsat) = desc.depth_stencil_attachment { + let aspects = dsat.target.view.aspects; + let attachment = match aspects { + crate::FormatAspects::DEPTH => glow::DEPTH_ATTACHMENT, + crate::FormatAspects::STENCIL => glow::STENCIL_ATTACHMENT, + _ => glow::DEPTH_STENCIL_ATTACHMENT, + }; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: dsat.target.view.clone(), + }); + if aspects.contains(crate::FormatAspects::DEPTH) + && !dsat.depth_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::DEPTH_ATTACHMENT); + } + if aspects.contains(crate::FormatAspects::STENCIL) + && !dsat.stencil_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::STENCIL_ATTACHMENT); + } + } + + if !rendering_to_external_framebuffer { + // set the draw buffers and states + self.cmd_buffer + .commands + .push(C::SetDrawColorBuffers(desc.color_attachments.len() as u8)); + } + } + } + + let rect = crate::Rect { + x: 0, + y: 0, + w: desc.extent.width as i32, + h: desc.extent.height as i32, + }; + self.cmd_buffer.commands.push(C::SetScissor(rect.clone())); + self.cmd_buffer.commands.push(C::SetViewport { + rect, + depth: 0.0..1.0, + }); + + // issue the clears + for (i, cat) in desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .enumerate() + { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let c = &cat.clear_value; + self.cmd_buffer.commands.push( + match cat.target.view.format.sample_type(None).unwrap() { + wgt::TextureSampleType::Float { .. } => C::ClearColorF { + draw_buffer: i as u32, + color: [c.r as f32, c.g as f32, c.b as f32, c.a as f32], + is_srgb: cat.target.view.format.is_srgb(), + }, + wgt::TextureSampleType::Uint => C::ClearColorU( + i as u32, + [c.r as u32, c.g as u32, c.b as u32, c.a as u32], + ), + wgt::TextureSampleType::Sint => C::ClearColorI( + i as u32, + [c.r as i32, c.g as i32, c.b as i32, c.a as i32], + ), + wgt::TextureSampleType::Depth => unreachable!(), + }, + ); + } + } + if let Some(ref dsat) = desc.depth_stencil_attachment { + let clear_depth = !dsat.depth_ops.contains(crate::AttachmentOps::LOAD); + let clear_stencil = !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD); + + if clear_depth && clear_stencil { + self.cmd_buffer.commands.push(C::ClearDepthAndStencil( + dsat.clear_value.0, + dsat.clear_value.1, + )); + } else if clear_depth { + self.cmd_buffer + .commands + .push(C::ClearDepth(dsat.clear_value.0)); + } else if clear_stencil { + self.cmd_buffer + .commands + .push(C::ClearStencil(dsat.clear_value.1)); + } + } + } + unsafe fn end_render_pass(&mut self) { + for (attachment, dst) in self.state.resolve_attachments.drain(..) { + self.cmd_buffer.commands.push(C::ResolveAttachment { + attachment, + dst, + size: self.state.render_size, + }); + } + if !self.state.invalidate_attachments.is_empty() { + self.cmd_buffer.commands.push(C::InvalidateAttachments( + self.state.invalidate_attachments.clone(), + )); + self.state.invalidate_attachments.clear(); + } + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + self.state.instance_vbuf_mask = 0; + self.state.dirty_vbuf_mask = 0; + self.state.active_first_instance = 0; + self.state.color_targets.clear(); + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + self.state.primitive = super::PrimitiveState::default(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let mut do_index = 0; + let mut dirty_textures = 0u32; + let mut dirty_samplers = 0u32; + let group_info = &layout.group_infos[index as usize]; + + for (binding_layout, raw_binding) in group_info.entries.iter().zip(group.contents.iter()) { + let slot = group_info.binding_to_slot[binding_layout.binding as usize] as u32; + match *raw_binding { + super::RawBinding::Buffer { + raw, + offset: base_offset, + size, + } => { + let mut offset = base_offset; + let target = match binding_layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + min_binding_size: _, + } => { + if has_dynamic_offset { + offset += dynamic_offsets[do_index] as i32; + do_index += 1; + } + match ty { + wgt::BufferBindingType::Uniform => glow::UNIFORM_BUFFER, + wgt::BufferBindingType::Storage { .. } => { + glow::SHADER_STORAGE_BUFFER + } + } + } + _ => unreachable!(), + }; + self.cmd_buffer.commands.push(C::BindBuffer { + target, + slot, + buffer: raw, + offset, + size, + }); + } + super::RawBinding::Sampler(sampler) => { + dirty_samplers |= 1 << slot; + self.state.samplers[slot as usize] = Some(sampler); + } + super::RawBinding::Texture { + raw, + target, + aspects, + } => { + dirty_textures |= 1 << slot; + self.state.texture_slots[slot as usize].tex_target = target; + self.cmd_buffer.commands.push(C::BindTexture { + slot, + texture: raw, + target, + aspects, + }); + } + super::RawBinding::Image(ref binding) => { + self.cmd_buffer.commands.push(C::BindImage { + slot, + binding: binding.clone(), + }); + } + } + } + + self.rebind_sampler_states(dirty_textures, dirty_samplers); + } + + unsafe fn set_push_constants( + &mut self, + _layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + start_offset: u32, + data: &[u32], + ) { + let range = self.cmd_buffer.add_push_constant_data(data); + + let end = start_offset + data.len() as u32 * 4; + let mut offset = start_offset; + while offset < end { + let uniform = self.state.push_offset_to_uniform[offset as usize / 4].clone(); + let size = uniform.size; + if uniform.location.is_none() { + panic!("No uniform for push constant"); + } + self.cmd_buffer.commands.push(C::SetPushConstants { + uniform, + offset: range.start + offset, + }); + offset += size; + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::InsertDebugMarker(range)); + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let range = self.cmd_buffer.add_marker(group_label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + } + unsafe fn end_debug_marker(&mut self) { + self.cmd_buffer.commands.push(C::PopDebugGroup); + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology); + + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for vat in pipeline.vertex_attributes.iter() { + let vb = &pipeline.vertex_buffers[vat.buffer_index as usize]; + // set the layout + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: None, + buffer_desc: vb.clone(), + attribute_desc: vat.clone(), + }); + } + } else { + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + + self.state.dirty_vbuf_mask = 0; + // copy vertex attributes + for vat in pipeline.vertex_attributes.iter() { + //Note: we can invalidate more carefully here. + self.state.dirty_vbuf_mask |= 1 << vat.buffer_index; + self.state.vertex_attributes.push(vat.clone()); + } + } + + self.state.instance_vbuf_mask = 0; + // copy vertex state + for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self + .state + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_buffers.iter()) + .enumerate() + { + if pipe_desc.step == wgt::VertexStepMode::Instance { + self.state.instance_vbuf_mask |= 1 << index; + } + if state_desc != pipe_desc { + self.state.dirty_vbuf_mask |= 1 << index; + *state_desc = pipe_desc.clone(); + } + } + + self.set_pipeline_inner(&pipeline.inner); + + // set primitive state + let prim_state = conv::map_primitive_state(&pipeline.primitive); + if prim_state != self.state.primitive { + self.cmd_buffer + .commands + .push(C::SetPrimitive(prim_state.clone())); + self.state.primitive = prim_state; + } + + // set depth/stencil states + let mut aspects = crate::FormatAspects::empty(); + if pipeline.depth_bias != self.state.depth_bias { + self.state.depth_bias = pipeline.depth_bias; + self.cmd_buffer + .commands + .push(C::SetDepthBias(pipeline.depth_bias)); + } + if let Some(ref depth) = pipeline.depth { + aspects |= crate::FormatAspects::DEPTH; + self.cmd_buffer.commands.push(C::SetDepth(depth.clone())); + } + if let Some(ref stencil) = pipeline.stencil { + aspects |= crate::FormatAspects::STENCIL; + self.state.stencil = stencil.clone(); + self.rebind_stencil_func(); + if stencil.front.ops == stencil.back.ops + && stencil.front.mask_write == stencil.back.mask_write + { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT_AND_BACK, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + } else { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::BACK, + write_mask: stencil.back.mask_write, + ops: stencil.back.ops.clone(), + }); + } + } + self.cmd_buffer + .commands + .push(C::ConfigureDepthStencil(aspects)); + + // set multisampling state + if pipeline.alpha_to_coverage_enabled != self.state.alpha_to_coverage_enabled { + self.state.alpha_to_coverage_enabled = pipeline.alpha_to_coverage_enabled; + self.cmd_buffer + .commands + .push(C::SetAlphaToCoverage(pipeline.alpha_to_coverage_enabled)); + } + + // set blend states + if self.state.color_targets[..] != pipeline.color_targets[..] { + if pipeline + .color_targets + .iter() + .skip(1) + .any(|ct| *ct != pipeline.color_targets[0]) + { + for (index, ct) in pipeline.color_targets.iter().enumerate() { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: Some(index as u32), + desc: ct.clone(), + }); + } + } else { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: None, + desc: pipeline.color_targets.first().cloned().unwrap_or_default(), + }); + } + } + self.state.color_targets.clear(); + for ct in pipeline.color_targets.iter() { + self.state.color_targets.push(ct.clone()); + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.state.index_offset = binding.offset; + self.state.index_format = format; + self.cmd_buffer + .commands + .push(C::SetIndexBuffer(binding.buffer.raw.unwrap())); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + self.state.dirty_vbuf_mask |= 1 << index; + let (_, ref mut vb) = self.state.vertex_buffers[index as usize]; + *vb = Some(super::BufferBinding { + raw: binding.buffer.raw.unwrap(), + offset: binding.offset, + }); + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth: Range<f32>) { + self.cmd_buffer.commands.push(C::SetViewport { + rect: crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + }, + depth, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + self.cmd_buffer.commands.push(C::SetScissor(crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + })); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.state.stencil.front.reference = value; + self.state.stencil.back.reference = value; + self.rebind_stencil_func(); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.cmd_buffer.commands.push(C::SetBlendConstant(*color)); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(start_instance); + self.cmd_buffer.commands.push(C::Draw { + topology: self.state.topology, + start_vertex, + vertex_count, + instance_count, + }); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(start_instance); + let (index_size, index_type) = match self.state.index_format { + wgt::IndexFormat::Uint16 => (2, glow::UNSIGNED_SHORT), + wgt::IndexFormat::Uint32 => (4, glow::UNSIGNED_INT), + }; + let index_offset = self.state.index_offset + index_size * start_index as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndexed { + topology: self.state.topology, + index_type, + index_offset, + index_count, + base_vertex, + instance_count, + }); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = + offset + draw * mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndirect { + topology: self.state.topology, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + }); + } + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + let index_type = match self.state.index_format { + wgt::IndexFormat::Uint16 => glow::UNSIGNED_SHORT, + wgt::IndexFormat::Uint32 => glow::UNSIGNED_INT, + }; + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = offset + + draw * mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndexedIndirect { + topology: self.state.topology, + index_type, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + }); + } + } + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.set_pipeline_inner(&pipeline.inner); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.cmd_buffer.commands.push(C::Dispatch(count)); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.cmd_buffer.commands.push(C::DispatchIndirect { + indirect_buf: buffer.raw.unwrap(), + indirect_offset: offset, + }); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs new file mode 100644 index 0000000000..86ff3b60b0 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/conv.rs @@ -0,0 +1,501 @@ +impl super::AdapterShared { + pub(super) fn describe_texture_format( + &self, + texture_format: wgt::TextureFormat, + ) -> super::TextureFormatDesc { + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + + let (internal, external, data_type) = match texture_format { + Tf::R8Unorm => (glow::R8, glow::RED, glow::UNSIGNED_BYTE), + Tf::R8Snorm => (glow::R8_SNORM, glow::RED, glow::BYTE), + Tf::R8Uint => (glow::R8UI, glow::RED_INTEGER, glow::UNSIGNED_BYTE), + Tf::R8Sint => (glow::R8I, glow::RED_INTEGER, glow::BYTE), + Tf::R16Uint => (glow::R16UI, glow::RED_INTEGER, glow::UNSIGNED_SHORT), + Tf::R16Sint => (glow::R16I, glow::RED_INTEGER, glow::SHORT), + Tf::R16Unorm => (glow::R16, glow::RED, glow::UNSIGNED_SHORT), + Tf::R16Snorm => (glow::R16_SNORM, glow::RED, glow::SHORT), + Tf::R16Float => (glow::R16F, glow::RED, glow::HALF_FLOAT), + Tf::Rg8Unorm => (glow::RG8, glow::RG, glow::UNSIGNED_BYTE), + Tf::Rg8Snorm => (glow::RG8_SNORM, glow::RG, glow::BYTE), + Tf::Rg8Uint => (glow::RG8UI, glow::RG_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rg8Sint => (glow::RG8I, glow::RG_INTEGER, glow::BYTE), + Tf::R32Uint => (glow::R32UI, glow::RED_INTEGER, glow::UNSIGNED_INT), + Tf::R32Sint => (glow::R32I, glow::RED_INTEGER, glow::INT), + Tf::R32Float => (glow::R32F, glow::RED, glow::FLOAT), + Tf::Rg16Uint => (glow::RG16UI, glow::RG_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rg16Sint => (glow::RG16I, glow::RG_INTEGER, glow::SHORT), + Tf::Rg16Unorm => (glow::RG16, glow::RG, glow::UNSIGNED_SHORT), + Tf::Rg16Snorm => (glow::RG16_SNORM, glow::RG, glow::SHORT), + Tf::Rg16Float => (glow::RG16F, glow::RG, glow::HALF_FLOAT), + Tf::Rgba8Unorm => (glow::RGBA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Rgba8UnormSrgb => (glow::SRGB8_ALPHA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Bgra8UnormSrgb => (glow::SRGB8_ALPHA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Snorm => (glow::RGBA8_SNORM, glow::RGBA, glow::BYTE), + Tf::Bgra8Unorm => (glow::RGBA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Uint => (glow::RGBA8UI, glow::RGBA_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rgba8Sint => (glow::RGBA8I, glow::RGBA_INTEGER, glow::BYTE), + Tf::Rgb10a2Unorm => ( + glow::RGB10_A2, + glow::RGBA, + glow::UNSIGNED_INT_2_10_10_10_REV, + ), + Tf::Rg11b10Float => ( + glow::R11F_G11F_B10F, + glow::RGB, + glow::UNSIGNED_INT_10F_11F_11F_REV, + ), + Tf::Rg32Uint => (glow::RG32UI, glow::RG_INTEGER, glow::UNSIGNED_INT), + Tf::Rg32Sint => (glow::RG32I, glow::RG_INTEGER, glow::INT), + Tf::Rg32Float => (glow::RG32F, glow::RG, glow::FLOAT), + Tf::Rgba16Uint => (glow::RGBA16UI, glow::RGBA_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rgba16Sint => (glow::RGBA16I, glow::RGBA_INTEGER, glow::SHORT), + Tf::Rgba16Unorm => (glow::RGBA16, glow::RGBA, glow::UNSIGNED_SHORT), + Tf::Rgba16Snorm => (glow::RGBA16_SNORM, glow::RGBA, glow::SHORT), + Tf::Rgba16Float => (glow::RGBA16F, glow::RGBA, glow::HALF_FLOAT), + Tf::Rgba32Uint => (glow::RGBA32UI, glow::RGBA_INTEGER, glow::UNSIGNED_INT), + Tf::Rgba32Sint => (glow::RGBA32I, glow::RGBA_INTEGER, glow::INT), + Tf::Rgba32Float => (glow::RGBA32F, glow::RGBA, glow::FLOAT), + Tf::Stencil8 => ( + glow::STENCIL_INDEX8, + glow::STENCIL_INDEX, + glow::UNSIGNED_BYTE, + ), + Tf::Depth16Unorm => ( + glow::DEPTH_COMPONENT16, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_SHORT, + ), + Tf::Depth32Float => (glow::DEPTH_COMPONENT32F, glow::DEPTH_COMPONENT, glow::FLOAT), + Tf::Depth32FloatStencil8 => ( + glow::DEPTH32F_STENCIL8, + glow::DEPTH_STENCIL, + glow::FLOAT_32_UNSIGNED_INT_24_8_REV, + ), + Tf::Depth24Plus => ( + glow::DEPTH_COMPONENT24, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_INT, + ), + Tf::Depth24PlusStencil8 => ( + glow::DEPTH24_STENCIL8, + glow::DEPTH_STENCIL, + glow::UNSIGNED_INT_24_8, + ), + Tf::Rgb9e5Ufloat => (glow::RGB9_E5, glow::RGB, glow::UNSIGNED_INT_5_9_9_9_REV), + Tf::Bc1RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc1RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc4RUnorm => (glow::COMPRESSED_RED_RGTC1, glow::RED, 0), + Tf::Bc4RSnorm => (glow::COMPRESSED_SIGNED_RED_RGTC1, glow::RED, 0), + Tf::Bc5RgUnorm => (glow::COMPRESSED_RG_RGTC2, glow::RG, 0), + Tf::Bc5RgSnorm => (glow::COMPRESSED_SIGNED_RG_RGTC2, glow::RG, 0), + Tf::Bc6hRgbUfloat => (glow::COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, glow::RGB, 0), + Tf::Bc6hRgbFloat => (glow::COMPRESSED_RGB_BPTC_SIGNED_FLOAT, glow::RGB, 0), + Tf::Bc7RgbaUnorm => (glow::COMPRESSED_RGBA_BPTC_UNORM, glow::RGBA, 0), + Tf::Bc7RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_BPTC_UNORM, glow::RGBA, 0), + Tf::Etc2Rgb8Unorm => (glow::COMPRESSED_RGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8UnormSrgb => (glow::COMPRESSED_SRGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8A1Unorm => ( + glow::COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgb8A1UnormSrgb => ( + glow::COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgba8Unorm => ( + //TODO: this is a lie, it's not sRGB + glow::COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, + glow::RGBA, + 0, + ), + Tf::Etc2Rgba8UnormSrgb => (glow::COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, glow::RGBA, 0), + Tf::EacR11Unorm => (glow::COMPRESSED_R11_EAC, glow::RED, 0), + Tf::EacR11Snorm => (glow::COMPRESSED_SIGNED_R11_EAC, glow::RED, 0), + Tf::EacRg11Unorm => (glow::COMPRESSED_RG11_EAC, glow::RG, 0), + Tf::EacRg11Snorm => (glow::COMPRESSED_SIGNED_RG11_EAC, glow::RG, 0), + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm | AstcChannel::Hdr => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_RGBA_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_RGBA_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_RGBA_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_RGBA_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_RGBA_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_RGBA_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_RGBA_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_RGBA_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => (glow::COMPRESSED_RGBA_ASTC_10x5_KHR, glow::RGBA, 0), + AstcBlock::B10x6 => (glow::COMPRESSED_RGBA_ASTC_10x6_KHR, glow::RGBA, 0), + AstcBlock::B10x8 => (glow::COMPRESSED_RGBA_ASTC_10x8_KHR, glow::RGBA, 0), + AstcBlock::B10x10 => (glow::COMPRESSED_RGBA_ASTC_10x10_KHR, glow::RGBA, 0), + AstcBlock::B12x10 => (glow::COMPRESSED_RGBA_ASTC_12x10_KHR, glow::RGBA, 0), + AstcBlock::B12x12 => (glow::COMPRESSED_RGBA_ASTC_12x12_KHR, glow::RGBA, 0), + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, glow::RGBA, 0) + } + AstcBlock::B10x6 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, glow::RGBA, 0) + } + AstcBlock::B10x8 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, glow::RGBA, 0) + } + AstcBlock::B10x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x12 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, glow::RGBA, 0) + } + }, + }, + }; + + super::TextureFormatDesc { + internal, + external, + data_type, + } + } +} + +pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super::VertexFormatDesc { + use super::VertexAttribKind as Vak; + use wgt::VertexFormat as Vf; + + let (element_count, element_format, attrib_kind) = match vertex_format { + Vf::Unorm8x2 => (2, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x2 => (2, glow::BYTE, Vak::Float), + Vf::Uint8x2 => (2, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x2 => (2, glow::BYTE, Vak::Integer), + Vf::Unorm8x4 => (4, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x4 => (4, glow::BYTE, Vak::Float), + Vf::Uint8x4 => (4, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x4 => (4, glow::BYTE, Vak::Integer), + Vf::Unorm16x2 => (2, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x2 => (2, glow::SHORT, Vak::Float), + Vf::Uint16x2 => (2, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x2 => (2, glow::SHORT, Vak::Integer), + Vf::Float16x2 => (2, glow::HALF_FLOAT, Vak::Float), + Vf::Unorm16x4 => (4, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x4 => (4, glow::SHORT, Vak::Float), + Vf::Uint16x4 => (4, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x4 => (4, glow::SHORT, Vak::Integer), + Vf::Float16x4 => (4, glow::HALF_FLOAT, Vak::Float), + Vf::Uint32 => (1, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32 => (1, glow::INT, Vak::Integer), + Vf::Float32 => (1, glow::FLOAT, Vak::Float), + Vf::Uint32x2 => (2, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x2 => (2, glow::INT, Vak::Integer), + Vf::Float32x2 => (2, glow::FLOAT, Vak::Float), + Vf::Uint32x3 => (3, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x3 => (3, glow::INT, Vak::Integer), + Vf::Float32x3 => (3, glow::FLOAT, Vak::Float), + Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x4 => (4, glow::INT, Vak::Integer), + Vf::Float32x4 => (4, glow::FLOAT, Vak::Float), + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + }; + + super::VertexFormatDesc { + element_count, + element_format, + attrib_kind, + } +} + +pub fn map_filter_modes( + min: wgt::FilterMode, + mag: wgt::FilterMode, + mip: wgt::FilterMode, +) -> (u32, u32) { + use wgt::FilterMode as Fm; + + let mag_filter = match mag { + Fm::Nearest => glow::NEAREST, + Fm::Linear => glow::LINEAR, + }; + + let min_filter = match (min, mip) { + (Fm::Nearest, Fm::Nearest) => glow::NEAREST_MIPMAP_NEAREST, + (Fm::Nearest, Fm::Linear) => glow::NEAREST_MIPMAP_LINEAR, + (Fm::Linear, Fm::Nearest) => glow::LINEAR_MIPMAP_NEAREST, + (Fm::Linear, Fm::Linear) => glow::LINEAR_MIPMAP_LINEAR, + }; + + (min_filter, mag_filter) +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> u32 { + match mode { + wgt::AddressMode::Repeat => glow::REPEAT, + wgt::AddressMode::MirrorRepeat => glow::MIRRORED_REPEAT, + wgt::AddressMode::ClampToEdge => glow::CLAMP_TO_EDGE, + wgt::AddressMode::ClampToBorder => glow::CLAMP_TO_BORDER, + //wgt::AddressMode::MirrorClamp => glow::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_compare_func(fun: wgt::CompareFunction) -> u32 { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => glow::NEVER, + Cf::Less => glow::LESS, + Cf::LessEqual => glow::LEQUAL, + Cf::Equal => glow::EQUAL, + Cf::GreaterEqual => glow::GEQUAL, + Cf::Greater => glow::GREATER, + Cf::NotEqual => glow::NOTEQUAL, + Cf::Always => glow::ALWAYS, + } +} + +pub fn map_primitive_topology(topology: wgt::PrimitiveTopology) -> u32 { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => glow::POINTS, + Pt::LineList => glow::LINES, + Pt::LineStrip => glow::LINE_STRIP, + Pt::TriangleList => glow::TRIANGLES, + Pt::TriangleStrip => glow::TRIANGLE_STRIP, + } +} + +pub(super) fn map_primitive_state(state: &wgt::PrimitiveState) -> super::PrimitiveState { + //Note: state.polygon_mode is not supported, see `Features::POLYGON_MODE_LINE` and + //`Features::POLYGON_MODE_POINT` + super::PrimitiveState { + //Note: we are flipping the front face, so that + // the Y-flip in the generated GLSL keeps the same visibility. + // See `naga::back::glsl::WriterFlags::ADJUST_COORDINATE_SPACE`. + front_face: match state.front_face { + wgt::FrontFace::Cw => glow::CCW, + wgt::FrontFace::Ccw => glow::CW, + }, + cull_face: match state.cull_mode { + Some(wgt::Face::Front) => glow::FRONT, + Some(wgt::Face::Back) => glow::BACK, + None => 0, + }, + unclipped_depth: state.unclipped_depth, + } +} + +pub fn _map_view_dimension(dim: wgt::TextureViewDimension) -> u32 { + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 | Tvd::D2 => glow::TEXTURE_2D, + Tvd::D2Array => glow::TEXTURE_2D_ARRAY, + Tvd::Cube => glow::TEXTURE_CUBE_MAP, + Tvd::CubeArray => glow::TEXTURE_CUBE_MAP_ARRAY, + Tvd::D3 => glow::TEXTURE_3D, + } +} + +fn map_stencil_op(operation: wgt::StencilOperation) -> u32 { + use wgt::StencilOperation as So; + match operation { + So::Keep => glow::KEEP, + So::Zero => glow::ZERO, + So::Replace => glow::REPLACE, + So::Invert => glow::INVERT, + So::IncrementClamp => glow::INCR, + So::DecrementClamp => glow::DECR, + So::IncrementWrap => glow::INCR_WRAP, + So::DecrementWrap => glow::DECR_WRAP, + } +} + +fn map_stencil_ops(face: &wgt::StencilFaceState) -> super::StencilOps { + super::StencilOps { + pass: map_stencil_op(face.pass_op), + fail: map_stencil_op(face.fail_op), + depth_fail: map_stencil_op(face.depth_fail_op), + } +} + +pub(super) fn map_stencil(state: &wgt::StencilState) -> super::StencilState { + super::StencilState { + front: super::StencilSide { + function: map_compare_func(state.front.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.front), + }, + back: super::StencilSide { + function: map_compare_func(state.back.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.back), + }, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> u32 { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => glow::ZERO, + Bf::One => glow::ONE, + Bf::Src => glow::SRC_COLOR, + Bf::OneMinusSrc => glow::ONE_MINUS_SRC_COLOR, + Bf::Dst => glow::DST_COLOR, + Bf::OneMinusDst => glow::ONE_MINUS_DST_COLOR, + Bf::SrcAlpha => glow::SRC_ALPHA, + Bf::OneMinusSrcAlpha => glow::ONE_MINUS_SRC_ALPHA, + Bf::DstAlpha => glow::DST_ALPHA, + Bf::OneMinusDstAlpha => glow::ONE_MINUS_DST_ALPHA, + Bf::Constant => glow::CONSTANT_COLOR, + Bf::OneMinusConstant => glow::ONE_MINUS_CONSTANT_COLOR, + Bf::SrcAlphaSaturated => glow::SRC_ALPHA_SATURATE, + } +} + +fn map_blend_component(component: &wgt::BlendComponent) -> super::BlendComponent { + super::BlendComponent { + src: map_blend_factor(component.src_factor), + dst: map_blend_factor(component.dst_factor), + equation: match component.operation { + wgt::BlendOperation::Add => glow::FUNC_ADD, + wgt::BlendOperation::Subtract => glow::FUNC_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => glow::FUNC_REVERSE_SUBTRACT, + wgt::BlendOperation::Min => glow::MIN, + wgt::BlendOperation::Max => glow::MAX, + }, + } +} + +pub(super) fn map_blend(blend: &wgt::BlendState) -> super::BlendDesc { + super::BlendDesc { + color: map_blend_component(&blend.color), + alpha: map_blend_component(&blend.alpha), + } +} + +pub(super) fn map_storage_access(access: wgt::StorageTextureAccess) -> u32 { + match access { + wgt::StorageTextureAccess::ReadOnly => glow::READ_ONLY, + wgt::StorageTextureAccess::WriteOnly => glow::WRITE_ONLY, + wgt::StorageTextureAccess::ReadWrite => glow::READ_WRITE, + } +} + +pub(super) fn is_sampler(glsl_uniform_type: u32) -> bool { + match glsl_uniform_type { + glow::INT_SAMPLER_1D + | glow::INT_SAMPLER_1D_ARRAY + | glow::INT_SAMPLER_2D + | glow::INT_SAMPLER_2D_ARRAY + | glow::INT_SAMPLER_2D_MULTISAMPLE + | glow::INT_SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::INT_SAMPLER_2D_RECT + | glow::INT_SAMPLER_3D + | glow::INT_SAMPLER_CUBE + | glow::INT_SAMPLER_CUBE_MAP_ARRAY + | glow::UNSIGNED_INT_SAMPLER_1D + | glow::UNSIGNED_INT_SAMPLER_1D_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D + | glow::UNSIGNED_INT_SAMPLER_2D_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE + | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D_RECT + | glow::UNSIGNED_INT_SAMPLER_3D + | glow::UNSIGNED_INT_SAMPLER_CUBE + | glow::UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY + | glow::SAMPLER_1D + | glow::SAMPLER_1D_SHADOW + | glow::SAMPLER_1D_ARRAY + | glow::SAMPLER_1D_ARRAY_SHADOW + | glow::SAMPLER_2D + | glow::SAMPLER_2D_SHADOW + | glow::SAMPLER_2D_ARRAY + | glow::SAMPLER_2D_ARRAY_SHADOW + | glow::SAMPLER_2D_MULTISAMPLE + | glow::SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::SAMPLER_2D_RECT + | glow::SAMPLER_2D_RECT_SHADOW + | glow::SAMPLER_3D + | glow::SAMPLER_CUBE + | glow::SAMPLER_CUBE_MAP_ARRAY + | glow::SAMPLER_CUBE_MAP_ARRAY_SHADOW + | glow::SAMPLER_CUBE_SHADOW => true, + _ => false, + } +} + +pub(super) fn is_image(glsl_uniform_type: u32) -> bool { + match glsl_uniform_type { + glow::INT_IMAGE_1D + | glow::INT_IMAGE_1D_ARRAY + | glow::INT_IMAGE_2D + | glow::INT_IMAGE_2D_ARRAY + | glow::INT_IMAGE_2D_MULTISAMPLE + | glow::INT_IMAGE_2D_MULTISAMPLE_ARRAY + | glow::INT_IMAGE_2D_RECT + | glow::INT_IMAGE_3D + | glow::INT_IMAGE_CUBE + | glow::INT_IMAGE_CUBE_MAP_ARRAY + | glow::UNSIGNED_INT_IMAGE_1D + | glow::UNSIGNED_INT_IMAGE_1D_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D + | glow::UNSIGNED_INT_IMAGE_2D_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE + | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D_RECT + | glow::UNSIGNED_INT_IMAGE_3D + | glow::UNSIGNED_INT_IMAGE_CUBE + | glow::UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY + | glow::IMAGE_1D + | glow::IMAGE_1D_ARRAY + | glow::IMAGE_2D + | glow::IMAGE_2D_ARRAY + | glow::IMAGE_2D_MULTISAMPLE + | glow::IMAGE_2D_MULTISAMPLE_ARRAY + | glow::IMAGE_2D_RECT + | glow::IMAGE_3D + | glow::IMAGE_CUBE + | glow::IMAGE_CUBE_MAP_ARRAY => true, + _ => false, + } +} + +pub(super) fn is_atomic_counter(glsl_uniform_type: u32) -> bool { + glsl_uniform_type == glow::UNSIGNED_INT_ATOMIC_COUNTER +} + +pub(super) fn is_opaque_type(glsl_uniform_type: u32) -> bool { + is_sampler(glsl_uniform_type) + || is_image(glsl_uniform_type) + || is_atomic_counter(glsl_uniform_type) +} + +pub(super) fn uniform_byte_size(glsl_uniform_type: u32) -> u32 { + match glsl_uniform_type { + glow::FLOAT | glow::INT => 4, + glow::FLOAT_VEC2 | glow::INT_VEC2 => 8, + glow::FLOAT_VEC3 | glow::INT_VEC3 => 12, + glow::FLOAT_VEC4 | glow::INT_VEC4 => 16, + glow::FLOAT_MAT2 => 16, + glow::FLOAT_MAT3 => 36, + glow::FLOAT_MAT4 => 64, + _ => panic!("Unsupported uniform datatype! {glsl_uniform_type:#X}"), + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs new file mode 100644 index 0000000000..0a1cfaf241 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/device.rs @@ -0,0 +1,1328 @@ +use super::conv; +use crate::auxil::map_naga_stage; +use glow::HasContext; +use std::{ + convert::TryInto, + ptr, + sync::{Arc, Mutex}, +}; + +use arrayvec::ArrayVec; +#[cfg(not(target_arch = "wasm32"))] +use std::mem; +use std::sync::atomic::Ordering; + +type ShaderStage<'a> = ( + naga::ShaderStage, + &'a crate::ProgrammableStage<'a, super::Api>, +); +type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>; + +struct CompilationContext<'a> { + layout: &'a super::PipelineLayout, + sampler_map: &'a mut super::SamplerBindMap, + name_binding_map: &'a mut NameBindingMap, + multiview: Option<std::num::NonZeroU32>, +} + +impl CompilationContext<'_> { + fn consume_reflection( + self, + module: &naga::Module, + ep_info: &naga::valid::FunctionInfo, + reflection_info: naga::back::glsl::ReflectionInfo, + ) { + for (handle, var) in module.global_variables.iter() { + if ep_info[handle].is_empty() { + continue; + } + let register = match var.space { + naga::AddressSpace::Uniform => super::BindingRegister::UniformBuffers, + naga::AddressSpace::Storage { .. } => super::BindingRegister::StorageBuffers, + _ => continue, + }; + + let br = var.binding.as_ref().unwrap(); + let slot = self.layout.get_slot(br); + + let name = match reflection_info.uniforms.get(&handle) { + Some(name) => name.clone(), + None => continue, + }; + log::debug!( + "Rebind buffer: {:?} -> {}, register={:?}, slot={}", + var.name.as_ref(), + &name, + register, + slot + ); + self.name_binding_map.insert(name, (register, slot)); + } + + for (name, mapping) in reflection_info.texture_mapping { + let var = &module.global_variables[mapping.texture]; + let register = match module.types[var.ty].inner { + naga::TypeInner::Image { + class: naga::ImageClass::Storage { .. }, + .. + } => super::BindingRegister::Images, + _ => super::BindingRegister::Textures, + }; + + let tex_br = var.binding.as_ref().unwrap(); + let texture_linear_index = self.layout.get_slot(tex_br); + + self.name_binding_map + .insert(name, (register, texture_linear_index)); + if let Some(sampler_handle) = mapping.sampler { + let sam_br = module.global_variables[sampler_handle] + .binding + .as_ref() + .unwrap(); + let sampler_linear_index = self.layout.get_slot(sam_br); + self.sampler_map[texture_linear_index as usize] = Some(sampler_linear_index); + } + } + } +} + +impl super::Device { + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a texture + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the texture. If `drop_guard` is + /// [`Some`], the texture must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + pub unsafe fn texture_from_raw( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + let (target, _, is_cubemap) = super::Texture::get_info_from_desc(desc); + + super::Texture { + inner: super::TextureInner::Texture { + raw: glow::NativeTexture(name), + target, + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + is_cubemap, + } + } + + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a renderbuffer + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the renderbuffer. If `drop_guard` is + /// [`Some`], the renderbuffer must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + pub unsafe fn texture_from_raw_renderbuffer( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: glow::NativeRenderbuffer(name), + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + is_cubemap: false, + } + } + + unsafe fn compile_shader( + gl: &glow::Context, + shader: &str, + naga_stage: naga::ShaderStage, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + ) -> Result<glow::Shader, crate::PipelineError> { + let target = match naga_stage { + naga::ShaderStage::Vertex => glow::VERTEX_SHADER, + naga::ShaderStage::Fragment => glow::FRAGMENT_SHADER, + naga::ShaderStage::Compute => glow::COMPUTE_SHADER, + }; + + let raw = unsafe { gl.create_shader(target) }.unwrap(); + #[cfg(not(target_arch = "wasm32"))] + if gl.supports_debug() { + //TODO: remove all transmutes from `object_label` + // https://github.com/grovesNL/glow/issues/186 + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SHADER, name, label) }; + } + + unsafe { gl.shader_source(raw, shader) }; + unsafe { gl.compile_shader(raw) }; + + log::info!("\tCompiled shader {:?}", raw); + + let compiled_ok = unsafe { gl.get_shader_compile_status(raw) }; + let msg = unsafe { gl.get_shader_info_log(raw) }; + if compiled_ok { + if !msg.is_empty() { + log::warn!("\tCompile: {}", msg); + } + Ok(raw) + } else { + Err(crate::PipelineError::Linkage( + map_naga_stage(naga_stage), + msg, + )) + } + } + + fn create_shader( + gl: &glow::Context, + naga_stage: naga::ShaderStage, + stage: &crate::ProgrammableStage<super::Api>, + context: CompilationContext, + ) -> Result<glow::Shader, crate::PipelineError> { + use naga::back::glsl; + let pipeline_options = glsl::PipelineOptions { + shader_stage: naga_stage, + entry_point: stage.entry_point.to_string(), + multiview: context.multiview, + }; + + let shader = &stage.module.naga; + let entry_point_index = shader + .module + .entry_points + .iter() + .position(|ep| ep.name.as_str() == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + use naga::proc::BoundsCheckPolicy; + // The image bounds checks require the TEXTURE_LEVELS feature available in GL core 1.3+. + let version = gl.version(); + let image_check = if !version.is_embedded && (version.major, version.minor) >= (1, 3) { + BoundsCheckPolicy::ReadZeroSkipWrite + } else { + BoundsCheckPolicy::Unchecked + }; + + // Other bounds check are either provided by glsl or not implemented yet. + let policies = naga::proc::BoundsCheckPolicies { + index: BoundsCheckPolicy::Unchecked, + buffer: BoundsCheckPolicy::Unchecked, + image: image_check, + binding_array: BoundsCheckPolicy::Unchecked, + }; + + let mut output = String::new(); + let mut writer = glsl::Writer::new( + &mut output, + &shader.module, + &shader.info, + &context.layout.naga_options, + &pipeline_options, + policies, + ) + .map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + let reflection_info = writer.write().map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + log::debug!("Naga generated shader:\n{}", output); + + context.consume_reflection( + &shader.module, + shader.info.get_entry_point(entry_point_index), + reflection_info, + ); + + unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) } + } + + unsafe fn create_pipeline<'a>( + &self, + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let mut program_stages = ArrayVec::new(); + let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len()); + for group in &*layout.group_infos { + group_to_binding_to_slot.push(group.binding_to_slot.clone()); + } + for &(naga_stage, stage) in &shaders { + program_stages.push(super::ProgramStage { + naga_stage: naga_stage.to_owned(), + shader_id: stage.module.id, + entry_point: stage.entry_point.to_owned(), + }); + } + let glsl_version = match self.shared.shading_language_version { + naga::back::glsl::Version::Embedded { version, .. } => version, + naga::back::glsl::Version::Desktop(_) => unreachable!(), + }; + let mut guard = self + .shared + .program_cache + .try_lock() + .expect("Couldn't acquire program_cache lock"); + // This guard ensures that we can't accidentally destroy a program whilst we're about to reuse it + // The only place that destroys a pipeline is also locking on `program_cache` + let program = guard + .entry(super::ProgramCacheKey { + stages: program_stages, + group_to_binding_to_slot: group_to_binding_to_slot.into_boxed_slice(), + }) + .or_insert_with(|| unsafe { + Self::create_program( + gl, + shaders, + layout, + label, + multiview, + glsl_version, + self.shared.private_caps, + ) + }) + .to_owned()?; + drop(guard); + + Ok(program) + } + + unsafe fn create_program<'a>( + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + glsl_version: u16, + private_caps: super::PrivateCapabilities, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let program = unsafe { gl.create_program() }.unwrap(); + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(program) }; + unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) }; + } + } + + let mut name_binding_map = NameBindingMap::default(); + let mut sampler_map = [None; super::MAX_TEXTURE_SLOTS]; + let mut has_stages = wgt::ShaderStages::empty(); + let mut shaders_to_delete = arrayvec::ArrayVec::<_, 3>::new(); + + for (naga_stage, stage) in shaders { + has_stages |= map_naga_stage(naga_stage); + let context = CompilationContext { + layout, + sampler_map: &mut sampler_map, + name_binding_map: &mut name_binding_map, + multiview, + }; + + let shader = Self::create_shader(gl, naga_stage, stage, context)?; + shaders_to_delete.push(shader); + } + + // Create empty fragment shader if only vertex shader is present + if has_stages == wgt::ShaderStages::VERTEX { + let shader_src = format!("#version {glsl_version} es \n void main(void) {{}}",); + log::info!("Only vertex shader is present. Creating an empty fragment shader",); + let shader = unsafe { + Self::compile_shader( + gl, + &shader_src, + naga::ShaderStage::Fragment, + Some("(wgpu internal) dummy fragment shader"), + ) + }?; + shaders_to_delete.push(shader); + } + + for &shader in shaders_to_delete.iter() { + unsafe { gl.attach_shader(program, shader) }; + } + unsafe { gl.link_program(program) }; + + for shader in shaders_to_delete { + unsafe { gl.delete_shader(shader) }; + } + + log::info!("\tLinked program {:?}", program); + + let linked_ok = unsafe { gl.get_program_link_status(program) }; + let msg = unsafe { gl.get_program_info_log(program) }; + if !linked_ok { + return Err(crate::PipelineError::Linkage(has_stages, msg)); + } + if !msg.is_empty() { + log::warn!("\tLink: {}", msg); + } + + if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) { + // This remapping is only needed if we aren't able to put the binding layout + // in the shader. We can't remap storage buffers this way. + unsafe { gl.use_program(Some(program)) }; + for (ref name, (register, slot)) in name_binding_map { + log::trace!("Get binding {:?} from program {:?}", name, program); + match register { + super::BindingRegister::UniformBuffers => { + let index = unsafe { gl.get_uniform_block_index(program, name) }.unwrap(); + unsafe { gl.uniform_block_binding(program, index, slot as _) }; + } + super::BindingRegister::StorageBuffers => { + let index = + unsafe { gl.get_shader_storage_block_index(program, name) }.unwrap(); + log::error!( + "Unable to re-map shader storage block {} to {}", + name, + index + ); + return Err(crate::DeviceError::Lost.into()); + } + super::BindingRegister::Textures | super::BindingRegister::Images => { + let location = unsafe { gl.get_uniform_location(program, name) }; + unsafe { gl.uniform_1_i32(location.as_ref(), slot as _) }; + } + } + } + } + + let mut uniforms: [super::UniformDesc; super::MAX_PUSH_CONSTANTS] = + [None; super::MAX_PUSH_CONSTANTS].map(|_: Option<()>| Default::default()); + let count = unsafe { gl.get_active_uniforms(program) }; + let mut offset = 0; + + for uniform in 0..count { + let glow::ActiveUniform { utype, name, .. } = + unsafe { gl.get_active_uniform(program, uniform) }.unwrap(); + + if conv::is_opaque_type(utype) { + continue; + } + + if let Some(location) = unsafe { gl.get_uniform_location(program, &name) } { + if uniforms[offset / 4].location.is_some() { + panic!("Offset already occupied") + } + + // `size` will always be 1 so we need to guess the real size from the type + let uniform_size = conv::uniform_byte_size(utype); + + uniforms[offset / 4] = super::UniformDesc { + location: Some(location), + size: uniform_size, + utype, + }; + + offset += uniform_size as usize; + } + } + + Ok(Arc::new(super::PipelineInner { + program, + sampler_map, + uniforms, + })) + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_vertex_array(self.main_vao) }; + unsafe { gl.delete_framebuffer(queue.draw_fbo) }; + unsafe { gl.delete_framebuffer(queue.copy_fbo) }; + unsafe { gl.delete_buffer(queue.zero_buffer) }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let emulate_map = self + .shared + .workarounds + .contains(super::Workarounds::EMULATE_BUFFER_MAP) + || !self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION); + + if emulate_map && desc.usage.intersects(crate::BufferUses::MAP_WRITE) { + return Ok(super::Buffer { + raw: None, + target, + size: desc.size, + map_flags: 0, + data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))), + }); + } + + let gl = &self.shared.context.lock(); + + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let is_host_visible = desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE); + let is_coherent = desc + .memory_flags + .contains(crate::MemoryFlags::PREFER_COHERENT); + + let mut map_flags = 0; + if desc.usage.contains(crate::BufferUses::MAP_READ) { + map_flags |= glow::MAP_READ_BIT; + } + if desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_WRITE_BIT; + } + + let raw = Some(unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?); + unsafe { gl.bind_buffer(target, raw) }; + let raw_size = desc + .size + .try_into() + .map_err(|_| crate::DeviceError::OutOfMemory)?; + + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION) + { + if is_host_visible { + map_flags |= glow::MAP_PERSISTENT_BIT; + if is_coherent { + map_flags |= glow::MAP_COHERENT_BIT; + } + } + unsafe { gl.buffer_storage(target, raw_size, None, map_flags) }; + } else { + assert!(!is_coherent); + let usage = if is_host_visible { + if desc.usage.contains(crate::BufferUses::MAP_READ) { + glow::STREAM_READ + } else { + glow::DYNAMIC_DRAW + } + } else { + // Even if the usage doesn't contain SRC_READ, we update it internally at least once + // Some vendors take usage very literally and STATIC_DRAW will freeze us with an empty buffer + // https://github.com/gfx-rs/wgpu/issues/3371 + glow::DYNAMIC_DRAW + }; + unsafe { gl.buffer_data_size(target, raw_size, usage) }; + } + + unsafe { gl.bind_buffer(target, None) }; + + if !is_coherent && desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_FLUSH_EXPLICIT_BIT; + } + //TODO: do we need `glow::MAP_UNSYNCHRONIZED_BIT`? + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::BUFFER, name, Some(label)) }; + } + } + + let data = if emulate_map && desc.usage.contains(crate::BufferUses::MAP_READ) { + Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))) + } else { + None + }; + + Ok(super::Buffer { + raw, + target, + size: desc.size, + map_flags, + data, + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_buffer(raw) }; + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let is_coherent = buffer.map_flags & glow::MAP_COHERENT_BIT != 0; + let ptr = match buffer.raw { + None => { + let mut vec = buffer.data.as_ref().unwrap().lock().unwrap(); + let slice = &mut vec.as_mut_slice()[range.start as usize..range.end as usize]; + slice.as_mut_ptr() + } + Some(raw) => { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + let ptr = if let Some(ref map_read_allocation) = buffer.data { + let mut guard = map_read_allocation.lock().unwrap(); + let slice = guard.as_mut_slice(); + unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) }; + slice.as_mut_ptr() + } else { + unsafe { + gl.map_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + buffer.map_flags, + ) + } + }; + unsafe { gl.bind_buffer(buffer.target, None) }; + ptr + } + }; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr).ok_or(crate::DeviceError::Lost)?, + is_coherent, + }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + if let Some(raw) = buffer.raw { + if buffer.data.is_none() { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + unsafe { gl.unmap_buffer(buffer.target) }; + unsafe { gl.bind_buffer(buffer.target, None) }; + } + } + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + for range in ranges { + unsafe { + gl.flush_mapped_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + ) + }; + } + } + } + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) { + //TODO: do we need to do anything? + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let render_usage = crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_WRITE + | crate::TextureUses::DEPTH_STENCIL_READ; + let format_desc = self.shared.describe_texture_format(desc.format); + + let (inner, is_cubemap) = if render_usage.contains(desc.usage) + && desc.dimension == wgt::TextureDimension::D2 + && desc.size.depth_or_array_layers == 1 + { + let raw = unsafe { gl.create_renderbuffer().unwrap() }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(raw)) }; + if desc.sample_count > 1 { + unsafe { + gl.renderbuffer_storage_multisample( + glow::RENDERBUFFER, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } else { + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) }; + } + } + + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + (super::TextureInner::Renderbuffer { raw }, false) + } else { + let raw = unsafe { gl.create_texture().unwrap() }; + let (target, is_3d, is_cubemap) = super::Texture::get_info_from_desc(desc); + + unsafe { gl.bind_texture(target, Some(raw)) }; + //Note: this has to be done before defining the storage! + match desc.format.sample_type(None) { + Some( + wgt::TextureSampleType::Float { filterable: false } + | wgt::TextureSampleType::Uint + | wgt::TextureSampleType::Sint, + ) => { + // reset default filtering mode + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MIN_FILTER, glow::NEAREST as i32) + }; + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MAG_FILTER, glow::NEAREST as i32) + }; + } + _ => {} + } + + if is_3d { + unsafe { + gl.tex_storage_3d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + desc.size.depth_or_array_layers as i32, + ) + }; + } else if desc.sample_count > 1 { + unsafe { + gl.tex_storage_2d_multisample( + target, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + true, + ) + }; + } else { + unsafe { + gl.tex_storage_2d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) }; + } + } + + unsafe { gl.bind_texture(target, None) }; + (super::TextureInner::Texture { raw, target }, is_cubemap) + }; + + Ok(super::Texture { + inner, + drop_guard: None, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc, + copy_size: desc.copy_extent(), + is_cubemap, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + let gl = &self.shared.context.lock(); + match texture.inner { + super::TextureInner::Renderbuffer { raw, .. } => { + unsafe { gl.delete_renderbuffer(raw) }; + } + super::TextureInner::DefaultRenderbuffer => {} + super::TextureInner::Texture { raw, .. } => { + unsafe { gl.delete_texture(raw) }; + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { .. } => {} + } + } + + // For clarity, we explicitly drop the drop guard. Although this has no real semantic effect as the + // end of the scope will drop the drop guard since this function takes ownership of the texture. + drop(texture.drop_guard); + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + Ok(super::TextureView { + //TODO: use `conv::map_view_dimension(desc.dimension)`? + inner: texture.inner.clone(), + aspects: crate::FormatAspects::new(texture.format, desc.range.aspect), + mip_levels: desc.range.mip_range(texture.mip_level_count), + array_layers: desc.range.layer_range(texture.array_layer_count), + format: texture.format, + }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let raw = unsafe { gl.create_sampler().unwrap() }; + + let (min, mag) = + conv::map_filter_modes(desc.min_filter, desc.mag_filter, desc.mipmap_filter); + + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MIN_FILTER, min as i32) }; + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MAG_FILTER, mag as i32) }; + + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_S, + conv::map_address_mode(desc.address_modes[0]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_T, + conv::map_address_mode(desc.address_modes[1]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_R, + conv::map_address_mode(desc.address_modes[2]) as i32, + ) + }; + + if let Some(border_color) = desc.border_color { + let border = match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + [0.0; 4] + } + wgt::SamplerBorderColor::OpaqueBlack => [0.0, 0.0, 0.0, 1.0], + wgt::SamplerBorderColor::OpaqueWhite => [1.0; 4], + }; + unsafe { gl.sampler_parameter_f32_slice(raw, glow::TEXTURE_BORDER_COLOR, &border) }; + } + + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MIN_LOD, desc.lod_clamp.start) }; + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MAX_LOD, desc.lod_clamp.end) }; + + // If clamp is not 1, we know anisotropy is supported up to 16x + if desc.anisotropy_clamp != 1 { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_MAX_ANISOTROPY, + desc.anisotropy_clamp as i32, + ) + }; + } + + //set_param_float(glow::TEXTURE_LOD_BIAS, info.lod_bias.0); + + if let Some(compare) = desc.compare { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_MODE, + glow::COMPARE_REF_TO_TEXTURE as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_FUNC, + conv::map_compare_func(compare) as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) }; + } + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_sampler(sampler.raw) }; + } + + unsafe fn create_command_encoder( + &self, + _desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + cmd_buffer: super::CommandBuffer::default(), + state: Default::default(), + private_caps: self.shared.private_caps, + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::glsl; + + let mut group_infos = Vec::with_capacity(desc.bind_group_layouts.len()); + let mut num_samplers = 0u8; + let mut num_textures = 0u8; + let mut num_images = 0u8; + let mut num_uniform_buffers = 0u8; + let mut num_storage_buffers = 0u8; + + let mut writer_flags = glsl::WriterFlags::ADJUST_COORDINATE_SPACE; + writer_flags.set( + glsl::WriterFlags::TEXTURE_SHADOW_LOD, + self.shared + .private_caps + .contains(super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD), + ); + // We always force point size to be written and it will be ignored by the driver if it's not a point list primitive. + // https://github.com/gfx-rs/wgpu/pull/3440/files#r1095726950 + writer_flags.set(glsl::WriterFlags::FORCE_POINT_SIZE, true); + let mut binding_map = glsl::BindingMap::default(); + + for (group_index, bg_layout) in desc.bind_group_layouts.iter().enumerate() { + // create a vector with the size enough to hold all the bindings, filled with `!0` + let mut binding_to_slot = vec![ + !0; + bg_layout + .entries + .last() + .map_or(0, |b| b.binding as usize + 1) + ] + .into_boxed_slice(); + + for entry in bg_layout.entries.iter() { + let counter = match entry.ty { + wgt::BindingType::Sampler { .. } => &mut num_samplers, + wgt::BindingType::Texture { .. } => &mut num_textures, + wgt::BindingType::StorageTexture { .. } => &mut num_images, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => &mut num_uniform_buffers, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } => &mut num_storage_buffers, + }; + + binding_to_slot[entry.binding as usize] = *counter; + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + binding_map.insert(br, *counter); + *counter += entry.count.map_or(1, |c| c.get() as u8); + } + + group_infos.push(super::BindGroupLayoutInfo { + entries: Arc::clone(&bg_layout.entries), + binding_to_slot, + }); + } + + Ok(super::PipelineLayout { + group_infos: group_infos.into_boxed_slice(), + naga_options: glsl::Options { + version: self.shared.shading_language_version, + writer_flags, + binding_map, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut contents = Vec::new(); + + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let binding = match layout.ty { + wgt::BindingType::Buffer { .. } => { + let bb = &desc.buffers[entry.resource_index as usize]; + super::RawBinding::Buffer { + raw: bb.buffer.raw.unwrap(), + offset: bb.offset as i32, + size: match bb.size { + Some(s) => s.get() as i32, + None => (bb.buffer.size - bb.offset) as i32, + }, + } + } + wgt::BindingType::Sampler { .. } => { + let sampler = desc.samplers[entry.resource_index as usize]; + super::RawBinding::Sampler(sampler.raw) + } + wgt::BindingType::Texture { .. } => { + let view = desc.textures[entry.resource_index as usize].view; + if view.mip_levels.start != 0 || view.array_layers.start != 0 { + log::error!("Unable to create a sampled texture binding for non-zero mipmap level or array layer.\n{}", + "This is an implementation problem of wgpu-hal/gles backend.") + } + let (raw, target) = view.inner.as_native(); + super::RawBinding::Texture { + raw, + target, + aspects: view.aspects, + } + } + wgt::BindingType::StorageTexture { + access, + format, + view_dimension, + } => { + let view = desc.textures[entry.resource_index as usize].view; + let format_desc = self.shared.describe_texture_format(format); + let (raw, _target) = view.inner.as_native(); + super::RawBinding::Image(super::ImageBinding { + raw, + mip_level: view.mip_levels.start, + array_layer: match view_dimension { + wgt::TextureViewDimension::D2Array + | wgt::TextureViewDimension::CubeArray => None, + _ => Some(view.array_layers.start), + }, + access: conv::map_storage_access(access), + format: format_desc.internal, + }) + } + }; + contents.push(binding); + } + + Ok(super::BindGroup { + contents: contents.into_boxed_slice(), + }) + } + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + Ok(super::ShaderModule { + naga: match shader { + crate::ShaderInput::SpirV(_) => { + panic!("`Features::SPIRV_SHADER_PASSTHROUGH` is not enabled") + } + crate::ShaderInput::Naga(naga) => naga, + }, + label: desc.label.map(|str| str.to_string()), + id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed), + }) + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage)); + if let Some(ref fs) = desc.fragment_stage { + shaders.push((naga::ShaderStage::Fragment, fs)); + } + let inner = + unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?; + + let (vertex_buffers, vertex_attributes) = { + let mut buffers = Vec::new(); + let mut attributes = Vec::new(); + for (index, vb_layout) in desc.vertex_buffers.iter().enumerate() { + buffers.push(super::VertexBufferDesc { + step: vb_layout.step_mode, + stride: vb_layout.array_stride as u32, + }); + for vat in vb_layout.attributes.iter() { + let format_desc = conv::describe_vertex_format(vat.format); + attributes.push(super::AttributeDesc { + location: vat.shader_location, + offset: vat.offset as u32, + buffer_index: index as u32, + format_desc, + }); + } + } + (buffers.into_boxed_slice(), attributes.into_boxed_slice()) + }; + + let color_targets = { + let mut targets = Vec::new(); + for ct in desc.color_targets.iter().filter_map(|at| at.as_ref()) { + targets.push(super::ColorTargetDesc { + mask: ct.write_mask, + blend: ct.blend.as_ref().map(conv::map_blend), + }); + } + //Note: if any of the states are different, and `INDEPENDENT_BLEND` flag + // is not exposed, then this pipeline will not bind correctly. + targets.into_boxed_slice() + }; + + Ok(super::RenderPipeline { + inner, + primitive: desc.primitive, + vertex_buffers, + vertex_attributes, + color_targets, + depth: desc.depth_stencil.as_ref().map(|ds| super::DepthState { + function: conv::map_compare_func(ds.depth_compare), + mask: ds.depth_write_enabled, + }), + depth_bias: desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(), + stencil: desc + .depth_stencil + .as_ref() + .map(|ds| conv::map_stencil(&ds.stencil)), + alpha_to_coverage_enabled: desc.multisample.alpha_to_coverage_enabled, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache` + // This is safe to assume as long as: + // - `RenderPipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Compute, &desc.stage)); + let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?; + + Ok(super::ComputePipeline { inner }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`` + // This is safe to assume as long as: + // - `ComputePipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + #[cfg_attr(target_arch = "wasm32", allow(unused))] + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let gl = &self.shared.context.lock(); + let mut temp_string = String::new(); + + let mut queries = Vec::with_capacity(desc.count as usize); + for i in 0..desc.count { + let query = + unsafe { gl.create_query() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + #[cfg(not(target_arch = "wasm32"))] + if gl.supports_debug() { + use std::fmt::Write; + + if let Some(label) = desc.label { + temp_string.clear(); + let _ = write!(temp_string, "{label}[{i}]"); + let name = unsafe { mem::transmute(query) }; + unsafe { gl.object_label(glow::QUERY, name, Some(&temp_string)) }; + } + } + queries.push(query); + } + + Ok(super::QuerySet { + queries: queries.into_boxed_slice(), + target: match desc.ty { + wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED, + _ => unimplemented!(), + }, + }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + let gl = &self.shared.context.lock(); + for &query in set.queries.iter() { + unsafe { gl.delete_query(query) }; + } + } + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(super::Fence { + last_completed: 0, + pending: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + let gl = &self.shared.context.lock(); + for (_, sync) in fence.pending { + unsafe { gl.delete_sync(sync) }; + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + #[cfg_attr(target_arch = "wasm32", allow(clippy::needless_borrow))] + Ok(fence.get_latest(&self.shared.context.lock())) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if fence.last_completed < wait_value { + let gl = &self.shared.context.lock(); + let timeout_ns = if cfg!(target_arch = "wasm32") { + 0 + } else { + (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) + }; + let &(_, sync) = fence + .pending + .iter() + .find(|&&(value, _)| value >= wait_value) + .unwrap(); + match unsafe { + gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32) + } { + // for some reason firefox returns WAIT_FAILED, to investigate + #[cfg(target_arch = "wasm32")] + glow::WAIT_FAILED => { + log::warn!("wait failed!"); + Ok(false) + } + glow::TIMEOUT_EXPIRED => Ok(false), + glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => Ok(true), + _ => Err(crate::DeviceError::Lost), + } + } else { + Ok(true) + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + return unsafe { + self.render_doc + .start_frame_capture(self.shared.context.raw_context(), ptr::null_mut()) + }; + #[allow(unreachable_code)] + false + } + unsafe fn stop_capture(&self) { + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + unsafe { + self.render_doc + .end_frame_capture(ptr::null_mut(), ptr::null_mut()) + } + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Device {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Device {} diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs new file mode 100644 index 0000000000..13e217f9d9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/egl.rs @@ -0,0 +1,1310 @@ +use glow::HasContext; +use parking_lot::{Mutex, MutexGuard}; + +use std::{ffi, os::raw, ptr, sync::Arc, time::Duration}; + +/// The amount of time to wait while trying to obtain a lock to the adapter context +const CONTEXT_LOCK_TIMEOUT_SECS: u64 = 1; + +const EGL_CONTEXT_FLAGS_KHR: i32 = 0x30FC; +const EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR: i32 = 0x0001; +const EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT: i32 = 0x30BF; +const EGL_PLATFORM_WAYLAND_KHR: u32 = 0x31D8; +const EGL_PLATFORM_X11_KHR: u32 = 0x31D5; +const EGL_PLATFORM_ANGLE_ANGLE: u32 = 0x3202; +const EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE: u32 = 0x348F; +const EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED: u32 = 0x3451; +const EGL_PLATFORM_SURFACELESS_MESA: u32 = 0x31DD; +const EGL_GL_COLORSPACE_KHR: u32 = 0x309D; +const EGL_GL_COLORSPACE_SRGB_KHR: u32 = 0x3089; + +type XOpenDisplayFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type WlDisplayConnectFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type WlDisplayDisconnectFun = unsafe extern "system" fn(display: *const raw::c_void); + +#[cfg(not(target_os = "emscripten"))] +type EglInstance = khronos_egl::DynamicInstance<khronos_egl::EGL1_4>; + +#[cfg(target_os = "emscripten")] +type EglInstance = khronos_egl::Instance<khronos_egl::Static>; + +type WlEglWindowCreateFun = unsafe extern "system" fn( + surface: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, +) -> *mut raw::c_void; + +type WlEglWindowResizeFun = unsafe extern "system" fn( + window: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, + dx: raw::c_int, + dy: raw::c_int, +); + +type WlEglWindowDestroyFun = unsafe extern "system" fn(window: *const raw::c_void); + +#[cfg(target_os = "android")] +extern "C" { + pub fn ANativeWindow_setBuffersGeometry( + window: *mut raw::c_void, + width: i32, + height: i32, + format: i32, + ) -> i32; +} + +type EglLabel = *const raw::c_void; + +#[allow(clippy::upper_case_acronyms)] +type EGLDEBUGPROCKHR = Option< + unsafe extern "system" fn( + error: khronos_egl::Enum, + command: *const raw::c_char, + message_type: u32, + thread_label: EglLabel, + object_label: EglLabel, + message: *const raw::c_char, + ), +>; + +const EGL_DEBUG_MSG_CRITICAL_KHR: u32 = 0x33B9; +const EGL_DEBUG_MSG_ERROR_KHR: u32 = 0x33BA; +const EGL_DEBUG_MSG_WARN_KHR: u32 = 0x33BB; +const EGL_DEBUG_MSG_INFO_KHR: u32 = 0x33BC; + +type EglDebugMessageControlFun = unsafe extern "system" fn( + proc: EGLDEBUGPROCKHR, + attrib_list: *const khronos_egl::Attrib, +) -> raw::c_int; + +unsafe extern "system" fn egl_debug_proc( + error: khronos_egl::Enum, + command_raw: *const raw::c_char, + message_type: u32, + _thread_label: EglLabel, + _object_label: EglLabel, + message_raw: *const raw::c_char, +) { + let log_severity = match message_type { + EGL_DEBUG_MSG_CRITICAL_KHR | EGL_DEBUG_MSG_ERROR_KHR => log::Level::Error, + EGL_DEBUG_MSG_WARN_KHR => log::Level::Warn, + EGL_DEBUG_MSG_INFO_KHR => log::Level::Info, + _ => log::Level::Debug, + }; + let command = unsafe { ffi::CStr::from_ptr(command_raw) }.to_string_lossy(); + let message = if message_raw.is_null() { + "".into() + } else { + unsafe { ffi::CStr::from_ptr(message_raw) }.to_string_lossy() + }; + + log::log!( + log_severity, + "EGL '{}' code 0x{:x}: {}", + command, + error, + message, + ); +} + +fn open_x_display() -> Option<(ptr::NonNull<raw::c_void>, libloading::Library)> { + log::info!("Loading X11 library to get the current display"); + unsafe { + let library = libloading::Library::new("libX11.so").ok()?; + let func: libloading::Symbol<XOpenDisplayFun> = library.get(b"XOpenDisplay").unwrap(); + let result = func(ptr::null()); + ptr::NonNull::new(result).map(|ptr| (ptr, library)) + } +} + +unsafe fn find_library(paths: &[&str]) -> Option<libloading::Library> { + for path in paths { + match unsafe { libloading::Library::new(path) } { + Ok(lib) => return Some(lib), + _ => continue, + }; + } + None +} + +fn test_wayland_display() -> Option<libloading::Library> { + /* We try to connect and disconnect here to simply ensure there + * is an active wayland display available. + */ + log::info!("Loading Wayland library to get the current display"); + let library = unsafe { + let client_library = find_library(&["libwayland-client.so.0", "libwayland-client.so"])?; + let wl_display_connect: libloading::Symbol<WlDisplayConnectFun> = + client_library.get(b"wl_display_connect").unwrap(); + let wl_display_disconnect: libloading::Symbol<WlDisplayDisconnectFun> = + client_library.get(b"wl_display_disconnect").unwrap(); + let display = ptr::NonNull::new(wl_display_connect(ptr::null()))?; + wl_display_disconnect(display.as_ptr()); + find_library(&["libwayland-egl.so.1", "libwayland-egl.so"])? + }; + Some(library) +} + +#[derive(Clone, Copy, Debug)] +enum SrgbFrameBufferKind { + /// No support for SRGB surface + None, + /// Using EGL 1.5's support for colorspaces + Core, + /// Using EGL_KHR_gl_colorspace + Khr, +} + +/// Choose GLES framebuffer configuration. +fn choose_config( + egl: &EglInstance, + display: khronos_egl::Display, + srgb_kind: SrgbFrameBufferKind, +) -> Result<(khronos_egl::Config, bool), crate::InstanceError> { + //TODO: EGL_SLOW_CONFIG + let tiers = [ + ( + "off-screen", + &[ + khronos_egl::SURFACE_TYPE, + khronos_egl::PBUFFER_BIT, + khronos_egl::RENDERABLE_TYPE, + khronos_egl::OPENGL_ES2_BIT, + ][..], + ), + ( + "presentation", + &[khronos_egl::SURFACE_TYPE, khronos_egl::WINDOW_BIT][..], + ), + #[cfg(not(target_os = "android"))] + ( + "native-render", + &[khronos_egl::NATIVE_RENDERABLE, khronos_egl::TRUE as _][..], + ), + ]; + + let mut attributes = Vec::with_capacity(9); + for tier_max in (0..tiers.len()).rev() { + let name = tiers[tier_max].0; + log::info!("\tTrying {}", name); + + attributes.clear(); + for &(_, tier_attr) in tiers[..=tier_max].iter() { + attributes.extend_from_slice(tier_attr); + } + // make sure the Alpha is enough to support sRGB + match srgb_kind { + SrgbFrameBufferKind::None => {} + _ => { + attributes.push(khronos_egl::ALPHA_SIZE); + attributes.push(8); + } + } + attributes.push(khronos_egl::NONE); + + match egl.choose_first_config(display, &attributes) { + Ok(Some(config)) => { + if tier_max == 1 { + //Note: this has been confirmed to malfunction on Intel+NV laptops, + // but also on Angle. + log::warn!("EGL says it can present to the window but not natively",); + } + // Android emulator can't natively present either. + let tier_threshold = if cfg!(target_os = "android") || cfg!(windows) { + 1 + } else { + 2 + }; + return Ok((config, tier_max >= tier_threshold)); + } + Ok(None) => { + log::warn!("No config found!"); + } + Err(e) => { + log::error!("error in choose_first_config: {:?}", e); + } + } + } + + Err(crate::InstanceError) +} + +fn gl_debug_message_callback(source: u32, gltype: u32, id: u32, severity: u32, message: &str) { + let source_str = match source { + glow::DEBUG_SOURCE_API => "API", + glow::DEBUG_SOURCE_WINDOW_SYSTEM => "Window System", + glow::DEBUG_SOURCE_SHADER_COMPILER => "ShaderCompiler", + glow::DEBUG_SOURCE_THIRD_PARTY => "Third Party", + glow::DEBUG_SOURCE_APPLICATION => "Application", + glow::DEBUG_SOURCE_OTHER => "Other", + _ => unreachable!(), + }; + + let log_severity = match severity { + glow::DEBUG_SEVERITY_HIGH => log::Level::Error, + glow::DEBUG_SEVERITY_MEDIUM => log::Level::Warn, + glow::DEBUG_SEVERITY_LOW => log::Level::Info, + glow::DEBUG_SEVERITY_NOTIFICATION => log::Level::Trace, + _ => unreachable!(), + }; + + let type_str = match gltype { + glow::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "Deprecated Behavior", + glow::DEBUG_TYPE_ERROR => "Error", + glow::DEBUG_TYPE_MARKER => "Marker", + glow::DEBUG_TYPE_OTHER => "Other", + glow::DEBUG_TYPE_PERFORMANCE => "Performance", + glow::DEBUG_TYPE_POP_GROUP => "Pop Group", + glow::DEBUG_TYPE_PORTABILITY => "Portability", + glow::DEBUG_TYPE_PUSH_GROUP => "Push Group", + glow::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "Undefined Behavior", + _ => unreachable!(), + }; + + let _ = std::panic::catch_unwind(|| { + log::log!( + log_severity, + "GLES: [{}/{}] ID {} : {}", + source_str, + type_str, + id, + message + ); + }); + + if cfg!(debug_assertions) && log_severity == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } +} + +#[derive(Clone, Debug)] +struct EglContext { + instance: Arc<EglInstance>, + version: (i32, i32), + display: khronos_egl::Display, + raw: khronos_egl::Context, + pbuffer: Option<khronos_egl::Surface>, +} + +impl EglContext { + fn make_current(&self) { + self.instance + .make_current(self.display, self.pbuffer, self.pbuffer, Some(self.raw)) + .unwrap(); + } + fn unmake_current(&self) { + self.instance + .make_current(self.display, None, None, None) + .unwrap(); + } +} + +/// A wrapper around a [`glow::Context`] and the required EGL context that uses locking to guarantee +/// exclusive access when shared with multiple threads. +pub struct AdapterContext { + glow: Mutex<glow::Context>, + egl: Option<EglContext>, +} + +unsafe impl Sync for AdapterContext {} +unsafe impl Send for AdapterContext {} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + self.egl.is_some() + } + + /// Returns the EGL instance. + /// + /// This provides access to EGL functions and the ability to load GL and EGL extension functions. + pub fn egl_instance(&self) -> Option<&EglInstance> { + self.egl.as_ref().map(|egl| &*egl.instance) + } + + /// Returns the EGLDisplay corresponding to the adapter context. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn raw_display(&self) -> Option<&khronos_egl::Display> { + self.egl.as_ref().map(|egl| &egl.display) + } + + /// Returns the EGL version the adapter context was created with. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn egl_version(&self) -> Option<(i32, i32)> { + self.egl.as_ref().map(|egl| egl.version) + } + + pub fn raw_context(&self) -> *mut raw::c_void { + match self.egl { + Some(ref egl) => egl.raw.as_ptr(), + None => ptr::null_mut(), + } + } +} + +struct EglContextLock<'a> { + instance: &'a Arc<EglInstance>, + display: khronos_egl::Display, +} + +/// A guard containing a lock to an [`AdapterContext`] +pub struct AdapterContextLock<'a> { + glow: MutexGuard<'a, glow::Context>, + egl: Option<EglContextLock<'a>>, +} + +impl<'a> std::ops::Deref for AdapterContextLock<'a> { + type Target = glow::Context; + + fn deref(&self) -> &Self::Target { + &self.glow + } +} + +impl<'a> Drop for AdapterContextLock<'a> { + fn drop(&mut self) { + if let Some(egl) = self.egl.take() { + egl.instance + .make_current(egl.display, None, None, None) + .unwrap(); + } + } +} + +impl AdapterContext { + /// Get's the [`glow::Context`] without waiting for a lock + /// + /// # Safety + /// + /// This should only be called when you have manually made sure that the current thread has made + /// the EGL context current and that no other thread also has the EGL context current. + /// Additionally, you must manually make the EGL context **not** current after you are done with + /// it, so that future calls to `lock()` will not fail. + /// + /// > **Note:** Calling this function **will** still lock the [`glow::Context`] which adds an + /// > extra safe-guard against accidental concurrent access to the context. + pub unsafe fn get_without_egl_lock(&self) -> MutexGuard<glow::Context> { + self.glow + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock.") + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock<'a>(&'a self) -> AdapterContextLock<'a> { + let glow = self + .glow + // Don't lock forever. If it takes longer than 1 second to get the lock we've got a + // deadlock and should panic to show where we got stuck + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + let egl = self.egl.as_ref().map(|egl| { + egl.make_current(); + EglContextLock { + instance: &egl.instance, + display: egl.display, + } + }); + + AdapterContextLock { glow, egl } + } +} + +#[derive(Debug)] +struct Inner { + /// Note: the context contains a dummy pbuffer (1x1). + /// Required for `eglMakeCurrent` on platforms that doesn't supports `EGL_KHR_surfaceless_context`. + egl: EglContext, + #[allow(unused)] + version: (i32, i32), + supports_native_window: bool, + config: khronos_egl::Config, + #[cfg_attr(target_os = "emscripten", allow(dead_code))] + wl_display: Option<*mut raw::c_void>, + /// Method by which the framebuffer should support srgb + srgb_kind: SrgbFrameBufferKind, +} + +impl Inner { + fn create( + flags: crate::InstanceFlags, + egl: Arc<EglInstance>, + display: khronos_egl::Display, + ) -> Result<Self, crate::InstanceError> { + let version = egl.initialize(display).map_err(|_| crate::InstanceError)?; + let vendor = egl + .query_string(Some(display), khronos_egl::VENDOR) + .unwrap(); + let display_extensions = egl + .query_string(Some(display), khronos_egl::EXTENSIONS) + .unwrap() + .to_string_lossy(); + log::info!("Display vendor {:?}, version {:?}", vendor, version,); + log::debug!( + "Display extensions: {:#?}", + display_extensions.split_whitespace().collect::<Vec<_>>() + ); + + let srgb_kind = if version >= (1, 5) { + log::info!("\tEGL surface: +srgb"); + SrgbFrameBufferKind::Core + } else if display_extensions.contains("EGL_KHR_gl_colorspace") { + log::info!("\tEGL surface: +srgb khr"); + SrgbFrameBufferKind::Khr + } else { + log::warn!("\tEGL surface: -srgb"); + SrgbFrameBufferKind::None + }; + + if log::max_level() >= log::LevelFilter::Trace { + log::trace!("Configurations:"); + let config_count = egl.get_config_count(display).unwrap(); + let mut configurations = Vec::with_capacity(config_count); + egl.get_configs(display, &mut configurations).unwrap(); + for &config in configurations.iter() { + log::trace!("\tCONFORMANT=0x{:X}, RENDERABLE=0x{:X}, NATIVE_RENDERABLE=0x{:X}, SURFACE_TYPE=0x{:X}, ALPHA_SIZE={}", + egl.get_config_attrib(display, config, khronos_egl::CONFORMANT).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::RENDERABLE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::NATIVE_RENDERABLE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::SURFACE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::ALPHA_SIZE).unwrap(), + ); + } + } + + let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; + egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let needs_robustness = true; + let mut khr_context_flags = 0; + let supports_khr_context = display_extensions.contains("EGL_KHR_create_context"); + + //TODO: make it so `Device` == EGL Context + let mut context_attributes = vec![ + khronos_egl::CONTEXT_CLIENT_VERSION, + 3, // Request GLES 3.0 or higher + ]; + if flags.contains(crate::InstanceFlags::DEBUG) { + if version >= (1, 5) { + log::info!("\tEGL context: +debug"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_DEBUG); + context_attributes.push(khronos_egl::TRUE as _); + } else if supports_khr_context { + log::info!("\tEGL context: +debug KHR"); + khr_context_flags |= EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR; + } else { + log::info!("\tEGL context: -debug"); + } + } + if needs_robustness { + //Note: the core version can fail if robustness is not supported + // (regardless of whether the extension is supported!). + // In fact, Angle does precisely that awful behavior, so we don't try it there. + if version >= (1, 5) && !display_extensions.contains("EGL_ANGLE_") { + log::info!("\tEGL context: +robust access"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_ROBUST_ACCESS); + context_attributes.push(khronos_egl::TRUE as _); + } else if display_extensions.contains("EGL_EXT_create_context_robustness") { + log::info!("\tEGL context: +robust access EXT"); + context_attributes.push(EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT); + context_attributes.push(khronos_egl::TRUE as _); + } else { + //Note: we aren't trying `EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR` + // because it's for desktop GL only, not GLES. + log::warn!("\tEGL context: -robust access"); + } + + //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`? + } + if khr_context_flags != 0 { + context_attributes.push(EGL_CONTEXT_FLAGS_KHR); + context_attributes.push(khr_context_flags); + } + context_attributes.push(khronos_egl::NONE); + let context = match egl.create_context(display, config, None, &context_attributes) { + Ok(context) => context, + Err(e) => { + log::warn!("unable to create GLES 3.x context: {:?}", e); + return Err(crate::InstanceError); + } + }; + + // Testing if context can be binded without surface + // and creating dummy pbuffer surface if not. + let pbuffer = if version >= (1, 5) + || display_extensions.contains("EGL_KHR_surfaceless_context") + || cfg!(target_os = "emscripten") + { + log::info!("\tEGL context: +surfaceless"); + None + } else { + let attributes = [ + khronos_egl::WIDTH, + 1, + khronos_egl::HEIGHT, + 1, + khronos_egl::NONE, + ]; + egl.create_pbuffer_surface(display, config, &attributes) + .map(Some) + .map_err(|e| { + log::warn!("Error in create_pbuffer_surface: {:?}", e); + crate::InstanceError + })? + }; + + Ok(Self { + egl: EglContext { + instance: egl, + display, + raw: context, + pbuffer, + version, + }, + version, + supports_native_window, + config, + wl_display: None, + srgb_kind, + }) + } +} + +impl Drop for Inner { + fn drop(&mut self) { + if let Err(e) = self + .egl + .instance + .destroy_context(self.egl.display, self.egl.raw) + { + log::warn!("Error in destroy_context: {:?}", e); + } + if let Err(e) = self.egl.instance.terminate(self.egl.display) { + log::warn!("Error in terminate: {:?}", e); + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum WindowKind { + Wayland, + X11, + AngleX11, + Unknown, +} + +#[derive(Clone, Debug)] +struct WindowSystemInterface { + library: Option<Arc<libloading::Library>>, + kind: WindowKind, +} + +pub struct Instance { + wsi: WindowSystemInterface, + flags: crate::InstanceFlags, + inner: Mutex<Inner>, +} + +impl Instance { + pub fn raw_display(&self) -> khronos_egl::Display { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .egl + .display + } + + /// Returns the version of the EGL display. + pub fn egl_version(&self) -> (i32, i32) { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .version + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + #[cfg(target_os = "emscripten")] + let egl_result: Result<EglInstance, khronos_egl::Error> = + Ok(khronos_egl::Instance::new(khronos_egl::Static)); + + #[cfg(not(target_os = "emscripten"))] + let egl_result = if cfg!(windows) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dll", + ) + } + } else if cfg!(any(target_os = "macos", target_os = "ios")) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dylib", + ) + } + } else { + unsafe { khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required() } + }; + let egl = match egl_result { + Ok(egl) => Arc::new(egl), + Err(e) => { + log::info!("Unable to open libEGL: {:?}", e); + return Err(crate::InstanceError); + } + }; + + let client_extensions = egl.query_string(None, khronos_egl::EXTENSIONS); + + let client_ext_str = match client_extensions { + Ok(ext) => ext.to_string_lossy().into_owned(), + Err(_) => String::new(), + }; + log::debug!( + "Client extensions: {:#?}", + client_ext_str.split_whitespace().collect::<Vec<_>>() + ); + + let wayland_library = if client_ext_str.contains("EGL_EXT_platform_wayland") { + test_wayland_display() + } else { + None + }; + let x11_display_library = if client_ext_str.contains("EGL_EXT_platform_x11") { + open_x_display() + } else { + None + }; + let angle_x11_display_library = if client_ext_str.contains("EGL_ANGLE_platform_angle") { + open_x_display() + } else { + None + }; + + #[cfg(not(target_os = "emscripten"))] + let egl1_5 = egl.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(target_os = "emscripten")] + let egl1_5: Option<&Arc<EglInstance>> = Some(&egl); + + let (display, wsi_library, wsi_kind) = if let (Some(library), Some(egl)) = + (wayland_library, egl1_5) + { + log::info!("Using Wayland platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = egl + .get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + khronos_egl::DEFAULT_DISPLAY, + &display_attributes, + ) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::Wayland) + } else if let (Some((display, library)), Some(egl)) = (x11_display_library, egl1_5) { + log::info!("Using X11 platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = egl + .get_platform_display(EGL_PLATFORM_X11_KHR, display.as_ptr(), &display_attributes) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::X11) + } else if let (Some((display, library)), Some(egl)) = (angle_x11_display_library, egl1_5) { + log::info!("Using Angle platform with X11"); + let display_attributes = [ + EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE as khronos_egl::Attrib, + EGL_PLATFORM_X11_KHR as khronos_egl::Attrib, + EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED as khronos_egl::Attrib, + usize::from(desc.flags.contains(crate::InstanceFlags::VALIDATION)), + khronos_egl::ATTRIB_NONE, + ]; + let display = egl + .get_platform_display( + EGL_PLATFORM_ANGLE_ANGLE, + display.as_ptr(), + &display_attributes, + ) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::AngleX11) + } else if client_ext_str.contains("EGL_MESA_platform_surfaceless") { + log::info!("No windowing system present. Using surfaceless platform"); + let egl = egl1_5.expect("Failed to get EGL 1.5 for surfaceless"); + let display = egl + .get_platform_display( + EGL_PLATFORM_SURFACELESS_MESA, + std::ptr::null_mut(), + &[khronos_egl::ATTRIB_NONE], + ) + .unwrap(); + (display, None, WindowKind::Unknown) + } else { + log::info!("EGL_MESA_platform_surfaceless not available. Using default platform"); + let display = egl.get_display(khronos_egl::DEFAULT_DISPLAY).unwrap(); + (display, None, WindowKind::Unknown) + }; + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) + && client_ext_str.contains("EGL_KHR_debug") + { + log::info!("Enabling EGL debug output"); + let function: EglDebugMessageControlFun = { + let addr = egl.get_proc_address("eglDebugMessageControlKHR").unwrap(); + unsafe { std::mem::transmute(addr) } + }; + let attributes = [ + EGL_DEBUG_MSG_CRITICAL_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_ERROR_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_WARN_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_INFO_KHR as khronos_egl::Attrib, + 1, + khronos_egl::ATTRIB_NONE, + ]; + unsafe { (function)(Some(egl_debug_proc), attributes.as_ptr()) }; + } + + let inner = Inner::create(desc.flags, egl, display)?; + + Ok(Instance { + wsi: WindowSystemInterface { + library: wsi_library, + kind: wsi_kind, + }, + flags: desc.flags, + inner: Mutex::new(inner), + }) + } + + #[cfg_attr(target_os = "macos", allow(unused, unused_mut, unreachable_code))] + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + #[cfg_attr( + any(target_os = "android", target_os = "emscripten"), + allow(unused_mut) + )] + let mut inner = self.inner.lock(); + + match (window_handle, display_handle) { + (Rwh::Xlib(_), _) => {} + (Rwh::Xcb(_), _) => {} + (Rwh::Win32(_), _) => {} + (Rwh::AppKit(_), _) => {} + #[cfg(target_os = "android")] + (Rwh::AndroidNdk(handle), _) => { + let format = inner + .egl + .instance + .get_config_attrib( + inner.egl.display, + inner.config, + khronos_egl::NATIVE_VISUAL_ID, + ) + .unwrap(); + + let ret = unsafe { + ANativeWindow_setBuffersGeometry(handle.a_native_window, 0, 0, format) + }; + + if ret != 0 { + log::error!("Error returned from ANativeWindow_setBuffersGeometry"); + return Err(crate::InstanceError); + } + } + #[cfg(not(target_os = "emscripten"))] + (Rwh::Wayland(_), raw_window_handle::RawDisplayHandle::Wayland(display_handle)) => { + if inner + .wl_display + .map(|ptr| ptr != display_handle.display) + .unwrap_or(true) + { + /* Wayland displays are not sharable between surfaces so if the + * surface we receive from this handle is from a different + * display, we must re-initialize the context. + * + * See gfx-rs/gfx#3545 + */ + log::warn!("Re-initializing Gles context due to Wayland window"); + + use std::ops::DerefMut; + let display_attributes = [khronos_egl::ATTRIB_NONE]; + + let display = inner + .egl + .instance + .upcast::<khronos_egl::EGL1_5>() + .unwrap() + .get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + display_handle.display, + &display_attributes, + ) + .unwrap(); + + let new_inner = + Inner::create(self.flags, Arc::clone(&inner.egl.instance), display) + .map_err(|_| crate::InstanceError)?; + + let old_inner = std::mem::replace(inner.deref_mut(), new_inner); + inner.wl_display = Some(display_handle.display); + + drop(old_inner); + } + } + #[cfg(target_os = "emscripten")] + (Rwh::Web(_), _) => {} + other => { + log::error!("Unsupported window: {:?}", other); + return Err(crate::InstanceError); + } + }; + + inner.egl.unmake_current(); + + Ok(Surface { + egl: inner.egl.clone(), + wsi: self.wsi.clone(), + config: inner.config, + presentable: inner.supports_native_window, + raw_window_handle: window_handle, + swapchain: None, + srgb_kind: inner.srgb_kind, + }) + } + unsafe fn destroy_surface(&self, _surface: Surface) {} + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let inner = self.inner.lock(); + inner.egl.make_current(); + + let gl = unsafe { + glow::Context::from_loader_function(|name| { + inner + .egl + .instance + .get_proc_address(name) + .map_or(ptr::null(), |p| p as *const _) + }) + }; + + if self.flags.contains(crate::InstanceFlags::DEBUG) && gl.supports_debug() { + log::info!("Max label length: {}", unsafe { + gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) + }); + } + + if self.flags.contains(crate::InstanceFlags::VALIDATION) && gl.supports_debug() { + log::info!("Enabling GLES debug output"); + unsafe { gl.enable(glow::DEBUG_OUTPUT) }; + unsafe { gl.debug_message_callback(gl_debug_message_callback) }; + } + + inner.egl.unmake_current(); + + unsafe { + super::Adapter::expose(AdapterContext { + glow: Mutex::new(gl), + egl: Some(inner.egl.clone()), + }) + } + .into_iter() + .collect() + } +} + +impl super::Adapter { + /// Creates a new external adapter using the specified loader function. + /// + /// # Safety + /// + /// - The underlying OpenGL ES context must be current. + /// - The underlying OpenGL ES context must be current when interfacing with any objects returned by + /// wgpu-hal from this adapter. + pub unsafe fn new_external( + fun: impl FnMut(&str) -> *const ffi::c_void, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let context = unsafe { glow::Context::from_loader_function(fun) }; + unsafe { + Self::expose(AdapterContext { + glow: Mutex::new(context), + egl: None, + }) + } + } + + pub fn adapter_context(&self) -> &AdapterContext { + &self.shared.context + } +} + +impl super::Device { + /// Returns the underlying EGL context. + pub fn context(&self) -> &AdapterContext { + &self.shared.context + } +} + +#[derive(Debug)] +pub struct Swapchain { + surface: khronos_egl::Surface, + wl_window: Option<*mut raw::c_void>, + framebuffer: glow::Framebuffer, + renderbuffer: glow::Renderbuffer, + /// Extent because the window lies + extent: wgt::Extent3d, + format: wgt::TextureFormat, + format_desc: super::TextureFormatDesc, + #[allow(unused)] + sample_type: wgt::TextureSampleType, +} + +#[derive(Debug)] +pub struct Surface { + egl: EglContext, + wsi: WindowSystemInterface, + config: khronos_egl::Config, + pub(super) presentable: bool, + raw_window_handle: raw_window_handle::RawWindowHandle, + swapchain: Option<Swapchain>, + srgb_kind: SrgbFrameBufferKind, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub(super) unsafe fn present( + &mut self, + _suf_texture: super::Texture, + gl: &glow::Context, + ) -> Result<(), crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + + self.egl + .instance + .make_current( + self.egl.display, + Some(sc.surface), + Some(sc.surface), + Some(self.egl.raw), + ) + .map_err(|e| { + log::error!("make_current(surface) failed: {}", e); + crate::SurfaceError::Lost + })?; + + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.color_mask(true, true, true, true) }; + + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + sc.extent.height as i32, + sc.extent.width as i32, + 0, + 0, + 0, + sc.extent.width as i32, + sc.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + self.egl + .instance + .swap_buffers(self.egl.display, sc.surface) + .map_err(|e| { + log::error!("swap_buffers failed: {}", e); + crate::SurfaceError::Lost + })?; + self.egl + .instance + .make_current(self.egl.display, None, None, None) + .map_err(|e| { + log::error!("make_current(null) failed: {}", e); + crate::SurfaceError::Lost + })?; + + Ok(()) + } + + unsafe fn unconfigure_impl( + &mut self, + device: &super::Device, + ) -> Option<(khronos_egl::Surface, Option<*mut raw::c_void>)> { + let gl = &device.shared.context.lock(); + match self.swapchain.take() { + Some(sc) => { + unsafe { gl.delete_renderbuffer(sc.renderbuffer) }; + unsafe { gl.delete_framebuffer(sc.framebuffer) }; + Some((sc.surface, sc.wl_window)) + } + None => None, + } + } + + pub fn supports_srgb(&self) -> bool { + match self.srgb_kind { + SrgbFrameBufferKind::None => false, + _ => true, + } + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + let (surface, wl_window) = match unsafe { self.unconfigure_impl(device) } { + Some(pair) => pair, + None => { + let mut wl_window = None; + let (mut temp_xlib_handle, mut temp_xcb_handle); + #[allow(trivial_casts)] + let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) { + (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => { + temp_xlib_handle = handle.window; + &mut temp_xlib_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xlib(handle)) => { + handle.window as *mut std::ffi::c_void + } + (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => { + temp_xcb_handle = handle.window; + &mut temp_xcb_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xcb(handle)) => { + handle.window as *mut std::ffi::c_void + } + (WindowKind::Unknown, Rwh::AndroidNdk(handle)) => handle.a_native_window, + (WindowKind::Wayland, Rwh::Wayland(handle)) => { + let library = self.wsi.library.as_ref().unwrap(); + let wl_egl_window_create: libloading::Symbol<WlEglWindowCreateFun> = + unsafe { library.get(b"wl_egl_window_create") }.unwrap(); + let window = unsafe { wl_egl_window_create(handle.surface, 640, 480) } + as *mut _ as *mut std::ffi::c_void; + wl_window = Some(window); + window + } + #[cfg(target_os = "emscripten")] + (WindowKind::Unknown, Rwh::Web(handle)) => handle.id as *mut std::ffi::c_void, + (WindowKind::Unknown, Rwh::Win32(handle)) => handle.hwnd, + (WindowKind::Unknown, Rwh::AppKit(handle)) => { + #[cfg(not(target_os = "macos"))] + let window_ptr = handle.ns_view; + #[cfg(target_os = "macos")] + let window_ptr = { + use objc::{msg_send, runtime::Object, sel, sel_impl}; + // ns_view always have a layer and don't need to verify that it exists. + let layer: *mut Object = + msg_send![handle.ns_view as *mut Object, layer]; + layer as *mut ffi::c_void + }; + window_ptr + } + _ => { + log::warn!( + "Initialized platform {:?} doesn't work with window {:?}", + self.wsi.kind, + self.raw_window_handle + ); + return Err(crate::SurfaceError::Other("incompatible window kind")); + } + }; + + let mut attributes = vec![ + khronos_egl::RENDER_BUFFER, + // We don't want any of the buffering done by the driver, because we + // manage a swapchain on our side. + // Some drivers just fail on surface creation seeing `EGL_SINGLE_BUFFER`. + if cfg!(any(target_os = "android", target_os = "macos")) + || cfg!(windows) + || self.wsi.kind == WindowKind::AngleX11 + { + khronos_egl::BACK_BUFFER + } else { + khronos_egl::SINGLE_BUFFER + }, + ]; + match self.srgb_kind { + SrgbFrameBufferKind::None => {} + SrgbFrameBufferKind::Core => { + attributes.push(khronos_egl::GL_COLORSPACE); + attributes.push(khronos_egl::GL_COLORSPACE_SRGB); + } + SrgbFrameBufferKind::Khr => { + attributes.push(EGL_GL_COLORSPACE_KHR as i32); + attributes.push(EGL_GL_COLORSPACE_SRGB_KHR as i32); + } + } + attributes.push(khronos_egl::ATTRIB_NONE as i32); + + #[cfg(not(target_os = "emscripten"))] + let egl1_5 = self.egl.instance.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(target_os = "emscripten")] + let egl1_5: Option<&Arc<EglInstance>> = Some(&self.egl.instance); + + // Careful, we can still be in 1.4 version even if `upcast` succeeds + let raw_result = match egl1_5 { + Some(egl) if self.wsi.kind != WindowKind::Unknown => { + let attributes_usize = attributes + .into_iter() + .map(|v| v as usize) + .collect::<Vec<_>>(); + egl.create_platform_window_surface( + self.egl.display, + self.config, + native_window_ptr, + &attributes_usize, + ) + } + _ => unsafe { + self.egl.instance.create_window_surface( + self.egl.display, + self.config, + native_window_ptr, + Some(&attributes), + ) + }, + }; + + match raw_result { + Ok(raw) => (raw, wl_window), + Err(e) => { + log::warn!("Error in create_window_surface: {:?}", e); + return Err(crate::SurfaceError::Lost); + } + } + } + }; + + if let Some(window) = wl_window { + let library = self.wsi.library.as_ref().unwrap(); + let wl_egl_window_resize: libloading::Symbol<WlEglWindowResizeFun> = + unsafe { library.get(b"wl_egl_window_resize") }.unwrap(); + unsafe { + wl_egl_window_resize( + window, + config.extent.width as i32, + config.extent.height as i32, + 0, + 0, + ) + }; + } + + let format_desc = device.shared.describe_texture_format(config.format); + let gl = &device.shared.context.lock(); + let renderbuffer = unsafe { gl.create_renderbuffer() }.map_err(|error| { + log::error!("Internal swapchain renderbuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(renderbuffer)) }; + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + config.extent.width as _, + config.extent.height as _, + ) + }; + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_renderbuffer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::RENDERBUFFER, + Some(renderbuffer), + ) + }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + self.swapchain = Some(Swapchain { + surface, + wl_window, + renderbuffer, + framebuffer, + extent: config.extent, + format: config.format, + format_desc, + sample_type: wgt::TextureSampleType::Float { filterable: false }, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + if let Some((surface, wl_window)) = unsafe { self.unconfigure_impl(device) } { + self.egl + .instance + .destroy_surface(self.egl.display, surface) + .unwrap(); + if let Some(window) = wl_window { + let library = self.wsi.library.as_ref().expect("unsupported window"); + let wl_egl_window_destroy: libloading::Symbol<WlEglWindowDestroyFun> = + unsafe { library.get(b"wl_egl_window_destroy") }.unwrap(); + unsafe { wl_egl_window_destroy(window) }; + } + } + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: sc.renderbuffer, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + is_cubemap: false, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/emscripten.rs b/third_party/rust/wgpu-hal/src/gles/emscripten.rs new file mode 100644 index 0000000000..7372dbd369 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/emscripten.rs @@ -0,0 +1,26 @@ +extern "C" { + /// returns 1 if success. 0 if failure. extension name must be null terminated + fn emscripten_webgl_enable_extension( + context: std::ffi::c_int, + extension: *const std::ffi::c_char, + ) -> std::ffi::c_int; + fn emscripten_webgl_get_current_context() -> std::ffi::c_int; +} +/// Webgl requires extensions to be enabled before using them. +/// This function can be used to enable webgl extension on emscripten target. +/// +/// returns true on success +/// +/// # Safety: +/// +/// - opengl context MUST BE current +/// - extension_name_null_terminated argument must be a valid string with null terminator. +/// - extension must be present. check `glow_context.supported_extensions()` +pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool { + unsafe { + emscripten_webgl_enable_extension( + emscripten_webgl_get_current_context(), + extension_name_null_terminated.as_ptr() as _, + ) == 1 + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs new file mode 100644 index 0000000000..f11286833d --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/mod.rs @@ -0,0 +1,861 @@ +/*! +# OpenGL ES3 API (aka GLES3). + +Designed to work on Linux and Android, with context provided by EGL. + +## Texture views + +GLES3 doesn't really have separate texture view objects. We have to remember the +original texture and the sub-range into it. Problem is, however, that there is +no way to expose a subset of array layers or mip levels of a sampled texture. + +## Binding model + +Binding model is very different from WebGPU, especially with regards to samplers. +GLES3 has sampler objects, but they aren't separately bindable to the shaders. +Each sampled texture is exposed to the shader as a combined texture-sampler binding. + +When building the pipeline layout, we linearize binding entries based on the groups +(uniform/storage buffers, uniform/storage textures), and record the mapping into +`BindGroupLayoutInfo`. +When a pipeline gets created, and we track all the texture-sampler associations +from the static use in the shader. +We only support at most one sampler used with each texture so far. The linear index +of this sampler is stored per texture slot in `SamplerBindMap` array. + +The texture-sampler pairs get potentially invalidated in 2 places: + - when a new pipeline is set, we update the linear indices of associated samplers + - when a new bind group is set, we update both the textures and the samplers + +We expect that the changes to sampler states between any 2 pipelines of the same layout +will be minimal, if any. + +## Vertex data + +Generally, vertex buffers are marked as dirty and lazily bound on draw. + +GLES3 doesn't support "base instance" semantics. However, it's easy to support, +since we are forced to do late binding anyway. We just adjust the offsets +into the vertex data. + +### Old path + +In GLES-3.0 and WebGL2, vertex buffer layout is provided +together with the actual buffer binding. +We invalidate the attributes on the vertex buffer change, and re-bind them. + +### New path + +In GLES-3.1 and higher, the vertex buffer layout can be declared separately +from the vertex data itself. This mostly matches WebGPU, however there is a catch: +`stride` needs to be specified with the data, not as a part of the layout. + +To address this, we invalidate the vertex buffers based on: + - whether or not `start_instance` is used + - stride has changed + +*/ + +///cbindgen:ignore +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +mod egl; +#[cfg(target_os = "emscripten")] +mod emscripten; +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +mod web; + +mod adapter; +mod command; +mod conv; +mod device; +mod queue; + +use crate::{CopyExtent, TextureDescriptor}; + +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +pub use self::egl::{AdapterContext, AdapterContextLock}; +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +use self::egl::{Instance, Surface}; + +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +pub use self::web::AdapterContext; +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +use self::web::{Instance, Surface}; + +use arrayvec::ArrayVec; + +use glow::HasContext; + +use naga::FastHashMap; +use parking_lot::Mutex; +use std::sync::atomic::AtomicU32; +use std::{fmt, ops::Range, sync::Arc}; + +#[derive(Clone)] +pub struct Api; + +//Note: we can support more samplers if not every one of them is used at a time, +// but it probably doesn't worth it. +const MAX_TEXTURE_SLOTS: usize = 16; +const MAX_SAMPLERS: usize = 16; +const MAX_VERTEX_ATTRIBUTES: usize = 16; +const ZERO_BUFFER_SIZE: usize = 256 << 10; +const MAX_PUSH_CONSTANTS: usize = 64; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +bitflags::bitflags! { + /// Flags that affect internal code paths but do not + /// change the exposed feature set. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct PrivateCapabilities: u32 { + /// Indicates support for `glBufferStorage` allocation. + const BUFFER_ALLOCATION = 1 << 0; + /// Support explicit layouts in shader. + const SHADER_BINDING_LAYOUT = 1 << 1; + /// Support extended shadow sampling instructions. + const SHADER_TEXTURE_SHADOW_LOD = 1 << 2; + /// Support memory barriers. + const MEMORY_BARRIERS = 1 << 3; + /// Vertex buffer layouts separate from the data. + const VERTEX_BUFFER_LAYOUT = 1 << 4; + /// Indicates that buffers used as `GL_ELEMENT_ARRAY_BUFFER` may be created / initialized / used + /// as other targets, if not present they must not be mixed with other targets. + const INDEX_BUFFER_ROLE_CHANGE = 1 << 5; + /// Indicates that the device supports disabling draw buffers + const CAN_DISABLE_DRAW_BUFFER = 1 << 6; + /// Supports `glGetBufferSubData` + const GET_BUFFER_SUB_DATA = 1 << 7; + /// Supports `f16` color buffers + const COLOR_BUFFER_HALF_FLOAT = 1 << 8; + /// Supports `f11/f10` and `f32` color buffers + const COLOR_BUFFER_FLOAT = 1 << 9; + /// Supports linear flitering `f32` textures. + const TEXTURE_FLOAT_LINEAR = 1 << 10; + } +} + +bitflags::bitflags! { + /// Flags that indicate necessary workarounds for specific devices or driver bugs + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct Workarounds: u32 { + // Needs workaround for Intel Mesa bug: + // https://gitlab.freedesktop.org/mesa/mesa/-/issues/2565. + // + // This comment + // (https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4972/diffs?diff_id=75888#22f5d1004713c9bbf857988c7efb81631ab88f99_323_327) + // seems to indicate all skylake models are effected. + const MESA_I915_SRGB_SHADER_CLEAR = 1 << 0; + /// Buffer map must emulated becuase it is not supported natively + const EMULATE_BUFFER_MAP = 1 << 1; + } +} + +type BindTarget = u32; + +#[derive(Debug, Clone, Copy)] +enum VertexAttribKind { + Float, // glVertexAttribPointer + Integer, // glVertexAttribIPointer + //Double, // glVertexAttribLPointer +} + +impl Default for VertexAttribKind { + fn default() -> Self { + Self::Float + } +} + +#[derive(Clone, Debug)] +pub struct TextureFormatDesc { + pub internal: u32, + pub external: u32, + pub data_type: u32, +} + +struct AdapterShared { + context: AdapterContext, + private_caps: PrivateCapabilities, + features: wgt::Features, + workarounds: Workarounds, + shading_language_version: naga::back::glsl::Version, + max_texture_size: u32, + next_shader_id: AtomicU32, + program_cache: Mutex<ProgramCache>, +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Device { + shared: Arc<AdapterShared>, + main_vao: glow::VertexArray, + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + shared: Arc<AdapterShared>, + features: wgt::Features, + draw_fbo: glow::Framebuffer, + copy_fbo: glow::Framebuffer, + /// Shader program used to clear the screen for [`Workarounds::MESA_I915_SRGB_SHADER_CLEAR`] + /// devices. + shader_clear_program: glow::Program, + /// The uniform location of the color uniform in the shader clear program + shader_clear_program_color_uniform_location: glow::UniformLocation, + /// Keep a reasonably large buffer filled with zeroes, so that we can implement `ClearBuffer` of + /// zeroes by copying from it. + zero_buffer: glow::Buffer, + temp_query_results: Vec<u64>, + draw_buffer_count: u8, + current_index_buffer: Option<glow::Buffer>, +} + +#[derive(Clone, Debug)] +pub struct Buffer { + raw: Option<glow::Buffer>, + target: BindTarget, + size: wgt::BufferAddress, + map_flags: u32, + data: Option<Arc<std::sync::Mutex<Vec<u8>>>>, +} + +// Safe: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for Buffer {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for Buffer {} + +#[derive(Clone, Debug)] +pub enum TextureInner { + Renderbuffer { + raw: glow::Renderbuffer, + }, + DefaultRenderbuffer, + Texture { + raw: glow::Texture, + target: BindTarget, + }, + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + ExternalFramebuffer { + inner: web_sys::WebGlFramebuffer, + }, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for TextureInner {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for TextureInner {} + +impl TextureInner { + fn as_native(&self) -> (glow::Texture, BindTarget) { + match *self { + Self::Renderbuffer { .. } | Self::DefaultRenderbuffer => { + panic!("Unexpected renderbuffer"); + } + Self::Texture { raw, target } => (raw, target), + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + Self::ExternalFramebuffer { .. } => panic!("Unexpected external framebuffer"), + } + } +} + +#[derive(Debug)] +pub struct Texture { + pub inner: TextureInner, + pub drop_guard: Option<crate::DropGuard>, + pub mip_level_count: u32, + pub array_layer_count: u32, + pub format: wgt::TextureFormat, + #[allow(unused)] + pub format_desc: TextureFormatDesc, + pub copy_size: CopyExtent, + pub is_cubemap: bool, +} + +impl Texture { + pub fn default_framebuffer(format: wgt::TextureFormat) -> Self { + Self { + inner: TextureInner::DefaultRenderbuffer, + drop_guard: None, + mip_level_count: 1, + array_layer_count: 1, + format, + format_desc: TextureFormatDesc { + internal: 0, + external: 0, + data_type: 0, + }, + copy_size: CopyExtent { + width: 0, + height: 0, + depth: 0, + }, + is_cubemap: false, + } + } + + /// Returns the `target`, whether the image is 3d and whether the image is a cubemap. + fn get_info_from_desc(desc: &TextureDescriptor) -> (u32, bool, bool) { + match desc.dimension { + wgt::TextureDimension::D1 => (glow::TEXTURE_2D, false, false), + wgt::TextureDimension::D2 => { + // HACK: detect a cube map; forces cube compatible textures to be cube textures + match (desc.is_cube_compatible(), desc.size.depth_or_array_layers) { + (false, 1) => (glow::TEXTURE_2D, false, false), + (false, _) => (glow::TEXTURE_2D_ARRAY, true, false), + (true, 6) => (glow::TEXTURE_CUBE_MAP, false, true), + (true, _) => (glow::TEXTURE_CUBE_MAP_ARRAY, true, true), + } + } + wgt::TextureDimension::D3 => (glow::TEXTURE_3D, true, false), + } + } +} + +#[derive(Clone, Debug)] +pub struct TextureView { + inner: TextureInner, + aspects: crate::FormatAspects, + mip_levels: Range<u32>, + array_layers: Range<u32>, + format: wgt::TextureFormat, +} + +#[derive(Debug)] +pub struct Sampler { + raw: glow::Sampler, +} + +pub struct BindGroupLayout { + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +struct BindGroupLayoutInfo { + entries: Arc<[wgt::BindGroupLayoutEntry]>, + /// Mapping of resources, indexed by `binding`, into the whole layout space. + /// For texture resources, the value is the texture slot index. + /// For sampler resources, the value is the index of the sampler in the whole layout. + /// For buffers, the value is the uniform or storage slot index. + /// For unused bindings, the value is `!0` + binding_to_slot: Box<[u8]>, +} + +pub struct PipelineLayout { + group_infos: Box<[BindGroupLayoutInfo]>, + naga_options: naga::back::glsl::Options, +} + +impl PipelineLayout { + fn get_slot(&self, br: &naga::ResourceBinding) -> u8 { + let group_info = &self.group_infos[br.group as usize]; + group_info.binding_to_slot[br.binding as usize] + } +} + +#[derive(Debug)] +enum BindingRegister { + UniformBuffers, + StorageBuffers, + Textures, + Images, +} + +#[derive(Debug)] +enum RawBinding { + Buffer { + raw: glow::Buffer, + offset: i32, + size: i32, + }, + Texture { + raw: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + //TODO: mip levels, array layers + }, + Image(ImageBinding), + Sampler(glow::Sampler), +} + +#[derive(Debug)] +pub struct BindGroup { + contents: Box<[RawBinding]>, +} + +type ShaderId = u32; + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + label: Option<String>, + id: ShaderId, +} + +#[derive(Clone, Debug, Default)] +struct VertexFormatDesc { + element_count: i32, + element_format: u32, + attrib_kind: VertexAttribKind, +} + +#[derive(Clone, Debug, Default)] +struct AttributeDesc { + location: u32, + offset: u32, + buffer_index: u32, + format_desc: VertexFormatDesc, +} + +#[derive(Clone, Debug)] +struct BufferBinding { + raw: glow::Buffer, + offset: wgt::BufferAddress, +} + +#[derive(Clone, Debug)] +struct ImageBinding { + raw: glow::Texture, + mip_level: u32, + array_layer: Option<u32>, + access: u32, + format: u32, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct VertexBufferDesc { + step: wgt::VertexStepMode, + stride: u32, +} + +#[derive(Clone, Debug, Default)] +struct UniformDesc { + location: Option<glow::UniformLocation>, + size: u32, + utype: u32, +} + +// Safe: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for UniformDesc {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for UniformDesc {} + +/// For each texture in the pipeline layout, store the index of the only +/// sampler (in this layout) that the texture is used with. +type SamplerBindMap = [Option<u8>; MAX_TEXTURE_SLOTS]; + +struct PipelineInner { + program: glow::Program, + sampler_map: SamplerBindMap, + uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], +} + +#[derive(Clone, Debug)] +struct DepthState { + function: u32, + mask: bool, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendComponent { + src: u32, + dst: u32, + equation: u32, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendDesc { + alpha: BlendComponent, + color: BlendComponent, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct ColorTargetDesc { + mask: wgt::ColorWrites, + blend: Option<BlendDesc>, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramStage { + naga_stage: naga::ShaderStage, + shader_id: ShaderId, + entry_point: String, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramCacheKey { + stages: ArrayVec<ProgramStage, 3>, + group_to_binding_to_slot: Box<[Box<[u8]>]>, +} + +type ProgramCache = FastHashMap<ProgramCacheKey, Result<Arc<PipelineInner>, crate::PipelineError>>; + +pub struct RenderPipeline { + inner: Arc<PipelineInner>, + primitive: wgt::PrimitiveState, + vertex_buffers: Box<[VertexBufferDesc]>, + vertex_attributes: Box<[AttributeDesc]>, + color_targets: Box<[ColorTargetDesc]>, + depth: Option<DepthState>, + depth_bias: wgt::DepthBiasState, + stencil: Option<StencilState>, + alpha_to_coverage_enabled: bool, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for RenderPipeline {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + inner: Arc<PipelineInner>, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for ComputePipeline {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub struct QuerySet { + queries: Box<[glow::Query]>, + target: BindTarget, +} + +#[derive(Debug)] +pub struct Fence { + last_completed: crate::FenceValue, + pending: Vec<(crate::FenceValue, glow::Fence)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { + let mut max_value = self.last_completed; + for &(value, sync) in self.pending.iter() { + let status = unsafe { gl.get_sync_status(sync) }; + if status == glow::SIGNALED { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self, gl: &glow::Context) { + let latest = self.get_latest(gl); + for &(value, sync) in self.pending.iter() { + if value <= latest { + unsafe { + gl.delete_sync(sync); + } + } + } + self.pending.retain(|&(value, _)| value > latest); + self.last_completed = latest; + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilOps { + pass: u32, + fail: u32, + depth_fail: u32, +} + +impl Default for StencilOps { + fn default() -> Self { + Self { + pass: glow::KEEP, + fail: glow::KEEP, + depth_fail: glow::KEEP, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilSide { + function: u32, + mask_read: u32, + mask_write: u32, + reference: u32, + ops: StencilOps, +} + +impl Default for StencilSide { + fn default() -> Self { + Self { + function: glow::ALWAYS, + mask_read: 0xFF, + mask_write: 0xFF, + reference: 0, + ops: StencilOps::default(), + } + } +} + +#[derive(Clone, Default)] +struct StencilState { + front: StencilSide, + back: StencilSide, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct PrimitiveState { + front_face: u32, + cull_face: u32, + unclipped_depth: bool, +} + +type InvalidatedAttachments = ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>; + +#[derive(Debug)] +enum Command { + Draw { + topology: u32, + start_vertex: u32, + vertex_count: u32, + instance_count: u32, + }, + DrawIndexed { + topology: u32, + index_type: u32, + index_count: u32, + index_offset: wgt::BufferAddress, + base_vertex: i32, + instance_count: u32, + }, + DrawIndirect { + topology: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + DrawIndexedIndirect { + topology: u32, + index_type: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + Dispatch([u32; 3]), + DispatchIndirect { + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + ClearBuffer { + dst: Buffer, + dst_target: BindTarget, + range: crate::MemoryRange, + }, + CopyBufferToBuffer { + src: Buffer, + src_target: BindTarget, + dst: Buffer, + dst_target: BindTarget, + copy: crate::BufferCopy, + }, + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + CopyExternalImageToTexture { + src: wgt::ImageCopyExternalImage, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + dst_premultiplication: bool, + copy: crate::TextureCopy, + }, + CopyTextureToTexture { + src: glow::Texture, + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + copy: crate::TextureCopy, + dst_is_cubemap: bool, + }, + CopyBufferToTexture { + src: Buffer, + #[allow(unused)] + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + copy: crate::BufferTextureCopy, + }, + CopyTextureToBuffer { + src: glow::Texture, + src_target: BindTarget, + src_format: wgt::TextureFormat, + dst: Buffer, + #[allow(unused)] + dst_target: BindTarget, + copy: crate::BufferTextureCopy, + }, + SetIndexBuffer(glow::Buffer), + BeginQuery(glow::Query, BindTarget), + EndQuery(BindTarget), + CopyQueryResults { + query_range: Range<u32>, + dst: Buffer, + dst_target: BindTarget, + dst_offset: wgt::BufferAddress, + }, + ResetFramebuffer { + is_default: bool, + }, + BindAttachment { + attachment: u32, + view: TextureView, + }, + ResolveAttachment { + attachment: u32, + dst: TextureView, + size: wgt::Extent3d, + }, + InvalidateAttachments(InvalidatedAttachments), + SetDrawColorBuffers(u8), + ClearColorF { + draw_buffer: u32, + color: [f32; 4], + is_srgb: bool, + }, + ClearColorU(u32, [u32; 4]), + ClearColorI(u32, [i32; 4]), + ClearDepth(f32), + ClearStencil(u32), + // Clearing both the depth and stencil buffer individually appears to + // result in the stencil buffer failing to clear, atleast in WebGL. + // It is also more efficient to emit a single command instead of two for + // this. + ClearDepthAndStencil(f32, u32), + BufferBarrier(glow::Buffer, crate::BufferUses), + TextureBarrier(crate::TextureUses), + SetViewport { + rect: crate::Rect<i32>, + depth: Range<f32>, + }, + SetScissor(crate::Rect<i32>), + SetStencilFunc { + face: u32, + function: u32, + reference: u32, + read_mask: u32, + }, + SetStencilOps { + face: u32, + write_mask: u32, + ops: StencilOps, + }, + SetDepth(DepthState), + SetDepthBias(wgt::DepthBiasState), + ConfigureDepthStencil(crate::FormatAspects), + SetAlphaToCoverage(bool), + SetVertexAttribute { + buffer: Option<glow::Buffer>, + buffer_desc: VertexBufferDesc, + attribute_desc: AttributeDesc, + }, + UnsetVertexAttribute(u32), + SetVertexBuffer { + index: u32, + buffer: BufferBinding, + buffer_desc: VertexBufferDesc, + }, + SetProgram(glow::Program), + SetPrimitive(PrimitiveState), + SetBlendConstant([f32; 4]), + SetColorTarget { + draw_buffer_index: Option<u32>, + desc: ColorTargetDesc, + }, + BindBuffer { + target: BindTarget, + slot: u32, + buffer: glow::Buffer, + offset: i32, + size: i32, + }, + BindSampler(u32, Option<glow::Sampler>), + BindTexture { + slot: u32, + texture: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + }, + BindImage { + slot: u32, + binding: ImageBinding, + }, + InsertDebugMarker(Range<u32>), + PushDebugGroup(Range<u32>), + PopDebugGroup, + SetPushConstants { + uniform: UniformDesc, + /// Offset from the start of the `data_bytes` + offset: u32, + }, +} + +#[derive(Default)] +pub struct CommandBuffer { + label: Option<String>, + commands: Vec<Command>, + data_bytes: Vec<u8>, + queries: Vec<glow::Query>, +} + +impl fmt::Debug for CommandBuffer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut builder = f.debug_struct("CommandBuffer"); + if let Some(ref label) = self.label { + builder.field("label", label); + } + builder.finish() + } +} + +//TODO: we would have something like `Arc<typed_arena::Arena>` +// here and in the command buffers. So that everything grows +// inside the encoder and stays there until `reset_all`. + +pub struct CommandEncoder { + cmd_buffer: CommandBuffer, + state: command::State, + private_caps: PrivateCapabilities, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("cmd_buffer", &self.cmd_buffer) + .finish() + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs new file mode 100644 index 0000000000..87a82524fc --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/queue.rs @@ -0,0 +1,1532 @@ +use super::Command as C; +use arrayvec::ArrayVec; +use glow::HasContext; +use std::{mem, slice, sync::Arc}; + +#[cfg(not(target_arch = "wasm32"))] +const DEBUG_ID: u32 = 0; + +const CUBEMAP_FACES: [u32; 6] = [ + glow::TEXTURE_CUBE_MAP_POSITIVE_X, + glow::TEXTURE_CUBE_MAP_NEGATIVE_X, + glow::TEXTURE_CUBE_MAP_POSITIVE_Y, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Y, + glow::TEXTURE_CUBE_MAP_POSITIVE_Z, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Z, +]; + +#[cfg(not(target_arch = "wasm32"))] +fn extract_marker<'a>(data: &'a [u8], range: &std::ops::Range<u32>) -> &'a str { + std::str::from_utf8(&data[range.start as usize..range.end as usize]).unwrap() +} + +fn is_layered_target(target: super::BindTarget) -> bool { + match target { + glow::TEXTURE_2D_ARRAY | glow::TEXTURE_3D | glow::TEXTURE_CUBE_MAP_ARRAY => true, + _ => false, + } +} + +impl super::Queue { + /// Performs a manual shader clear, used as a workaround for a clearing bug on mesa + unsafe fn perform_shader_clear(&self, gl: &glow::Context, draw_buffer: u32, color: [f32; 4]) { + unsafe { gl.use_program(Some(self.shader_clear_program)) }; + unsafe { + gl.uniform_4_f32( + Some(&self.shader_clear_program_color_uniform_location), + color[0], + color[1], + color[2], + color[3], + ) + }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::COLOR_ATTACHMENT0 + draw_buffer]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + + if self.draw_buffer_count != 0 { + // Reset the draw buffers to what they were before the clear + let indices = (0..self.draw_buffer_count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + } + #[cfg(not(target_arch = "wasm32"))] + for draw_buffer in 0..self.draw_buffer_count as u32 { + unsafe { gl.disable_draw_buffer(glow::BLEND, draw_buffer) }; + } + } + + unsafe fn reset_state(&mut self, gl: &glow::Context) { + unsafe { gl.use_program(None) }; + unsafe { gl.bind_framebuffer(glow::FRAMEBUFFER, None) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, None) }; + self.current_index_buffer = None; + } + + unsafe fn set_attachment( + &self, + gl: &glow::Context, + fbo_target: u32, + attachment: u32, + view: &super::TextureView, + ) { + match view.inner { + super::TextureInner::Renderbuffer { raw } => { + unsafe { + gl.framebuffer_renderbuffer( + fbo_target, + attachment, + glow::RENDERBUFFER, + Some(raw), + ) + }; + } + super::TextureInner::DefaultRenderbuffer => panic!("Unexpected default RBO"), + super::TextureInner::Texture { raw, target } => { + let num_layers = view.array_layers.end - view.array_layers.start; + if num_layers > 1 { + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + unsafe { + gl.framebuffer_texture_multiview_ovr( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + num_layers as i32, + ) + }; + } else if is_layered_target(target) { + unsafe { + gl.framebuffer_texture_layer( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + ) + }; + } else if target == glow::TEXTURE_CUBE_MAP { + unsafe { + gl.framebuffer_texture_2d( + fbo_target, + attachment, + CUBEMAP_FACES[view.array_layers.start as usize], + Some(raw), + view.mip_levels.start as i32, + ) + }; + } else { + unsafe { + gl.framebuffer_texture_2d( + fbo_target, + attachment, + target, + Some(raw), + view.mip_levels.start as i32, + ) + }; + } + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { ref inner } => unsafe { + gl.bind_external_framebuffer(glow::FRAMEBUFFER, inner); + }, + } + } + + unsafe fn process( + &mut self, + gl: &glow::Context, + command: &C, + #[cfg_attr(target_arch = "wasm32", allow(unused))] data_bytes: &[u8], + queries: &[glow::Query], + ) { + match *command { + C::Draw { + topology, + start_vertex, + vertex_count, + instance_count, + } => { + // Don't use `gl.draw_arrays` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `draw_arrays`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + unsafe { + gl.draw_arrays_instanced( + topology, + start_vertex as i32, + vertex_count as i32, + instance_count as i32, + ) + }; + } + C::DrawIndexed { + topology, + index_type, + index_count, + index_offset, + base_vertex, + instance_count, + } => { + match base_vertex { + // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + 0 => unsafe { + gl.draw_elements_instanced( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + ) + }, + _ => unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) + }, + } + } + C::DrawIndirect { + topology, + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.draw_arrays_indirect_offset(topology, indirect_offset as i32) }; + } + C::DrawIndexedIndirect { + topology, + index_type, + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { + gl.draw_elements_indirect_offset(topology, index_type, indirect_offset as i32) + }; + } + C::Dispatch(group_counts) => { + unsafe { gl.dispatch_compute(group_counts[0], group_counts[1], group_counts[2]) }; + } + C::DispatchIndirect { + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DISPATCH_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.dispatch_compute_indirect(indirect_offset as i32) }; + } + C::ClearBuffer { + ref dst, + dst_target, + ref range, + } => match dst.raw { + Some(buffer) => { + // When `INDEX_BUFFER_ROLE_CHANGE` isn't available, we can't copy into the + // index buffer from the zero buffer. This would fail in Chrome with the + // following message: + // + // > Cannot copy into an element buffer destination from a non-element buffer + // > source + // + // Instead, we'll upload zeroes into the buffer. + let can_use_zero_buffer = self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + || dst_target != glow::ELEMENT_ARRAY_BUFFER; + + if can_use_zero_buffer { + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer)) }; + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let mut dst_offset = range.start; + while dst_offset < range.end { + let size = (range.end - dst_offset).min(super::ZERO_BUFFER_SIZE as u64); + unsafe { + gl.copy_buffer_sub_data( + glow::COPY_READ_BUFFER, + dst_target, + 0, + dst_offset as i32, + size as i32, + ) + }; + dst_offset += size; + } + } else { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let zeroes = vec![0u8; (range.end - range.start) as usize]; + unsafe { + gl.buffer_sub_data_u8_slice(dst_target, range.start as i32, &zeroes) + }; + } + } + None => { + dst.data.as_ref().unwrap().lock().unwrap().as_mut_slice() + [range.start as usize..range.end as usize] + .fill(0); + } + }, + C::CopyBufferToBuffer { + ref src, + src_target, + ref dst, + dst_target, + copy, + } => { + let copy_src_target = glow::COPY_READ_BUFFER; + let is_index_buffer_only_element_dst = !self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + && dst_target == glow::ELEMENT_ARRAY_BUFFER + || src_target == glow::ELEMENT_ARRAY_BUFFER; + + // WebGL not allowed to copy data from other targets to element buffer and can't copy element data to other buffers + let copy_dst_target = if is_index_buffer_only_element_dst { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::COPY_WRITE_BUFFER + }; + let size = copy.size.get() as usize; + match (src.raw, dst.raw) { + (Some(ref src), Some(ref dst)) => { + unsafe { gl.bind_buffer(copy_src_target, Some(*src)) }; + unsafe { gl.bind_buffer(copy_dst_target, Some(*dst)) }; + unsafe { + gl.copy_buffer_sub_data( + copy_src_target, + copy_dst_target, + copy.src_offset as _, + copy.dst_offset as _, + copy.size.get() as _, + ) + }; + } + (Some(src), None) => { + let mut data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut data.as_mut_slice() + [copy.dst_offset as usize..copy.dst_offset as usize + size]; + + unsafe { gl.bind_buffer(copy_src_target, Some(src)) }; + unsafe { + self.shared.get_buffer_sub_data( + gl, + copy_src_target, + copy.src_offset as i32, + dst_data, + ) + }; + } + (None, Some(dst)) => { + let data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &data.as_slice() + [copy.src_offset as usize..copy.src_offset as usize + size]; + unsafe { gl.bind_buffer(copy_dst_target, Some(dst)) }; + unsafe { + gl.buffer_sub_data_u8_slice( + copy_dst_target, + copy.dst_offset as i32, + src_data, + ) + }; + } + (None, None) => { + todo!() + } + } + unsafe { gl.bind_buffer(copy_src_target, None) }; + if is_index_buffer_only_element_dst { + unsafe { + gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, self.current_index_buffer) + }; + } else { + unsafe { gl.bind_buffer(copy_dst_target, None) }; + } + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + C::CopyExternalImageToTexture { + ref src, + dst, + dst_target, + dst_format, + dst_premultiplication, + ref copy, + } => { + const UNPACK_FLIP_Y_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_FLIP_Y_WEBGL; + const UNPACK_PREMULTIPLY_ALPHA_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_PREMULTIPLY_ALPHA_WEBGL; + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, true); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, true); + } + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + let format_desc = self.shared.describe_texture_format(dst_format); + if is_layered_target(dst_target) { + let z_offset = + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = dst_target { + copy.dst_base.array_layer as i32 + } else { + copy.dst_base.origin.z as i32 + }; + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_3d_with_image_bitmap( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_3d_with_html_video_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + v, + ); + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_3d_with_html_canvas_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + c, + ); + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } else { + let dst_target = if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.dst_base.array_layer as usize] + } else { + dst_target + }; + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_2d_with_image_bitmap_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_2d_with_html_video_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + v, + ) + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_2d_with_html_canvas_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + c, + ) + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, false); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, false); + } + } + } + C::CopyTextureToTexture { + src, + src_target, + dst, + dst_target, + dst_is_cubemap, + ref copy, + } => { + //TODO: handle 3D copies + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + if is_layered_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.src_base.mip_level as i32, + copy.src_base.array_layer as i32, + ) + }; + } else { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.src_base.mip_level as i32, + ) + }; + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + if dst_is_cubemap { + unsafe { + gl.copy_tex_sub_image_2d( + CUBEMAP_FACES[copy.dst_base.array_layer as usize], + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } else if is_layered_target(dst_target) { + unsafe { + gl.copy_tex_sub_image_3d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.dst_base.array_layer as i32 + } else { + copy.dst_base.origin.z as i32 + }, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } else { + unsafe { + gl.copy_tex_sub_image_2d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } + } + C::CopyBufferToTexture { + ref src, + src_target: _, + dst, + dst_target, + dst_format, + ref copy, + } => { + let (block_width, block_height) = dst_format.block_dimensions(); + let block_size = dst_format.block_size(None).unwrap(); + let format_desc = self.shared.describe_texture_format(dst_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(0, |bpr| block_width * bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .map_or(0, |rpi| block_height * rpi); + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_IMAGE_HEIGHT, column_texels as i32) }; + let mut unbind_unpack_buffer = false; + if !dst_format.is_compressed() { + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::PixelUnpackData::BufferOffset(copy.buffer_layout.offset as u32) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = + &buffer_data.as_slice()[copy.buffer_layout.offset as usize..]; + glow::PixelUnpackData::Slice(src_data) + } + }; + if is_layered_target(dst_target) { + unsafe { + gl.tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.texture_base.array_layer as i32 + } else { + copy.texture_base.origin.z as i32 + }, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } else { + unsafe { + gl.tex_sub_image_2d( + if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.texture_base.array_layer as usize] + } else { + dst_target + }, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } + } else { + let bytes_per_row = copy + .buffer_layout + .bytes_per_row + .unwrap_or(copy.size.width * block_size); + let minimum_rows_per_image = + (copy.size.height + block_height - 1) / block_height; + let rows_per_image = copy + .buffer_layout + .rows_per_image + .unwrap_or(minimum_rows_per_image); + + let bytes_per_image = bytes_per_row * rows_per_image; + let minimum_bytes_per_image = bytes_per_row * minimum_rows_per_image; + let bytes_in_upload = + (bytes_per_image * (copy.size.depth - 1)) + minimum_bytes_per_image; + let offset = copy.buffer_layout.offset as u32; + + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::CompressedPixelUnpackData::BufferRange( + offset..offset + bytes_in_upload, + ) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &buffer_data.as_slice() + [(offset as usize)..(offset + bytes_in_upload) as usize]; + glow::CompressedPixelUnpackData::Slice(src_data) + } + }; + + if is_layered_target(dst_target) { + unsafe { + gl.compressed_tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.texture_base.array_layer as i32 + } else { + copy.texture_base.origin.z as i32 + }, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.internal, + unpack_data, + ) + }; + } else { + unsafe { + gl.compressed_tex_sub_image_2d( + if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.texture_base.array_layer as usize] + } else { + dst_target + }, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.internal, + unpack_data, + ) + }; + } + } + if unbind_unpack_buffer { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, None) }; + } + } + C::CopyTextureToBuffer { + src, + src_target, + src_format, + ref dst, + dst_target: _, + ref copy, + } => { + let block_size = src_format.block_size(None).unwrap(); + if src_format.is_compressed() { + log::error!("Not implemented yet: compressed texture copy to buffer"); + return; + } + if src_target == glow::TEXTURE_CUBE_MAP + || src_target == glow::TEXTURE_CUBE_MAP_ARRAY + { + log::error!("Not implemented yet: cubemap texture copy to buffer"); + return; + } + let format_desc = self.shared.describe_texture_format(src_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(copy.size.width, |bpr| bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .unwrap_or(copy.size.height); + + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + + let read_pixels = |offset| { + let mut buffer_data; + let unpack_data = match dst.raw { + Some(buffer) => { + unsafe { gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(buffer)) }; + glow::PixelPackData::BufferOffset(offset as u32) + } + None => { + buffer_data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut buffer_data.as_mut_slice()[offset as usize..]; + glow::PixelPackData::Slice(dst_data) + } + }; + unsafe { + gl.read_pixels( + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + }; + + match src_target { + glow::TEXTURE_2D => { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.texture_base.mip_level as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_2D_ARRAY => { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + copy.texture_base.array_layer as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_3D => { + for z in copy.texture_base.origin.z..copy.size.depth { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + z as i32, + ) + }; + let offset = copy.buffer_layout.offset + + (z * block_size * row_texels * column_texels) as u64; + read_pixels(offset); + } + } + glow::TEXTURE_CUBE_MAP | glow::TEXTURE_CUBE_MAP_ARRAY => unimplemented!(), + _ => unreachable!(), + } + } + C::SetIndexBuffer(buffer) => { + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(buffer)) }; + self.current_index_buffer = Some(buffer); + } + C::BeginQuery(query, target) => { + unsafe { gl.begin_query(target, query) }; + } + C::EndQuery(target) => { + unsafe { gl.end_query(target) }; + } + C::CopyQueryResults { + ref query_range, + ref dst, + dst_target, + dst_offset, + } => { + self.temp_query_results.clear(); + for &query in queries[query_range.start as usize..query_range.end as usize].iter() { + let result = unsafe { gl.get_query_parameter_u32(query, glow::QUERY_RESULT) }; + self.temp_query_results.push(result as u64); + } + let query_data = unsafe { + slice::from_raw_parts( + self.temp_query_results.as_ptr() as *const u8, + self.temp_query_results.len() * mem::size_of::<u64>(), + ) + }; + match dst.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + unsafe { + gl.buffer_sub_data_u8_slice(dst_target, dst_offset as i32, query_data) + }; + } + None => { + let data = &mut dst.data.as_ref().unwrap().lock().unwrap(); + let len = query_data.len().min(data.len()); + data[..len].copy_from_slice(&query_data[..len]); + } + } + } + C::ResetFramebuffer { is_default } => { + if is_default { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + } else { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + glow::DEPTH_STENCIL_ATTACHMENT, + glow::TEXTURE_2D, + None, + 0, + ) + }; + for i in 0..crate::MAX_COLOR_ATTACHMENTS { + let target = glow::COLOR_ATTACHMENT0 + i as u32; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + target, + glow::TEXTURE_2D, + None, + 0, + ) + }; + } + } + unsafe { gl.color_mask(true, true, true, true) }; + unsafe { gl.depth_mask(true) }; + unsafe { gl.stencil_mask(!0) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + } + C::BindAttachment { + attachment, + ref view, + } => { + unsafe { self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, attachment, view) }; + } + C::ResolveAttachment { + attachment, + ref dst, + ref size, + } => { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { gl.read_buffer(attachment) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.copy_fbo)) }; + unsafe { + self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, glow::COLOR_ATTACHMENT0, dst) + }; + unsafe { + gl.blit_framebuffer( + 0, + 0, + size.width as i32, + size.height as i32, + 0, + 0, + size.width as i32, + size.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + } + C::InvalidateAttachments(ref list) => { + unsafe { gl.invalidate_framebuffer(glow::DRAW_FRAMEBUFFER, list) }; + } + C::SetDrawColorBuffers(count) => { + self.draw_buffer_count = count; + let indices = (0..count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER) + { + for draw_buffer in 0..count as u32 { + unsafe { gl.disable_draw_buffer(glow::BLEND, draw_buffer) }; + } + } + } + C::ClearColorF { + draw_buffer, + ref color, + is_srgb, + } => { + if self + .shared + .workarounds + .contains(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR) + && is_srgb + { + unsafe { self.perform_shader_clear(gl, draw_buffer, *color) }; + } else { + unsafe { gl.clear_buffer_f32_slice(glow::COLOR, draw_buffer, color) }; + } + } + C::ClearColorU(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_u32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearColorI(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_i32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearDepth(depth) => { + unsafe { gl.clear_buffer_f32_slice(glow::DEPTH, 0, &[depth]) }; + } + C::ClearStencil(value) => { + unsafe { gl.clear_buffer_i32_slice(glow::STENCIL, 0, &[value as i32]) }; + } + C::ClearDepthAndStencil(depth, stencil_value) => { + unsafe { + gl.clear_buffer_depth_stencil( + glow::DEPTH_STENCIL, + 0, + depth, + stencil_value as i32, + ) + }; + } + C::BufferBarrier(raw, usage) => { + let mut flags = 0; + if usage.contains(crate::BufferUses::VERTEX) { + flags |= glow::VERTEX_ATTRIB_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, Some(raw)) }; + unsafe { gl.vertex_attrib_pointer_f32(0, 1, glow::BYTE, true, 0, 0) }; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= glow::ELEMENT_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= glow::UNIFORM_BARRIER_BIT; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= glow::COMMAND_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(raw)) }; + } + if usage.intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) { + flags |= glow::BUFFER_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_STORAGE_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::TextureBarrier(usage) => { + let mut flags = 0; + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= glow::TEXTURE_FETCH_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_IMAGE_ACCESS_BARRIER_BIT; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= glow::TEXTURE_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_READ + | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= glow::FRAMEBUFFER_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::SetViewport { + ref rect, + ref depth, + } => { + unsafe { gl.viewport(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.depth_range_f32(depth.start, depth.end) }; + } + C::SetScissor(ref rect) => { + unsafe { gl.scissor(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.enable(glow::SCISSOR_TEST) }; + } + C::SetStencilFunc { + face, + function, + reference, + read_mask, + } => { + unsafe { gl.stencil_func_separate(face, function, reference as i32, read_mask) }; + } + C::SetStencilOps { + face, + write_mask, + ref ops, + } => { + unsafe { gl.stencil_mask_separate(face, write_mask) }; + unsafe { gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass) }; + } + C::SetVertexAttribute { + buffer, + ref buffer_desc, + attribute_desc: ref vat, + } => { + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, buffer) }; + unsafe { gl.enable_vertex_attrib_array(vat.location) }; + + if buffer.is_none() { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_format_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + vat.offset, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_format_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + vat.offset, + ) + }, + } + + //Note: there is apparently a bug on AMD 3500U: + // this call is ignored if the current array is disabled. + unsafe { gl.vertex_attrib_binding(vat.location, vat.buffer_index) }; + } else { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_pointer_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_pointer_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + } + unsafe { gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32) }; + } + } + C::UnsetVertexAttribute(location) => { + unsafe { gl.disable_vertex_attrib_array(location) }; + } + C::SetVertexBuffer { + index, + ref buffer, + ref buffer_desc, + } => { + unsafe { gl.vertex_binding_divisor(index, buffer_desc.step as u32) }; + unsafe { + gl.bind_vertex_buffer( + index, + Some(buffer.raw), + buffer.offset as i32, + buffer_desc.stride as i32, + ) + }; + } + C::SetDepth(ref depth) => { + unsafe { gl.depth_func(depth.function) }; + unsafe { gl.depth_mask(depth.mask) }; + } + C::SetDepthBias(bias) => { + if bias.is_enabled() { + unsafe { gl.enable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.polygon_offset(bias.constant as f32, bias.slope_scale) }; + } else { + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + } + } + C::ConfigureDepthStencil(aspects) => { + if aspects.contains(crate::FormatAspects::DEPTH) { + unsafe { gl.enable(glow::DEPTH_TEST) }; + } else { + unsafe { gl.disable(glow::DEPTH_TEST) }; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + unsafe { gl.enable(glow::STENCIL_TEST) }; + } else { + unsafe { gl.disable(glow::STENCIL_TEST) }; + } + } + C::SetAlphaToCoverage(enabled) => { + if enabled { + unsafe { gl.enable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } else { + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } + } + C::SetProgram(program) => { + unsafe { gl.use_program(Some(program)) }; + } + C::SetPrimitive(ref state) => { + unsafe { gl.front_face(state.front_face) }; + if state.cull_face != 0 { + unsafe { gl.enable(glow::CULL_FACE) }; + unsafe { gl.cull_face(state.cull_face) }; + } else { + unsafe { gl.disable(glow::CULL_FACE) }; + } + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + //Note: this is a bit tricky, since we are controlling the clip, not the clamp. + if state.unclipped_depth { + unsafe { gl.enable(glow::DEPTH_CLAMP) }; + } else { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + } + } + C::SetBlendConstant(c) => { + unsafe { gl.blend_color(c[0], c[1], c[2], c[3]) }; + } + C::SetColorTarget { + draw_buffer_index, + desc: super::ColorTargetDesc { mask, ref blend }, + } => { + use wgt::ColorWrites as Cw; + if let Some(index) = draw_buffer_index { + unsafe { + gl.color_mask_draw_buffer( + index, + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable_draw_buffer(index, glow::BLEND) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate_draw_buffer( + index, + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate_draw_buffer( + index, + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation_draw_buffer(index, blend.color.equation) }; + unsafe { + gl.blend_func_draw_buffer(index, blend.color.src, blend.color.dst) + }; + } + } else if self + .shared + .private_caps + .contains(super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER) + { + unsafe { gl.disable_draw_buffer(index, glow::BLEND) }; + } + } else { + unsafe { + gl.color_mask( + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable(glow::BLEND) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate( + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate( + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation(blend.color.equation) }; + unsafe { gl.blend_func(blend.color.src, blend.color.dst) }; + } + } else { + unsafe { gl.disable(glow::BLEND) }; + } + } + } + C::BindBuffer { + target, + slot, + buffer, + offset, + size, + } => { + unsafe { gl.bind_buffer_range(target, slot, Some(buffer), offset, size) }; + } + C::BindSampler(texture_index, sampler) => { + unsafe { gl.bind_sampler(texture_index, sampler) }; + } + C::BindTexture { + slot, + texture, + target, + aspects, + } => { + unsafe { gl.active_texture(glow::TEXTURE0 + slot) }; + unsafe { gl.bind_texture(target, Some(texture)) }; + + let version = gl.version(); + let is_min_es_3_1 = version.is_embedded && (version.major, version.minor) >= (3, 1); + let is_min_4_3 = !version.is_embedded && (version.major, version.minor) >= (4, 3); + if is_min_es_3_1 || is_min_4_3 { + let mode = match aspects { + crate::FormatAspects::DEPTH => Some(glow::DEPTH_COMPONENT), + crate::FormatAspects::STENCIL => Some(glow::STENCIL_INDEX), + _ => None, + }; + if let Some(mode) = mode { + unsafe { + gl.tex_parameter_i32( + target, + glow::DEPTH_STENCIL_TEXTURE_MODE, + mode as _, + ) + }; + } + } + } + C::BindImage { slot, ref binding } => { + unsafe { + gl.bind_image_texture( + slot, + binding.raw, + binding.mip_level as i32, + binding.array_layer.is_none(), + binding.array_layer.unwrap_or_default() as i32, + binding.access, + binding.format, + ) + }; + } + #[cfg(not(target_arch = "wasm32"))] + C::InsertDebugMarker(ref range) => { + let marker = extract_marker(data_bytes, range); + unsafe { + gl.debug_message_insert( + glow::DEBUG_SOURCE_APPLICATION, + glow::DEBUG_TYPE_MARKER, + DEBUG_ID, + glow::DEBUG_SEVERITY_NOTIFICATION, + marker, + ) + }; + } + #[cfg(target_arch = "wasm32")] + C::InsertDebugMarker(_) => (), + #[cfg_attr(target_arch = "wasm32", allow(unused))] + C::PushDebugGroup(ref range) => { + #[cfg(not(target_arch = "wasm32"))] + let marker = extract_marker(data_bytes, range); + #[cfg(not(target_arch = "wasm32"))] + unsafe { + gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, marker) + }; + } + C::PopDebugGroup => { + #[cfg(not(target_arch = "wasm32"))] + unsafe { + gl.pop_debug_group() + }; + } + C::SetPushConstants { + ref uniform, + offset, + } => { + fn get_data<T>(data: &[u8], offset: u32) -> &[T] { + let raw = &data[(offset as usize)..]; + unsafe { + slice::from_raw_parts( + raw.as_ptr() as *const _, + raw.len() / mem::size_of::<T>(), + ) + } + } + + let location = uniform.location.as_ref(); + + match uniform.utype { + glow::FLOAT => { + let data = get_data::<f32>(data_bytes, offset)[0]; + unsafe { gl.uniform_1_f32(location, data) }; + } + glow::FLOAT_VEC2 => { + let data = get_data::<[f32; 2]>(data_bytes, offset)[0]; + unsafe { gl.uniform_2_f32_slice(location, &data) }; + } + glow::FLOAT_VEC3 => { + let data = get_data::<[f32; 3]>(data_bytes, offset)[0]; + unsafe { gl.uniform_3_f32_slice(location, &data) }; + } + glow::FLOAT_VEC4 => { + let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_4_f32_slice(location, &data) }; + } + glow::INT => { + let data = get_data::<i32>(data_bytes, offset)[0]; + unsafe { gl.uniform_1_i32(location, data) }; + } + glow::INT_VEC2 => { + let data = get_data::<[i32; 2]>(data_bytes, offset)[0]; + unsafe { gl.uniform_2_i32_slice(location, &data) }; + } + glow::INT_VEC3 => { + let data = get_data::<[i32; 3]>(data_bytes, offset)[0]; + unsafe { gl.uniform_3_i32_slice(location, &data) }; + } + glow::INT_VEC4 => { + let data = get_data::<[i32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_4_i32_slice(location, &data) }; + } + glow::FLOAT_MAT2 => { + let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_2_f32_slice(location, false, &data) }; + } + glow::FLOAT_MAT3 => { + let data = get_data::<[f32; 9]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_3_f32_slice(location, false, &data) }; + } + glow::FLOAT_MAT4 => { + let data = get_data::<[f32; 16]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_4_f32_slice(location, false, &data) }; + } + _ => panic!("Unsupported uniform datatype!"), + } + } + } + } +} + +impl crate::Queue<super::Api> for super::Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&super::CommandBuffer], + signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let shared = Arc::clone(&self.shared); + let gl = &shared.context.lock(); + for cmd_buf in command_buffers.iter() { + // The command encoder assumes a default state when encoding the command buffer. + // Always reset the state between command_buffers to reflect this assumption. Do + // this at the beginning of the loop in case something outside of wgpu modified + // this state prior to commit. + unsafe { self.reset_state(gl) }; + #[cfg(not(target_arch = "wasm32"))] + if let Some(ref label) = cmd_buf.label { + unsafe { gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, label) }; + } + + for command in cmd_buf.commands.iter() { + unsafe { self.process(gl, command, &cmd_buf.data_bytes, &cmd_buf.queries) }; + } + + #[cfg(not(target_arch = "wasm32"))] + if cmd_buf.label.is_some() { + unsafe { gl.pop_debug_group() }; + } + } + + if let Some((fence, value)) = signal_fence { + fence.maintain(gl); + let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } + .map_err(|_| crate::DeviceError::OutOfMemory)?; + fence.pending.push((value, sync)); + } + + Ok(()) + } + + unsafe fn present( + &mut self, + surface: &mut super::Surface, + texture: super::Texture, + ) -> Result<(), crate::SurfaceError> { + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + let gl = unsafe { &self.shared.context.get_without_egl_lock() }; + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + let gl = &self.shared.context.glow_context; + + unsafe { surface.present(texture, gl) } + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Queue {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Queue {} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag new file mode 100644 index 0000000000..7766c12d9f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag @@ -0,0 +1,9 @@ +#version 300 es +precision lowp float; +uniform vec4 color; +//Hack: Some WebGL implementations don't find "color" otherwise. +uniform vec4 color_workaround; +out vec4 frag; +void main() { + frag = color + color_workaround; +} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert new file mode 100644 index 0000000000..ac655e7f31 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert @@ -0,0 +1,11 @@ +#version 300 es +precision lowp float; +// A triangle that fills the whole screen +const vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +void main() { + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag new file mode 100644 index 0000000000..853f82a6ae --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag @@ -0,0 +1,16 @@ +#version 300 es +precision mediump float; +in vec2 uv; +uniform sampler2D present_texture; +out vec4 frag; +vec4 linear_to_srgb(vec4 linear) { + vec3 color_linear = linear.rgb; + vec3 selector = ceil(color_linear - 0.0031308); // 0 if under value, 1 if over + vec3 under = 12.92 * color_linear; + vec3 over = 1.055 * pow(color_linear, vec3(0.41666)) - 0.055; + vec3 result = mix(under, over, selector); + return vec4(result, linear.a); +} +void main() { + frag = linear_to_srgb(texture(present_texture, uv)); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert new file mode 100644 index 0000000000..922f2a1848 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert @@ -0,0 +1,18 @@ +#version 300 es +precision mediump float; +// A triangle that fills the whole screen +const vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +const vec2[3] TRIANGLE_UV = vec2[]( + vec2( 0.5, 1.), + vec2( -1.0, -1.0), + vec2( 2.0, -1.0) +); +out vec2 uv; +void main() { + uv = TRIANGLE_UV[gl_VertexID]; + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/web.rs b/third_party/rust/wgpu-hal/src/gles/web.rs new file mode 100644 index 0000000000..565ffcab18 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/web.rs @@ -0,0 +1,404 @@ +use glow::HasContext; +use parking_lot::Mutex; +use wasm_bindgen::JsCast; + +use super::TextureFormatDesc; + +/// A wrapper around a [`glow::Context`] to provide a fake `lock()` api that makes it compatible +/// with the `AdapterContext` API from the EGL implementation. +pub struct AdapterContext { + pub glow_context: glow::Context, +} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + false + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock(&self) -> &glow::Context { + &self.glow_context + } +} + +#[derive(Debug)] +pub struct Instance { + webgl2_context: Mutex<Option<web_sys::WebGl2RenderingContext>>, +} + +impl Instance { + pub fn create_surface_from_canvas( + &self, + canvas: web_sys::HtmlCanvasElement, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Canvas(canvas), result) + } + + pub fn create_surface_from_offscreen_canvas( + &self, + canvas: web_sys::OffscreenCanvas, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Offscreen(canvas), result) + } + + /// Common portion of public `create_surface_from_*` functions. + /// + /// Note: Analogous code also exists in the WebGPU backend at + /// `wgpu::backend::web::Context`. + fn create_surface_from_context( + &self, + canvas: Canvas, + context_result: Result<Option<js_sys::Object>, wasm_bindgen::JsValue>, + ) -> Result<Surface, crate::InstanceError> { + let context_object: js_sys::Object = match context_result { + Ok(Some(context)) => context, + Ok(None) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext-dev> + // A getContext() call “returns null if contextId is not supported, or if the + // canvas has already been initialized with another context type”. Additionally, + // “not supported” could include “insufficient GPU resources” or “the GPU process + // previously crashed”. So, we must return it as an `Err` since it could occur + // for circumstances outside the application author's control. + return Err(crate::InstanceError); + } + Err(js_error) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext> + // A thrown exception indicates misuse of the canvas state. Ideally we wouldn't + // panic in this case, but for now, `InstanceError` conveys no detail, so it + // is more informative to panic with a specific message. + panic!("canvas.getContext() threw {js_error:?}") + } + }; + + // Not returning this error because it is a type error that shouldn't happen unless + // the browser, JS builtin objects, or wasm bindings are misbehaving somehow. + let webgl2_context: web_sys::WebGl2RenderingContext = context_object + .dyn_into() + .expect("canvas context is not a WebGl2RenderingContext"); + + *self.webgl2_context.lock() = Some(webgl2_context.clone()); + + Ok(Surface { + canvas, + webgl2_context, + srgb_present_program: None, + swapchain: None, + texture: None, + presentable: true, + }) + } + + fn create_context_options() -> js_sys::Object { + let context_options = js_sys::Object::new(); + js_sys::Reflect::set( + &context_options, + &"antialias".into(), + &wasm_bindgen::JsValue::FALSE, + ) + .expect("Cannot create context options"); + context_options + } +} + +// SAFE: WASM doesn't have threads +unsafe impl Sync for Instance {} +unsafe impl Send for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + Ok(Instance { + webgl2_context: Mutex::new(None), + }) + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let context_guard = self.webgl2_context.lock(); + let gl = match *context_guard { + Some(ref webgl2_context) => glow::Context::from_webgl2_context(webgl2_context.clone()), + None => return Vec::new(), + }; + + unsafe { super::Adapter::expose(AdapterContext { glow_context: gl }) } + .into_iter() + .collect() + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + if let raw_window_handle::RawWindowHandle::Web(handle) = window_handle { + let canvas: web_sys::HtmlCanvasElement = web_sys::window() + .and_then(|win| win.document()) + .expect("Cannot get document") + .query_selector(&format!("canvas[data-raw-handle=\"{}\"]", handle.id)) + .expect("Cannot query for canvas") + .expect("Canvas is not found") + .dyn_into() + .expect("Failed to downcast to canvas type"); + + self.create_surface_from_canvas(canvas) + } else { + Err(crate::InstanceError) + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + let mut context_option_ref = self.webgl2_context.lock(); + + if let Some(context) = context_option_ref.as_ref() { + if context == &surface.webgl2_context { + *context_option_ref = None; + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Surface { + canvas: Canvas, + webgl2_context: web_sys::WebGl2RenderingContext, + pub(super) swapchain: Option<Swapchain>, + texture: Option<glow::Texture>, + pub(super) presentable: bool, + srgb_present_program: Option<glow::Program>, +} + +#[derive(Clone, Debug)] +enum Canvas { + Canvas(web_sys::HtmlCanvasElement), + Offscreen(web_sys::OffscreenCanvas), +} + +// SAFE: Because web doesn't have threads ( yet ) +unsafe impl Sync for Surface {} +unsafe impl Send for Surface {} + +#[derive(Clone, Debug)] +pub struct Swapchain { + pub(crate) extent: wgt::Extent3d, + // pub(crate) channel: f::ChannelType, + pub(super) format: wgt::TextureFormat, + pub(super) framebuffer: glow::Framebuffer, + pub(super) format_desc: TextureFormatDesc, +} + +impl Surface { + pub(super) unsafe fn present( + &mut self, + _suf_texture: super::Texture, + gl: &glow::Context, + ) -> Result<(), crate::SurfaceError> { + let swapchain = self.swapchain.as_ref().ok_or(crate::SurfaceError::Other( + "need to configure surface before presenting", + ))?; + + if swapchain.format.is_srgb() { + // Important to set the viewport since we don't know in what state the user left it. + unsafe { + gl.viewport( + 0, + 0, + swapchain.extent.width as _, + swapchain.extent.height as _, + ) + }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_sampler(0, None) }; + unsafe { gl.active_texture(glow::TEXTURE0) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, self.texture) }; + unsafe { gl.use_program(self.srgb_present_program) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::BACK]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + } else { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(swapchain.framebuffer)) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + swapchain.extent.height as i32, + swapchain.extent.width as i32, + 0, + 0, + 0, + swapchain.extent.width as i32, + swapchain.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + } + + Ok(()) + } + + unsafe fn create_srgb_present_program(gl: &glow::Context) -> glow::Program { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = + unsafe { gl.create_shader(glow::VERTEX_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(vertex, include_str!("./shaders/srgb_present.vert")) }; + unsafe { gl.compile_shader(vertex) }; + let fragment = + unsafe { gl.create_shader(glow::FRAGMENT_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(fragment, include_str!("./shaders/srgb_present.frag")) }; + unsafe { gl.compile_shader(fragment) }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + program + } + + pub fn supports_srgb(&self) -> bool { + // present.frag takes care of handling srgb conversion + true + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + match self.canvas { + Canvas::Canvas(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + Canvas::Offscreen(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + } + + let gl = &device.shared.context.lock(); + + if let Some(swapchain) = self.swapchain.take() { + // delete all frame buffers already allocated + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + + if self.srgb_present_program.is_none() && config.format.is_srgb() { + self.srgb_present_program = Some(unsafe { Self::create_srgb_present_program(gl) }); + } + + if let Some(texture) = self.texture.take() { + unsafe { gl.delete_texture(texture) }; + } + + self.texture = Some(unsafe { gl.create_texture() }.map_err(|error| { + log::error!("Internal swapchain texture creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?); + + let desc = device.shared.describe_texture_format(config.format); + unsafe { gl.bind_texture(glow::TEXTURE_2D, self.texture) }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MIN_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MAG_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_storage_2d( + glow::TEXTURE_2D, + 1, + desc.internal, + config.extent.width as i32, + config.extent.height as i32, + ) + }; + + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::TEXTURE_2D, + self.texture, + 0, + ) + }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + self.swapchain = Some(Swapchain { + extent: config.extent, + // channel: config.format.base_format().1, + format: config.format, + format_desc: desc, + framebuffer, + }); + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + let gl = device.shared.context.lock(); + if let Some(swapchain) = self.swapchain.take() { + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + if let Some(renderbuffer) = self.texture.take() { + unsafe { gl.delete_texture(renderbuffer) }; + } + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Texture { + raw: self.texture.unwrap(), + target: glow::TEXTURE_2D, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + is_cubemap: false, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs new file mode 100644 index 0000000000..1758149380 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/lib.rs @@ -0,0 +1,1316 @@ +/*! This library describes the internal unsafe graphics abstraction API. + * It follows WebGPU for the most part, re-using wgpu-types, + * with the following deviations: + * - Fully unsafe: zero overhead, zero validation. + * - Compile-time backend selection via traits. + * - Objects are passed by references and returned by value. No IDs. + * - Mapping is persistent, with explicit synchronization. + * - Resource transitions are explicit. + * - All layouts are explicit. Binding model has compatibility. + * + * General design direction is to follow the majority by the following weights: + * - wgpu-core: 1.5 + * - primary backends (Vulkan/Metal/DX12): 1.0 each + * - secondary backends (DX11/GLES): 0.5 each + */ + +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![allow( + // for `if_then_panic` until it reaches stable + unknown_lints, + // We use loops for getting early-out of scope without closures. + clippy::never_loop, + // We don't use syntax sugar where it's not necessary. + clippy::match_like_matches_macro, + // Redundant matching is more explicit. + clippy::redundant_pattern_matching, + // Explicit lifetimes are often easier to reason about. + clippy::needless_lifetimes, + // No need for defaults in the internal types. + clippy::new_without_default, + // Matches are good and extendable, no need to make an exception here. + clippy::single_match, + // Push commands are more regular than macros. + clippy::vec_init_then_push, + // "if panic" is a good uniform construct. + clippy::if_then_panic, + // We unsafe impl `Send` for a reason. + clippy::non_send_fields_in_send_ty, + // TODO! + clippy::missing_safety_doc, + // Clashes with clippy::pattern_type_mismatch + clippy::needless_borrowed_reference, +)] +#![warn( + trivial_casts, + trivial_numeric_casts, + unsafe_op_in_unsafe_fn, + unused_extern_crates, + unused_qualifications, + // We don't match on a reference, unless required. + clippy::pattern_type_mismatch, +)] + +/// DirectX11 API internals. +#[cfg(all(feature = "dx11", windows))] +pub mod dx11; +/// DirectX12 API internals. +#[cfg(all(feature = "dx12", windows))] +pub mod dx12; +/// A dummy API implementation. +pub mod empty; +/// GLES API internals. +#[cfg(all(feature = "gles"))] +pub mod gles; +/// Metal API internals. +#[cfg(all(feature = "metal", any(target_os = "macos", target_os = "ios")))] +pub mod metal; +/// Vulkan API internals. +#[cfg(all(feature = "vulkan", not(target_arch = "wasm32")))] +pub mod vulkan; + +pub mod auxil; +pub mod api { + #[cfg(all(feature = "dx11", windows))] + pub use super::dx11::Api as Dx11; + #[cfg(all(feature = "dx12", windows))] + pub use super::dx12::Api as Dx12; + pub use super::empty::Api as Empty; + #[cfg(feature = "gles")] + pub use super::gles::Api as Gles; + #[cfg(all(feature = "metal", any(target_os = "macos", target_os = "ios")))] + pub use super::metal::Api as Metal; + #[cfg(all(feature = "vulkan", not(target_arch = "wasm32")))] + pub use super::vulkan::Api as Vulkan; +} + +use std::{ + borrow::{Borrow, Cow}, + fmt, + num::NonZeroU32, + ops::{Range, RangeInclusive}, + ptr::NonNull, + sync::atomic::AtomicBool, +}; + +use bitflags::bitflags; +use thiserror::Error; + +pub const MAX_ANISOTROPY: u8 = 16; +pub const MAX_BIND_GROUPS: usize = 8; +pub const MAX_VERTEX_BUFFERS: usize = 16; +pub const MAX_COLOR_ATTACHMENTS: usize = 8; +pub const MAX_MIP_LEVELS: u32 = 16; +/// Size of a single occlusion/timestamp query, when copied into a buffer, in bytes. +pub const QUERY_SIZE: wgt::BufferAddress = 8; + +pub type Label<'a> = Option<&'a str>; +pub type MemoryRange = Range<wgt::BufferAddress>; +pub type FenceValue = u64; + +/// Drop guard to signal wgpu-hal is no longer using an externally created object. +pub type DropGuard = Box<dyn std::any::Any + Send + Sync>; + +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum DeviceError { + #[error("Out of memory")] + OutOfMemory, + #[error("Device is lost")] + Lost, +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum ShaderError { + #[error("Compilation failed: {0:?}")] + Compilation(String), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum PipelineError { + #[error("Linkage failed for stage {0:?}: {1}")] + Linkage(wgt::ShaderStages, String), + #[error("Entry point for stage {0:?} is invalid")] + EntryPoint(naga::ShaderStage), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum SurfaceError { + #[error("Surface is lost")] + Lost, + #[error("Surface is outdated, needs to be re-created")] + Outdated, + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Other reason: {0}")] + Other(&'static str), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +#[error("Not supported")] +pub struct InstanceError; + +pub trait Api: Clone + Sized { + type Instance: Instance<Self>; + type Surface: Surface<Self>; + type Adapter: Adapter<Self>; + type Device: Device<Self>; + + type Queue: Queue<Self>; + type CommandEncoder: CommandEncoder<Self>; + type CommandBuffer: Send + Sync + fmt::Debug; + + type Buffer: fmt::Debug + Send + Sync + 'static; + type Texture: fmt::Debug + Send + Sync + 'static; + type SurfaceTexture: fmt::Debug + Send + Sync + Borrow<Self::Texture>; + type TextureView: fmt::Debug + Send + Sync; + type Sampler: fmt::Debug + Send + Sync; + type QuerySet: fmt::Debug + Send + Sync; + type Fence: fmt::Debug + Send + Sync; + + type BindGroupLayout: Send + Sync; + type BindGroup: fmt::Debug + Send + Sync; + type PipelineLayout: Send + Sync; + type ShaderModule: fmt::Debug + Send + Sync; + type RenderPipeline: Send + Sync; + type ComputePipeline: Send + Sync; +} + +pub trait Instance<A: Api>: Sized + Send + Sync { + unsafe fn init(desc: &InstanceDescriptor) -> Result<Self, InstanceError>; + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<A::Surface, InstanceError>; + unsafe fn destroy_surface(&self, surface: A::Surface); + unsafe fn enumerate_adapters(&self) -> Vec<ExposedAdapter<A>>; +} + +pub trait Surface<A: Api>: Send + Sync { + unsafe fn configure( + &mut self, + device: &A::Device, + config: &SurfaceConfiguration, + ) -> Result<(), SurfaceError>; + + unsafe fn unconfigure(&mut self, device: &A::Device); + + /// Returns the next texture to be presented by the swapchain for drawing + /// + /// A `timeout` of `None` means to wait indefinitely, with no timeout. + /// + /// # Portability + /// + /// Some backends can't support a timeout when acquiring a texture and + /// the timeout will be ignored. + /// + /// Returns `None` on timing out. + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<AcquiredSurfaceTexture<A>>, SurfaceError>; + unsafe fn discard_texture(&mut self, texture: A::SurfaceTexture); +} + +pub trait Adapter<A: Api>: Send + Sync { + unsafe fn open( + &self, + features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<OpenDevice<A>, DeviceError>; + + /// Return the set of supported capabilities for a texture format. + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> TextureFormatCapabilities; + + /// Returns the capabilities of working with a specified surface. + /// + /// `None` means presentation is not supported for it. + unsafe fn surface_capabilities(&self, surface: &A::Surface) -> Option<SurfaceCapabilities>; + + /// Creates a [`PresentationTimestamp`] using the adapter's WSI. + /// + /// [`PresentationTimestamp`]: wgt::PresentationTimestamp + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp; +} + +pub trait Device<A: Api>: Send + Sync { + /// Exit connection to this logical device. + unsafe fn exit(self, queue: A::Queue); + /// Creates a new buffer. + /// + /// The initial usage is `BufferUses::empty()`. + unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result<A::Buffer, DeviceError>; + unsafe fn destroy_buffer(&self, buffer: A::Buffer); + //TODO: clarify if zero-sized mapping is allowed + unsafe fn map_buffer( + &self, + buffer: &A::Buffer, + range: MemoryRange, + ) -> Result<BufferMapping, DeviceError>; + unsafe fn unmap_buffer(&self, buffer: &A::Buffer) -> Result<(), DeviceError>; + unsafe fn flush_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + + /// Creates a new texture. + /// + /// The initial usage for all subresources is `TextureUses::UNINITIALIZED`. + unsafe fn create_texture(&self, desc: &TextureDescriptor) -> Result<A::Texture, DeviceError>; + unsafe fn destroy_texture(&self, texture: A::Texture); + unsafe fn create_texture_view( + &self, + texture: &A::Texture, + desc: &TextureViewDescriptor, + ) -> Result<A::TextureView, DeviceError>; + unsafe fn destroy_texture_view(&self, view: A::TextureView); + unsafe fn create_sampler(&self, desc: &SamplerDescriptor) -> Result<A::Sampler, DeviceError>; + unsafe fn destroy_sampler(&self, sampler: A::Sampler); + + unsafe fn create_command_encoder( + &self, + desc: &CommandEncoderDescriptor<A>, + ) -> Result<A::CommandEncoder, DeviceError>; + unsafe fn destroy_command_encoder(&self, pool: A::CommandEncoder); + + /// Creates a bind group layout. + unsafe fn create_bind_group_layout( + &self, + desc: &BindGroupLayoutDescriptor, + ) -> Result<A::BindGroupLayout, DeviceError>; + unsafe fn destroy_bind_group_layout(&self, bg_layout: A::BindGroupLayout); + unsafe fn create_pipeline_layout( + &self, + desc: &PipelineLayoutDescriptor<A>, + ) -> Result<A::PipelineLayout, DeviceError>; + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: A::PipelineLayout); + unsafe fn create_bind_group( + &self, + desc: &BindGroupDescriptor<A>, + ) -> Result<A::BindGroup, DeviceError>; + unsafe fn destroy_bind_group(&self, group: A::BindGroup); + + unsafe fn create_shader_module( + &self, + desc: &ShaderModuleDescriptor, + shader: ShaderInput, + ) -> Result<A::ShaderModule, ShaderError>; + unsafe fn destroy_shader_module(&self, module: A::ShaderModule); + unsafe fn create_render_pipeline( + &self, + desc: &RenderPipelineDescriptor<A>, + ) -> Result<A::RenderPipeline, PipelineError>; + unsafe fn destroy_render_pipeline(&self, pipeline: A::RenderPipeline); + unsafe fn create_compute_pipeline( + &self, + desc: &ComputePipelineDescriptor<A>, + ) -> Result<A::ComputePipeline, PipelineError>; + unsafe fn destroy_compute_pipeline(&self, pipeline: A::ComputePipeline); + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<Label>, + ) -> Result<A::QuerySet, DeviceError>; + unsafe fn destroy_query_set(&self, set: A::QuerySet); + unsafe fn create_fence(&self) -> Result<A::Fence, DeviceError>; + unsafe fn destroy_fence(&self, fence: A::Fence); + unsafe fn get_fence_value(&self, fence: &A::Fence) -> Result<FenceValue, DeviceError>; + /// Calling wait with a lower value than the current fence value will immediately return. + unsafe fn wait( + &self, + fence: &A::Fence, + value: FenceValue, + timeout_ms: u32, + ) -> Result<bool, DeviceError>; + + unsafe fn start_capture(&self) -> bool; + unsafe fn stop_capture(&self); +} + +pub trait Queue<A: Api>: Send + Sync { + /// Submits the command buffers for execution on GPU. + /// + /// Valid usage: + /// - all of the command buffers were created from command pools + /// that are associated with this queue. + /// - all of the command buffers had `CommadBuffer::finish()` called. + unsafe fn submit( + &mut self, + command_buffers: &[&A::CommandBuffer], + signal_fence: Option<(&mut A::Fence, FenceValue)>, + ) -> Result<(), DeviceError>; + unsafe fn present( + &mut self, + surface: &mut A::Surface, + texture: A::SurfaceTexture, + ) -> Result<(), SurfaceError>; + unsafe fn get_timestamp_period(&self) -> f32; +} + +/// Encoder for commands in command buffers. +/// Serves as a parent for all the encoded command buffers. +/// Works in bursts of action: one or more command buffers are recorded, +/// then submitted to a queue, and then it needs to be `reset_all()`. +pub trait CommandEncoder<A: Api>: Send + Sync + fmt::Debug { + /// Begin encoding a new command buffer. + unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; + /// Discard currently recorded list, if any. + unsafe fn discard_encoding(&mut self); + unsafe fn end_encoding(&mut self) -> Result<A::CommandBuffer, DeviceError>; + /// Reclaims all resources that are allocated for this encoder. + /// Must get all of the produced command buffers back, + /// and they must not be used by GPU at this moment. + unsafe fn reset_all<I>(&mut self, command_buffers: I) + where + I: Iterator<Item = A::CommandBuffer>; + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = BufferBarrier<'a, A>>; + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = TextureBarrier<'a, A>>; + + // copy operations + + unsafe fn clear_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange); + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &A::Buffer, dst: &A::Buffer, regions: T) + where + T: Iterator<Item = BufferCopy>; + + /// Copy from an external image to an internal texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &A::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from one texture to another. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Texture, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from buffer to texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &A::Buffer, dst: &A::Texture, regions: T) + where + T: Iterator<Item = BufferTextureCopy>; + + /// Copy from texture to buffer. + /// Works with a single array layer. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Buffer, + regions: T, + ) where + T: Iterator<Item = BufferTextureCopy>; + + // pass common + + /// Sets the bind group at `index` to `group`, assuming the layout + /// of all the preceeding groups to be taken from `layout`. + unsafe fn set_bind_group( + &mut self, + layout: &A::PipelineLayout, + index: u32, + group: &A::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ); + + unsafe fn set_push_constants( + &mut self, + layout: &A::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ); + + unsafe fn insert_debug_marker(&mut self, label: &str); + unsafe fn begin_debug_marker(&mut self, group_label: &str); + unsafe fn end_debug_marker(&mut self); + + // queries + + unsafe fn begin_query(&mut self, set: &A::QuerySet, index: u32); + unsafe fn end_query(&mut self, set: &A::QuerySet, index: u32); + unsafe fn write_timestamp(&mut self, set: &A::QuerySet, index: u32); + unsafe fn reset_queries(&mut self, set: &A::QuerySet, range: Range<u32>); + unsafe fn copy_query_results( + &mut self, + set: &A::QuerySet, + range: Range<u32>, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ); + + // render passes + + // Begins a render pass, clears all active bindings. + unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<A>); + unsafe fn end_render_pass(&mut self); + + unsafe fn set_render_pipeline(&mut self, pipeline: &A::RenderPipeline); + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: BufferBinding<'a, A>, + format: wgt::IndexFormat, + ); + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, A>); + unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>); + unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>); + unsafe fn set_stencil_reference(&mut self, value: u32); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]); + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + + // compute passes + + // Begins a compute pass, clears all active bindings. + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor); + unsafe fn end_compute_pass(&mut self); + + unsafe fn set_compute_pipeline(&mut self, pipeline: &A::ComputePipeline); + + unsafe fn dispatch(&mut self, count: [u32; 3]); + unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); +} + +bitflags!( + /// Instance initialization flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct InstanceFlags: u32 { + /// Generate debug information in shaders and objects. + const DEBUG = 1 << 0; + /// Enable validation, if possible. + const VALIDATION = 1 << 1; + } +); + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct PipelineLayoutFlags: u32 { + /// Include support for base vertex/instance drawing. + const BASE_VERTEX_INSTANCE = 1 << 0; + /// Include support for num work groups builtin. + const NUM_WORK_GROUPS = 1 << 1; + } +); + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BindGroupLayoutFlags: u32 { + /// Allows for bind group binding arrays to be shorter than the array in the BGL. + const PARTIALLY_BOUND = 1 << 0; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureFormatCapabilities: u32 { + /// Format can be sampled. + const SAMPLED = 1 << 0; + /// Format can be sampled with a linear sampler. + const SAMPLED_LINEAR = 1 << 1; + /// Format can be sampled with a min/max reduction sampler. + const SAMPLED_MINMAX = 1 << 2; + + /// Format can be used as storage with write-only access. + const STORAGE = 1 << 3; + /// Format can be used as storage with read and read/write access. + const STORAGE_READ_WRITE = 1 << 4; + /// Format can be used as storage with atomics. + const STORAGE_ATOMIC = 1 << 5; + + /// Format can be used as color and input attachment. + const COLOR_ATTACHMENT = 1 << 6; + /// Format can be used as color (with blending) and input attachment. + const COLOR_ATTACHMENT_BLEND = 1 << 7; + /// Format can be used as depth-stencil and input attachment. + const DEPTH_STENCIL_ATTACHMENT = 1 << 8; + + /// Format can be multisampled by x2. + const MULTISAMPLE_X2 = 1 << 9; + /// Format can be multisampled by x4. + const MULTISAMPLE_X4 = 1 << 10; + /// Format can be multisampled by x8. + const MULTISAMPLE_X8 = 1 << 11; + /// Format can be multisampled by x16. + const MULTISAMPLE_X16 = 1 << 12; + + /// Format can be used for render pass resolve targets. + const MULTISAMPLE_RESOLVE = 1 << 13; + + /// Format can be copied from. + const COPY_SRC = 1 << 14; + /// Format can be copied to. + const COPY_DST = 1 << 15; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct FormatAspects: u8 { + const COLOR = 1 << 0; + const DEPTH = 1 << 1; + const STENCIL = 1 << 2; + } +); + +impl FormatAspects { + pub fn new(format: wgt::TextureFormat, aspect: wgt::TextureAspect) -> Self { + let aspect_mask = match aspect { + wgt::TextureAspect::All => Self::all(), + wgt::TextureAspect::DepthOnly => Self::DEPTH, + wgt::TextureAspect::StencilOnly => Self::STENCIL, + }; + Self::from(format) & aspect_mask + } + + /// Returns `true` if only one flag is set + pub fn is_one(&self) -> bool { + self.bits().count_ones() == 1 + } + + pub fn map(&self) -> wgt::TextureAspect { + match *self { + Self::COLOR => wgt::TextureAspect::All, + Self::DEPTH => wgt::TextureAspect::DepthOnly, + Self::STENCIL => wgt::TextureAspect::StencilOnly, + _ => unreachable!(), + } + } +} + +impl From<wgt::TextureFormat> for FormatAspects { + fn from(format: wgt::TextureFormat) -> Self { + match format { + wgt::TextureFormat::Stencil8 => Self::STENCIL, + wgt::TextureFormat::Depth16Unorm + | wgt::TextureFormat::Depth32Float + | wgt::TextureFormat::Depth24Plus => Self::DEPTH, + wgt::TextureFormat::Depth32FloatStencil8 | wgt::TextureFormat::Depth24PlusStencil8 => { + Self::DEPTH | Self::STENCIL + } + _ => Self::COLOR, + } + } +} + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct MemoryFlags: u32 { + const TRANSIENT = 1 << 0; + const PREFER_COHERENT = 1 << 1; + } +); + +//TODO: it's not intuitive for the backends to consider `LOAD` being optional. + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct AttachmentOps: u8 { + const LOAD = 1 << 0; + const STORE = 1 << 1; + } +); + +bitflags::bitflags! { + /// Similar to `wgt::BufferUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BufferUses: u16 { + /// The argument to a read-only mapping. + const MAP_READ = 1 << 0; + /// The argument to a write-only mapping. + const MAP_WRITE = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// The index buffer used for drawing. + const INDEX = 1 << 4; + /// A vertex buffer used for drawing. + const VERTEX = 1 << 5; + /// A uniform buffer bound in a bind group. + const UNIFORM = 1 << 6; + /// A read-only storage buffer used in a bind group. + const STORAGE_READ = 1 << 7; + /// A read-write or write-only buffer used in a bind group. + const STORAGE_READ_WRITE = 1 << 8; + /// The indirect or count buffer in a indirect draw or dispatch. + const INDIRECT = 1 << 9; + /// The combination of states that a buffer may be in _at the same time_. + const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | + Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | + Self::STORAGE_READ.bits() | Self::INDIRECT.bits(); + /// The combination of states that a buffer must exclusively be in. + const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::MAP_WRITE.bits(); + } +} + +bitflags::bitflags! { + /// Similar to `wgt::TextureUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureUses: u16 { + /// The texture is in unknown state. + const UNINITIALIZED = 1 << 0; + /// Ready to present image to the surface. + const PRESENT = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// Read-only sampled or fetched resource. + const RESOURCE = 1 << 4; + /// The color target of a renderpass. + const COLOR_TARGET = 1 << 5; + /// Read-only depth stencil usage. + const DEPTH_STENCIL_READ = 1 << 6; + /// Read-write depth stencil usage + const DEPTH_STENCIL_WRITE = 1 << 7; + /// Read-only storage buffer usage. Corresponds to a UAV in d3d, so is exclusive, despite being read only. + const STORAGE_READ = 1 << 8; + /// Read-write or write-only storage buffer usage. + const STORAGE_READ_WRITE = 1 << 9; + /// The combination of states that a texture may be in _at the same time_. + const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); + /// The combination of states that a texture must exclusively be in. + const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits() | Self::STORAGE_READ_WRITE.bits() | Self::PRESENT.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the texture state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits(); + + /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource + const COMPLEX = 1 << 10; + /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. + /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. + const UNKNOWN = 1 << 11; + } +} + +#[derive(Clone, Debug)] +pub struct InstanceDescriptor<'a> { + pub name: &'a str, + pub flags: InstanceFlags, + pub dx12_shader_compiler: wgt::Dx12Compiler, +} + +#[derive(Clone, Debug)] +pub struct Alignments { + /// The alignment of the start of the buffer used as a GPU copy source. + pub buffer_copy_offset: wgt::BufferSize, + /// The alignment of the row pitch of the texture data stored in a buffer that is + /// used in a GPU copy operation. + pub buffer_copy_pitch: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct Capabilities { + pub limits: wgt::Limits, + pub alignments: Alignments, + pub downlevel: wgt::DownlevelCapabilities, +} + +#[derive(Debug)] +pub struct ExposedAdapter<A: Api> { + pub adapter: A::Adapter, + pub info: wgt::AdapterInfo, + pub features: wgt::Features, + pub capabilities: Capabilities, +} + +/// Describes information about what a `Surface`'s presentation capabilities are. +/// Fetch this with [Adapter::surface_capabilities]. +#[derive(Debug, Clone)] +pub struct SurfaceCapabilities { + /// List of supported texture formats. + /// + /// Must be at least one. + pub formats: Vec<wgt::TextureFormat>, + + /// Range for the swap chain sizes. + /// + /// - `swap_chain_sizes.start` must be at least 1. + /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`. + pub swap_chain_sizes: RangeInclusive<u32>, + + /// Current extent of the surface, if known. + pub current_extent: Option<wgt::Extent3d>, + + /// Range of supported extents. + /// + /// `current_extent` must be inside this range. + pub extents: RangeInclusive<wgt::Extent3d>, + + /// Supported texture usage flags. + /// + /// Must have at least `TextureUses::COLOR_TARGET` + pub usage: TextureUses, + + /// List of supported V-sync modes. + /// + /// Must be at least one. + pub present_modes: Vec<wgt::PresentMode>, + + /// List of supported alpha composition modes. + /// + /// Must be at least one. + pub composite_alpha_modes: Vec<wgt::CompositeAlphaMode>, +} + +#[derive(Debug)] +pub struct AcquiredSurfaceTexture<A: Api> { + pub texture: A::SurfaceTexture, + /// The presentation configuration no longer matches + /// the surface properties exactly, but can still be used to present + /// to the surface successfully. + pub suboptimal: bool, +} + +#[derive(Debug)] +pub struct OpenDevice<A: Api> { + pub device: A::Device, + pub queue: A::Queue, +} + +#[derive(Clone, Debug)] +pub struct BufferMapping { + pub ptr: NonNull<u8>, + pub is_coherent: bool, +} + +#[derive(Clone, Debug)] +pub struct BufferDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub usage: BufferUses, + pub memory_flags: MemoryFlags, +} + +#[derive(Clone, Debug)] +pub struct TextureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::Extent3d, + pub mip_level_count: u32, + pub sample_count: u32, + pub dimension: wgt::TextureDimension, + pub format: wgt::TextureFormat, + pub usage: TextureUses, + pub memory_flags: MemoryFlags, + /// Allows views of this texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +impl TextureDescriptor<'_> { + pub fn copy_extent(&self) -> CopyExtent { + CopyExtent::map_extent_to_copy_size(&self.size, self.dimension) + } + + pub fn is_cube_compatible(&self) -> bool { + self.dimension == wgt::TextureDimension::D2 + && self.size.depth_or_array_layers % 6 == 0 + && self.sample_count == 1 + && self.size.width == self.size.height + } + + pub fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } +} + +/// TextureView descriptor. +/// +/// Valid usage: +///. - `format` has to be the same as `TextureDescriptor::format` +///. - `dimension` has to be compatible with `TextureDescriptor::dimension` +///. - `usage` has to be a subset of `TextureDescriptor::usage` +///. - `range` has to be a subset of parent texture +#[derive(Clone, Debug)] +pub struct TextureViewDescriptor<'a> { + pub label: Label<'a>, + pub format: wgt::TextureFormat, + pub dimension: wgt::TextureViewDimension, + pub usage: TextureUses, + pub range: wgt::ImageSubresourceRange, +} + +#[derive(Clone, Debug)] +pub struct SamplerDescriptor<'a> { + pub label: Label<'a>, + pub address_modes: [wgt::AddressMode; 3], + pub mag_filter: wgt::FilterMode, + pub min_filter: wgt::FilterMode, + pub mipmap_filter: wgt::FilterMode, + pub lod_clamp: Range<f32>, + pub compare: Option<wgt::CompareFunction>, + // Must in the range [1, 16]. + // + // Anisotropic filtering must be supported if this is not 1. + pub anisotropy_clamp: u16, + pub border_color: Option<wgt::SamplerBorderColor>, +} + +/// BindGroupLayout descriptor. +/// +/// Valid usage: +/// - `entries` are sorted by ascending `wgt::BindGroupLayoutEntry::binding` +#[derive(Clone, Debug)] +pub struct BindGroupLayoutDescriptor<'a> { + pub label: Label<'a>, + pub flags: BindGroupLayoutFlags, + pub entries: &'a [wgt::BindGroupLayoutEntry], +} + +#[derive(Clone, Debug)] +pub struct PipelineLayoutDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub flags: PipelineLayoutFlags, + pub bind_group_layouts: &'a [&'a A::BindGroupLayout], + pub push_constant_ranges: &'a [wgt::PushConstantRange], +} + +#[derive(Debug)] +pub struct BufferBinding<'a, A: Api> { + /// The buffer being bound. + pub buffer: &'a A::Buffer, + + /// The offset at which the bound region starts. + /// + /// This must be less than the size of the buffer. Some back ends + /// cannot tolerate zero-length regions; for example, see + /// [VUID-VkDescriptorBufferInfo-offset-00340][340] and + /// [VUID-VkDescriptorBufferInfo-range-00341][341], or the + /// documentation for GLES's [glBindBufferRange][bbr]. + /// + /// [340]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-offset-00340 + /// [341]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-range-00341 + /// [bbr]: https://registry.khronos.org/OpenGL-Refpages/es3.0/html/glBindBufferRange.xhtml + pub offset: wgt::BufferAddress, + + /// The size of the region bound, in bytes. + /// + /// If `None`, the region extends from `offset` to the end of the + /// buffer. Given the restrictions on `offset`, this means that + /// the size is always greater than zero. + pub size: Option<wgt::BufferSize>, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for BufferBinding<'_, A> { + fn clone(&self) -> Self { + Self { + buffer: self.buffer, + offset: self.offset, + size: self.size, + } + } +} + +#[derive(Debug)] +pub struct TextureBinding<'a, A: Api> { + pub view: &'a A::TextureView, + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for TextureBinding<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Clone, Debug)] +pub struct BindGroupEntry { + pub binding: u32, + pub resource_index: u32, + pub count: u32, +} + +/// BindGroup descriptor. +/// +/// Valid usage: +///. - `entries` has to be sorted by ascending `BindGroupEntry::binding` +///. - `entries` has to have the same set of `BindGroupEntry::binding` as `layout` +///. - each entry has to be compatible with the `layout` +///. - each entry's `BindGroupEntry::resource_index` is within range +/// of the corresponding resource array, selected by the relevant +/// `BindGroupLayoutEntry`. +#[derive(Clone, Debug)] +pub struct BindGroupDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub layout: &'a A::BindGroupLayout, + pub buffers: &'a [BufferBinding<'a, A>], + pub samplers: &'a [&'a A::Sampler], + pub textures: &'a [TextureBinding<'a, A>], + pub entries: &'a [BindGroupEntry], +} + +#[derive(Clone, Debug)] +pub struct CommandEncoderDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub queue: &'a A::Queue, +} + +/// Naga shader module. +pub struct NagaShader { + /// Shader module IR. + pub module: Cow<'static, naga::Module>, + /// Analysis information of the module. + pub info: naga::valid::ModuleInfo, +} + +// Custom implementation avoids the need to generate Debug impl code +// for the whole Naga module and info. +impl fmt::Debug for NagaShader { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Naga shader") + } +} + +/// Shader input. +#[allow(clippy::large_enum_variant)] +pub enum ShaderInput<'a> { + Naga(NagaShader), + SpirV(&'a [u32]), +} + +pub struct ShaderModuleDescriptor<'a> { + pub label: Label<'a>, + pub runtime_checks: bool, +} + +/// Describes a programmable pipeline stage. +#[derive(Debug)] +pub struct ProgrammableStage<'a, A: Api> { + /// The compiled shader module for this stage. + pub module: &'a A::ShaderModule, + /// The name of the entry point in the compiled shader. There must be a function with this name + /// in the shader. + pub entry_point: &'a str, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ProgrammableStage<'_, A> { + fn clone(&self) -> Self { + Self { + module: self.module, + entry_point: self.entry_point, + } + } +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug)] +pub struct ComputePipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The compiled compute stage and its entry point. + pub stage: ProgrammableStage<'a, A>, +} + +/// Describes how the vertex buffer is interpreted. +#[derive(Clone, Debug)] +pub struct VertexBufferLayout<'a> { + /// The stride, in bytes, between elements of this buffer. + pub array_stride: wgt::BufferAddress, + /// How often this vertex buffer is "stepped" forward. + pub step_mode: wgt::VertexStepMode, + /// The list of attributes which comprise a single vertex. + pub attributes: &'a [wgt::VertexAttribute], +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug)] +pub struct RenderPipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The format of any vertex buffers used with this pipeline. + pub vertex_buffers: &'a [VertexBufferLayout<'a>], + /// The vertex stage for this pipeline. + pub vertex_stage: ProgrammableStage<'a, A>, + /// The properties of the pipeline at the primitive assembly and rasterization level. + pub primitive: wgt::PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + pub depth_stencil: Option<wgt::DepthStencilState>, + /// The multi-sampling properties of the pipeline. + pub multisample: wgt::MultisampleState, + /// The fragment stage for this pipeline. + pub fragment_stage: Option<ProgrammableStage<'a, A>>, + /// The effect of draw calls on the color aspect of the output target. + pub color_targets: &'a [Option<wgt::ColorTargetState>], + /// If the pipeline will be used with a multiview render pass, this indicates how many array + /// layers the attachments will have. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Debug, Clone)] +pub struct SurfaceConfiguration { + /// Number of textures in the swap chain. Must be in + /// `SurfaceCapabilities::swap_chain_size` range. + pub swap_chain_size: u32, + /// Vertical synchronization mode. + pub present_mode: wgt::PresentMode, + /// Alpha composition mode. + pub composite_alpha_mode: wgt::CompositeAlphaMode, + /// Format of the surface textures. + pub format: wgt::TextureFormat, + /// Requested texture extent. Must be in + /// `SurfaceCapabilities::extents` range. + pub extent: wgt::Extent3d, + /// Allowed usage of surface textures, + pub usage: TextureUses, + /// Allows views of swapchain texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +#[derive(Debug, Clone)] +pub struct Rect<T> { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[derive(Debug, Clone)] +pub struct BufferBarrier<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub usage: Range<BufferUses>, +} + +#[derive(Debug, Clone)] +pub struct TextureBarrier<'a, A: Api> { + pub texture: &'a A::Texture, + pub range: wgt::ImageSubresourceRange, + pub usage: Range<TextureUses>, +} + +#[derive(Clone, Copy, Debug)] +pub struct BufferCopy { + pub src_offset: wgt::BufferAddress, + pub dst_offset: wgt::BufferAddress, + pub size: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct TextureCopyBase { + pub mip_level: u32, + pub array_layer: u32, + /// Origin within a texture. + /// Note: for 1D and 2D textures, Z must be 0. + pub origin: wgt::Origin3d, + pub aspect: FormatAspects, +} + +#[derive(Clone, Copy, Debug)] +pub struct CopyExtent { + pub width: u32, + pub height: u32, + pub depth: u32, +} + +#[derive(Clone, Debug)] +pub struct TextureCopy { + pub src_base: TextureCopyBase, + pub dst_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Clone, Debug)] +pub struct BufferTextureCopy { + pub buffer_layout: wgt::ImageDataLayout, + pub texture_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Debug)] +pub struct Attachment<'a, A: Api> { + pub view: &'a A::TextureView, + /// Contains either a single mutating usage as a target, + /// or a valid combination of read-only usages. + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for Attachment<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Debug)] +pub struct ColorAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub resolve_target: Option<Attachment<'a, A>>, + pub ops: AttachmentOps, + pub clear_value: wgt::Color, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ColorAttachment<'_, A> { + fn clone(&self) -> Self { + Self { + target: self.target.clone(), + resolve_target: self.resolve_target.clone(), + ops: self.ops, + clear_value: self.clear_value, + } + } +} + +#[derive(Clone, Debug)] +pub struct DepthStencilAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub depth_ops: AttachmentOps, + pub stencil_ops: AttachmentOps, + pub clear_value: (f32, u32), +} + +#[derive(Clone, Debug)] +pub struct RenderPassDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub extent: wgt::Extent3d, + pub sample_count: u32, + pub color_attachments: &'a [Option<ColorAttachment<'a, A>>], + pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>, + pub multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug)] +pub struct ComputePassDescriptor<'a> { + pub label: Label<'a>, +} + +/// Stores if any API validation error has occurred in this process +/// since it was last reset. +/// +/// This is used for internal wgpu testing only and _must not_ be used +/// as a way to check for errors. +/// +/// This works as a static because `cargo nextest` runs all of our +/// tests in separate processes, so each test gets its own canary. +/// +/// This prevents the issue of one validation error terminating the +/// entire process. +pub static VALIDATION_CANARY: ValidationCanary = ValidationCanary { + inner: AtomicBool::new(false), +}; + +/// Flag for internal testing. +pub struct ValidationCanary { + inner: AtomicBool, +} + +impl ValidationCanary { + #[allow(dead_code)] // in some configurations this function is dead + fn set(&self) { + self.inner.store(true, std::sync::atomic::Ordering::SeqCst); + } + + /// Returns true if any API validation error has occurred in this process + /// since the last call to this function. + pub fn get_and_reset(&self) -> bool { + self.inner.swap(false, std::sync::atomic::Ordering::SeqCst) + } +} + +#[test] +fn test_default_limits() { + let limits = wgt::Limits::default(); + assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); +} diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs new file mode 100644 index 0000000000..e5c3de3417 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -0,0 +1,1045 @@ +use metal::{MTLFeatureSet, MTLGPUFamily, MTLLanguageVersion, MTLReadWriteTextureTier}; +use objc::{class, msg_send, sel, sel_impl}; +use parking_lot::Mutex; +use wgt::{AstcBlock, AstcChannel}; + +use std::{sync::Arc, thread}; + +const MAX_COMMAND_BUFFERS: u64 = 2048; + +unsafe impl Send for super::Adapter {} +unsafe impl Sync for super::Adapter {} + +impl super::Adapter { + pub(super) fn new(shared: Arc<super::AdapterShared>) -> Self { + Self { shared } + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = self + .shared + .device + .lock() + .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + features, + }, + queue: super::Queue { + raw: Arc::new(Mutex::new(queue)), + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let pc = &self.shared.private_caps; + // Affected formats documented at: + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier1?language=objc + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier2?language=objc + let (read_write_tier1_if, read_write_tier2_if) = match pc.read_write_texture_tier { + metal::MTLReadWriteTextureTier::TierNone => (Tfc::empty(), Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier1 => (Tfc::STORAGE_READ_WRITE, Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier2 => { + (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE) + } + }; + let msaa_count = pc.sample_count_mask; + + let msaa_resolve_desktop_if = if pc.msaa_desktop { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let msaa_resolve_apple3x_if = if pc.msaa_desktop | pc.msaa_apple3 { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let is_not_apple1x = super::PrivateCapabilities::supports_any( + self.shared.device.lock().as_ref(), + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + ], + ); + + // Metal defined pixel format capabilities + let all_caps = Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE; + + let extra = match format { + Tf::R8Unorm | Tf::R16Float | Tf::Rgba8Unorm | Tf::Rgba16Float => { + read_write_tier2_if | all_caps + } + Tf::R8Snorm | Tf::Rg8Snorm | Tf::Rgba8Snorm => { + let mut flags = all_caps; + flags.set(Tfc::MULTISAMPLE_RESOLVE, is_not_apple1x); + flags + } + Tf::R8Uint + | Tf::R8Sint + | Tf::R16Uint + | Tf::R16Sint + | Tf::Rgba8Uint + | Tf::Rgba8Sint + | Tf::Rgba16Uint + | Tf::Rgba16Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R16Unorm + | Tf::R16Snorm + | Tf::Rg16Unorm + | Tf::Rg16Snorm + | Tf::Rgba16Unorm + | Tf::Rgba16Snorm => { + Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | msaa_resolve_desktop_if + } + Tf::Rg8Unorm | Tf::Rg16Float | Tf::Bgra8Unorm => all_caps, + Tf::Rg8Uint | Tf::Rg8Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::R32Uint | Tf::R32Sint => { + read_write_tier1_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R32Float => { + let flags = if pc.format_r32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + }; + read_write_tier1_if | flags + } + Tf::Rg16Uint | Tf::Rg16Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::Rgba8UnormSrgb | Tf::Bgra8UnormSrgb => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgba8_srgb_all); + flags + } + Tf::Rgb10a2Unorm => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all); + flags + } + Tf::Rg11b10Float => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rg11b10_all); + flags + } + Tf::Rg32Uint | Tf::Rg32Sint => Tfc::COLOR_ATTACHMENT | Tfc::STORAGE | msaa_count, + Tf::Rg32Float => { + if pc.format_rg32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + } + } + Tf::Rgba32Uint | Tf::Rgba32Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::Rgba32Float => { + let mut flags = read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT; + if pc.format_rgba32float_all { + flags |= all_caps + } else if pc.msaa_apple7 { + flags |= msaa_count + }; + flags + } + Tf::Stencil8 => { + all_caps | Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if + } + Tf::Depth16Unorm => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth16unorm { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth32Float | Tf::Depth32FloatStencil8 => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth24Plus | Tf::Depth24PlusStencil8 => { + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count; + if pc.format_depth24_stencil8 { + flags |= Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE_RESOLVE + } else { + flags |= msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + } + flags + } + Tf::Rgb9e5Ufloat => { + if pc.msaa_apple3 { + all_caps + } else if pc.msaa_desktop { + Tfc::SAMPLED_LINEAR + } else { + Tfc::SAMPLED_LINEAR + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE + } + } + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbFloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => { + if pc.format_bc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => { + if pc.format_eac_etc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Astc { + block: _, + channel: _, + } => { + if pc.format_astc || pc.format_astc_hdr { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + }; + + Tfc::COPY_SRC | Tfc::COPY_DST | Tfc::SAMPLED | extra + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = if surface.main_thread_id == thread::current().id() { + Some(surface.dimensions()) + } else { + log::warn!("Unable to get the current view dimensions on a non-main thread"); + None + }; + + let mut formats = vec![ + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Rgba16Float, + ]; + if self.shared.private_caps.format_rgb10a2_unorm_all { + formats.push(wgt::TextureFormat::Rgb10a2Unorm); + } + + let pc = &self.shared.private_caps; + Some(crate::SurfaceCapabilities { + formats, + //Note: this is hardcoded in `CAMetalLayer` documentation + swap_chain_sizes: if pc.can_set_maximum_drawables_count { + 2..=3 + } else { + // 3 is the default in `CAMetalLayer` documentation + // iOS 10.3 was tested to use 3 on iphone5s + 3..=3 + }, + present_modes: if pc.can_set_display_sync { + vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] + } else { + vec![wgt::PresentMode::Fifo] + }, + composite_alpha_modes: vec![ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::PostMultiplied, + ], + + current_extent, + extents: wgt::Extent3d { + width: 4, + height: 4, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: pc.max_texture_size as u32, + height: pc.max_texture_size as u32, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET | crate::TextureUses::COPY_DST, //TODO: expose more + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + let timestamp = self.shared.presentation_timer.get_timestamp_ns(); + + wgt::PresentationTimestamp(timestamp) + } +} + +const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const SAMPLER_CLAMP_TO_BORDER_SUPPORT: &[MTLFeatureSet] = &[MTLFeatureSet::macOS_GPUFamily1_v2]; + +const ASTC_PIXEL_FORMAT_FEATURES: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, +]; + +const ANY8_UNORM_SRGB_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const ANY8_SNORM_RESOLVE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGBA8_SRGB: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const RGB10A2UNORM_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB10A2UINT_COLOR_WRITE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RG11B10FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB9E5FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const BGR10A2_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const BASE_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const BASE_VERTEX_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const TEXTURE_CUBE_ARRAY_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const DUAL_SOURCE_BLEND_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const LAYERED_RENDERING_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const FUNCTION_SPECIALIZATION_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const OS_NOT_SUPPORT: (usize, usize) = (10000, 0); + +impl super::PrivateCapabilities { + fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + features_sets + .iter() + .cloned() + .any(|x| raw.supports_feature_set(x)) + } + + pub fn new(device: &metal::Device) -> Self { + #[repr(C)] + #[derive(Clone, Copy, Debug)] + #[allow(clippy::upper_case_acronyms)] + struct NSOperatingSystemVersion { + major: usize, + minor: usize, + patch: usize, + } + + impl NSOperatingSystemVersion { + fn at_least( + &self, + mac_version: (usize, usize), + ios_version: (usize, usize), + is_mac: bool, + ) -> bool { + if is_mac { + self.major > mac_version.0 + || (self.major == mac_version.0 && self.minor >= mac_version.1) + } else { + self.major > ios_version.0 + || (self.major == ios_version.0 && self.minor >= ios_version.1) + } + } + } + + let version: NSOperatingSystemVersion = unsafe { + let process_info: *mut objc::runtime::Object = + msg_send![class!(NSProcessInfo), processInfo]; + msg_send![process_info, operatingSystemVersion] + }; + + let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + let family_check = version.at_least((10, 15), (13, 0), os_is_mac); + + let mut sample_count_mask = crate::TextureFormatCapabilities::MULTISAMPLE_X4; // 1 and 4 samples are supported on all devices + if device.supports_texture_sample_count(2) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X2; + } + if device.supports_texture_sample_count(8) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X8; + } + if device.supports_texture_sample_count(16) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X16; + } + + let rw_texture_tier = if version.at_least((10, 13), (11, 0), os_is_mac) { + device.read_write_texture_support() + } else if version.at_least((10, 12), OS_NOT_SUPPORT, os_is_mac) { + if Self::supports_any(device, &[MTLFeatureSet::macOS_ReadWriteTextureTier2]) { + MTLReadWriteTextureTier::Tier2 + } else { + MTLReadWriteTextureTier::Tier1 + } + } else { + MTLReadWriteTextureTier::TierNone + }; + + Self { + family_check, + msl_version: if version.at_least((12, 0), (15, 0), os_is_mac) { + MTLLanguageVersion::V2_4 + } else if version.at_least((11, 0), (14, 0), os_is_mac) { + MTLLanguageVersion::V2_3 + } else if version.at_least((10, 15), (13, 0), os_is_mac) { + MTLLanguageVersion::V2_2 + } else if version.at_least((10, 14), (12, 0), os_is_mac) { + MTLLanguageVersion::V2_1 + } else if version.at_least((10, 13), (11, 0), os_is_mac) { + MTLLanguageVersion::V2_0 + } else if version.at_least((10, 12), (10, 0), os_is_mac) { + MTLLanguageVersion::V1_2 + } else if version.at_least((10, 11), (9, 0), os_is_mac) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + }, + // macOS 10.11 doesn't support read-write resources + fragment_rw_storage: version.at_least((10, 12), (8, 0), os_is_mac), + read_write_texture_tier: rw_texture_tier, + msaa_desktop: os_is_mac, + msaa_apple3: if family_check { + device.supports_family(MTLGPUFamily::Apple3) + } else { + device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily3_v4) + }, + msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), + resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), + argument_buffers: Self::supports_any(device, ARGUMENT_BUFFER_SUPPORT), + shared_textures: !os_is_mac, + mutable_comparison_samplers: Self::supports_any( + device, + MUTABLE_COMPARISON_SAMPLER_SUPPORT, + ), + sampler_clamp_to_border: Self::supports_any(device, SAMPLER_CLAMP_TO_BORDER_SUPPORT), + base_instance: Self::supports_any(device, BASE_INSTANCE_SUPPORT), + base_vertex_instance_drawing: Self::supports_any(device, BASE_VERTEX_INSTANCE_SUPPORT), + dual_source_blending: Self::supports_any(device, DUAL_SOURCE_BLEND_SUPPORT), + low_power: !os_is_mac || device.is_low_power(), + headless: os_is_mac && device.is_headless(), + layered_rendering: Self::supports_any(device, LAYERED_RENDERING_SUPPORT), + function_specialization: Self::supports_any(device, FUNCTION_SPECIALIZATION_SUPPORT), + depth_clip_mode: Self::supports_any(device, DEPTH_CLIP_MODE), + texture_cube_array: Self::supports_any(device, TEXTURE_CUBE_ARRAY_SUPPORT), + format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + format_depth32_stencil8_filter: os_is_mac, + format_depth32_stencil8_none: !os_is_mac, + format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, + format_b5: !os_is_mac, + format_bc: os_is_mac, + format_eac_etc: !os_is_mac + // M1 in macOS supports EAC/ETC2 + || (family_check && device.supports_family(MTLGPUFamily::Apple7)), + // A8(Apple2) and later always support ASTC pixel formats + format_astc: (family_check && device.supports_family(MTLGPUFamily::Apple2)) + || Self::supports_any(device, ASTC_PIXEL_FORMAT_FEATURES), + // A13(Apple6) M1(Apple7) and later always support HDR ASTC pixel formats + format_astc_hdr: family_check && device.supports_family(MTLGPUFamily::Apple6), + format_any8_unorm_srgb_all: Self::supports_any(device, ANY8_UNORM_SRGB_ALL), + format_any8_unorm_srgb_no_write: !Self::supports_any(device, ANY8_UNORM_SRGB_ALL) + && !os_is_mac, + format_any8_snorm_all: Self::supports_any(device, ANY8_SNORM_RESOLVE), + format_r16_norm_all: os_is_mac, + // No devices support r32's all capabilities + format_r32_all: false, + // All devices support r32's write capability + format_r32_no_write: false, + // iOS support r32float's write capability, macOS support r32float's all capabilities + format_r32float_no_write_no_filter: false, + // Only iOS doesn't support r32float's filter capability + format_r32float_no_filter: !os_is_mac, + format_r32float_all: os_is_mac, + format_rgba8_srgb_all: Self::supports_any(device, RGBA8_SRGB), + format_rgba8_srgb_no_write: !Self::supports_any(device, RGBA8_SRGB), + format_rgb10a2_unorm_all: Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_unorm_no_write: !Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_uint_color: !Self::supports_any(device, RGB10A2UINT_COLOR_WRITE), + format_rgb10a2_uint_color_write: Self::supports_any(device, RGB10A2UINT_COLOR_WRITE), + format_rg11b10_all: Self::supports_any(device, RG11B10FLOAT_ALL), + format_rg11b10_no_write: !Self::supports_any(device, RG11B10FLOAT_ALL), + format_rgb9e5_all: Self::supports_any(device, RGB9E5FLOAT_ALL), + format_rgb9e5_no_write: !Self::supports_any(device, RGB9E5FLOAT_ALL) && !os_is_mac, + format_rgb9e5_filter_only: os_is_mac, + format_rg32_color: true, + format_rg32_color_write: true, + // Only macOS support rg32float's all capabilities + format_rg32float_all: os_is_mac, + // All devices support rg32float's color + blend capabilities + format_rg32float_color_blend: true, + // Only iOS doesn't support rg32float's filter + format_rg32float_no_filter: !os_is_mac, + format_rgba32int_color: true, + // All devices support rgba32uint and rgba32sint's color + write capabilities + format_rgba32int_color_write: true, + format_rgba32float_color: true, + // All devices support rgba32float's color + write capabilities + format_rgba32float_color_write: true, + // Only macOS support rgba32float's all capabilities + format_rgba32float_all: os_is_mac, + format_depth16unorm: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ), + format_depth32float_filter: os_is_mac, + format_depth32float_none: !os_is_mac, + format_bgr10a2_all: Self::supports_any(device, BGR10A2_ALL), + format_bgr10a2_no_write: !Self::supports_any(device, BGR10A2_ALL), + max_buffers_per_stage: 31, + max_vertex_buffers: 31, + max_textures_per_stage: if os_is_mac + || (family_check && device.supports_family(MTLGPUFamily::Apple6)) + { + 128 + } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + 96 + } else { + 31 + }, + max_samplers_per_stage: 16, + buffer_alignment: if os_is_mac { 256 } else { 64 }, + max_buffer_size: if version.at_least((10, 14), (12, 0), os_is_mac) { + // maxBufferLength available on macOS 10.14+ and iOS 12.0+ + let buffer_size: metal::NSInteger = + unsafe { msg_send![device.as_ref(), maxBufferLength] }; + buffer_size as _ + } else if os_is_mac { + 1 << 30 // 1GB on macOS 10.11 and up + } else { + 1 << 28 // 256MB on iOS 8.0+ + }, + max_texture_size: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 16384 + } else { + 8192 + }, + max_texture_3d_size: 2048, + max_texture_layers: 2048, + max_fragment_input_components: if os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1) + { + 124 + } else { + 60 + }, + max_color_render_targets: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 8 + } else { + 4 + }, + max_varying_components: if device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) + { + 124 + } else { + 60 + }, + max_threads_per_group: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 1024 + } else { + 512 + }, + max_total_threadgroup_memory: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ) { + 32 << 10 + } else { + 16 << 10 + }, + sample_count_mask, + supports_debug_markers: Self::supports_any( + device, + &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + ], + ), + supports_binary_archives: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Mac1)), + supports_capture_manager: version.at_least((10, 13), (11, 0), os_is_mac), + can_set_maximum_drawables_count: version.at_least((10, 14), (11, 2), os_is_mac), + can_set_display_sync: version.at_least((10, 13), OS_NOT_SUPPORT, os_is_mac), + can_set_next_drawable_timeout: version.at_least((10, 13), (11, 0), os_is_mac), + supports_arrays_of_textures: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + ], + ), + supports_arrays_of_textures_write: family_check + && (device.supports_family(MTLGPUFamily::Apple6) + || device.supports_family(MTLGPUFamily::Mac1) + || device.supports_family(MTLGPUFamily::MacCatalyst1)), + supports_mutability: version.at_least((10, 13), (11, 0), os_is_mac), + //Depth clipping is supported on all macOS GPU families and iOS family 4 and later + supports_depth_clip_control: os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1), + supports_preserve_invariance: version.at_least((11, 0), (13, 0), os_is_mac), + // Metal 2.2 on mac, 2.3 on iOS. + supports_shader_primitive_index: version.at_least((10, 15), (14, 0), os_is_mac), + has_unified_memory: if version.at_least((10, 15), (13, 0), os_is_mac) { + Some(device.has_unified_memory()) + } else { + None + }, + } + } + + pub fn device_type(&self) -> wgt::DeviceType { + if self.has_unified_memory.unwrap_or(self.low_power) { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + } + } + + pub fn features(&self) -> wgt::Features { + use wgt::Features as F; + + let mut features = F::empty() + | F::INDIRECT_FIRST_INSTANCE + | F::MAPPABLE_PRIMARY_BUFFERS + | F::VERTEX_WRITABLE_STORAGE + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::PUSH_CONSTANTS + | F::POLYGON_MODE_LINE + | F::CLEAR_TEXTURE + | F::TEXTURE_FORMAT_16BIT_NORM + | F::SHADER_F16 + | F::DEPTH32FLOAT_STENCIL8 + | F::MULTI_DRAW_INDIRECT; + + features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); + features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); + features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); + features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc); + + features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control); + features.set( + F::SHADER_PRIMITIVE_INDEX, + self.supports_shader_primitive_index, + ); + + features.set( + F::TEXTURE_BINDING_ARRAY + | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + ); + //// XXX: this is technically not true, as read-only storage images can be used in arrays + //// on precisely the same conditions that sampled textures can. But texel fetch from a + //// sampled texture is a thing; should we bother introducing another feature flag? + if self.msl_version >= MTLLanguageVersion::V2_2 + && self.supports_arrays_of_textures + && self.supports_arrays_of_textures_write + { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + + features.set( + F::ADDRESS_MODE_CLAMP_TO_BORDER, + self.sampler_clamp_to_border, + ); + features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); + + features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); + + features + } + + pub fn capabilities(&self) -> crate::Capabilities { + let mut downlevel = wgt::DownlevelCapabilities::default(); + downlevel.flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + self.fragment_rw_storage, + ); + downlevel.flags.set( + wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES, + self.texture_cube_array, + ); + //TODO: separate the mutable comparisons from immutable ones + downlevel.flags.set( + wgt::DownlevelFlags::COMPARISON_SAMPLERS, + self.mutable_comparison_samplers, + ); + downlevel + .flags + .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + + let base = wgt::Limits::default(); + crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: self.max_texture_size as u32, + max_texture_dimension_2d: self.max_texture_size as u32, + max_texture_dimension_3d: self.max_texture_3d_size as u32, + max_texture_array_layers: self.max_texture_layers as u32, + max_bind_groups: 8, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: self.max_textures_per_stage, + max_samplers_per_shader_stage: self.max_samplers_per_stage, + max_storage_buffers_per_shader_stage: self.max_buffers_per_stage, + max_storage_textures_per_shader_stage: self.max_textures_per_stage, + max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage, + max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_vertex_buffers: self.max_vertex_buffers, + max_vertex_attributes: 31, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + max_push_constant_size: 0x1000, + min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, + min_storage_buffer_offset_alignment: self.buffer_alignment as u32, + max_inter_stage_shader_components: self.max_varying_components, + max_compute_workgroup_storage_size: self.max_total_threadgroup_memory, + max_compute_invocations_per_workgroup: self.max_threads_per_group, + max_compute_workgroup_size_x: self.max_threads_per_group, + max_compute_workgroup_size_y: self.max_threads_per_group, + max_compute_workgroup_size_z: self.max_threads_per_group, + max_compute_workgroups_per_dimension: 0xFFFF, + max_buffer_size: self.max_buffer_size, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + downlevel, + } + } + + pub fn map_format(&self, format: wgt::TextureFormat) -> metal::MTLPixelFormat { + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match format { + Tf::R8Unorm => R8Unorm, + Tf::R8Snorm => R8Snorm, + Tf::R8Uint => R8Uint, + Tf::R8Sint => R8Sint, + Tf::R16Uint => R16Uint, + Tf::R16Sint => R16Sint, + Tf::R16Unorm => R16Unorm, + Tf::R16Snorm => R16Snorm, + Tf::R16Float => R16Float, + Tf::Rg8Unorm => RG8Unorm, + Tf::Rg8Snorm => RG8Snorm, + Tf::Rg8Uint => RG8Uint, + Tf::Rg8Sint => RG8Sint, + Tf::Rg16Unorm => RG16Unorm, + Tf::Rg16Snorm => RG16Snorm, + Tf::R32Uint => R32Uint, + Tf::R32Sint => R32Sint, + Tf::R32Float => R32Float, + Tf::Rg16Uint => RG16Uint, + Tf::Rg16Sint => RG16Sint, + Tf::Rg16Float => RG16Float, + Tf::Rgba8Unorm => RGBA8Unorm, + Tf::Rgba8UnormSrgb => RGBA8Unorm_sRGB, + Tf::Bgra8UnormSrgb => BGRA8Unorm_sRGB, + Tf::Rgba8Snorm => RGBA8Snorm, + Tf::Bgra8Unorm => BGRA8Unorm, + Tf::Rgba8Uint => RGBA8Uint, + Tf::Rgba8Sint => RGBA8Sint, + Tf::Rgb10a2Unorm => RGB10A2Unorm, + Tf::Rg11b10Float => RG11B10Float, + Tf::Rg32Uint => RG32Uint, + Tf::Rg32Sint => RG32Sint, + Tf::Rg32Float => RG32Float, + Tf::Rgba16Uint => RGBA16Uint, + Tf::Rgba16Sint => RGBA16Sint, + Tf::Rgba16Unorm => RGBA16Unorm, + Tf::Rgba16Snorm => RGBA16Snorm, + Tf::Rgba16Float => RGBA16Float, + Tf::Rgba32Uint => RGBA32Uint, + Tf::Rgba32Sint => RGBA32Sint, + Tf::Rgba32Float => RGBA32Float, + Tf::Stencil8 => Stencil8, + Tf::Depth16Unorm => Depth16Unorm, + Tf::Depth32Float => Depth32Float, + Tf::Depth32FloatStencil8 => Depth32Float_Stencil8, + Tf::Depth24Plus => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float + } + } + Tf::Depth24PlusStencil8 => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float_Stencil8 + } + } + Tf::Rgb9e5Ufloat => RGB9E5Float, + Tf::Bc1RgbaUnorm => BC1_RGBA, + Tf::Bc1RgbaUnormSrgb => BC1_RGBA_sRGB, + Tf::Bc2RgbaUnorm => BC2_RGBA, + Tf::Bc2RgbaUnormSrgb => BC2_RGBA_sRGB, + Tf::Bc3RgbaUnorm => BC3_RGBA, + Tf::Bc3RgbaUnormSrgb => BC3_RGBA_sRGB, + Tf::Bc4RUnorm => BC4_RUnorm, + Tf::Bc4RSnorm => BC4_RSnorm, + Tf::Bc5RgUnorm => BC5_RGUnorm, + Tf::Bc5RgSnorm => BC5_RGSnorm, + Tf::Bc6hRgbFloat => BC6H_RGBFloat, + Tf::Bc6hRgbUfloat => BC6H_RGBUfloat, + Tf::Bc7RgbaUnorm => BC7_RGBAUnorm, + Tf::Bc7RgbaUnormSrgb => BC7_RGBAUnorm_sRGB, + Tf::Etc2Rgb8Unorm => ETC2_RGB8, + Tf::Etc2Rgb8UnormSrgb => ETC2_RGB8_sRGB, + Tf::Etc2Rgb8A1Unorm => ETC2_RGB8A1, + Tf::Etc2Rgb8A1UnormSrgb => ETC2_RGB8A1_sRGB, + Tf::Etc2Rgba8Unorm => EAC_RGBA8, + Tf::Etc2Rgba8UnormSrgb => EAC_RGBA8_sRGB, + Tf::EacR11Unorm => EAC_R11Unorm, + Tf::EacR11Snorm => EAC_R11Snorm, + Tf::EacRg11Unorm => EAC_RG11Unorm, + Tf::EacRg11Snorm => EAC_RG11Snorm, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => ASTC_4x4_LDR, + AstcBlock::B5x4 => ASTC_5x4_LDR, + AstcBlock::B5x5 => ASTC_5x5_LDR, + AstcBlock::B6x5 => ASTC_6x5_LDR, + AstcBlock::B6x6 => ASTC_6x6_LDR, + AstcBlock::B8x5 => ASTC_8x5_LDR, + AstcBlock::B8x6 => ASTC_8x6_LDR, + AstcBlock::B8x8 => ASTC_8x8_LDR, + AstcBlock::B10x5 => ASTC_10x5_LDR, + AstcBlock::B10x6 => ASTC_10x6_LDR, + AstcBlock::B10x8 => ASTC_10x8_LDR, + AstcBlock::B10x10 => ASTC_10x10_LDR, + AstcBlock::B12x10 => ASTC_12x10_LDR, + AstcBlock::B12x12 => ASTC_12x12_LDR, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => ASTC_4x4_sRGB, + AstcBlock::B5x4 => ASTC_5x4_sRGB, + AstcBlock::B5x5 => ASTC_5x5_sRGB, + AstcBlock::B6x5 => ASTC_6x5_sRGB, + AstcBlock::B6x6 => ASTC_6x6_sRGB, + AstcBlock::B8x5 => ASTC_8x5_sRGB, + AstcBlock::B8x6 => ASTC_8x6_sRGB, + AstcBlock::B8x8 => ASTC_8x8_sRGB, + AstcBlock::B10x5 => ASTC_10x5_sRGB, + AstcBlock::B10x6 => ASTC_10x6_sRGB, + AstcBlock::B10x8 => ASTC_10x8_sRGB, + AstcBlock::B10x10 => ASTC_10x10_sRGB, + AstcBlock::B12x10 => ASTC_12x10_sRGB, + AstcBlock::B12x12 => ASTC_12x12_sRGB, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => ASTC_4x4_HDR, + AstcBlock::B5x4 => ASTC_5x4_HDR, + AstcBlock::B5x5 => ASTC_5x5_HDR, + AstcBlock::B6x5 => ASTC_6x5_HDR, + AstcBlock::B6x6 => ASTC_6x6_HDR, + AstcBlock::B8x5 => ASTC_8x5_HDR, + AstcBlock::B8x6 => ASTC_8x6_HDR, + AstcBlock::B8x8 => ASTC_8x8_HDR, + AstcBlock::B10x5 => ASTC_10x5_HDR, + AstcBlock::B10x6 => ASTC_10x6_HDR, + AstcBlock::B10x8 => ASTC_10x8_HDR, + AstcBlock::B10x10 => ASTC_10x10_HDR, + AstcBlock::B12x10 => ASTC_12x10_HDR, + AstcBlock::B12x12 => ASTC_12x12_HDR, + }, + }, + } + } + + pub fn map_view_format( + &self, + format: wgt::TextureFormat, + aspects: crate::FormatAspects, + ) -> metal::MTLPixelFormat { + use crate::FormatAspects as Fa; + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match (format, aspects) { + // map combined depth-stencil format to their stencil-only format + // see https://developer.apple.com/library/archive/documentation/Miscellaneous/Conceptual/MetalProgrammingGuide/WhatsNewiniOS10tvOS10andOSX1012/WhatsNewiniOS10tvOS10andOSX1012.html#//apple_ref/doc/uid/TP40014221-CH14-DontLinkElementID_77 + (Tf::Depth24PlusStencil8, Fa::STENCIL) => { + if self.format_depth24_stencil8 { + X24_Stencil8 + } else { + X32_Stencil8 + } + } + (Tf::Depth32FloatStencil8, Fa::STENCIL) => X32_Stencil8, + + _ => self.map_format(format), + } + } +} + +impl super::PrivateDisabilities { + pub fn new(device: &metal::Device) -> Self { + let is_intel = device.name().starts_with("Intel"); + Self { + broken_viewport_near_depth: is_intel + && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + broken_layered_clear_image: is_intel, + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/command.rs b/third_party/rust/wgpu-hal/src/metal/command.rs new file mode 100644 index 0000000000..866e163a64 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/command.rs @@ -0,0 +1,977 @@ +use super::{conv, AsNative}; +use std::{borrow::Cow, mem, ops::Range}; + +// has to match `Temp::binding_sizes` +const WORD_SIZE: usize = 4; + +impl Default for super::CommandState { + fn default() -> Self { + Self { + blit: None, + render: None, + compute: None, + raw_primitive_type: metal::MTLPrimitiveType::Point, + index: None, + raw_wg_size: metal::MTLSize::new(0, 0, 0), + stage_infos: Default::default(), + storage_buffer_length_map: Default::default(), + work_group_memory_sizes: Vec::new(), + push_constants: Vec::new(), + } + } +} + +impl super::CommandEncoder { + fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { + if self.state.blit.is_none() { + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + objc::rc::autoreleasepool(|| { + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); + }); + } + self.state.blit.as_ref().unwrap() + } + + pub(super) fn leave_blit(&mut self) { + if let Some(encoder) = self.state.blit.take() { + encoder.end_encoding(); + } + } + + fn enter_any(&mut self) -> Option<&metal::CommandEncoderRef> { + if let Some(ref encoder) = self.state.render { + Some(encoder) + } else if let Some(ref encoder) = self.state.compute { + Some(encoder) + } else if let Some(ref encoder) = self.state.blit { + Some(encoder) + } else { + None + } + } + + fn begin_pass(&mut self) { + self.state.reset(); + self.leave_blit(); + } +} + +impl super::CommandState { + fn reset(&mut self) { + self.storage_buffer_length_map.clear(); + self.stage_infos.vs.clear(); + self.stage_infos.fs.clear(); + self.stage_infos.cs.clear(); + self.work_group_memory_sizes.clear(); + self.push_constants.clear(); + } + + fn make_sizes_buffer_update<'a>( + &self, + stage: naga::ShaderStage, + result_sizes: &'a mut Vec<u32>, + ) -> Option<(u32, &'a [u32])> { + let stage_info = &self.stage_infos[stage]; + let slot = stage_info.sizes_slot?; + + result_sizes.clear(); + result_sizes.extend(stage_info.sized_bindings.iter().map(|br| { + self.storage_buffer_length_map + .get(br) + .map(|size| u32::try_from(size.get()).unwrap_or(u32::MAX)) + .unwrap_or_default() + })); + + if !result_sizes.is_empty() { + Some((slot as _, result_sizes)) + } else { + None + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let queue = &self.raw_queue.lock(); + let retain_references = self.shared.settings.retain_command_buffer_references; + let raw = objc::rc::autoreleasepool(move || { + let cmd_buf_ref = if retain_references { + queue.new_command_buffer() + } else { + queue.new_command_buffer_with_unretained_references() + }; + if let Some(label) = label { + cmd_buf_ref.set_label(label); + } + cmd_buf_ref.to_owned() + }); + + self.raw_cmd_buf = Some(raw); + + Ok(()) + } + + unsafe fn discard_encoding(&mut self) { + self.leave_blit(); + // when discarding, we don't have a guarantee that + // everything is in a good state, so check carefully + if let Some(encoder) = self.state.render.take() { + encoder.end_encoding(); + } + if let Some(encoder) = self.state.compute.take() { + encoder.end_encoding(); + } + self.raw_cmd_buf = None; + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + self.leave_blit(); + assert!(self.state.render.is_none()); + assert!(self.state.compute.is_none()); + Ok(super::CommandBuffer { + raw: self.raw_cmd_buf.take().unwrap(), + }) + } + + unsafe fn reset_all<I>(&mut self, _cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + //do nothing + } + + unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let encoder = self.enter_blit(); + encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + encoder.copy_from_buffer( + &src.raw, + copy.src_offset, + &dst.raw, + copy.dst_offset, + copy.size.get(), + ); + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let dst_texture = if src.format != dst.format { + let raw_format = self.shared.private_caps.map_format(src.format); + Cow::Owned(objc::rc::autoreleasepool(|| { + dst.raw.new_texture_view(raw_format) + })) + } else { + Cow::Borrowed(&dst.raw) + }; + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + // no clamping is done: Metal expects physical sizes here + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst_texture, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let dst_origin = conv::map_origin(©.texture_base.origin); + // Metal expects buffer-texture copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&dst.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let image_byte_stride = if extent.depth > 1 { + copy.buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row) + } else { + // Don't pass a stride when updating a single layer, otherwise metal validation + // fails when updating a subset of the image due to the stride being larger than + // the amount of data to copy. + 0 + }; + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + image_byte_stride, + conv::map_copy_extent(&extent), + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + conv::get_blit_option(dst.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.texture_base.origin); + // Metal expects texture-buffer copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&src.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let bytes_per_image = copy + .buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row); + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + conv::map_copy_extent(&extent), + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + conv::get_blit_option(src.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode( + metal::MTLVisibilityResultMode::Boolean, + index as u64 * crate::QUERY_SIZE, + ); + } + _ => {} + } + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode(metal::MTLVisibilityResultMode::Disabled, 0); + } + _ => {} + } + } + unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) {} + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + let encoder = self.enter_blit(); + let raw_range = metal::NSRange { + location: range.start as u64 * crate::QUERY_SIZE, + length: (range.end - range.start) as u64 * crate::QUERY_SIZE, + }; + encoder.fill_buffer(&set.raw_buffer, raw_range, 0); + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported + ) { + let encoder = self.enter_blit(); + let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; + encoder.copy_from_buffer( + &set.raw_buffer, + range.start as u64 * crate::QUERY_SIZE, + &buffer.raw, + offset, + size, + ); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.begin_pass(); + self.state.index = None; + + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPassDescriptor::new(); + //TODO: set visibility results buffer + + for (i, at) in desc.color_attachments.iter().enumerate() { + if let Some(at) = at.as_ref() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + if let Some(ref resolve) = at.resolve_target { + //Note: the selection of levels and slices is already handled by `TextureView` + at_descriptor.set_resolve_texture(Some(&resolve.view.raw)); + } + let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value)); + metal::MTLLoadAction::Clear + }; + let store_action = conv::map_store_action( + at.ops.contains(crate::AttachmentOps::STORE), + at.resolve_target.is_some(), + ); + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + if let Some(ref at) = desc.depth_stencil_attachment { + if at.target.view.aspects.contains(crate::FormatAspects::DEPTH) { + let at_descriptor = descriptor.depth_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_depth(at.clear_value.0 as f64); + metal::MTLLoadAction::Clear + }; + let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + if at + .target + .view + .aspects + .contains(crate::FormatAspects::STENCIL) + { + let at_descriptor = descriptor.stencil_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_stencil(at.clear_value.1); + metal::MTLLoadAction::Clear + }; + let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + let encoder = raw.new_render_command_encoder(descriptor); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.render = Some(encoder.to_owned()); + }); + } + + unsafe fn end_render_pass(&mut self) { + self.state.render.take().unwrap().end_encoding(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + group_index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let bg_info = &layout.bind_group_infos[group_index as usize]; + + if let Some(ref encoder) = self.state.render { + let mut changes_sizes_buffer = false; + for index in 0..group.counters.vs.buffers { + let buf = &group.buffers[index as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_vertex_buffer( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Vertex, + &mut self.temp.binding_sizes, + ) { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + changes_sizes_buffer = false; + for index in 0..group.counters.fs.buffers { + let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_fragment_buffer( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Fragment, + &mut self.temp.binding_sizes, + ) { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.vs.samplers { + let res = group.samplers[index as usize]; + encoder.set_vertex_sampler_state( + (bg_info.base_resource_indices.vs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.samplers { + let res = group.samplers[(group.counters.vs.samplers + index) as usize]; + encoder.set_fragment_sampler_state( + (bg_info.base_resource_indices.fs.samplers + index) as u64, + Some(res.as_native()), + ); + } + + for index in 0..group.counters.vs.textures { + let res = group.textures[index as usize]; + encoder.set_vertex_texture( + (bg_info.base_resource_indices.vs.textures + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.textures { + let res = group.textures[(group.counters.vs.textures + index) as usize]; + encoder.set_fragment_texture( + (bg_info.base_resource_indices.fs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + + if let Some(ref encoder) = self.state.compute { + let index_base = super::ResourceData { + buffers: group.counters.vs.buffers + group.counters.fs.buffers, + samplers: group.counters.vs.samplers + group.counters.fs.samplers, + textures: group.counters.vs.textures + group.counters.fs.textures, + }; + + let mut changes_sizes_buffer = false; + for index in 0..group.counters.cs.buffers { + let buf = &group.buffers[(index_base.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_buffer( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Compute, + &mut self.temp.binding_sizes, + ) { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.cs.samplers { + let res = group.samplers[(index_base.samplers + index) as usize]; + encoder.set_sampler_state( + (bg_info.base_resource_indices.cs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.cs.textures { + let res = group.textures[(index_base.textures + index) as usize]; + encoder.set_texture( + (bg_info.base_resource_indices.cs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + let state_pc = &mut self.state.push_constants; + if state_pc.len() < layout.total_push_constants as usize { + state_pc.resize(layout.total_push_constants as usize, 0); + } + assert_eq!(offset as usize % WORD_SIZE, 0); + + let offset = offset as usize / WORD_SIZE; + state_pc[offset..offset + data.len()].copy_from_slice(data); + + if stages.contains(wgt::ShaderStages::COMPUTE) { + self.state.compute.as_ref().unwrap().set_bytes( + layout.push_constants_infos.cs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::VERTEX) { + self.state.render.as_ref().unwrap().set_vertex_bytes( + layout.push_constants_infos.vs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::FRAGMENT) { + self.state.render.as_ref().unwrap().set_fragment_bytes( + layout.push_constants_infos.fs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(encoder) = self.enter_any() { + encoder.insert_debug_signpost(label); + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(encoder) = self.enter_any() { + encoder.push_debug_group(group_label); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.push_debug_group(group_label); + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(encoder) = self.enter_any() { + encoder.pop_debug_group(); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.pop_debug_group(); + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.raw_primitive_type = pipeline.raw_primitive_type; + self.state.stage_infos.vs.assign_from(&pipeline.vs_info); + match pipeline.fs_info { + Some(ref info) => self.state.stage_infos.fs.assign_from(info), + None => self.state.stage_infos.fs.clear(), + } + + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_render_pipeline_state(&pipeline.raw); + encoder.set_front_facing_winding(pipeline.raw_front_winding); + encoder.set_cull_mode(pipeline.raw_cull_mode); + encoder.set_triangle_fill_mode(pipeline.raw_triangle_fill_mode); + if let Some(depth_clip) = pipeline.raw_depth_clip_mode { + encoder.set_depth_clip_mode(depth_clip); + } + if let Some((ref state, bias)) = pipeline.depth_stencil { + encoder.set_depth_stencil_state(state); + encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); + } + + { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) + { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + if pipeline.fs_lib.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) + { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + let (stride, raw_type) = match format { + wgt::IndexFormat::Uint16 => (2, metal::MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, metal::MTLIndexType::UInt32), + }; + self.state.index = Some(super::IndexState { + buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), + offset: binding.offset, + stride, + raw_type, + }); + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let zfar = if self.shared.disabilities.broken_viewport_near_depth { + depth_range.end - depth_range.start + } else { + depth_range.end + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_viewport(metal::MTLViewport { + originX: rect.x as _, + originY: rect.y as _, + width: rect.w as _, + height: rect.h as _, + znear: depth_range.start as _, + zfar: zfar as _, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + //TODO: support empty scissors by modifying the viewport + let scissor = metal::MTLScissorRect { + x: rect.x as _, + y: rect.y as _, + width: rect.w as _, + height: rect.h as _, + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_scissor_rect(scissor); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_stencil_front_back_reference_value(value, value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + if start_instance != 0 { + encoder.draw_primitives_instanced_base_instance( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + instance_count as _, + start_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_primitives_instanced( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + instance_count as _, + ); + } else { + encoder.draw_primitives( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + ); + } + } + + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + let offset = index.offset + index.stride * start_index as wgt::BufferAddress; + if base_vertex != 0 || start_instance != 0 { + encoder.draw_indexed_primitives_instanced_base_instance( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + base_vertex as _, + start_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_indexed_primitives_instanced( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + ); + } else { + encoder.draw_indexed_primitives( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + ); + } + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_primitives_indirect(self.state.raw_primitive_type, &buffer.raw, offset); + offset += mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_indexed_primitives_indirect( + self.state.raw_primitive_type, + index.raw_type, + index.buffer_ptr.as_native(), + index.offset, + &buffer.raw, + offset, + ); + offset += mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + self.begin_pass(); + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + objc::rc::autoreleasepool(|| { + let encoder = raw.new_compute_command_encoder(); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.compute = Some(encoder.to_owned()); + }); + } + unsafe fn end_compute_pass(&mut self) { + self.state.compute.take().unwrap().end_encoding(); + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.state.raw_wg_size = pipeline.work_group_size; + self.state.stage_infos.cs.assign_from(&pipeline.cs_info); + + let encoder = self.state.compute.as_ref().unwrap(); + encoder.set_compute_pipeline_state(&pipeline.raw); + + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes) + { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + + // update the threadgroup memory sizes + while self.state.work_group_memory_sizes.len() < pipeline.work_group_memory_sizes.len() { + self.state.work_group_memory_sizes.push(0); + } + for (index, (cur_size, pipeline_size)) in self + .state + .work_group_memory_sizes + .iter_mut() + .zip(pipeline.work_group_memory_sizes.iter()) + .enumerate() + { + const ALIGN_MASK: u32 = 0xF; // must be a multiple of 16 bytes + let size = ((*pipeline_size - 1) | ALIGN_MASK) + 1; + if *cur_size != size { + *cur_size = size; + encoder.set_threadgroup_memory_length(index as _, size as _); + } + } + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + let encoder = self.state.compute.as_ref().unwrap(); + let raw_count = metal::MTLSize { + width: count[0] as u64, + height: count[1] as u64, + depth: count[2] as u64, + }; + encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + let encoder = self.state.compute.as_ref().unwrap(); + encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs new file mode 100644 index 0000000000..a1ceb287ab --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -0,0 +1,324 @@ +pub fn map_texture_usage( + format: wgt::TextureFormat, + usage: crate::TextureUses, +) -> metal::MTLTextureUsage { + use crate::TextureUses as Tu; + + let mut mtl_usage = metal::MTLTextureUsage::Unknown; + + mtl_usage.set( + metal::MTLTextureUsage::RenderTarget, + usage.intersects(Tu::COLOR_TARGET | Tu::DEPTH_STENCIL_READ | Tu::DEPTH_STENCIL_WRITE), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderRead, + usage.intersects( + Tu::RESOURCE | Tu::DEPTH_STENCIL_READ | Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE, + ), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderWrite, + usage.intersects(Tu::STORAGE_READ_WRITE), + ); + // needed for combined depth/stencil formats since we might + // create a stencil-only view from them + mtl_usage.set( + metal::MTLTextureUsage::PixelFormatView, + format.is_combined_depth_stencil_format(), + ); + + mtl_usage +} + +pub fn map_texture_view_dimension(dim: wgt::TextureViewDimension) -> metal::MTLTextureType { + use metal::MTLTextureType::*; + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 => D1, + Tvd::D2 => D2, + Tvd::D2Array => D2Array, + Tvd::D3 => D3, + Tvd::Cube => Cube, + Tvd::CubeArray => CubeArray, + } +} + +pub fn map_compare_function(fun: wgt::CompareFunction) -> metal::MTLCompareFunction { + use metal::MTLCompareFunction::*; + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => Never, + Cf::Less => Less, + Cf::LessEqual => LessEqual, + Cf::Equal => Equal, + Cf::GreaterEqual => GreaterEqual, + Cf::Greater => Greater, + Cf::NotEqual => NotEqual, + Cf::Always => Always, + } +} + +pub fn map_filter_mode(filter: wgt::FilterMode) -> metal::MTLSamplerMinMagFilter { + use metal::MTLSamplerMinMagFilter::*; + match filter { + wgt::FilterMode::Nearest => Nearest, + wgt::FilterMode::Linear => Linear, + } +} + +pub fn map_address_mode(address: wgt::AddressMode) -> metal::MTLSamplerAddressMode { + use metal::MTLSamplerAddressMode::*; + use wgt::AddressMode as Fm; + match address { + Fm::Repeat => Repeat, + Fm::MirrorRepeat => MirrorRepeat, + Fm::ClampToEdge => ClampToEdge, + Fm::ClampToBorder => ClampToBorderColor, + //Fm::MirrorClamp => MirrorClampToEdge, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> metal::MTLSamplerBorderColor { + use metal::MTLSamplerBorderColor::*; + match border_color { + wgt::SamplerBorderColor::TransparentBlack => TransparentBlack, + wgt::SamplerBorderColor::OpaqueBlack => OpaqueBlack, + wgt::SamplerBorderColor::OpaqueWhite => OpaqueWhite, + wgt::SamplerBorderColor::Zero => unreachable!(), + } +} + +pub fn map_primitive_topology( + topology: wgt::PrimitiveTopology, +) -> (metal::MTLPrimitiveTopologyClass, metal::MTLPrimitiveType) { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => ( + metal::MTLPrimitiveTopologyClass::Point, + metal::MTLPrimitiveType::Point, + ), + Pt::LineList => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::Line, + ), + Pt::LineStrip => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::LineStrip, + ), + Pt::TriangleList => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::Triangle, + ), + Pt::TriangleStrip => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::TriangleStrip, + ), + } +} + +pub fn map_color_write(mask: wgt::ColorWrites) -> metal::MTLColorWriteMask { + let mut raw_mask = metal::MTLColorWriteMask::empty(); + + if mask.contains(wgt::ColorWrites::RED) { + raw_mask |= metal::MTLColorWriteMask::Red; + } + if mask.contains(wgt::ColorWrites::GREEN) { + raw_mask |= metal::MTLColorWriteMask::Green; + } + if mask.contains(wgt::ColorWrites::BLUE) { + raw_mask |= metal::MTLColorWriteMask::Blue; + } + if mask.contains(wgt::ColorWrites::ALPHA) { + raw_mask |= metal::MTLColorWriteMask::Alpha; + } + + raw_mask +} + +pub fn map_blend_factor(factor: wgt::BlendFactor) -> metal::MTLBlendFactor { + use metal::MTLBlendFactor::*; + use wgt::BlendFactor as Bf; + + match factor { + Bf::Zero => Zero, + Bf::One => One, + Bf::Src => SourceColor, + Bf::OneMinusSrc => OneMinusSourceColor, + Bf::Dst => DestinationColor, + Bf::OneMinusDst => OneMinusDestinationColor, + Bf::SrcAlpha => SourceAlpha, + Bf::OneMinusSrcAlpha => OneMinusSourceAlpha, + Bf::DstAlpha => DestinationAlpha, + Bf::OneMinusDstAlpha => OneMinusDestinationAlpha, + Bf::Constant => BlendColor, + Bf::OneMinusConstant => OneMinusBlendColor, + //Bf::ConstantAlpha => BlendAlpha, + //Bf::OneMinusConstantAlpha => OneMinusBlendAlpha, + Bf::SrcAlphaSaturated => SourceAlphaSaturated, + //Bf::Src1 => Source1Color, + //Bf::OneMinusSrc1 => OneMinusSource1Color, + //Bf::Src1Alpha => Source1Alpha, + //Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, + } +} + +pub fn map_blend_op(operation: wgt::BlendOperation) -> metal::MTLBlendOperation { + use metal::MTLBlendOperation::*; + use wgt::BlendOperation as Bo; + + match operation { + Bo::Add => Add, + Bo::Subtract => Subtract, + Bo::ReverseSubtract => ReverseSubtract, + Bo::Min => Min, + Bo::Max => Max, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> ( + metal::MTLBlendOperation, + metal::MTLBlendFactor, + metal::MTLBlendFactor, +) { + ( + map_blend_op(component.operation), + map_blend_factor(component.src_factor), + map_blend_factor(component.dst_factor), + ) +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { + use metal::MTLVertexFormat::*; + use wgt::VertexFormat as Vf; + + match format { + Vf::Unorm8x2 => UChar2Normalized, + Vf::Snorm8x2 => Char2Normalized, + Vf::Uint8x2 => UChar2, + Vf::Sint8x2 => Char2, + Vf::Unorm8x4 => UChar4Normalized, + Vf::Snorm8x4 => Char4Normalized, + Vf::Uint8x4 => UChar4, + Vf::Sint8x4 => Char4, + Vf::Unorm16x2 => UShort2Normalized, + Vf::Snorm16x2 => Short2Normalized, + Vf::Uint16x2 => UShort2, + Vf::Sint16x2 => Short2, + Vf::Float16x2 => Half2, + Vf::Unorm16x4 => UShort4Normalized, + Vf::Snorm16x4 => Short4Normalized, + Vf::Uint16x4 => UShort4, + Vf::Sint16x4 => Short4, + Vf::Float16x4 => Half4, + Vf::Uint32 => UInt, + Vf::Sint32 => Int, + Vf::Float32 => Float, + Vf::Uint32x2 => UInt2, + Vf::Sint32x2 => Int2, + Vf::Float32x2 => Float2, + Vf::Uint32x3 => UInt3, + Vf::Sint32x3 => Int3, + Vf::Float32x3 => Float3, + Vf::Uint32x4 => UInt4, + Vf::Sint32x4 => Int4, + Vf::Float32x4 => Float4, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_step_mode(mode: wgt::VertexStepMode) -> metal::MTLVertexStepFunction { + match mode { + wgt::VertexStepMode::Vertex => metal::MTLVertexStepFunction::PerVertex, + wgt::VertexStepMode::Instance => metal::MTLVertexStepFunction::PerInstance, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> metal::MTLStencilOperation { + use metal::MTLStencilOperation::*; + use wgt::StencilOperation as So; + + match op { + So::Keep => Keep, + So::Zero => Zero, + So::Replace => Replace, + So::IncrementClamp => IncrementClamp, + So::IncrementWrap => IncrementWrap, + So::DecrementClamp => DecrementClamp, + So::DecrementWrap => DecrementWrap, + So::Invert => Invert, + } +} + +pub fn map_winding(winding: wgt::FrontFace) -> metal::MTLWinding { + match winding { + wgt::FrontFace::Cw => metal::MTLWinding::Clockwise, + wgt::FrontFace::Ccw => metal::MTLWinding::CounterClockwise, + } +} + +pub fn map_cull_mode(face: Option<wgt::Face>) -> metal::MTLCullMode { + match face { + None => metal::MTLCullMode::None, + Some(wgt::Face::Front) => metal::MTLCullMode::Front, + Some(wgt::Face::Back) => metal::MTLCullMode::Back, + } +} + +pub fn map_range(range: &crate::MemoryRange) -> metal::NSRange { + metal::NSRange { + location: range.start, + length: range.end - range.start, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> metal::MTLSize { + metal::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } +} + +pub fn map_origin(origin: &wgt::Origin3d) -> metal::MTLOrigin { + metal::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } +} + +pub fn map_store_action(store: bool, resolve: bool) -> metal::MTLStoreAction { + use metal::MTLStoreAction::*; + match (store, resolve) { + (true, true) => StoreAndMultisampleResolve, + (false, true) => MultisampleResolve, + (true, false) => Store, + (false, false) => DontCare, + } +} + +pub fn map_clear_color(color: &wgt::Color) -> metal::MTLClearColor { + metal::MTLClearColor { + red: color.r, + green: color.g, + blue: color.b, + alpha: color.a, + } +} + +pub fn get_blit_option( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> metal::MTLBlitOption { + if format.is_combined_depth_stencil_format() { + match aspect { + crate::FormatAspects::DEPTH => metal::MTLBlitOption::DepthFromDepthStencil, + crate::FormatAspects::STENCIL => metal::MTLBlitOption::StencilFromDepthStencil, + _ => unreachable!(), + } + } else { + metal::MTLBlitOption::None + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs new file mode 100644 index 0000000000..f8a1ad9a9f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -0,0 +1,1176 @@ +use parking_lot::Mutex; +use std::{ + num::NonZeroU32, + ptr, + sync::{atomic, Arc}, + thread, time, +}; + +use super::conv; +use crate::auxil::map_naga_stage; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +struct CompiledShader { + library: metal::Library, + function: metal::Function, + wg_size: metal::MTLSize, + wg_memory_sizes: Vec<u32>, + + /// Bindings of WGSL `storage` globals that contain variable-sized arrays. + /// + /// In order to implement bounds checks and the `arrayLength` function for + /// WGSL runtime-sized arrays, we pass the entry point a struct with a + /// member for each global variable that contains such an array. That member + /// is a `u32` holding the variable's total size in bytes---which is simply + /// the size of the `Buffer` supplying that variable's contents for the + /// draw call. + sized_bindings: Vec<naga::ResourceBinding>, + + immutable_buffer_mask: usize, +} + +fn create_stencil_desc( + face: &wgt::StencilFaceState, + read_mask: u32, + write_mask: u32, +) -> metal::StencilDescriptor { + let desc = metal::StencilDescriptor::new(); + desc.set_stencil_compare_function(conv::map_compare_function(face.compare)); + desc.set_read_mask(read_mask); + desc.set_write_mask(write_mask); + desc.set_stencil_failure_operation(conv::map_stencil_op(face.fail_op)); + desc.set_depth_failure_operation(conv::map_stencil_op(face.depth_fail_op)); + desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.pass_op)); + desc +} + +fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthStencilDescriptor { + let desc = metal::DepthStencilDescriptor::new(); + desc.set_depth_compare_function(conv::map_compare_function(state.depth_compare)); + desc.set_depth_write_enabled(state.depth_write_enabled); + let s = &state.stencil; + if s.is_enabled() { + let front_desc = create_stencil_desc(&s.front, s.read_mask, s.write_mask); + desc.set_front_face_stencil(Some(&front_desc)); + let back_desc = create_stencil_desc(&s.back, s.read_mask, s.write_mask); + desc.set_back_face_stencil(Some(&back_desc)); + } + desc +} + +impl super::Device { + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + primitive_class: metal::MTLPrimitiveTopologyClass, + naga_stage: naga::ShaderStage, + ) -> Result<CompiledShader, crate::PipelineError> { + let stage_bit = map_naga_stage(naga_stage); + + let module = &stage.module.naga.module; + let ep_resources = &layout.per_stage_map[naga_stage]; + + let bounds_check_policy = if stage.module.runtime_checks { + naga::proc::BoundsCheckPolicy::ReadZeroSkipWrite + } else { + naga::proc::BoundsCheckPolicy::Unchecked + }; + + let options = naga::back::msl::Options { + lang_version: match self.shared.private_caps.msl_version { + metal::MTLLanguageVersion::V1_0 => (1, 0), + metal::MTLLanguageVersion::V1_1 => (1, 1), + metal::MTLLanguageVersion::V1_2 => (1, 2), + metal::MTLLanguageVersion::V2_0 => (2, 0), + metal::MTLLanguageVersion::V2_1 => (2, 1), + metal::MTLLanguageVersion::V2_2 => (2, 2), + metal::MTLLanguageVersion::V2_3 => (2, 3), + metal::MTLLanguageVersion::V2_4 => (2, 4), + }, + inline_samplers: Default::default(), + spirv_cross_compatibility: false, + fake_missing_bindings: false, + per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( + stage.entry_point.to_string(), + ep_resources.clone(), + )]), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: bounds_check_policy, + buffer: bounds_check_policy, + image: bounds_check_policy, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: true, + }; + + let pipeline_options = naga::back::msl::PipelineOptions { + allow_point_size: match primitive_class { + metal::MTLPrimitiveTopologyClass::Point => true, + _ => false, + }, + }; + + let (source, info) = naga::back::msl::write_string( + module, + &stage.module.naga.info, + &options, + &pipeline_options, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + + log::debug!( + "Naga generated shader for entry point '{}' and stage {:?}\n{}", + stage.entry_point, + naga_stage, + &source + ); + + let options = metal::CompileOptions::new(); + options.set_language_version(self.shared.private_caps.msl_version); + + if self.shared.private_caps.supports_preserve_invariance { + options.set_preserve_invariance(true); + } + + let library = self + .shared + .device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| { + log::warn!("Naga generated shader:\n{}", source); + crate::PipelineError::Linkage(stage_bit, format!("Metal: {}", err)) + })?; + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + let ep = &module.entry_points[ep_index]; + let ep_name = info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{}", e)))?; + + let wg_size = metal::MTLSize { + width: ep.workgroup_size[0] as _, + height: ep.workgroup_size[1] as _, + depth: ep.workgroup_size[2] as _, + }; + + let function = library.get_function(ep_name, None).map_err(|e| { + log::error!("get_function: {:?}", e); + crate::PipelineError::EntryPoint(naga_stage) + })?; + + // collect sizes indices, immutable buffers, and work group memory sizes + let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let mut wg_memory_sizes = Vec::new(); + let mut sized_bindings = Vec::new(); + let mut immutable_buffer_mask = 0; + for (var_handle, var) in module.global_variables.iter() { + match var.space { + naga::AddressSpace::WorkGroup => { + if !ep_info[var_handle].is_empty() { + let size = module.types[var.ty].inner.size(&module.constants); + wg_memory_sizes.push(size); + } + } + naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { + let br = match var.binding { + Some(ref br) => br.clone(), + None => continue, + }; + let storage_access_store = match var.space { + naga::AddressSpace::Storage { access } => { + access.contains(naga::StorageAccess::STORE) + } + _ => false, + }; + + // check for an immutable buffer + if !ep_info[var_handle].is_empty() && !storage_access_store { + let slot = ep_resources.resources[&br].buffer.unwrap(); + immutable_buffer_mask |= 1 << slot; + } + + let mut dynamic_array_container_ty = var.ty; + if let naga::TypeInner::Struct { ref members, .. } = module.types[var.ty].inner + { + dynamic_array_container_ty = members.last().unwrap().ty; + } + if let naga::TypeInner::Array { + size: naga::ArraySize::Dynamic, + .. + } = module.types[dynamic_array_container_ty].inner + { + sized_bindings.push(br); + } + } + _ => {} + } + } + + Ok(CompiledShader { + library, + function, + wg_size, + wg_memory_sizes, + sized_bindings, + immutable_buffer_mask, + }) + } + + fn set_buffers_mutability( + buffers: &metal::PipelineBufferDescriptorArrayRef, + mut immutable_mask: usize, + ) { + while immutable_mask != 0 { + let slot = immutable_mask.trailing_zeros(); + immutable_mask ^= 1 << slot; + buffers + .object_at(slot as u64) + .unwrap() + .set_mutability(metal::MTLMutability::Immutable); + } + } + + pub unsafe fn texture_from_raw( + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, + ) -> super::Texture { + super::Texture { + raw, + format, + raw_type, + array_layers, + mip_levels, + copy_size, + } + } + + pub unsafe fn device_from_raw(raw: metal::Device, features: wgt::Features) -> super::Device { + super::Device { + shared: Arc::new(super::AdapterShared::new(raw)), + features, + } + } + + pub fn raw_device(&self) -> &Mutex<metal::Device> { + &self.shared.device + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, _queue: super::Queue) {} + + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> { + let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let mut options = metal::MTLResourceOptions::empty(); + options |= if map_read || map_write { + // `crate::MemoryFlags::PREFER_COHERENT` is ignored here + metal::MTLResourceOptions::StorageModeShared + } else { + metal::MTLResourceOptions::StorageModePrivate + }; + options.set( + metal::MTLResourceOptions::CPUCacheModeWriteCombined, + map_write, + ); + + //TODO: HazardTrackingModeUntracked + + objc::rc::autoreleasepool(|| { + let raw = self.shared.device.lock().new_buffer(desc.size, options); + if let Some(label) = desc.label { + raw.set_label(label); + } + Ok(super::Buffer { + raw, + size: desc.size, + }) + }) + } + unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {} + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + let ptr = buffer.raw.contents() as *mut u8; + assert!(!ptr.is_null()); + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> DeviceResult<super::Texture> { + use foreign_types::ForeignTypeRef; + + let mtl_format = self.shared.private_caps.map_format(desc.format); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::TextureDescriptor::new(); + + let mtl_type = match desc.dimension { + wgt::TextureDimension::D1 => metal::MTLTextureType::D1, + wgt::TextureDimension::D2 => { + if desc.sample_count > 1 { + descriptor.set_sample_count(desc.sample_count as u64); + metal::MTLTextureType::D2Multisample + } else if desc.size.depth_or_array_layers > 1 { + descriptor.set_array_length(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D2Array + } else { + metal::MTLTextureType::D2 + } + } + wgt::TextureDimension::D3 => { + descriptor.set_depth(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D3 + } + }; + + descriptor.set_texture_type(mtl_type); + descriptor.set_width(desc.size.width as u64); + descriptor.set_height(desc.size.height as u64); + descriptor.set_mipmap_level_count(desc.mip_level_count as u64); + descriptor.set_pixel_format(mtl_format); + descriptor.set_usage(conv::map_texture_usage(desc.format, desc.usage)); + descriptor.set_storage_mode(metal::MTLStorageMode::Private); + + let raw = self.shared.device.lock().new_texture(&descriptor); + if raw.as_ptr().is_null() { + return Err(crate::DeviceError::OutOfMemory); + } + if let Some(label) = desc.label { + raw.set_label(label); + } + + Ok(super::Texture { + raw, + format: desc.format, + raw_type: mtl_type, + mip_levels: desc.mip_level_count, + array_layers: desc.array_layer_count(), + copy_size: desc.copy_extent(), + }) + }) + } + + unsafe fn destroy_texture(&self, _texture: super::Texture) {} + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<super::TextureView> { + let raw_type = if texture.raw_type == metal::MTLTextureType::D2Multisample { + texture.raw_type + } else { + conv::map_texture_view_dimension(desc.dimension) + }; + + let aspects = crate::FormatAspects::new(desc.format, desc.range.aspect); + + let raw_format = self + .shared + .private_caps + .map_view_format(desc.format, aspects); + + let format_equal = raw_format == self.shared.private_caps.map_format(texture.format); + let type_equal = raw_type == texture.raw_type; + let range_full_resource = + desc.range + .is_full_resource(desc.format, texture.mip_levels, texture.array_layers); + + let raw = if format_equal && type_equal && range_full_resource { + // Some images are marked as framebuffer-only, and we can't create aliases of them. + // Also helps working around Metal bugs with aliased array textures. + texture.raw.to_owned() + } else { + let mip_level_count = desc + .range + .mip_level_count + .unwrap_or(texture.mip_levels - desc.range.base_mip_level); + let array_layer_count = desc + .range + .array_layer_count + .unwrap_or(texture.array_layers - desc.range.base_array_layer); + + objc::rc::autoreleasepool(|| { + let raw = texture.raw.new_texture_view_from_slice( + raw_format, + raw_type, + metal::NSRange { + location: desc.range.base_mip_level as _, + length: mip_level_count as _, + }, + metal::NSRange { + location: desc.range.base_array_layer as _, + length: array_layer_count as _, + }, + ); + if let Some(label) = desc.label { + raw.set_label(label); + } + raw + }) + }; + + Ok(super::TextureView { raw, aspects }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> DeviceResult<super::Sampler> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::SamplerDescriptor::new(); + + descriptor.set_min_filter(conv::map_filter_mode(desc.min_filter)); + descriptor.set_mag_filter(conv::map_filter_mode(desc.mag_filter)); + descriptor.set_mip_filter(match desc.mipmap_filter { + wgt::FilterMode::Nearest if desc.lod_clamp == (0.0..0.0) => { + metal::MTLSamplerMipFilter::NotMipmapped + } + wgt::FilterMode::Nearest => metal::MTLSamplerMipFilter::Nearest, + wgt::FilterMode::Linear => metal::MTLSamplerMipFilter::Linear, + }); + + let [s, t, r] = desc.address_modes; + descriptor.set_address_mode_s(conv::map_address_mode(s)); + descriptor.set_address_mode_t(conv::map_address_mode(t)); + descriptor.set_address_mode_r(conv::map_address_mode(r)); + + // Anisotropy is always supported on mac up to 16x + descriptor.set_max_anisotropy(desc.anisotropy_clamp as _); + + descriptor.set_lod_min_clamp(desc.lod_clamp.start); + descriptor.set_lod_max_clamp(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + descriptor.set_compare_function(conv::map_compare_function(fun)); + } + + if let Some(border_color) = desc.border_color { + if let wgt::SamplerBorderColor::Zero = border_color { + if s == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_s(metal::MTLSamplerAddressMode::ClampToZero); + } + + if t == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_t(metal::MTLSamplerAddressMode::ClampToZero); + } + + if r == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_r(metal::MTLSamplerAddressMode::ClampToZero); + } + } else { + descriptor.set_border_color(conv::map_border_color(border_color)); + } + } + + if let Some(label) = desc.label { + descriptor.set_label(label); + } + let raw = self.shared.device.lock().new_sampler(&descriptor); + + Ok(super::Sampler { raw }) + }) + } + unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + shared: Arc::clone(&self.shared), + raw_queue: Arc::clone(&desc.queue.raw), + raw_cmd_buf: None, + state: super::CommandState::default(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<super::BindGroupLayout> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> DeviceResult<super::PipelineLayout> { + #[derive(Debug)] + struct StageInfo { + stage: naga::ShaderStage, + counters: super::ResourceData<super::ResourceIndex>, + pc_buffer: Option<super::ResourceIndex>, + pc_limit: u32, + sizes_buffer: Option<super::ResourceIndex>, + sizes_count: u8, + resources: naga::back::msl::BindingMap, + } + + let mut stage_data = super::NAGA_STAGES.map(|stage| StageInfo { + stage, + counters: super::ResourceData::default(), + pc_buffer: None, + pc_limit: 0, + sizes_buffer: None, + sizes_count: 0, + resources: Default::default(), + }); + let mut bind_group_infos = arrayvec::ArrayVec::new(); + + // First, place the push constants + let mut total_push_constants = 0; + for info in stage_data.iter_mut() { + for pcr in desc.push_constant_ranges { + if pcr.stages.contains(map_naga_stage(info.stage)) { + debug_assert_eq!(pcr.range.end % 4, 0); + info.pc_limit = (pcr.range.end / 4).max(info.pc_limit); + } + } + + // round up the limits alignment to 4, so that it matches MTL compiler logic + const LIMIT_MASK: u32 = 3; + //TODO: figure out what and how exactly does the alignment. Clearly, it's not + // straightforward, given that value of 2 stays non-aligned. + if info.pc_limit > LIMIT_MASK { + info.pc_limit = (info.pc_limit + LIMIT_MASK) & !LIMIT_MASK; + } + + // handle the push constant buffer assignment and shader overrides + if info.pc_limit != 0 { + info.pc_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + + total_push_constants = total_push_constants.max(info.pc_limit); + } + + // Second, place the described resources + for (group_index, &bgl) in desc.bind_group_layouts.iter().enumerate() { + // remember where the resources for this set start at each shader stage + let base_resource_indices = stage_data.map_ref(|info| info.counters.clone()); + + for entry in bgl.entries.iter() { + if let wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + for info in stage_data.iter_mut() { + if entry.visibility.contains(map_naga_stage(info.stage)) { + info.sizes_count += 1; + } + } + } + + for info in stage_data.iter_mut() { + if !entry.visibility.contains(map_naga_stage(info.stage)) { + continue; + } + + let mut target = naga::back::msl::BindTarget::default(); + let count = entry.count.map_or(1, NonZeroU32::get); + target.binding_array_size = entry.count.map(NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += count; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } + } + wgt::BindingType::Sampler { .. } => { + target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += count; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + } + + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + info.resources.insert(br, target); + } + } + + bind_group_infos.push(super::BindGroupLayoutInfo { + base_resource_indices, + }); + } + + // Finally, make sure we fit the limits + for info in stage_data.iter_mut() { + // handle the sizes buffer assignment and shader overrides + if info.sizes_count != 0 { + info.sizes_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage + || info.counters.textures > self.shared.private_caps.max_textures_per_stage + || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage + { + log::error!("Resource limit exceeded: {:?}", info); + return Err(crate::DeviceError::OutOfMemory); + } + } + + let push_constants_infos = stage_data.map_ref(|info| { + info.pc_buffer.map(|buffer_index| super::PushConstantsInfo { + count: info.pc_limit, + buffer_index, + }) + }); + + let total_counters = stage_data.map_ref(|info| info.counters.clone()); + + let per_stage_map = stage_data.map(|info| naga::back::msl::EntryPointResources { + push_constant_buffer: info + .pc_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + sizes_buffer: info + .sizes_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + resources: info.resources, + }); + + Ok(super::PipelineLayout { + bind_group_infos, + push_constants_infos, + total_counters, + total_push_constants, + per_stage_map, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> DeviceResult<super::BindGroup> { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let size = layout.count.map_or(1, |c| c.get()); + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += size; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = + wgt::BufferSize::new(source.buffer.size - source.offset); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.samplers + .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); + counter.samplers += size; + } + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += size; + } + } + } + } + + Ok(bg) + } + + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { + naga, + runtime_checks: desc.runtime_checks, + }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPipelineDescriptor::new(); + + let raw_triangle_fill_mode = match desc.primitive.polygon_mode { + wgt::PolygonMode::Fill => metal::MTLTriangleFillMode::Fill, + wgt::PolygonMode::Line => metal::MTLTriangleFillMode::Lines, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), + }; + + let (primitive_class, raw_primitive_type) = + conv::map_primitive_topology(desc.primitive.topology); + + // Vertex shader + let (vs_lib, vs_info) = { + let vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + primitive_class, + naga::ShaderStage::Vertex, + )?; + + descriptor.set_vertex_function(Some(&vs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.vertex_buffers().unwrap(), + vs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.vs, + sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, + sized_bindings: vs.sized_bindings, + }; + + (vs.library, info) + }; + + // Fragment shader + let (fs_lib, fs_info) = match desc.fragment_stage { + Some(ref stage) => { + let fs = self.load_shader( + stage, + desc.layout, + primitive_class, + naga::ShaderStage::Fragment, + )?; + + descriptor.set_fragment_function(Some(&fs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.fragment_buffers().unwrap(), + fs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.fs, + sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, + sized_bindings: fs.sized_bindings, + }; + + (Some(fs.library), Some(info)) + } + None => { + // TODO: This is a workaround for what appears to be a Metal validation bug + // A pixel format is required even though no attachments are provided + if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { + descriptor + .set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); + } + (None, None) + } + }; + + for (i, ct) in desc.color_targets.iter().enumerate() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let ct = if let Some(color_target) = ct.as_ref() { + color_target + } else { + at_descriptor.set_pixel_format(metal::MTLPixelFormat::Invalid); + continue; + }; + + let raw_format = self.shared.private_caps.map_format(ct.format); + at_descriptor.set_pixel_format(raw_format); + at_descriptor.set_write_mask(conv::map_color_write(ct.write_mask)); + + if let Some(ref blend) = ct.blend { + at_descriptor.set_blending_enabled(true); + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + + at_descriptor.set_rgb_blend_operation(color_op); + at_descriptor.set_source_rgb_blend_factor(color_src); + at_descriptor.set_destination_rgb_blend_factor(color_dst); + + at_descriptor.set_alpha_blend_operation(alpha_op); + at_descriptor.set_source_alpha_blend_factor(alpha_src); + at_descriptor.set_destination_alpha_blend_factor(alpha_dst); + } + } + + let depth_stencil = match desc.depth_stencil { + Some(ref ds) => { + let raw_format = self.shared.private_caps.map_format(ds.format); + let aspects = crate::FormatAspects::from(ds.format); + if aspects.contains(crate::FormatAspects::DEPTH) { + descriptor.set_depth_attachment_pixel_format(raw_format); + } + if aspects.contains(crate::FormatAspects::STENCIL) { + descriptor.set_stencil_attachment_pixel_format(raw_format); + } + + let ds_descriptor = create_depth_stencil_desc(ds); + let raw = self + .shared + .device + .lock() + .new_depth_stencil_state(&ds_descriptor); + Some((raw, ds.bias)) + } + None => None, + }; + + if desc.layout.total_counters.vs.buffers + (desc.vertex_buffers.len() as u32) + > self.shared.private_caps.max_vertex_buffers + { + let msg = format!( + "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", + desc.vertex_buffers.len(), + desc.layout.total_counters.vs.buffers + ); + return Err(crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX, + msg, + )); + } + + if !desc.vertex_buffers.is_empty() { + let vertex_descriptor = metal::VertexDescriptor::new(); + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + let buffer_index = + self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; + let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + + // Metal expects the stride to be the actual size of the attributes. + // The semantics of array_stride == 0 can be achieved by setting + // the step function to constant and rate to 0. + if vb.array_stride == 0 { + let stride = vb + .attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0); + buffer_desc.set_stride(wgt::math::align_to(stride, 4)); + buffer_desc.set_step_function(metal::MTLVertexStepFunction::Constant); + buffer_desc.set_step_rate(0); + } else { + buffer_desc.set_stride(vb.array_stride); + buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + } + + for at in vb.attributes { + let attribute_desc = vertex_descriptor + .attributes() + .object_at(at.shader_location as u64) + .unwrap(); + attribute_desc.set_format(conv::map_vertex_format(at.format)); + attribute_desc.set_buffer_index(buffer_index); + attribute_desc.set_offset(at.offset); + } + } + descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + } + + if desc.multisample.count != 1 { + //TODO: handle sample mask + descriptor.set_sample_count(desc.multisample.count as u64); + descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); + } + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_render_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::RenderPipeline { + raw, + vs_lib, + fs_lib, + vs_info, + fs_info, + raw_primitive_type, + raw_triangle_fill_mode, + raw_front_winding: conv::map_winding(desc.primitive.front_face), + raw_cull_mode: conv::map_cull_mode(desc.primitive.cull_mode), + raw_depth_clip_mode: if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + Some(if desc.primitive.unclipped_depth { + metal::MTLDepthClipMode::Clamp + } else { + metal::MTLDepthClipMode::Clip + }) + } else { + None + }, + depth_stencil, + }) + }) + } + unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {} + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::ComputePipelineDescriptor::new(); + + let cs = self.load_shader( + &desc.stage, + desc.layout, + metal::MTLPrimitiveTopologyClass::Unspecified, + naga::ShaderStage::Compute, + )?; + descriptor.set_compute_function(Some(&cs.function)); + + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.buffers().unwrap(), + cs.immutable_buffer_mask, + ); + } + + let cs_info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.cs, + sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, + sized_bindings: cs.sized_bindings, + }; + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_compute_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::COMPUTE, + format!("new_compute_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::ComputePipeline { + raw, + cs_info, + cs_lib: cs.library, + work_group_size: cs.wg_size, + work_group_memory_sizes: cs.wg_memory_sizes, + }) + }) + } + unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<super::QuerySet> { + objc::rc::autoreleasepool(|| { + match desc.ty { + wgt::QueryType::Occlusion => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let options = metal::MTLResourceOptions::empty(); + //TODO: HazardTrackingModeUntracked + let raw_buffer = self.shared.device.lock().new_buffer(size, options); + if let Some(label) = desc.label { + raw_buffer.set_label(label); + } + Ok(super::QuerySet { + raw_buffer, + ty: desc.ty, + }) + } + wgt::QueryType::Timestamp | wgt::QueryType::PipelineStatistics(_) => { + Err(crate::DeviceError::OutOfMemory) + } + } + }) + } + unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> DeviceResult<super::Fence> { + Ok(super::Fence { + completed_value: Arc::new(atomic::AtomicU64::new(0)), + pending_command_buffers: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, _fence: super::Fence) {} + unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> { + let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + Ok(max_value) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) { + return Ok(true); + } + + let cmd_buf = match fence + .pending_command_buffers + .iter() + .find(|&&(value, _)| value >= wait_value) + { + Some(&(_, ref cmd_buf)) => cmd_buf, + None => { + log::error!("No active command buffers for fence value {}", wait_value); + return Err(crate::DeviceError::Lost); + } + }; + + let start = time::Instant::now(); + loop { + if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { + return Ok(true); + } + if start.elapsed().as_millis() >= timeout_ms as u128 { + return Ok(false); + } + thread::sleep(time::Duration::from_millis(1)); + } + } + + unsafe fn start_capture(&self) -> bool { + if !self.shared.private_caps.supports_capture_manager { + return false; + } + let device = self.shared.device.lock(); + let shared_capture_manager = metal::CaptureManager::shared(); + let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device); + shared_capture_manager.set_default_capture_scope(&default_capture_scope); + shared_capture_manager.start_capture_with_scope(&default_capture_scope); + default_capture_scope.begin_scope(); + true + } + unsafe fn stop_capture(&self) { + let shared_capture_manager = metal::CaptureManager::shared(); + if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { + default_capture_scope.end_scope(); + } + shared_capture_manager.stop_capture(); + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs new file mode 100644 index 0000000000..b77685bd94 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -0,0 +1,805 @@ +/*! +# Metal API internals. + +## Pipeline Layout + +In Metal, push constants, vertex buffers, and resources in the bind groups +are all placed together in the native resource bindings, which work similarly to D3D11: +there are tables of textures, buffers, and samplers. + +We put push constants first (if any) in the table, followed by bind group 0 +resources, followed by other bind groups. The vertex buffers are bound at the very +end of the VS buffer table. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod surface; +mod time; + +use std::{ + fmt, iter, ops, + ptr::NonNull, + sync::{atomic, Arc}, + thread, +}; + +use arrayvec::ArrayVec; +use foreign_types::ForeignTypeRef as _; +use parking_lot::Mutex; + +#[derive(Clone)] +pub struct Api; + +type ResourceIndex = u32; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +pub struct Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate, +} + +impl Instance { + pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { + unsafe { Surface::from_layer(layer) } + } +} + +impl crate::Instance<Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + //TODO: enable `METAL_DEVICE_WRAPPER_TYPE` environment based on the flags? + Ok(Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate::new(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + match window_handle { + #[cfg(target_os = "ios")] + raw_window_handle::RawWindowHandle::UiKit(handle) => { + let _ = &self.managed_metal_layer_delegate; + Ok(unsafe { Surface::from_view(handle.ui_view, None) }) + } + #[cfg(target_os = "macos")] + raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe { + Surface::from_view(handle.ns_view, Some(&self.managed_metal_layer_delegate)) + }), + _ => Err(crate::InstanceError), + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + unsafe { surface.dispose() }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + let devices = metal::Device::all(); + let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices + .into_iter() + .map(|dev| { + let name = dev.name().into(); + let shared = AdapterShared::new(dev); + crate::ExposedAdapter { + info: wgt::AdapterInfo { + name, + vendor: 0, + device: 0, + device_type: shared.private_caps.device_type(), + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Metal, + }, + features: shared.private_caps.features(), + capabilities: shared.private_caps.capabilities(), + adapter: Adapter::new(Arc::new(shared)), + } + }) + .collect(); + adapters.sort_by_key(|ad| { + ( + ad.adapter.shared.private_caps.low_power, + ad.adapter.shared.private_caps.headless, + ) + }); + adapters + } +} + +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct PrivateCapabilities { + family_check: bool, + msl_version: metal::MTLLanguageVersion, + fragment_rw_storage: bool, + read_write_texture_tier: metal::MTLReadWriteTextureTier, + msaa_desktop: bool, + msaa_apple3: bool, + msaa_apple7: bool, + resource_heaps: bool, + argument_buffers: bool, + shared_textures: bool, + mutable_comparison_samplers: bool, + sampler_clamp_to_border: bool, + base_instance: bool, + base_vertex_instance_drawing: bool, + dual_source_blending: bool, + low_power: bool, + headless: bool, + layered_rendering: bool, + function_specialization: bool, + depth_clip_mode: bool, + texture_cube_array: bool, + format_depth24_stencil8: bool, + format_depth32_stencil8_filter: bool, + format_depth32_stencil8_none: bool, + format_min_srgb_channels: u8, + format_b5: bool, + format_bc: bool, + format_eac_etc: bool, + format_astc: bool, + format_astc_hdr: bool, + format_any8_unorm_srgb_all: bool, + format_any8_unorm_srgb_no_write: bool, + format_any8_snorm_all: bool, + format_r16_norm_all: bool, + format_r32_all: bool, + format_r32_no_write: bool, + format_r32float_no_write_no_filter: bool, + format_r32float_no_filter: bool, + format_r32float_all: bool, + format_rgba8_srgb_all: bool, + format_rgba8_srgb_no_write: bool, + format_rgb10a2_unorm_all: bool, + format_rgb10a2_unorm_no_write: bool, + format_rgb10a2_uint_color: bool, + format_rgb10a2_uint_color_write: bool, + format_rg11b10_all: bool, + format_rg11b10_no_write: bool, + format_rgb9e5_all: bool, + format_rgb9e5_no_write: bool, + format_rgb9e5_filter_only: bool, + format_rg32_color: bool, + format_rg32_color_write: bool, + format_rg32float_all: bool, + format_rg32float_color_blend: bool, + format_rg32float_no_filter: bool, + format_rgba32int_color: bool, + format_rgba32int_color_write: bool, + format_rgba32float_color: bool, + format_rgba32float_color_write: bool, + format_rgba32float_all: bool, + format_depth16unorm: bool, + format_depth32float_filter: bool, + format_depth32float_none: bool, + format_bgr10a2_all: bool, + format_bgr10a2_no_write: bool, + max_buffers_per_stage: ResourceIndex, + max_vertex_buffers: ResourceIndex, + max_textures_per_stage: ResourceIndex, + max_samplers_per_stage: ResourceIndex, + buffer_alignment: u64, + max_buffer_size: u64, + max_texture_size: u64, + max_texture_3d_size: u64, + max_texture_layers: u64, + max_fragment_input_components: u64, + max_color_render_targets: u8, + max_varying_components: u32, + max_threads_per_group: u32, + max_total_threadgroup_memory: u32, + sample_count_mask: crate::TextureFormatCapabilities, + supports_debug_markers: bool, + supports_binary_archives: bool, + supports_capture_manager: bool, + can_set_maximum_drawables_count: bool, + can_set_display_sync: bool, + can_set_next_drawable_timeout: bool, + supports_arrays_of_textures: bool, + supports_arrays_of_textures_write: bool, + supports_mutability: bool, + supports_depth_clip_control: bool, + supports_preserve_invariance: bool, + supports_shader_primitive_index: bool, + has_unified_memory: Option<bool>, +} + +#[derive(Clone, Debug)] +struct PrivateDisabilities { + /// Near depth is not respected properly on some Intel GPUs. + broken_viewport_near_depth: bool, + /// Multi-target clears don't appear to work properly on Intel GPUs. + #[allow(dead_code)] + broken_layered_clear_image: bool, +} + +#[derive(Debug, Default)] +struct Settings { + retain_command_buffer_references: bool, +} + +struct AdapterShared { + device: Mutex<metal::Device>, + disabilities: PrivateDisabilities, + private_caps: PrivateCapabilities, + settings: Settings, + presentation_timer: time::PresentationTimer, +} + +unsafe impl Send for AdapterShared {} +unsafe impl Sync for AdapterShared {} + +impl AdapterShared { + fn new(device: metal::Device) -> Self { + let private_caps = PrivateCapabilities::new(&device); + log::debug!("{:#?}", private_caps); + + Self { + disabilities: PrivateDisabilities::new(&device), + private_caps, + device: Mutex::new(device), + settings: Settings::default(), + presentation_timer: time::PresentationTimer::new(), + } + } +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Queue { + raw: Arc<Mutex<metal::CommandQueue>>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl Queue { + pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self { + Self { + raw: Arc::new(Mutex::new(raw)), + } + } +} +pub struct Device { + shared: Arc<AdapterShared>, + features: wgt::Features, +} + +pub struct Surface { + view: Option<NonNull<objc::runtime::Object>>, + render_layer: Mutex<metal::MetalLayer>, + swapchain_format: Option<wgt::TextureFormat>, + extent: wgt::Extent3d, + main_thread_id: thread::ThreadId, + // Useful for UI-intensive applications that are sensitive to + // window resizing. + pub present_with_transaction: bool, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug)] +pub struct SurfaceTexture { + texture: Texture, + drawable: metal::MetalDrawable, + present_with_transaction: bool, +} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +unsafe impl Send for SurfaceTexture {} +unsafe impl Sync for SurfaceTexture {} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + objc::rc::autoreleasepool(|| { + let extra_command_buffer = match signal_fence { + Some((fence, value)) => { + let completed_value = Arc::clone(&fence.completed_value); + let block = block::ConcreteBlock::new(move |_cmd_buf| { + completed_value.store(value, atomic::Ordering::Release); + }) + .copy(); + + let raw = match command_buffers.last() { + Some(&cmd_buf) => cmd_buf.raw.to_owned(), + None => { + let queue = self.raw.lock(); + queue + .new_command_buffer_with_unretained_references() + .to_owned() + } + }; + raw.set_label("(wgpu internal) Signal"); + raw.add_completed_handler(&block); + + fence.maintain(); + fence.pending_command_buffers.push((value, raw.to_owned())); + // only return an extra one if it's extra + match command_buffers.last() { + Some(_) => None, + None => Some(raw), + } + } + None => None, + }; + + for cmd_buffer in command_buffers { + cmd_buffer.raw.commit(); + } + + if let Some(raw) = extra_command_buffer { + raw.commit(); + } + }); + Ok(()) + } + unsafe fn present( + &mut self, + _surface: &mut Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let queue = &self.raw.lock(); + objc::rc::autoreleasepool(|| { + let command_buffer = queue.new_command_buffer(); + command_buffer.set_label("(wgpu internal) Present"); + + // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc + if !texture.present_with_transaction { + command_buffer.present_drawable(&texture.drawable); + } + + command_buffer.commit(); + + if texture.present_with_transaction { + command_buffer.wait_until_scheduled(); + texture.drawable.present(); + } + }); + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + // TODO: This is hard, see https://github.com/gpuweb/gpuweb/issues/1325 + 1.0 + } +} + +#[derive(Debug)] +pub struct Buffer { + raw: metal::Buffer, + size: wgt::BufferAddress, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl Buffer { + fn as_raw(&self) -> BufferPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Texture { + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +#[derive(Debug)] +pub struct TextureView { + raw: metal::Texture, + aspects: crate::FormatAspects, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +impl TextureView { + fn as_raw(&self) -> TexturePtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Sampler { + raw: metal::SamplerState, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +impl Sampler { + fn as_raw(&self) -> SamplerPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct BindGroupLayout { + /// Sorted list of BGL entries. + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +#[derive(Clone, Debug, Default)] +struct ResourceData<T> { + buffers: T, + textures: T, + samplers: T, +} + +#[derive(Clone, Debug, Default)] +struct MultiStageData<T> { + vs: T, + fs: T, + cs: T, +} + +const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData { + vs: naga::ShaderStage::Vertex, + fs: naga::ShaderStage::Fragment, + cs: naga::ShaderStage::Compute, +}; + +impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> { + type Output = T; + fn index(&self, stage: naga::ShaderStage) -> &T { + match stage { + naga::ShaderStage::Vertex => &self.vs, + naga::ShaderStage::Fragment => &self.fs, + naga::ShaderStage::Compute => &self.cs, + } + } +} + +impl<T> MultiStageData<T> { + fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(&self.vs), + fs: fun(&self.fs), + cs: fun(&self.cs), + } + } + fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(self.vs), + fs: fun(self.fs), + cs: fun(self.cs), + } + } + fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> { + iter::once(&self.vs) + .chain(iter::once(&self.fs)) + .chain(iter::once(&self.cs)) + } + fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> { + iter::once(&mut self.vs) + .chain(iter::once(&mut self.fs)) + .chain(iter::once(&mut self.cs)) + } +} + +type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>; +type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>; + +#[derive(Debug)] +struct BindGroupLayoutInfo { + base_resource_indices: MultiStageResourceCounters, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PushConstantsInfo { + count: u32, + buffer_index: ResourceIndex, +} + +#[derive(Debug)] +pub struct PipelineLayout { + bind_group_infos: ArrayVec<BindGroupLayoutInfo, { crate::MAX_BIND_GROUPS }>, + push_constants_infos: MultiStageData<Option<PushConstantsInfo>>, + total_counters: MultiStageResourceCounters, + total_push_constants: u32, + per_stage_map: MultiStageResources, +} + +trait AsNative { + type Native; + fn from(native: &Self::Native) -> Self; + fn as_native(&self) -> &Self::Native; +} + +type BufferPtr = NonNull<metal::MTLBuffer>; +type TexturePtr = NonNull<metal::MTLTexture>; +type SamplerPtr = NonNull<metal::MTLSamplerState>; + +impl AsNative for BufferPtr { + type Native = metal::BufferRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for TexturePtr { + type Native = metal::TextureRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for SamplerPtr { + type Native = metal::SamplerStateRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +#[derive(Debug)] +struct BufferResource { + ptr: BufferPtr, + offset: wgt::BufferAddress, + dynamic_index: Option<u32>, + + /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. + /// + /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can + /// hold WGSL runtime-sized arrays. When one does, we must pass its size to + /// shader entry points to implement bounds checks and WGSL's `arrayLength` + /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// + /// [`Storage`]: wgt::BufferBindingType::Storage + binding_size: Option<wgt::BufferSize>, + + binding_location: u32, +} + +#[derive(Debug, Default)] +pub struct BindGroup { + counters: MultiStageResourceCounters, + buffers: Vec<BufferResource>, + samplers: Vec<SamplerPtr>, + textures: Vec<TexturePtr>, +} + +unsafe impl Send for BindGroup {} +unsafe impl Sync for BindGroup {} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + runtime_checks: bool, +} + +#[derive(Debug, Default)] +struct PipelineStageInfo { + push_constants: Option<PushConstantsInfo>, + + /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sizes_slot: Option<naga::back::msl::Slot>, + + /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sized_bindings: Vec<naga::ResourceBinding>, +} + +impl PipelineStageInfo { + fn clear(&mut self) { + self.push_constants = None; + self.sizes_slot = None; + self.sized_bindings.clear(); + } + + fn assign_from(&mut self, other: &Self) { + self.push_constants = other.push_constants; + self.sizes_slot = other.sizes_slot; + self.sized_bindings.clear(); + self.sized_bindings.extend_from_slice(&other.sized_bindings); + } +} + +pub struct RenderPipeline { + raw: metal::RenderPipelineState, + #[allow(dead_code)] + vs_lib: metal::Library, + #[allow(dead_code)] + fs_lib: Option<metal::Library>, + vs_info: PipelineStageInfo, + fs_info: Option<PipelineStageInfo>, + raw_primitive_type: metal::MTLPrimitiveType, + raw_triangle_fill_mode: metal::MTLTriangleFillMode, + raw_front_winding: metal::MTLWinding, + raw_cull_mode: metal::MTLCullMode, + raw_depth_clip_mode: Option<metal::MTLDepthClipMode>, + depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: metal::ComputePipelineState, + #[allow(dead_code)] + cs_lib: metal::Library, + cs_info: PipelineStageInfo, + work_group_size: metal::MTLSize, + work_group_memory_sizes: Vec<u32>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub struct QuerySet { + raw_buffer: metal::Buffer, + ty: wgt::QueryType, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + completed_value: Arc<atomic::AtomicU64>, + /// The pending fence values have to be ascending. + pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self) -> crate::FenceValue { + let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self) { + let latest = self.get_latest(); + self.pending_command_buffers + .retain(|&(value, _)| value > latest); + } +} + +struct IndexState { + buffer_ptr: BufferPtr, + offset: wgt::BufferAddress, + stride: wgt::BufferAddress, + raw_type: metal::MTLIndexType, +} + +#[derive(Default)] +struct Temp { + binding_sizes: Vec<u32>, +} + +struct CommandState { + blit: Option<metal::BlitCommandEncoder>, + render: Option<metal::RenderCommandEncoder>, + compute: Option<metal::ComputeCommandEncoder>, + raw_primitive_type: metal::MTLPrimitiveType, + index: Option<IndexState>, + raw_wg_size: metal::MTLSize, + stage_infos: MultiStageData<PipelineStageInfo>, + + /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. + /// + /// Specifically: + /// + /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// and `@binding` attributes) for `var<storage>` global variables in the + /// current module that contain runtime-sized arrays. + /// + /// - The values are the actual sizes of the buffers currently bound to + /// provide those globals' contents, which are needed to implement bounds + /// checks and the WGSL `arrayLength` function. + /// + /// For each stage `S` in `stage_infos`, we consult this to find the sizes + /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must + /// pass to the entry point. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + /// + /// [`ResourceBinding`]: naga::ResourceBinding + storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, + + work_group_memory_sizes: Vec<u32>, + push_constants: Vec<u32>, +} + +pub struct CommandEncoder { + shared: Arc<AdapterShared>, + raw_queue: Arc<Mutex<metal::CommandQueue>>, + raw_cmd_buf: Option<metal::CommandBuffer>, + state: CommandState, + temp: Temp, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw_queue", &self.raw_queue) + .field("raw_cmd_buf", &self.raw_cmd_buf) + .finish() + } +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: metal::CommandBuffer, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} diff --git a/third_party/rust/wgpu-hal/src/metal/surface.rs b/third_party/rust/wgpu-hal/src/metal/surface.rs new file mode 100644 index 0000000000..8101d52969 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/surface.rs @@ -0,0 +1,278 @@ +#![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type + +use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread}; + +use core_graphics_types::{ + base::CGFloat, + geometry::{CGRect, CGSize}, +}; +use objc::{ + class, + declare::ClassDecl, + msg_send, + rc::autoreleasepool, + runtime::{Class, Object, Sel, BOOL, NO, YES}, + sel, sel_impl, +}; +use parking_lot::Mutex; + +#[cfg(target_os = "macos")] +#[link(name = "QuartzCore", kind = "framework")] +extern "C" { + #[allow(non_upper_case_globals)] + static kCAGravityTopLeft: *mut Object; +} + +extern "C" fn layer_should_inherit_contents_scale_from_window( + _: &Class, + _: Sel, + _layer: *mut Object, + _new_scale: CGFloat, + _from_window: *mut Object, +) -> BOOL { + YES +} + +static CAML_DELEGATE_REGISTER: Once = Once::new(); + +#[derive(Debug)] +pub struct HalManagedMetalLayerDelegate(&'static Class); + +impl HalManagedMetalLayerDelegate { + pub fn new() -> Self { + let class_name = format!("HalManagedMetalLayerDelegate@{:p}", &CAML_DELEGATE_REGISTER); + + CAML_DELEGATE_REGISTER.call_once(|| { + type Fun = extern "C" fn(&Class, Sel, *mut Object, CGFloat, *mut Object) -> BOOL; + let mut decl = ClassDecl::new(&class_name, class!(NSObject)).unwrap(); + #[allow(trivial_casts)] // false positive + unsafe { + decl.add_class_method( + sel!(layer:shouldInheritContentsScale:fromWindow:), + layer_should_inherit_contents_scale_from_window as Fun, + ); + } + decl.register(); + }); + Self(Class::get(&class_name).unwrap()) + } +} + +impl super::Surface { + fn new(view: Option<NonNull<Object>>, layer: metal::MetalLayer) -> Self { + Self { + view, + render_layer: Mutex::new(layer), + swapchain_format: None, + extent: wgt::Extent3d::default(), + main_thread_id: thread::current().id(), + present_with_transaction: false, + } + } + + pub unsafe fn dispose(self) { + if let Some(view) = self.view { + let () = msg_send![view.as_ptr(), release]; + } + } + + /// If not called on the main thread, this will panic. + #[allow(clippy::transmute_ptr_to_ref)] + pub unsafe fn from_view( + view: *mut c_void, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> Self { + let view = view as *mut Object; + let render_layer = { + let layer = unsafe { Self::get_metal_layer(view, delegate) }; + unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) } + } + .to_owned(); + let _: *mut c_void = msg_send![view, retain]; + Self::new(NonNull::new(view), render_layer) + } + + pub unsafe fn from_layer(layer: &metal::MetalLayerRef) -> Self { + let class = class!(CAMetalLayer); + let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; + assert_eq!(proper_kind, YES); + Self::new(None, layer.to_owned()) + } + + /// If not called on the main thread, this will panic. + pub(crate) unsafe fn get_metal_layer( + view: *mut Object, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> *mut Object { + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let is_main_thread: BOOL = msg_send![class!(NSThread), isMainThread]; + if is_main_thread == NO { + panic!("get_metal_layer cannot be called in non-ui thread."); + } + + let main_layer: *mut Object = msg_send![view, layer]; + let class = class!(CAMetalLayer); + let is_valid_layer: BOOL = msg_send![main_layer, isKindOfClass: class]; + + if is_valid_layer == YES { + main_layer + } else { + // If the main layer is not a CAMetalLayer, we create a CAMetalLayer and use it. + let new_layer: *mut Object = msg_send![class, new]; + let frame: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![new_layer, setFrame: frame]; + #[cfg(target_os = "ios")] + { + // Unlike NSView, UIView does not allow to replace main layer. + let () = msg_send![main_layer, addSublayer: new_layer]; + // On iOS, "from_view" may be called before the application initialization is complete, + // `msg_send![view, window]` and `msg_send![window, screen]` will get null. + let screen: *mut Object = msg_send![class!(UIScreen), mainScreen]; + let scale_factor: CGFloat = msg_send![screen, nativeScale]; + let () = msg_send![view, setContentScaleFactor: scale_factor]; + }; + #[cfg(target_os = "macos")] + { + let () = msg_send![view, setLayer: new_layer]; + let () = msg_send![view, setWantsLayer: YES]; + let () = msg_send![new_layer, setContentsGravity: unsafe { kCAGravityTopLeft }]; + let window: *mut Object = msg_send![view, window]; + if !window.is_null() { + let scale_factor: CGFloat = msg_send![window, backingScaleFactor]; + let () = msg_send![new_layer, setContentsScale: scale_factor]; + } + }; + if let Some(delegate) = delegate { + let () = msg_send![new_layer, setDelegate: delegate.0]; + } + new_layer + } + } + + pub(super) fn dimensions(&self) -> wgt::Extent3d { + let (size, scale): (CGSize, CGFloat) = unsafe { + let render_layer_borrow = self.render_layer.lock(); + let render_layer = render_layer_borrow.as_ref(); + let bounds: CGRect = msg_send![render_layer, bounds]; + let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + (bounds.size, contents_scale) + }; + + wgt::Extent3d { + width: (size.width * scale) as u32, + height: (size.height * scale) as u32, + depth_or_array_layers: 1, + } + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + log::info!("build swapchain {:?}", config); + + let caps = &device.shared.private_caps; + self.swapchain_format = Some(config.format); + self.extent = config.extent; + + let render_layer = self.render_layer.lock(); + let framebuffer_only = config.usage == crate::TextureUses::COLOR_TARGET; + let display_sync = match config.present_mode { + wgt::PresentMode::Fifo => true, + wgt::PresentMode::Immediate => false, + m => unreachable!("Unsupported present mode: {m:?}"), + }; + let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); + + match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Opaque => render_layer.set_opaque(true), + wgt::CompositeAlphaMode::PostMultiplied => render_layer.set_opaque(false), + _ => (), + } + + let device_raw = device.shared.device.lock(); + // On iOS, unless the user supplies a view with a CAMetalLayer, we + // create one as a sublayer. However, when the view changes size, + // its sublayers are not automatically resized, and we must resize + // it here. The drawable size and the layer size don't correlate + #[cfg(target_os = "ios")] + { + if let Some(view) = self.view { + let main_layer: *mut Object = msg_send![view.as_ptr(), layer]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![*render_layer, setFrame: bounds]; + } + } + render_layer.set_device(&device_raw); + render_layer.set_pixel_format(caps.map_format(config.format)); + render_layer.set_framebuffer_only(framebuffer_only); + render_layer.set_presents_with_transaction(self.present_with_transaction); + // opt-in to Metal EDR + // EDR potentially more power used in display and more bandwidth, memory footprint. + let wants_edr = config.format == wgt::TextureFormat::Rgba16Float; + if wants_edr != render_layer.wants_extended_dynamic_range_content() { + render_layer.set_wants_extended_dynamic_range_content(wants_edr); + } + + // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) + render_layer.set_maximum_drawable_count(config.swap_chain_size as _); + render_layer.set_drawable_size(drawable_size); + if caps.can_set_next_drawable_timeout { + let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + } + if caps.can_set_display_sync { + let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + } + + Ok(()) + } + + unsafe fn unconfigure(&mut self, _device: &super::Device) { + self.swapchain_format = None; + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let render_layer = self.render_layer.lock(); + let (drawable, texture) = match autoreleasepool(|| { + render_layer + .next_drawable() + .map(|drawable| (drawable.to_owned(), drawable.texture().to_owned())) + }) { + Some(pair) => pair, + None => return Ok(None), + }; + + let suf_texture = super::SurfaceTexture { + texture: super::Texture { + raw: texture, + format: self.swapchain_format.unwrap(), + raw_type: metal::MTLTextureType::D2, + array_layers: 1, + mip_levels: 1, + copy_size: crate::CopyExtent { + width: self.extent.width, + height: self.extent.height, + depth: 1, + }, + }, + drawable, + present_with_transaction: self.present_with_transaction, + }; + + Ok(Some(crate::AcquiredSurfaceTexture { + texture: suf_texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/metal/time.rs b/third_party/rust/wgpu-hal/src/metal/time.rs new file mode 100644 index 0000000000..5c6bec10cd --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/time.rs @@ -0,0 +1,38 @@ +//! Handling of global timestamps. + +#[repr(C)] +#[derive(Debug)] +struct MachTimebaseInfo { + numerator: u32, + denominator: u32, +} +extern "C" { + fn mach_timebase_info(out: *mut MachTimebaseInfo) -> u32; + fn mach_absolute_time() -> u64; +} + +/// A timer which uses mach_absolute_time to get its time. This is what the metal callbacks use. +#[derive(Debug)] +pub struct PresentationTimer { + scale: MachTimebaseInfo, +} +impl PresentationTimer { + /// Generates a new timer. + pub fn new() -> Self { + // Default to 1 / 1 in case the call to timebase_info fails. + let mut scale = MachTimebaseInfo { + numerator: 1, + denominator: 1, + }; + unsafe { mach_timebase_info(&mut scale) }; + + Self { scale } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + let time = unsafe { mach_absolute_time() }; + + (time as u128 * self.scale.numerator as u128) / self.scale.denominator as u128 + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs new file mode 100644 index 0000000000..f8f26e422f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -0,0 +1,1746 @@ +use super::conv; + +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{collections::BTreeMap, ffi::CStr, sync::Arc}; + +fn depth_stencil_required_flags() -> vk::FormatFeatureFlags { + vk::FormatFeatureFlags::SAMPLED_IMAGE | vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT +} + +//TODO: const fn? +fn indexing_features() -> wgt::Features { + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING +} + +/// Aggregate of the `vk::PhysicalDevice*Features` structs used by `gfx`. +#[derive(Debug, Default)] +pub struct PhysicalDeviceFeatures { + core: vk::PhysicalDeviceFeatures, + pub(super) descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingFeaturesEXT>, + imageless_framebuffer: Option<vk::PhysicalDeviceImagelessFramebufferFeaturesKHR>, + timeline_semaphore: Option<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>, + image_robustness: Option<vk::PhysicalDeviceImageRobustnessFeaturesEXT>, + robustness2: Option<vk::PhysicalDeviceRobustness2FeaturesEXT>, + depth_clip_enable: Option<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>, + multiview: Option<vk::PhysicalDeviceMultiviewFeaturesKHR>, + astc_hdr: Option<vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT>, + shader_float16: Option<( + vk::PhysicalDeviceShaderFloat16Int8Features, + vk::PhysicalDevice16BitStorageFeatures, + )>, + zero_initialize_workgroup_memory: + Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceFeatures {} +unsafe impl Sync for PhysicalDeviceFeatures {} + +impl PhysicalDeviceFeatures { + /// Add the members of `self` into `info.enabled_features` and its `p_next` chain. + pub fn add_to_device_create_builder<'a>( + &'a mut self, + mut info: vk::DeviceCreateInfoBuilder<'a>, + ) -> vk::DeviceCreateInfoBuilder<'a> { + info = info.enabled_features(&self.core); + if let Some(ref mut feature) = self.descriptor_indexing { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.imageless_framebuffer { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.timeline_semaphore { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.image_robustness { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.robustness2 { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.depth_clip_enable { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.astc_hdr { + info = info.push_next(feature); + } + if let Some((ref mut f16_i8_feature, ref mut _16bit_feature)) = self.shader_float16 { + info = info.push_next(f16_i8_feature); + info = info.push_next(_16bit_feature); + } + if let Some(ref mut feature) = self.zero_initialize_workgroup_memory { + info = info.push_next(feature); + } + info + } + + /// Create a `PhysicalDeviceFeatures` that will be used to create a logical device. + /// + /// `requested_features` should be the same as what was used to generate `enabled_extensions`. + fn from_extensions_and_requested_features( + effective_api_version: u32, + enabled_extensions: &[&'static CStr], + requested_features: wgt::Features, + downlevel_flags: wgt::DownlevelFlags, + private_caps: &super::PrivateCapabilities, + ) -> Self { + let needs_sampled_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_buffer_non_uniform = requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_uniform_buffer_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_partially_bound = + requested_features.intersects(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY); + + Self { + // vk::PhysicalDeviceFeatures is a struct composed of Bool32's while + // Features is a bitfield so we need to map everything manually + core: vk::PhysicalDeviceFeatures::builder() + .robust_buffer_access(private_caps.robust_buffer_access) + .independent_blend(downlevel_flags.contains(wgt::DownlevelFlags::INDEPENDENT_BLEND)) + .sample_rate_shading( + downlevel_flags.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), + ) + .image_cube_array( + downlevel_flags.contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), + ) + .draw_indirect_first_instance( + requested_features.contains(wgt::Features::INDIRECT_FIRST_INSTANCE), + ) + //.dual_src_blend(requested_features.contains(wgt::Features::DUAL_SRC_BLENDING)) + .multi_draw_indirect( + requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT), + ) + .fill_mode_non_solid(requested_features.intersects( + wgt::Features::POLYGON_MODE_LINE | wgt::Features::POLYGON_MODE_POINT, + )) + //.depth_bounds(requested_features.contains(wgt::Features::DEPTH_BOUNDS)) + //.alpha_to_one(requested_features.contains(wgt::Features::ALPHA_TO_ONE)) + //.multi_viewport(requested_features.contains(wgt::Features::MULTI_VIEWPORTS)) + .sampler_anisotropy( + downlevel_flags.contains(wgt::DownlevelFlags::ANISOTROPIC_FILTERING), + ) + .texture_compression_etc2( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ETC2), + ) + .texture_compression_astc_ldr( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC), + ) + .texture_compression_bc( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC), + ) + //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY)) + .pipeline_statistics_query( + requested_features.contains(wgt::Features::PIPELINE_STATISTICS_QUERY), + ) + .vertex_pipeline_stores_and_atomics( + requested_features.contains(wgt::Features::VERTEX_WRITABLE_STORAGE), + ) + .fragment_stores_and_atomics( + downlevel_flags.contains(wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE), + ) + //.shader_image_gather_extended( + //.shader_storage_image_extended_formats( + .shader_uniform_buffer_array_dynamic_indexing( + requested_features.contains(wgt::Features::BUFFER_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + .shader_sampled_image_array_dynamic_indexing( + requested_features.contains(wgt::Features::TEXTURE_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + //.shader_storage_image_array_dynamic_indexing( + //.shader_clip_distance(requested_features.contains(wgt::Features::SHADER_CLIP_DISTANCE)) + //.shader_cull_distance(requested_features.contains(wgt::Features::SHADER_CULL_DISTANCE)) + .shader_float64(requested_features.contains(wgt::Features::SHADER_F64)) + //.shader_int64(requested_features.contains(wgt::Features::SHADER_INT64)) + .shader_int16(requested_features.contains(wgt::Features::SHADER_I16)) + //.shader_resource_residency(requested_features.contains(wgt::Features::SHADER_RESOURCE_RESIDENCY)) + .geometry_shader(requested_features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX)) + .build(), + descriptor_indexing: if requested_features.intersects(indexing_features()) { + Some( + vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::builder() + .shader_sampled_image_array_non_uniform_indexing( + needs_sampled_image_non_uniform, + ) + .shader_storage_image_array_non_uniform_indexing( + needs_storage_image_non_uniform, + ) + .shader_uniform_buffer_array_non_uniform_indexing( + needs_uniform_buffer_non_uniform, + ) + .shader_storage_buffer_array_non_uniform_indexing( + needs_storage_buffer_non_uniform, + ) + .descriptor_binding_partially_bound(needs_partially_bound) + .build(), + ) + } else { + None + }, + imageless_framebuffer: if effective_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrImagelessFramebufferFn::name()) + { + Some( + vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::builder() + .imageless_framebuffer(private_caps.imageless_framebuffers) + .build(), + ) + } else { + None + }, + timeline_semaphore: if effective_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrTimelineSemaphoreFn::name()) + { + Some( + vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::builder() + .timeline_semaphore(private_caps.timeline_semaphores) + .build(), + ) + } else { + None + }, + image_robustness: if effective_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtImageRobustnessFn::name()) + { + Some( + vk::PhysicalDeviceImageRobustnessFeaturesEXT::builder() + .robust_image_access(private_caps.robust_image_access) + .build(), + ) + } else { + None + }, + robustness2: if enabled_extensions.contains(&vk::ExtRobustness2Fn::name()) { + // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking + // since we can enable `robust_buffer_access` all the time. But it improves + // program portability, so we opt into it anyway. + Some( + vk::PhysicalDeviceRobustness2FeaturesEXT::builder() + .robust_buffer_access2(private_caps.robust_buffer_access) + .robust_image_access2(private_caps.robust_image_access) + .build(), + ) + } else { + None + }, + depth_clip_enable: if enabled_extensions.contains(&vk::ExtDepthClipEnableFn::name()) { + Some( + vk::PhysicalDeviceDepthClipEnableFeaturesEXT::builder() + .depth_clip_enable( + requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL), + ) + .build(), + ) + } else { + None + }, + multiview: if effective_api_version >= vk::API_VERSION_1_1 + || enabled_extensions.contains(&vk::KhrMultiviewFn::name()) + { + Some( + vk::PhysicalDeviceMultiviewFeatures::builder() + .multiview(requested_features.contains(wgt::Features::MULTIVIEW)) + .build(), + ) + } else { + None + }, + astc_hdr: if enabled_extensions.contains(&vk::ExtTextureCompressionAstcHdrFn::name()) { + Some( + vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::builder() + .texture_compression_astc_hdr(true) + .build(), + ) + } else { + None + }, + shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) { + Some(( + vk::PhysicalDeviceShaderFloat16Int8Features::builder() + .shader_float16(true) + .build(), + vk::PhysicalDevice16BitStorageFeatures::builder() + .storage_buffer16_bit_access(true) + .uniform_and_storage_buffer16_bit_access(true) + .build(), + )) + } else { + None + }, + zero_initialize_workgroup_memory: if effective_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + Some( + vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::builder() + .shader_zero_initialize_workgroup_memory( + private_caps.zero_initialize_workgroup_memory, + ) + .build(), + ) + } else { + None + }, + } + } + + fn to_wgpu( + &self, + instance: &ash::Instance, + phd: vk::PhysicalDevice, + caps: &PhysicalDeviceCapabilities, + ) -> (wgt::Features, wgt::DownlevelFlags) { + use crate::auxil::db; + use wgt::{DownlevelFlags as Df, Features as F}; + let mut features = F::empty() + | F::SPIRV_SHADER_PASSTHROUGH + | F::MAPPABLE_PRIMARY_BUFFERS + | F::PUSH_CONSTANTS + | F::ADDRESS_MODE_CLAMP_TO_BORDER + | F::ADDRESS_MODE_CLAMP_TO_ZERO + | F::TIMESTAMP_QUERY + | F::TIMESTAMP_QUERY_INSIDE_PASSES + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::CLEAR_TEXTURE; + + let mut dl_flags = Df::COMPUTE_SHADERS + | Df::BASE_VERTEX + | Df::READ_ONLY_DEPTH_STENCIL + | Df::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | Df::COMPARISON_SAMPLERS + | Df::VERTEX_STORAGE + | Df::FRAGMENT_STORAGE + | Df::DEPTH_TEXTURE_AND_BUFFER_COPIES + | Df::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED + | Df::UNRESTRICTED_INDEX_BUFFER + | Df::INDIRECT_EXECUTION + | Df::VIEW_FORMATS + | Df::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES; + + dl_flags.set( + Df::SURFACE_VIEW_FORMATS, + caps.supports_extension(vk::KhrSwapchainMutableFormatFn::name()), + ); + dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0); + dl_flags.set(Df::ANISOTROPIC_FILTERING, self.core.sampler_anisotropy != 0); + dl_flags.set( + Df::FRAGMENT_WRITABLE_STORAGE, + self.core.fragment_stores_and_atomics != 0, + ); + dl_flags.set(Df::MULTISAMPLED_SHADING, self.core.sample_rate_shading != 0); + dl_flags.set(Df::INDEPENDENT_BLEND, self.core.independent_blend != 0); + dl_flags.set( + Df::FULL_DRAW_INDEX_UINT32, + self.core.full_draw_index_uint32 != 0, + ); + dl_flags.set(Df::DEPTH_BIAS_CLAMP, self.core.depth_bias_clamp != 0); + + features.set( + F::INDIRECT_FIRST_INSTANCE, + self.core.draw_indirect_first_instance != 0, + ); + //if self.core.dual_src_blend != 0 + features.set(F::MULTI_DRAW_INDIRECT, self.core.multi_draw_indirect != 0); + features.set(F::POLYGON_MODE_LINE, self.core.fill_mode_non_solid != 0); + features.set(F::POLYGON_MODE_POINT, self.core.fill_mode_non_solid != 0); + //if self.core.depth_bounds != 0 { + //if self.core.alpha_to_one != 0 { + //if self.core.multi_viewport != 0 { + features.set( + F::TEXTURE_COMPRESSION_ETC2, + self.core.texture_compression_etc2 != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_ASTC, + self.core.texture_compression_astc_ldr != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_BC, + self.core.texture_compression_bc != 0, + ); + features.set( + F::PIPELINE_STATISTICS_QUERY, + self.core.pipeline_statistics_query != 0, + ); + features.set( + F::VERTEX_WRITABLE_STORAGE, + self.core.vertex_pipeline_stores_and_atomics != 0, + ); + //if self.core.shader_image_gather_extended != 0 { + //if self.core.shader_storage_image_extended_formats != 0 { + features.set( + F::BUFFER_BINDING_ARRAY, + self.core.shader_uniform_buffer_array_dynamic_indexing != 0, + ); + features.set( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_sampled_image_array_dynamic_indexing != 0, + ); + features.set(F::SHADER_PRIMITIVE_INDEX, self.core.geometry_shader != 0); + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + self.core.shader_storage_buffer_array_dynamic_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_storage_image_array_dynamic_indexing, + ), + ], + ) { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + //if self.core.shader_storage_image_array_dynamic_indexing != 0 { + //if self.core.shader_clip_distance != 0 { + //if self.core.shader_cull_distance != 0 { + features.set(F::SHADER_F64, self.core.shader_float64 != 0); + //if self.core.shader_int64 != 0 { + features.set(F::SHADER_I16, self.core.shader_int16 != 0); + + //if caps.supports_extension(vk::KhrSamplerMirrorClampToEdgeFn::name()) { + //if caps.supports_extension(vk::ExtSamplerFilterMinmaxFn::name()) { + features.set( + F::MULTI_DRAW_INDIRECT_COUNT, + caps.supports_extension(vk::KhrDrawIndirectCountFn::name()), + ); + features.set( + F::CONSERVATIVE_RASTERIZATION, + caps.supports_extension(vk::ExtConservativeRasterizationFn::name()), + ); + + let intel_windows = caps.properties.vendor_id == db::intel::VENDOR && cfg!(windows); + + if let Some(ref descriptor_indexing) = self.descriptor_indexing { + const STORAGE: F = F::STORAGE_RESOURCE_BINDING_ARRAY; + if Self::all_features_supported( + &features, + &[ + ( + F::TEXTURE_BINDING_ARRAY, + descriptor_indexing.shader_sampled_image_array_non_uniform_indexing, + ), + ( + F::BUFFER_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING); + } + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_image_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING); + } + if descriptor_indexing.descriptor_binding_partially_bound != 0 && !intel_windows { + features |= F::PARTIALLY_BOUND_BINDING_ARRAY; + } + } + + if let Some(ref feature) = self.depth_clip_enable { + features.set(F::DEPTH_CLIP_CONTROL, feature.depth_clip_enable != 0); + } + + if let Some(ref multiview) = self.multiview { + features.set(F::MULTIVIEW, multiview.multiview != 0); + } + + features.set( + F::TEXTURE_FORMAT_16BIT_NORM, + is_format_16bit_norm_supported(instance, phd), + ); + + if let Some(ref astc_hdr) = self.astc_hdr { + features.set( + F::TEXTURE_COMPRESSION_ASTC_HDR, + astc_hdr.texture_compression_astc_hdr != 0, + ); + } + + if let Some((ref f16_i8, ref bit16)) = self.shader_float16 { + features.set( + F::SHADER_F16, + f16_i8.shader_float16 != 0 + && bit16.storage_buffer16_bit_access != 0 + && bit16.uniform_and_storage_buffer16_bit_access != 0, + ); + } + + let supports_depth_format = |format| { + supports_format( + instance, + phd, + format, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ) + }; + + let texture_s8 = supports_depth_format(vk::Format::S8_UINT); + let texture_d32 = supports_depth_format(vk::Format::D32_SFLOAT); + let texture_d24_s8 = supports_depth_format(vk::Format::D24_UNORM_S8_UINT); + let texture_d32_s8 = supports_depth_format(vk::Format::D32_SFLOAT_S8_UINT); + + let stencil8 = texture_s8 || texture_d24_s8; + let depth24_plus_stencil8 = texture_d24_s8 || texture_d32_s8; + + dl_flags.set( + Df::WEBGPU_TEXTURE_FORMAT_SUPPORT, + stencil8 && depth24_plus_stencil8 && texture_d32, + ); + + features.set(F::DEPTH32FLOAT_STENCIL8, texture_d32_s8); + + let rg11b10ufloat_renderable = supports_format( + instance, + phd, + vk::Format::B10G11R11_UFLOAT_PACK32, + vk::ImageTiling::OPTIMAL, + vk::FormatFeatureFlags::COLOR_ATTACHMENT + | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, + ); + features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); + + (features, dl_flags) + } + + fn all_features_supported( + features: &wgt::Features, + implications: &[(wgt::Features, vk::Bool32)], + ) -> bool { + implications + .iter() + .all(|&(flag, support)| !features.contains(flag) || support != 0) + } +} + +/// Information gathered about a physical device capabilities. +#[derive(Default)] +pub struct PhysicalDeviceCapabilities { + supported_extensions: Vec<vk::ExtensionProperties>, + properties: vk::PhysicalDeviceProperties, + maintenance_3: Option<vk::PhysicalDeviceMaintenance3Properties>, + descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingPropertiesEXT>, + driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// The effective driver api version supported by the physical device. + /// + /// The Vulkan specification states the following in the documentation for VkPhysicalDeviceProperties: + /// > The value of apiVersion may be different than the version returned by vkEnumerateInstanceVersion; + /// > either higher or lower. In such cases, the application must not use functionality that exceeds + /// > the version of Vulkan associated with a given object. + /// + /// For example, a Vulkan 1.1 instance cannot use functionality added in Vulkan 1.2 even if the physical + /// device supports Vulkan 1.2. + /// + /// This means that assuming that the apiVersion provided by VkPhysicalDeviceProperties is the actual + /// version we can use is incorrect. Instead the effective version is the lower of the instance version + /// and physical device version. + effective_api_version: u32, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceCapabilities {} +unsafe impl Sync for PhysicalDeviceCapabilities {} + +impl PhysicalDeviceCapabilities { + pub fn properties(&self) -> vk::PhysicalDeviceProperties { + self.properties + } + + pub fn supports_extension(&self, extension: &CStr) -> bool { + use crate::auxil::cstr_from_bytes_until_nul; + self.supported_extensions + .iter() + .any(|ep| cstr_from_bytes_until_nul(&ep.extension_name) == Some(extension)) + } + + /// Map `requested_features` to the list of Vulkan extension strings required to create the logical device. + fn get_required_extensions(&self, requested_features: wgt::Features) -> Vec<&'static CStr> { + let mut extensions = Vec::new(); + + // Note that quite a few extensions depend on the `VK_KHR_get_physical_device_properties2` instance extension. + // We enable `VK_KHR_get_physical_device_properties2` unconditionally (if available). + + // Require `VK_KHR_swapchain` + extensions.push(vk::KhrSwapchainFn::name()); + + if self.effective_api_version < vk::API_VERSION_1_1 { + // Require either `VK_KHR_maintenance1` or `VK_AMD_negative_viewport_height` + if self.supports_extension(vk::KhrMaintenance1Fn::name()) { + extensions.push(vk::KhrMaintenance1Fn::name()); + } else { + // `VK_AMD_negative_viewport_height` is obsoleted by `VK_KHR_maintenance1` and must not be enabled alongside it + extensions.push(vk::AmdNegativeViewportHeightFn::name()); + } + + // Optional `VK_KHR_maintenance2` + if self.supports_extension(vk::KhrMaintenance2Fn::name()) { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + + // Optional `VK_KHR_maintenance3` + if self.supports_extension(vk::KhrMaintenance3Fn::name()) { + extensions.push(vk::KhrMaintenance3Fn::name()); + } + + // Require `VK_KHR_storage_buffer_storage_class` + extensions.push(vk::KhrStorageBufferStorageClassFn::name()); + + // Require `VK_KHR_multiview` if the associated feature was requested + if requested_features.contains(wgt::Features::MULTIVIEW) { + extensions.push(vk::KhrMultiviewFn::name()); + } + } + + if self.effective_api_version < vk::API_VERSION_1_2 { + // Optional `VK_KHR_image_format_list` + if self.supports_extension(vk::KhrImageFormatListFn::name()) { + extensions.push(vk::KhrImageFormatListFn::name()); + } + + // Optional `VK_KHR_imageless_framebuffer` + if self.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + extensions.push(vk::KhrImagelessFramebufferFn::name()); + // Require `VK_KHR_maintenance2` due to it being a dependency + if self.effective_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + } + + // Optional `VK_KHR_driver_properties` + if self.supports_extension(vk::KhrDriverPropertiesFn::name()) { + extensions.push(vk::KhrDriverPropertiesFn::name()); + } + + // Optional `VK_KHR_timeline_semaphore` + if self.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + extensions.push(vk::KhrTimelineSemaphoreFn::name()); + } + + // Require `VK_EXT_descriptor_indexing` if one of the associated features was requested + if requested_features.intersects(indexing_features()) { + extensions.push(vk::ExtDescriptorIndexingFn::name()); + } + + // Require `VK_KHR_shader_float16_int8` and `VK_KHR_16bit_storage` if the associated feature was requested + if requested_features.contains(wgt::Features::SHADER_F16) { + extensions.push(vk::KhrShaderFloat16Int8Fn::name()); + // `VK_KHR_16bit_storage` requires `VK_KHR_storage_buffer_storage_class`, however we require that one already + if self.effective_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::Khr16bitStorageFn::name()); + } + } + + //extensions.push(vk::KhrSamplerMirrorClampToEdgeFn::name()); + //extensions.push(vk::ExtSamplerFilterMinmaxFn::name()); + } + + if self.effective_api_version < vk::API_VERSION_1_3 { + // Optional `VK_EXT_image_robustness` + if self.supports_extension(vk::ExtImageRobustnessFn::name()) { + extensions.push(vk::ExtImageRobustnessFn::name()); + } + } + + // Optional `VK_KHR_swapchain_mutable_format` + if self.supports_extension(vk::KhrSwapchainMutableFormatFn::name()) { + extensions.push(vk::KhrSwapchainMutableFormatFn::name()); + } + + // Optional `VK_EXT_robustness2` + if self.supports_extension(vk::ExtRobustness2Fn::name()) { + extensions.push(vk::ExtRobustness2Fn::name()); + } + + // Require `VK_KHR_draw_indirect_count` if the associated feature was requested + // Even though Vulkan 1.2 has promoted the extension to core, we must require the extension to avoid + // large amounts of spaghetti involved with using PhysicalDeviceVulkan12Features. + if requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT_COUNT) { + extensions.push(vk::KhrDrawIndirectCountFn::name()); + } + + // Require `VK_EXT_conservative_rasterization` if the associated feature was requested + if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { + extensions.push(vk::ExtConservativeRasterizationFn::name()); + } + + // Require `VK_EXT_depth_clip_enable` if the associated feature was requested + if requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + extensions.push(vk::ExtDepthClipEnableFn::name()); + } + + // Require `VK_KHR_portability_subset` on macOS/iOS + #[cfg(any(target_os = "macos", target_os = "ios"))] + extensions.push(vk::KhrPortabilitySubsetFn::name()); + + // Require `VK_EXT_texture_compression_astc_hdr` if the associated feature was requested + if requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR) { + extensions.push(vk::ExtTextureCompressionAstcHdrFn::name()); + } + + extensions + } + + fn to_wgpu_limits(&self) -> wgt::Limits { + let limits = &self.properties.limits; + + let max_compute_workgroup_sizes = limits.max_compute_work_group_size; + let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0] + .min(limits.max_compute_work_group_count[1]) + .min(limits.max_compute_work_group_count[2]); + + // Prevent very large buffers on mesa and most android devices. + let is_nvidia = self.properties.vendor_id == crate::auxil::db::nvidia::VENDOR; + let max_buffer_size = + if (cfg!(target_os = "linux") || cfg!(target_os = "android")) && !is_nvidia { + i32::MAX as u64 + } else { + u64::MAX + }; + + wgt::Limits { + max_texture_dimension_1d: limits.max_image_dimension1_d, + max_texture_dimension_2d: limits.max_image_dimension2_d, + max_texture_dimension_3d: limits.max_image_dimension3_d, + max_texture_array_layers: limits.max_image_array_layers, + max_bind_groups: limits + .max_bound_descriptor_sets + .min(crate::MAX_BIND_GROUPS as u32), + max_bindings_per_bind_group: 640, + max_dynamic_uniform_buffers_per_pipeline_layout: limits + .max_descriptor_set_uniform_buffers_dynamic, + max_dynamic_storage_buffers_per_pipeline_layout: limits + .max_descriptor_set_storage_buffers_dynamic, + max_sampled_textures_per_shader_stage: limits.max_per_stage_descriptor_sampled_images, + max_samplers_per_shader_stage: limits.max_per_stage_descriptor_samplers, + max_storage_buffers_per_shader_stage: limits.max_per_stage_descriptor_storage_buffers, + max_storage_textures_per_shader_stage: limits.max_per_stage_descriptor_storage_images, + max_uniform_buffers_per_shader_stage: limits.max_per_stage_descriptor_uniform_buffers, + max_uniform_buffer_binding_size: limits + .max_uniform_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_storage_buffer_binding_size: limits + .max_storage_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_vertex_buffers: limits + .max_vertex_input_bindings + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: limits.max_vertex_input_attributes, + max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + max_push_constant_size: limits.max_push_constants_size, + min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, + min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, + max_inter_stage_shader_components: limits + .max_vertex_output_components + .min(limits.max_fragment_input_components), + max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size, + max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations, + max_compute_workgroup_size_x: max_compute_workgroup_sizes[0], + max_compute_workgroup_size_y: max_compute_workgroup_sizes[1], + max_compute_workgroup_size_z: max_compute_workgroup_sizes[2], + max_compute_workgroups_per_dimension, + max_buffer_size, + } + } + + fn to_hal_alignments(&self) -> crate::Alignments { + let limits = &self.properties.limits; + crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(limits.optimal_buffer_copy_offset_alignment) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment) + .unwrap(), + } + } +} + +impl super::InstanceShared { + #[allow(trivial_casts)] // false positives + fn inspect( + &self, + phd: vk::PhysicalDevice, + ) -> (PhysicalDeviceCapabilities, PhysicalDeviceFeatures) { + let capabilities = { + let mut capabilities = PhysicalDeviceCapabilities::default(); + capabilities.supported_extensions = + unsafe { self.raw.enumerate_device_extension_properties(phd).unwrap() }; + capabilities.properties = if let Some(ref get_device_properties) = + self.get_physical_device_properties + { + // Get these now to avoid borrowing conflicts later + let supports_descriptor_indexing = self.driver_api_version >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()); + let supports_driver_properties = self.driver_api_version >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + + let mut builder = vk::PhysicalDeviceProperties2KHR::builder(); + if self.driver_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMaintenance3Fn::name()) + { + capabilities.maintenance_3 = + Some(vk::PhysicalDeviceMaintenance3Properties::default()); + builder = builder.push_next(capabilities.maintenance_3.as_mut().unwrap()); + } + + if supports_descriptor_indexing { + let next = capabilities + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingPropertiesEXT::default()); + builder = builder.push_next(next); + } + + if supports_driver_properties { + let next = capabilities + .driver + .insert(vk::PhysicalDeviceDriverPropertiesKHR::default()); + builder = builder.push_next(next); + } + + let mut properties2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_properties2(phd, &mut properties2); + } + properties2.properties + } else { + unsafe { self.raw.get_physical_device_properties(phd) } + }; + + // Set the effective api version + capabilities.effective_api_version = self + .driver_api_version + .min(capabilities.properties.api_version); + capabilities + }; + + let mut features = PhysicalDeviceFeatures::default(); + features.core = if let Some(ref get_device_properties) = self.get_physical_device_properties + { + let core = vk::PhysicalDeviceFeatures::default(); + let mut builder = vk::PhysicalDeviceFeatures2KHR::builder().features(core); + + // `VK_KHR_multiview` is promoted to 1.1 + if capabilities.effective_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMultiviewFn::name()) + { + let next = features + .multiview + .insert(vk::PhysicalDeviceMultiviewFeatures::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()) { + let next = features + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_imageless_framebuffer` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + let next = features + .imageless_framebuffer + .insert(vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_timeline_semaphore` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + let next = features + .timeline_semaphore + .insert(vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtImageRobustnessFn::name()) { + let next = features + .image_robustness + .insert(vk::PhysicalDeviceImageRobustnessFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtRobustness2Fn::name()) { + let next = features + .robustness2 + .insert(vk::PhysicalDeviceRobustness2FeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtDepthClipEnableFn::name()) { + let next = features + .depth_clip_enable + .insert(vk::PhysicalDeviceDepthClipEnableFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtTextureCompressionAstcHdrFn::name()) { + let next = features + .astc_hdr + .insert(vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::KhrShaderFloat16Int8Fn::name()) + && capabilities.supports_extension(vk::Khr16bitStorageFn::name()) + { + let next = features.shader_float16.insert(( + vk::PhysicalDeviceShaderFloat16Int8FeaturesKHR::default(), + vk::PhysicalDevice16BitStorageFeaturesKHR::default(), + )); + builder = builder.push_next(&mut next.0); + builder = builder.push_next(&mut next.1); + } + + // `VK_KHR_zero_initialize_workgroup_memory` is promoted to 1.3 + if capabilities.effective_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + let next = features + .zero_initialize_workgroup_memory + .insert(vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::default()); + builder = builder.push_next(next); + } + + let mut features2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_features2(phd, &mut features2); + } + features2.features + } else { + unsafe { self.raw.get_physical_device_features(phd) } + }; + + (capabilities, features) + } +} + +impl super::Instance { + pub fn expose_adapter( + &self, + phd: vk::PhysicalDevice, + ) -> Option<crate::ExposedAdapter<super::Api>> { + use crate::auxil::cstr_from_bytes_until_nul; + use crate::auxil::db; + + let (phd_capabilities, phd_features) = self.shared.inspect(phd); + + let info = wgt::AdapterInfo { + name: { + cstr_from_bytes_until_nul(&phd_capabilities.properties.device_name) + .and_then(|info| info.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + vendor: phd_capabilities.properties.vendor_id, + device: phd_capabilities.properties.device_id, + device_type: match phd_capabilities.properties.device_type { + ash::vk::PhysicalDeviceType::OTHER => wgt::DeviceType::Other, + ash::vk::PhysicalDeviceType::INTEGRATED_GPU => wgt::DeviceType::IntegratedGpu, + ash::vk::PhysicalDeviceType::DISCRETE_GPU => wgt::DeviceType::DiscreteGpu, + ash::vk::PhysicalDeviceType::VIRTUAL_GPU => wgt::DeviceType::VirtualGpu, + ash::vk::PhysicalDeviceType::CPU => wgt::DeviceType::Cpu, + _ => wgt::DeviceType::Other, + }, + driver: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_name)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + driver_info: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_info)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + backend: wgt::Backend::Vulkan, + }; + + let (available_features, downlevel_flags) = + phd_features.to_wgpu(&self.shared.raw, phd, &phd_capabilities); + let mut workarounds = super::Workarounds::empty(); + { + // see https://github.com/gfx-rs/gfx/issues/1930 + let _is_windows_intel_dual_src_bug = cfg!(windows) + && phd_capabilities.properties.vendor_id == db::intel::VENDOR + && (phd_capabilities.properties.device_id & db::intel::DEVICE_KABY_LAKE_MASK + == db::intel::DEVICE_KABY_LAKE_MASK + || phd_capabilities.properties.device_id & db::intel::DEVICE_SKY_LAKE_MASK + == db::intel::DEVICE_SKY_LAKE_MASK); + // TODO: only enable for particular devices + workarounds |= super::Workarounds::SEPARATE_ENTRY_POINTS; + workarounds.set( + super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS, + phd_capabilities.properties.vendor_id == db::qualcomm::VENDOR, + ); + }; + + if phd_capabilities.effective_api_version == vk::API_VERSION_1_0 + && !phd_capabilities.supports_extension(vk::KhrStorageBufferStorageClassFn::name()) + { + log::warn!( + "SPIR-V storage buffer class is not supported, hiding adapter: {}", + info.name + ); + return None; + } + if !phd_capabilities.supports_extension(vk::AmdNegativeViewportHeightFn::name()) + && !phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()) + && phd_capabilities.effective_api_version < vk::API_VERSION_1_1 + { + log::warn!( + "viewport Y-flip is not supported, hiding adapter: {}", + info.name + ); + return None; + } + + let queue_families = unsafe { + self.shared + .raw + .get_physical_device_queue_family_properties(phd) + }; + let queue_flags = queue_families.first()?.queue_flags; + if !queue_flags.contains(vk::QueueFlags::GRAPHICS) { + log::warn!("The first queue only exposes {:?}", queue_flags); + return None; + } + + let private_caps = super::PrivateCapabilities { + flip_y_requires_shift: phd_capabilities.effective_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()), + imageless_framebuffers: match phd_features.imageless_framebuffer { + Some(features) => features.imageless_framebuffer == vk::TRUE, + None => phd_features + .imageless_framebuffer + .map_or(false, |ext| ext.imageless_framebuffer != 0), + }, + image_view_usage: phd_capabilities.effective_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance2Fn::name()), + timeline_semaphores: match phd_features.timeline_semaphore { + Some(features) => features.timeline_semaphore == vk::TRUE, + None => phd_features + .timeline_semaphore + .map_or(false, |ext| ext.timeline_semaphore != 0), + }, + texture_d24: supports_format( + &self.shared.raw, + phd, + vk::Format::X8_D24_UNORM_PACK32, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_d24_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::D24_UNORM_S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + non_coherent_map_mask: phd_capabilities.properties.limits.non_coherent_atom_size - 1, + can_present: true, + //TODO: make configurable + robust_buffer_access: phd_features.core.robust_buffer_access != 0, + robust_image_access: match phd_features.robustness2 { + Some(ref f) => f.robust_image_access2 != 0, + None => phd_features + .image_robustness + .map_or(false, |ext| ext.robust_image_access != 0), + }, + zero_initialize_workgroup_memory: phd_features + .zero_initialize_workgroup_memory + .map_or(false, |ext| { + ext.shader_zero_initialize_workgroup_memory == vk::TRUE + }), + }; + let capabilities = crate::Capabilities { + limits: phd_capabilities.to_wgpu_limits(), + alignments: phd_capabilities.to_hal_alignments(), + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: wgt::DownlevelLimits {}, + shader_model: wgt::ShaderModel::Sm5, //TODO? + }, + }; + + let adapter = super::Adapter { + raw: phd, + instance: Arc::clone(&self.shared), + //queue_families, + known_memory_flags: vk::MemoryPropertyFlags::DEVICE_LOCAL + | vk::MemoryPropertyFlags::HOST_VISIBLE + | vk::MemoryPropertyFlags::HOST_COHERENT + | vk::MemoryPropertyFlags::HOST_CACHED + | vk::MemoryPropertyFlags::LAZILY_ALLOCATED, + phd_capabilities, + //phd_features, + downlevel_flags, + private_caps, + workarounds, + }; + + Some(crate::ExposedAdapter { + adapter, + info, + features: available_features, + capabilities, + }) + } +} + +impl super::Adapter { + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.raw + } + + pub fn physical_device_capabilities(&self) -> &PhysicalDeviceCapabilities { + &self.phd_capabilities + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.instance + } + + pub fn required_device_extensions(&self, features: wgt::Features) -> Vec<&'static CStr> { + let (supported_extensions, unsupported_extensions) = self + .phd_capabilities + .get_required_extensions(features) + .iter() + .partition::<Vec<&CStr>, _>(|&&extension| { + self.phd_capabilities.supports_extension(extension) + }); + + if !unsupported_extensions.is_empty() { + log::warn!("Missing extensions: {:?}", unsupported_extensions); + } + + log::debug!("Supported extensions: {:?}", supported_extensions); + supported_extensions + } + + /// `features` must be the same features used to create `enabled_extensions`. + pub fn physical_device_features( + &self, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + ) -> PhysicalDeviceFeatures { + PhysicalDeviceFeatures::from_extensions_and_requested_features( + self.phd_capabilities.effective_api_version, + enabled_extensions, + features, + self.downlevel_flags, + &self.private_caps, + ) + } + + /// # Safety + /// + /// - `raw_device` must be created from this adapter. + /// - `raw_device` must be created using `family_index`, `enabled_extensions` and `physical_device_features()` + /// - `enabled_extensions` must be a superset of `required_device_extensions()`. + #[allow(clippy::too_many_arguments)] + pub unsafe fn device_from_raw( + &self, + raw_device: ash::Device, + handle_is_owned: bool, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + family_index: u32, + queue_index: u32, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let mem_properties = { + profiling::scope!("vkGetPhysicalDeviceMemoryProperties"); + unsafe { + self.instance + .raw + .get_physical_device_memory_properties(self.raw) + } + }; + let memory_types = + &mem_properties.memory_types[..mem_properties.memory_type_count as usize]; + let valid_ash_memory_types = memory_types.iter().enumerate().fold(0, |u, (i, mem)| { + if self.known_memory_flags.contains(mem.property_flags) { + u | (1 << i) + } else { + u + } + }); + + let swapchain_fn = khr::Swapchain::new(&self.instance.raw, &raw_device); + + let indirect_count_fn = if enabled_extensions.contains(&khr::DrawIndirectCount::name()) { + Some(khr::DrawIndirectCount::new(&self.instance.raw, &raw_device)) + } else { + None + }; + let timeline_semaphore_fn = if enabled_extensions.contains(&khr::TimelineSemaphore::name()) + { + Some(super::ExtensionFn::Extension(khr::TimelineSemaphore::new( + &self.instance.raw, + &raw_device, + ))) + } else if self.phd_capabilities.effective_api_version >= vk::API_VERSION_1_2 { + Some(super::ExtensionFn::Promoted) + } else { + None + }; + + let naga_options = { + use naga::back::spv; + + let mut capabilities = vec![ + spv::Capability::Shader, + spv::Capability::Matrix, + spv::Capability::Sampled1D, + spv::Capability::Image1D, + spv::Capability::ImageQuery, + spv::Capability::DerivativeControl, + spv::Capability::SampledCubeArray, + spv::Capability::SampleRateShading, + //Note: this is requested always, no matter what the actual + // adapter supports. It's not the responsibility of SPV-out + // translation to handle the storage support for formats. + spv::Capability::StorageImageExtendedFormats, + //TODO: fill out the rest + ]; + + if features.contains(wgt::Features::MULTIVIEW) { + capabilities.push(spv::Capability::MultiView); + } + + if features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX) { + capabilities.push(spv::Capability::Geometry); + } + + if features.intersects( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ) { + capabilities.push(spv::Capability::ShaderNonUniform); + } + + let mut flags = spv::WriterFlags::empty(); + flags.set( + spv::WriterFlags::DEBUG, + self.instance.flags.contains(crate::InstanceFlags::DEBUG), + ); + flags.set( + spv::WriterFlags::LABEL_VARYINGS, + self.phd_capabilities.properties.vendor_id != crate::auxil::db::qualcomm::VENDOR, + ); + flags.set( + spv::WriterFlags::FORCE_POINT_SIZE, + //Note: we could technically disable this when we are compiling separate entry points, + // and we know exactly that the primitive topology is not `PointList`. + // But this requires cloning the `spv::Options` struct, which has heap allocations. + true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` + ); + spv::Options { + lang_version: (1, 0), + flags, + capabilities: Some(capabilities.iter().cloned().collect()), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Restrict, + buffer: if self.private_caps.robust_buffer_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + image: if self.private_caps.robust_image_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: if self + .private_caps + .zero_initialize_workgroup_memory + { + spv::ZeroInitializeWorkgroupMemoryMode::Native + } else { + spv::ZeroInitializeWorkgroupMemoryMode::Polyfill + }, + // We need to build this separately for each invocation, so just default it out here + binding_map: BTreeMap::default(), + } + }; + + let raw_queue = { + profiling::scope!("vkGetDeviceQueue"); + unsafe { raw_device.get_device_queue(family_index, queue_index) } + }; + + let shared = Arc::new(super::DeviceShared { + raw: raw_device, + family_index, + queue_index, + raw_queue, + handle_is_owned, + instance: Arc::clone(&self.instance), + physical_device: self.raw, + enabled_extensions: enabled_extensions.into(), + extension_fns: super::DeviceExtensionFunctions { + draw_indirect_count: indirect_count_fn, + timeline_semaphore: timeline_semaphore_fn, + }, + vendor_id: self.phd_capabilities.properties.vendor_id, + timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, + private_caps: self.private_caps.clone(), + workarounds: self.workarounds, + render_passes: Mutex::new(Default::default()), + framebuffers: Mutex::new(Default::default()), + }); + let mut relay_semaphores = [vk::Semaphore::null(); 2]; + for sem in relay_semaphores.iter_mut() { + unsafe { + *sem = shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None)? + }; + } + let queue = super::Queue { + raw: raw_queue, + swapchain_fn, + device: Arc::clone(&shared), + family_index, + relay_semaphores, + relay_index: None, + }; + + let mem_allocator = { + let limits = self.phd_capabilities.properties.limits; + let config = gpu_alloc::Config::i_am_prototyping(); //TODO + let max_memory_allocation_size = + if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 { + maintenance_3.max_memory_allocation_size + } else { + u64::max_value() + }; + let properties = gpu_alloc::DeviceProperties { + max_memory_allocation_count: limits.max_memory_allocation_count, + max_memory_allocation_size, + non_coherent_atom_size: limits.non_coherent_atom_size, + memory_types: memory_types + .iter() + .map(|memory_type| gpu_alloc::MemoryType { + props: gpu_alloc::MemoryPropertyFlags::from_bits_truncate( + memory_type.property_flags.as_raw() as u8, + ), + heap: memory_type.heap_index, + }) + .collect(), + memory_heaps: mem_properties.memory_heaps + [..mem_properties.memory_heap_count as usize] + .iter() + .map(|&memory_heap| gpu_alloc::MemoryHeap { + size: memory_heap.size, + }) + .collect(), + buffer_device_address: false, + }; + gpu_alloc::GpuAllocator::new(config, properties) + }; + let desc_allocator = gpu_descriptor::DescriptorAllocator::new( + if let Some(di) = self.phd_capabilities.descriptor_indexing { + di.max_update_after_bind_descriptors_in_all_pools + } else { + 0 + }, + ); + + let device = super::Device { + shared, + mem_allocator: Mutex::new(mem_allocator), + desc_allocator: Mutex::new(desc_allocator), + valid_ash_memory_types, + naga_options, + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + }; + + Ok(crate::OpenDevice { device, queue }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let enabled_extensions = self.required_device_extensions(features); + let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features); + + let family_index = 0; //TODO + let family_info = vk::DeviceQueueCreateInfo::builder() + .queue_family_index(family_index) + .queue_priorities(&[1.0]) + .build(); + let family_infos = [family_info]; + + let str_pointers = enabled_extensions + .iter() + .map(|&s| { + // Safe because `enabled_extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let pre_info = vk::DeviceCreateInfo::builder() + .queue_create_infos(&family_infos) + .enabled_extension_names(&str_pointers); + let info = enabled_phd_features + .add_to_device_create_builder(pre_info) + .build(); + let raw_device = { + profiling::scope!("vkCreateDevice"); + unsafe { self.instance.raw.create_device(self.raw, &info, None)? } + }; + + unsafe { + self.device_from_raw( + raw_device, + true, + &enabled_extensions, + features, + family_info.queue_family_index, + 0, + ) + } + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let vk_format = self.private_caps.map_texture_format(format); + let properties = unsafe { + self.instance + .raw + .get_physical_device_format_properties(self.raw, vk_format) + }; + let features = properties.optimal_tiling_features; + + let mut flags = Tfc::empty(); + flags.set( + Tfc::SAMPLED, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE), + ); + flags.set( + Tfc::SAMPLED_LINEAR, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_LINEAR), + ); + // flags.set( + // Tfc::SAMPLED_MINMAX, + // features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_MINMAX), + // ); + flags.set( + Tfc::STORAGE | Tfc::STORAGE_READ_WRITE, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE), + ); + flags.set( + Tfc::STORAGE_ATOMIC, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE_ATOMIC), + ); + flags.set( + Tfc::COLOR_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT), + ); + flags.set( + Tfc::COLOR_ATTACHMENT_BLEND, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND), + ); + flags.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT), + ); + flags.set( + Tfc::COPY_SRC, + features.intersects(vk::FormatFeatureFlags::TRANSFER_SRC), + ); + flags.set( + Tfc::COPY_DST, + features.intersects(vk::FormatFeatureFlags::TRANSFER_DST), + ); + // Vulkan is very permissive about MSAA + flags.set(Tfc::MULTISAMPLE_RESOLVE, !format.is_compressed()); + + // get the supported sample counts + let format_aspect = crate::FormatAspects::from(format); + let limits = self.phd_capabilities.properties.limits; + + let sample_flags = if format_aspect.contains(crate::FormatAspects::DEPTH) { + limits + .framebuffer_depth_sample_counts + .min(limits.sampled_image_depth_sample_counts) + } else if format_aspect.contains(crate::FormatAspects::STENCIL) { + limits + .framebuffer_stencil_sample_counts + .min(limits.sampled_image_stencil_sample_counts) + } else { + match format.sample_type(None).unwrap() { + wgt::TextureSampleType::Float { filterable: _ } => limits + .framebuffer_color_sample_counts + .min(limits.sampled_image_color_sample_counts), + wgt::TextureSampleType::Sint | wgt::TextureSampleType::Uint => { + limits.sampled_image_integer_sample_counts + } + _ => unreachable!(), + } + }; + + flags.set( + Tfc::MULTISAMPLE_X2, + sample_flags.contains(vk::SampleCountFlags::TYPE_2), + ); + flags.set( + Tfc::MULTISAMPLE_X4, + sample_flags.contains(vk::SampleCountFlags::TYPE_4), + ); + flags.set( + Tfc::MULTISAMPLE_X8, + sample_flags.contains(vk::SampleCountFlags::TYPE_8), + ); + flags.set( + Tfc::MULTISAMPLE_X16, + sample_flags.contains(vk::SampleCountFlags::TYPE_16), + ); + + flags + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if !self.private_caps.can_present { + return None; + } + let queue_family_index = 0; //TODO + { + profiling::scope!("vkGetPhysicalDeviceSurfaceSupportKHR"); + match unsafe { + surface.functor.get_physical_device_surface_support( + self.raw, + queue_family_index, + surface.raw, + ) + } { + Ok(true) => (), + Ok(false) => return None, + Err(e) => { + log::error!("get_physical_device_surface_support: {}", e); + return None; + } + } + } + + let caps = { + profiling::scope!("vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_capabilities(self.raw, surface.raw) + } { + Ok(caps) => caps, + Err(e) => { + log::error!("get_physical_device_surface_capabilities: {}", e); + return None; + } + } + }; + + // If image count is 0, the support number of images is unlimited. + let max_image_count = if caps.max_image_count == 0 { + !0 + } else { + caps.max_image_count + }; + + // `0xFFFFFFFF` indicates that the extent depends on the created swapchain. + let current_extent = if caps.current_extent.width != !0 && caps.current_extent.height != !0 + { + Some(wgt::Extent3d { + width: caps.current_extent.width, + height: caps.current_extent.height, + depth_or_array_layers: 1, + }) + } else { + None + }; + + let min_extent = wgt::Extent3d { + width: caps.min_image_extent.width, + height: caps.min_image_extent.height, + depth_or_array_layers: 1, + }; + + let max_extent = wgt::Extent3d { + width: caps.max_image_extent.width, + height: caps.max_image_extent.height, + depth_or_array_layers: caps.max_image_array_layers, + }; + + let raw_present_modes = { + profiling::scope!("vkGetPhysicalDeviceSurfacePresentModesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_present_modes(self.raw, surface.raw) + } { + Ok(present_modes) => present_modes, + Err(e) => { + log::error!("get_physical_device_surface_present_modes: {}", e); + Vec::new() + } + } + }; + + let raw_surface_formats = { + profiling::scope!("vkGetPhysicalDeviceSurfaceFormatsKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_formats(self.raw, surface.raw) + } { + Ok(formats) => formats, + Err(e) => { + log::error!("get_physical_device_surface_formats: {}", e); + Vec::new() + } + } + }; + + let formats = raw_surface_formats + .into_iter() + .filter_map(conv::map_vk_surface_formats) + .collect(); + Some(crate::SurfaceCapabilities { + formats, + swap_chain_sizes: caps.min_image_count..=max_image_count, + current_extent, + extents: min_extent..=max_extent, + usage: conv::map_vk_image_usage(caps.supported_usage_flags), + present_modes: raw_present_modes + .into_iter() + .flat_map(conv::map_vk_present_mode) + .collect(), + composite_alpha_modes: conv::map_vk_composite_alpha(caps.supported_composite_alpha), + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + // VK_GOOGLE_display_timing is the only way to get presentation + // timestamps on vulkan right now and it is only ever available + // on android and linux. This includes mac, but there's no alternative + // on mac, so this is fine. + #[cfg(unix)] + { + let mut timespec = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { + libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut timespec); + } + + wgt::PresentationTimestamp( + timespec.tv_sec as u128 * 1_000_000_000 + timespec.tv_nsec as u128, + ) + } + #[cfg(not(unix))] + { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } + } +} + +fn is_format_16bit_norm_supported(instance: &ash::Instance, phd: vk::PhysicalDevice) -> bool { + let tiling = vk::ImageTiling::OPTIMAL; + let features = vk::FormatFeatureFlags::SAMPLED_IMAGE + | vk::FormatFeatureFlags::STORAGE_IMAGE + | vk::FormatFeatureFlags::TRANSFER_SRC + | vk::FormatFeatureFlags::TRANSFER_DST; + let r16unorm = supports_format(instance, phd, vk::Format::R16_UNORM, tiling, features); + let r16snorm = supports_format(instance, phd, vk::Format::R16_SNORM, tiling, features); + let rg16unorm = supports_format(instance, phd, vk::Format::R16G16_UNORM, tiling, features); + let rg16snorm = supports_format(instance, phd, vk::Format::R16G16_SNORM, tiling, features); + let rgba16unorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_UNORM, + tiling, + features, + ); + let rgba16snorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_SNORM, + tiling, + features, + ); + + r16unorm && r16snorm && rg16unorm && rg16snorm && rgba16unorm && rgba16snorm +} + +fn supports_format( + instance: &ash::Instance, + phd: vk::PhysicalDevice, + format: vk::Format, + tiling: vk::ImageTiling, + features: vk::FormatFeatureFlags, +) -> bool { + let properties = unsafe { instance.get_physical_device_format_properties(phd, format) }; + match tiling { + vk::ImageTiling::LINEAR => properties.linear_tiling_features.contains(features), + vk::ImageTiling::OPTIMAL => properties.optimal_tiling_features.contains(features), + _ => false, + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs new file mode 100644 index 0000000000..f6c871026c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -0,0 +1,826 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::ext, vk}; + +use std::{mem, ops::Range, slice}; + +const ALLOCATION_GRANULARITY: u32 = 16; +const DST_IMAGE_LAYOUT: vk::ImageLayout = vk::ImageLayout::TRANSFER_DST_OPTIMAL; + +impl super::Texture { + fn map_buffer_copies<T>(&self, regions: T) -> impl Iterator<Item = vk::BufferImageCopy> + where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (block_width, block_height) = self.format.block_dimensions(); + let format = self.format; + let copy_size = self.copy_size; + regions.map(move |r| { + let extent = r.texture_base.max_copy_size(©_size).min(&r.size); + let (image_subresource, image_offset) = conv::map_subresource_layers(&r.texture_base); + vk::BufferImageCopy { + buffer_offset: r.buffer_layout.offset, + buffer_row_length: r.buffer_layout.bytes_per_row.map_or(0, |bpr| { + let block_size = format + .block_size(Some(r.texture_base.aspect.map())) + .unwrap(); + block_width * (bpr / block_size) + }), + buffer_image_height: r + .buffer_layout + .rows_per_image + .map_or(0, |rpi| rpi * block_height), + image_subresource, + image_offset, + image_extent: conv::map_copy_extent(&extent), + } + }) + } +} + +impl super::DeviceShared { + fn debug_messenger(&self) -> Option<&ext::DebugUtils> { + Some(&self.instance.debug_utils.as_ref()?.extension) + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + if self.free.is_empty() { + let vk_info = vk::CommandBufferAllocateInfo::builder() + .command_pool(self.raw) + .command_buffer_count(ALLOCATION_GRANULARITY) + .build(); + let cmd_buf_vec = unsafe { self.device.raw.allocate_command_buffers(&vk_info)? }; + self.free.extend(cmd_buf_vec); + } + let raw = self.free.pop().unwrap(); + + // Set the name unconditionally, since there might be a + // previous name assigned to this. + unsafe { + self.device.set_object_name( + vk::ObjectType::COMMAND_BUFFER, + raw, + label.unwrap_or_default(), + ) + }; + + // Reset this in case the last renderpass was never ended. + self.rpass_debug_marker_active = false; + + let vk_info = vk::CommandBufferBeginInfo::builder() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT) + .build(); + unsafe { self.device.raw.begin_command_buffer(raw, &vk_info) }?; + self.active = raw; + + Ok(()) + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.active; + self.active = vk::CommandBuffer::null(); + unsafe { self.device.raw.end_command_buffer(raw) }?; + Ok(super::CommandBuffer { raw }) + } + + unsafe fn discard_encoding(&mut self) { + self.discarded.push(self.active); + self.active = vk::CommandBuffer::null(); + } + + unsafe fn reset_all<I>(&mut self, cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + self.temp.clear(); + self.free + .extend(cmd_bufs.into_iter().map(|cmd_buf| cmd_buf.raw)); + self.free.append(&mut self.discarded); + let _ = unsafe { + self.device + .raw + .reset_command_pool(self.raw, vk::CommandPoolResetFlags::default()) + }; + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + //Note: this is done so that we never end up with empty stage flags + let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE; + let mut dst_stages = vk::PipelineStageFlags::BOTTOM_OF_PIPE; + let vk_barriers = &mut self.temp.buffer_barriers; + vk_barriers.clear(); + + for bar in barriers { + let (src_stage, src_access) = conv::map_buffer_usage_to_barrier(bar.usage.start); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_buffer_usage_to_barrier(bar.usage.end); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::BufferMemoryBarrier::builder() + .buffer(bar.buffer.raw) + .size(vk::WHOLE_SIZE) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .build(), + ) + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + vk_barriers, + &[], + ) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + let mut src_stages = vk::PipelineStageFlags::empty(); + let mut dst_stages = vk::PipelineStageFlags::empty(); + let vk_barriers = &mut self.temp.image_barriers; + vk_barriers.clear(); + + for bar in barriers { + let range = conv::map_subresource_range(&bar.range, bar.texture.format); + let (src_stage, src_access) = conv::map_texture_usage_to_barrier(bar.usage.start); + let src_layout = conv::derive_image_layout(bar.usage.start, bar.texture.format); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_texture_usage_to_barrier(bar.usage.end); + let dst_layout = conv::derive_image_layout(bar.usage.end, bar.texture.format); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::ImageMemoryBarrier::builder() + .image(bar.texture.raw) + .subresource_range(range) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .old_layout(src_layout) + .new_layout(dst_layout) + .build(), + ); + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + &[], + vk_barriers, + ) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + unsafe { + self.device.raw.cmd_fill_buffer( + self.active, + buffer.raw, + range.start, + range.end - range.start, + 0, + ) + }; + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let vk_regions_iter = regions.map(|r| vk::BufferCopy { + src_offset: r.src_offset, + dst_offset: r.dst_offset, + size: r.size.get(), + }); + + unsafe { + self.device.raw.cmd_copy_buffer( + self.active, + src.raw, + dst.raw, + &smallvec::SmallVec::<[vk::BufferCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + + let vk_regions_iter = regions.map(|r| { + let (src_subresource, src_offset) = conv::map_subresource_layers(&r.src_base); + let (dst_subresource, dst_offset) = conv::map_subresource_layers(&r.dst_base); + let extent = r + .size + .min(&r.src_base.max_copy_size(&src.copy_size)) + .min(&r.dst_base.max_copy_size(&dst.copy_size)); + vk::ImageCopy { + src_subresource, + src_offset, + dst_subresource, + dst_offset, + extent: conv::map_copy_extent(&extent), + } + }); + + unsafe { + self.device.raw.cmd_copy_image( + self.active, + src.raw, + src_layout, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::ImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let vk_regions_iter = dst.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_buffer_to_image( + self.active, + src.raw, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + let vk_regions_iter = src.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_image_to_buffer( + self.active, + src.raw, + src_layout, + dst.raw, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_begin_query( + self.active, + set.raw, + index, + vk::QueryControlFlags::empty(), + ) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { self.device.raw.cmd_end_query(self.active, set.raw, index) }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_write_timestamp( + self.active, + vk::PipelineStageFlags::BOTTOM_OF_PIPE, + set.raw, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + unsafe { + self.device.raw.cmd_reset_query_pool( + self.active, + set.raw, + range.start, + range.end - range.start, + ) + }; + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + unsafe { + self.device.raw.cmd_copy_query_pool_results( + self.active, + set.raw, + range.start, + range.end - range.start, + buffer.raw, + offset, + stride.get(), + vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + let mut vk_clear_values = + ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut rp_key = super::RenderPassKey::default(); + let mut fb_key = super::FramebufferKey { + attachments: ArrayVec::default(), + extent: desc.extent, + sample_count: desc.sample_count, + }; + let caps = &self.device.private_caps; + + for cat in desc.color_attachments { + if let Some(cat) = cat.as_ref() { + vk_clear_values.push(vk::ClearValue { + color: unsafe { cat.make_vk_clear_color() }, + }); + vk_image_views.push(cat.target.view.raw); + let color = super::ColorAttachmentKey { + base: cat.target.make_attachment_key(cat.ops, caps), + resolve: cat.resolve_target.as_ref().map(|target| { + target.make_attachment_key(crate::AttachmentOps::STORE, caps) + }), + }; + + rp_key.colors.push(Some(color)); + fb_key.attachments.push(cat.target.view.attachment.clone()); + if let Some(ref at) = cat.resolve_target { + vk_clear_values.push(unsafe { mem::zeroed() }); + vk_image_views.push(at.view.raw); + fb_key.attachments.push(at.view.attachment.clone()); + } + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(cat.target.view.layers, multiview); + if let Some(ref resolve_target) = cat.resolve_target { + assert_eq!(resolve_target.view.layers, multiview); + } + } + } else { + rp_key.colors.push(None); + } + } + if let Some(ref ds) = desc.depth_stencil_attachment { + vk_clear_values.push(vk::ClearValue { + depth_stencil: vk::ClearDepthStencilValue { + depth: ds.clear_value.0, + stencil: ds.clear_value.1, + }, + }); + vk_image_views.push(ds.target.view.raw); + rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: ds.target.make_attachment_key(ds.depth_ops, caps), + stencil_ops: ds.stencil_ops, + }); + fb_key.attachments.push(ds.target.view.attachment.clone()); + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(ds.target.view.layers, multiview); + } + } + rp_key.sample_count = fb_key.sample_count; + rp_key.multiview = desc.multiview; + + let render_area = vk::Rect2D { + offset: vk::Offset2D { x: 0, y: 0 }, + extent: vk::Extent2D { + width: desc.extent.width, + height: desc.extent.height, + }, + }; + let vk_viewports = [vk::Viewport { + x: 0.0, + y: if self.device.private_caps.flip_y_requires_shift { + desc.extent.height as f32 + } else { + 0.0 + }, + width: desc.extent.width as f32, + height: -(desc.extent.height as f32), + min_depth: 0.0, + max_depth: 1.0, + }]; + + let raw_pass = self.device.make_render_pass(rp_key).unwrap(); + let raw_framebuffer = self + .device + .make_framebuffer(fb_key, raw_pass, desc.label) + .unwrap(); + + let mut vk_info = vk::RenderPassBeginInfo::builder() + .render_pass(raw_pass) + .render_area(render_area) + .clear_values(&vk_clear_values) + .framebuffer(raw_framebuffer); + let mut vk_attachment_info = if caps.imageless_framebuffers { + Some( + vk::RenderPassAttachmentBeginInfo::builder() + .attachments(&vk_image_views) + .build(), + ) + } else { + None + }; + if let Some(attachment_info) = vk_attachment_info.as_mut() { + vk_info = vk_info.push_next(attachment_info); + } + + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports); + self.device + .raw + .cmd_set_scissor(self.active, 0, &[render_area]); + self.device.raw.cmd_begin_render_pass( + self.active, + &vk_info, + vk::SubpassContents::INLINE, + ); + }; + + self.bind_point = vk::PipelineBindPoint::GRAPHICS; + } + unsafe fn end_render_pass(&mut self) { + unsafe { + self.device.raw.cmd_end_render_pass(self.active); + if self.rpass_debug_marker_active { + self.end_debug_marker(); + self.rpass_debug_marker_active = false; + } + } + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let sets = [*group.set.raw()]; + unsafe { + self.device.raw.cmd_bind_descriptor_sets( + self.active, + self.bind_point, + layout.raw, + index, + &sets, + dynamic_offsets, + ) + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + unsafe { + self.device.raw.cmd_push_constants( + self.active, + layout.raw, + conv::map_shader_stage(stages), + offset, + slice::from_raw_parts(data.as_ptr() as _, data.len() * 4), + ) + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_insert_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(group_label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_begin_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(ext) = self.device.debug_messenger() { + unsafe { ext.cmd_end_debug_utils_label(self.active) }; + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::GRAPHICS, + pipeline.raw, + ) + }; + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + unsafe { + self.device.raw.cmd_bind_index_buffer( + self.active, + binding.buffer.raw, + binding.offset, + conv::map_index_format(format), + ) + }; + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vk_buffers = [binding.buffer.raw]; + let vk_offsets = [binding.offset]; + unsafe { + self.device + .raw + .cmd_bind_vertex_buffers(self.active, index, &vk_buffers, &vk_offsets) + }; + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let vk_viewports = [vk::Viewport { + x: rect.x, + y: if self.device.private_caps.flip_y_requires_shift { + rect.y + rect.h + } else { + rect.y + }, + width: rect.w, + height: -rect.h, // flip Y + min_depth: depth_range.start, + max_depth: depth_range.end, + }]; + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports) + }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let vk_scissors = [vk::Rect2D { + offset: vk::Offset2D { + x: rect.x as i32, + y: rect.y as i32, + }, + extent: vk::Extent2D { + width: rect.w, + height: rect.h, + }, + }]; + unsafe { + self.device + .raw + .cmd_set_scissor(self.active, 0, &vk_scissors) + }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + unsafe { + self.device.raw.cmd_set_stencil_reference( + self.active, + vk::StencilFaceFlags::FRONT_AND_BACK, + value, + ) + }; + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + unsafe { self.device.raw.cmd_set_blend_constants(self.active, color) }; + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw( + self.active, + vertex_count, + instance_count, + start_vertex, + start_instance, + ) + }; + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed( + self.active, + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ) + }; + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indexed_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + self.bind_point = vk::PipelineBindPoint::COMPUTE; + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker() }; + self.rpass_debug_marker_active = false + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::COMPUTE, + pipeline.raw, + ) + }; + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + unsafe { + self.device + .raw + .cmd_dispatch(self.active, count[0], count[1], count[2]) + }; + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + unsafe { + self.device + .raw + .cmd_dispatch_indirect(self.active, buffer.raw, offset) + } + } +} + +#[test] +fn check_dst_image_layout() { + assert_eq!( + conv::derive_image_layout(crate::TextureUses::COPY_DST, wgt::TextureFormat::Rgba8Unorm), + DST_IMAGE_LAYOUT + ); +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs new file mode 100644 index 0000000000..a26f3765b9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -0,0 +1,825 @@ +use ash::vk; + +impl super::PrivateCapabilities { + pub fn map_texture_format(&self, format: wgt::TextureFormat) -> vk::Format { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + match format { + Tf::R8Unorm => F::R8_UNORM, + Tf::R8Snorm => F::R8_SNORM, + Tf::R8Uint => F::R8_UINT, + Tf::R8Sint => F::R8_SINT, + Tf::R16Uint => F::R16_UINT, + Tf::R16Sint => F::R16_SINT, + Tf::R16Unorm => F::R16_UNORM, + Tf::R16Snorm => F::R16_SNORM, + Tf::R16Float => F::R16_SFLOAT, + Tf::Rg8Unorm => F::R8G8_UNORM, + Tf::Rg8Snorm => F::R8G8_SNORM, + Tf::Rg8Uint => F::R8G8_UINT, + Tf::Rg8Sint => F::R8G8_SINT, + Tf::Rg16Unorm => F::R16G16_UNORM, + Tf::Rg16Snorm => F::R16G16_SNORM, + Tf::R32Uint => F::R32_UINT, + Tf::R32Sint => F::R32_SINT, + Tf::R32Float => F::R32_SFLOAT, + Tf::Rg16Uint => F::R16G16_UINT, + Tf::Rg16Sint => F::R16G16_SINT, + Tf::Rg16Float => F::R16G16_SFLOAT, + Tf::Rgba8Unorm => F::R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => F::R8G8B8A8_SRGB, + Tf::Bgra8UnormSrgb => F::B8G8R8A8_SRGB, + Tf::Rgba8Snorm => F::R8G8B8A8_SNORM, + Tf::Bgra8Unorm => F::B8G8R8A8_UNORM, + Tf::Rgba8Uint => F::R8G8B8A8_UINT, + Tf::Rgba8Sint => F::R8G8B8A8_SINT, + Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32, + Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32, + Tf::Rg32Uint => F::R32G32_UINT, + Tf::Rg32Sint => F::R32G32_SINT, + Tf::Rg32Float => F::R32G32_SFLOAT, + Tf::Rgba16Uint => F::R16G16B16A16_UINT, + Tf::Rgba16Sint => F::R16G16B16A16_SINT, + Tf::Rgba16Unorm => F::R16G16B16A16_UNORM, + Tf::Rgba16Snorm => F::R16G16B16A16_SNORM, + Tf::Rgba16Float => F::R16G16B16A16_SFLOAT, + Tf::Rgba32Uint => F::R32G32B32A32_UINT, + Tf::Rgba32Sint => F::R32G32B32A32_SINT, + Tf::Rgba32Float => F::R32G32B32A32_SFLOAT, + Tf::Depth32Float => F::D32_SFLOAT, + Tf::Depth32FloatStencil8 => F::D32_SFLOAT_S8_UINT, + Tf::Depth24Plus => { + if self.texture_d24 { + F::X8_D24_UNORM_PACK32 + } else { + F::D32_SFLOAT + } + } + Tf::Depth24PlusStencil8 => { + if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Stencil8 => { + if self.texture_s8 { + F::S8_UINT + } else if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Depth16Unorm => F::D16_UNORM, + Tf::Rgb9e5Ufloat => F::E5B9G9R9_UFLOAT_PACK32, + Tf::Bc1RgbaUnorm => F::BC1_RGBA_UNORM_BLOCK, + Tf::Bc1RgbaUnormSrgb => F::BC1_RGBA_SRGB_BLOCK, + Tf::Bc2RgbaUnorm => F::BC2_UNORM_BLOCK, + Tf::Bc2RgbaUnormSrgb => F::BC2_SRGB_BLOCK, + Tf::Bc3RgbaUnorm => F::BC3_UNORM_BLOCK, + Tf::Bc3RgbaUnormSrgb => F::BC3_SRGB_BLOCK, + Tf::Bc4RUnorm => F::BC4_UNORM_BLOCK, + Tf::Bc4RSnorm => F::BC4_SNORM_BLOCK, + Tf::Bc5RgUnorm => F::BC5_UNORM_BLOCK, + Tf::Bc5RgSnorm => F::BC5_SNORM_BLOCK, + Tf::Bc6hRgbUfloat => F::BC6H_UFLOAT_BLOCK, + Tf::Bc6hRgbFloat => F::BC6H_SFLOAT_BLOCK, + Tf::Bc7RgbaUnorm => F::BC7_UNORM_BLOCK, + Tf::Bc7RgbaUnormSrgb => F::BC7_SRGB_BLOCK, + Tf::Etc2Rgb8Unorm => F::ETC2_R8G8B8_UNORM_BLOCK, + Tf::Etc2Rgb8UnormSrgb => F::ETC2_R8G8B8_SRGB_BLOCK, + Tf::Etc2Rgb8A1Unorm => F::ETC2_R8G8B8A1_UNORM_BLOCK, + Tf::Etc2Rgb8A1UnormSrgb => F::ETC2_R8G8B8A1_SRGB_BLOCK, + Tf::Etc2Rgba8Unorm => F::ETC2_R8G8B8A8_UNORM_BLOCK, + Tf::Etc2Rgba8UnormSrgb => F::ETC2_R8G8B8A8_SRGB_BLOCK, + Tf::EacR11Unorm => F::EAC_R11_UNORM_BLOCK, + Tf::EacR11Snorm => F::EAC_R11_SNORM_BLOCK, + Tf::EacRg11Unorm => F::EAC_R11G11_UNORM_BLOCK, + Tf::EacRg11Snorm => F::EAC_R11G11_SNORM_BLOCK, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => F::ASTC_4X4_UNORM_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_UNORM_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_UNORM_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_UNORM_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_UNORM_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_UNORM_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_UNORM_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_UNORM_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_UNORM_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_UNORM_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_UNORM_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_UNORM_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_UNORM_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_UNORM_BLOCK, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SRGB_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_SRGB_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_SRGB_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_SRGB_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_SRGB_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_SRGB_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_SRGB_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_SRGB_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_SRGB_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_SRGB_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_SRGB_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_SRGB_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_SRGB_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_SRGB_BLOCK, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x4 => F::ASTC_5X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x5 => F::ASTC_5X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x5 => F::ASTC_6X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x6 => F::ASTC_6X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x5 => F::ASTC_8X5_SFLOAT_BLOCK_EXT, + AstcBlock::B8x6 => F::ASTC_8X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x8 => F::ASTC_8X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x5 => F::ASTC_10X5_SFLOAT_BLOCK_EXT, + AstcBlock::B10x6 => F::ASTC_10X6_SFLOAT_BLOCK_EXT, + AstcBlock::B10x8 => F::ASTC_10X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x10 => F::ASTC_10X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x10 => F::ASTC_12X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x12 => F::ASTC_12X12_SFLOAT_BLOCK_EXT, + }, + }, + } + } +} + +pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFormat> { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + // List we care about pulled from https://vulkan.gpuinfo.org/listsurfaceformats.php + Some(match sf.color_space { + vk::ColorSpaceKHR::SRGB_NONLINEAR => match sf.format { + F::B8G8R8A8_UNORM => Tf::Bgra8Unorm, + F::B8G8R8A8_SRGB => Tf::Bgra8UnormSrgb, + F::R8G8B8A8_SNORM => Tf::Rgba8Snorm, + F::R8G8B8A8_UNORM => Tf::Rgba8Unorm, + F::R8G8B8A8_SRGB => Tf::Rgba8UnormSrgb, + _ => return None, + }, + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT => match sf.format { + F::R16G16B16A16_SFLOAT => Tf::Rgba16Float, + F::R16G16B16A16_SNORM => Tf::Rgba16Snorm, + F::R16G16B16A16_UNORM => Tf::Rgba16Unorm, + F::A2B10G10R10_UNORM_PACK32 => Tf::Rgb10a2Unorm, + _ => return None, + }, + _ => return None, + }) +} + +impl crate::Attachment<'_, super::Api> { + pub(super) fn make_attachment_key( + &self, + ops: crate::AttachmentOps, + caps: &super::PrivateCapabilities, + ) -> super::AttachmentKey { + super::AttachmentKey { + format: caps.map_texture_format(self.view.attachment.view_format), + layout: derive_image_layout(self.usage, self.view.attachment.view_format), + ops, + } + } +} + +impl crate::ColorAttachment<'_, super::Api> { + pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue { + let cv = &self.clear_value; + match self + .target + .view + .attachment + .view_format + .sample_type(None) + .unwrap() + { + wgt::TextureSampleType::Float { .. } => vk::ClearColorValue { + float32: [cv.r as f32, cv.g as f32, cv.b as f32, cv.a as f32], + }, + wgt::TextureSampleType::Sint => vk::ClearColorValue { + int32: [cv.r as i32, cv.g as i32, cv.b as i32, cv.a as i32], + }, + wgt::TextureSampleType::Uint => vk::ClearColorValue { + uint32: [cv.r as u32, cv.g as u32, cv.b as u32, cv.a as u32], + }, + wgt::TextureSampleType::Depth => unreachable!(), + } + } +} + +pub fn derive_image_layout( + usage: crate::TextureUses, + format: wgt::TextureFormat, +) -> vk::ImageLayout { + // Note: depth textures are always sampled with RODS layout + let is_color = crate::FormatAspects::from(format).contains(crate::FormatAspects::COLOR); + match usage { + crate::TextureUses::UNINITIALIZED => vk::ImageLayout::UNDEFINED, + crate::TextureUses::COPY_SRC => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + crate::TextureUses::COPY_DST => vk::ImageLayout::TRANSFER_DST_OPTIMAL, + crate::TextureUses::RESOURCE if is_color => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, + crate::TextureUses::COLOR_TARGET => vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + crate::TextureUses::DEPTH_STENCIL_WRITE => { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + } + _ => { + if usage == crate::TextureUses::PRESENT { + vk::ImageLayout::PRESENT_SRC_KHR + } else if is_color { + vk::ImageLayout::GENERAL + } else { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } + } + } +} + +pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { + let mut flags = vk::ImageUsageFlags::empty(); + if usage.contains(crate::TextureUses::COPY_SRC) { + flags |= vk::ImageUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= vk::ImageUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= vk::ImageUsageFlags::SAMPLED; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= vk::ImageUsageFlags::COLOR_ATTACHMENT; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT; + } + if usage.intersects(crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE) { + flags |= vk::ImageUsageFlags::STORAGE; + } + flags +} + +pub fn map_texture_usage_to_barrier( + usage: crate::TextureUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::TextureUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::TextureUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::TextureUses::RESOURCE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + stages |= vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT; + access |= vk::AccessFlags::COLOR_ATTACHMENT_READ | vk::AccessFlags::COLOR_ATTACHMENT_WRITE; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_READ) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ + | vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE; + } + if usage.contains(crate::TextureUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + + if usage == crate::TextureUses::UNINITIALIZED || usage == crate::TextureUses::PRESENT { + ( + vk::PipelineStageFlags::TOP_OF_PIPE, + vk::AccessFlags::empty(), + ) + } else { + (stages, access) + } +} + +pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> crate::TextureUses { + let mut bits = crate::TextureUses::empty(); + if usage.contains(vk::ImageUsageFlags::TRANSFER_SRC) { + bits |= crate::TextureUses::COPY_SRC; + } + if usage.contains(vk::ImageUsageFlags::TRANSFER_DST) { + bits |= crate::TextureUses::COPY_DST; + } + if usage.contains(vk::ImageUsageFlags::SAMPLED) { + bits |= crate::TextureUses::RESOURCE; + } + if usage.contains(vk::ImageUsageFlags::COLOR_ATTACHMENT) { + bits |= crate::TextureUses::COLOR_TARGET; + } + if usage.contains(vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT) { + bits |= crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE; + } + if usage.contains(vk::ImageUsageFlags::STORAGE) { + bits |= crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE; + } + bits +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> vk::ImageType { + match dim { + wgt::TextureDimension::D1 => vk::ImageType::TYPE_1D, + wgt::TextureDimension::D2 => vk::ImageType::TYPE_2D, + wgt::TextureDimension::D3 => vk::ImageType::TYPE_3D, + } +} + +pub fn map_index_format(index_format: wgt::IndexFormat) -> vk::IndexType { + match index_format { + wgt::IndexFormat::Uint16 => vk::IndexType::UINT16, + wgt::IndexFormat::Uint32 => vk::IndexType::UINT32, + } +} + +pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { + use wgt::VertexFormat as Vf; + match vertex_format { + Vf::Uint8x2 => vk::Format::R8G8_UINT, + Vf::Uint8x4 => vk::Format::R8G8B8A8_UINT, + Vf::Sint8x2 => vk::Format::R8G8_SINT, + Vf::Sint8x4 => vk::Format::R8G8B8A8_SINT, + Vf::Unorm8x2 => vk::Format::R8G8_UNORM, + Vf::Unorm8x4 => vk::Format::R8G8B8A8_UNORM, + Vf::Snorm8x2 => vk::Format::R8G8_SNORM, + Vf::Snorm8x4 => vk::Format::R8G8B8A8_SNORM, + Vf::Uint16x2 => vk::Format::R16G16_UINT, + Vf::Uint16x4 => vk::Format::R16G16B16A16_UINT, + Vf::Sint16x2 => vk::Format::R16G16_SINT, + Vf::Sint16x4 => vk::Format::R16G16B16A16_SINT, + Vf::Unorm16x2 => vk::Format::R16G16_UNORM, + Vf::Unorm16x4 => vk::Format::R16G16B16A16_UNORM, + Vf::Snorm16x2 => vk::Format::R16G16_SNORM, + Vf::Snorm16x4 => vk::Format::R16G16B16A16_SNORM, + Vf::Float16x2 => vk::Format::R16G16_SFLOAT, + Vf::Float16x4 => vk::Format::R16G16B16A16_SFLOAT, + Vf::Float32 => vk::Format::R32_SFLOAT, + Vf::Float32x2 => vk::Format::R32G32_SFLOAT, + Vf::Float32x3 => vk::Format::R32G32B32_SFLOAT, + Vf::Float32x4 => vk::Format::R32G32B32A32_SFLOAT, + Vf::Uint32 => vk::Format::R32_UINT, + Vf::Uint32x2 => vk::Format::R32G32_UINT, + Vf::Uint32x3 => vk::Format::R32G32B32_UINT, + Vf::Uint32x4 => vk::Format::R32G32B32A32_UINT, + Vf::Sint32 => vk::Format::R32_SINT, + Vf::Sint32x2 => vk::Format::R32G32_SINT, + Vf::Sint32x3 => vk::Format::R32G32B32_SINT, + Vf::Sint32x4 => vk::Format::R32G32B32A32_SINT, + Vf::Float64 => vk::Format::R64_SFLOAT, + Vf::Float64x2 => vk::Format::R64G64_SFLOAT, + Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, + Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + } +} + +pub fn map_aspects(aspects: crate::FormatAspects) -> vk::ImageAspectFlags { + let mut flags = vk::ImageAspectFlags::empty(); + if aspects.contains(crate::FormatAspects::COLOR) { + flags |= vk::ImageAspectFlags::COLOR; + } + if aspects.contains(crate::FormatAspects::DEPTH) { + flags |= vk::ImageAspectFlags::DEPTH; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + flags |= vk::ImageAspectFlags::STENCIL; + } + flags +} + +pub fn map_attachment_ops( + op: crate::AttachmentOps, +) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) { + let load_op = if op.contains(crate::AttachmentOps::LOAD) { + vk::AttachmentLoadOp::LOAD + } else { + vk::AttachmentLoadOp::CLEAR + }; + let store_op = if op.contains(crate::AttachmentOps::STORE) { + vk::AttachmentStoreOp::STORE + } else { + vk::AttachmentStoreOp::DONT_CARE + }; + (load_op, store_op) +} + +pub fn map_present_mode(mode: wgt::PresentMode) -> vk::PresentModeKHR { + match mode { + wgt::PresentMode::Immediate => vk::PresentModeKHR::IMMEDIATE, + wgt::PresentMode::Mailbox => vk::PresentModeKHR::MAILBOX, + wgt::PresentMode::Fifo => vk::PresentModeKHR::FIFO, + wgt::PresentMode::FifoRelaxed => vk::PresentModeKHR::FIFO_RELAXED, + wgt::PresentMode::AutoNoVsync | wgt::PresentMode::AutoVsync => { + unreachable!("Cannot create swapchain with Auto PresentationMode") + } + } +} + +pub fn map_vk_present_mode(mode: vk::PresentModeKHR) -> Option<wgt::PresentMode> { + if mode == vk::PresentModeKHR::IMMEDIATE { + Some(wgt::PresentMode::Immediate) + } else if mode == vk::PresentModeKHR::MAILBOX { + Some(wgt::PresentMode::Mailbox) + } else if mode == vk::PresentModeKHR::FIFO { + Some(wgt::PresentMode::Fifo) + } else if mode == vk::PresentModeKHR::FIFO_RELAXED { + //Some(wgt::PresentMode::Relaxed) + None + } else { + log::warn!("Unrecognized present mode {:?}", mode); + None + } +} + +pub fn map_composite_alpha_mode(mode: wgt::CompositeAlphaMode) -> vk::CompositeAlphaFlagsKHR { + match mode { + wgt::CompositeAlphaMode::Opaque => vk::CompositeAlphaFlagsKHR::OPAQUE, + wgt::CompositeAlphaMode::PreMultiplied => vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED, + wgt::CompositeAlphaMode::PostMultiplied => vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED, + wgt::CompositeAlphaMode::Inherit => vk::CompositeAlphaFlagsKHR::INHERIT, + wgt::CompositeAlphaMode::Auto => unreachable!(), + } +} + +pub fn map_vk_composite_alpha(flags: vk::CompositeAlphaFlagsKHR) -> Vec<wgt::CompositeAlphaMode> { + let mut modes = Vec::new(); + if flags.contains(vk::CompositeAlphaFlagsKHR::OPAQUE) { + modes.push(wgt::CompositeAlphaMode::Opaque); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PreMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PostMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::INHERIT) { + modes.push(wgt::CompositeAlphaMode::Inherit); + } + modes +} + +pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { + let mut flags = vk::BufferUsageFlags::empty(); + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= vk::BufferUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= vk::BufferUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= vk::BufferUsageFlags::UNIFORM_BUFFER; + } + if usage.intersects(crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= vk::BufferUsageFlags::INDEX_BUFFER; + } + if usage.contains(crate::BufferUses::VERTEX) { + flags |= vk::BufferUsageFlags::VERTEX_BUFFER; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; + } + flags +} + +pub fn map_buffer_usage_to_barrier( + usage: crate::BufferUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::BufferUses::MAP_READ) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_READ; + } + if usage.contains(crate::BufferUses::MAP_WRITE) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_WRITE; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::BufferUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::BufferUses::UNIFORM) { + stages |= shader_stages; + access |= vk::AccessFlags::UNIFORM_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + if usage.contains(crate::BufferUses::INDEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::INDEX_READ; + } + if usage.contains(crate::BufferUses::VERTEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::VERTEX_ATTRIBUTE_READ; + } + if usage.contains(crate::BufferUses::INDIRECT) { + stages |= vk::PipelineStageFlags::DRAW_INDIRECT; + access |= vk::AccessFlags::INDIRECT_COMMAND_READ; + } + + (stages, access) +} + +pub fn map_view_dimension(dim: wgt::TextureViewDimension) -> vk::ImageViewType { + match dim { + wgt::TextureViewDimension::D1 => vk::ImageViewType::TYPE_1D, + wgt::TextureViewDimension::D2 => vk::ImageViewType::TYPE_2D, + wgt::TextureViewDimension::D2Array => vk::ImageViewType::TYPE_2D_ARRAY, + wgt::TextureViewDimension::Cube => vk::ImageViewType::CUBE, + wgt::TextureViewDimension::CubeArray => vk::ImageViewType::CUBE_ARRAY, + wgt::TextureViewDimension::D3 => vk::ImageViewType::TYPE_3D, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> vk::Extent3D { + vk::Extent3D { + width: extent.width, + height: extent.height, + depth: extent.depth, + } +} + +pub fn map_subresource_range( + range: &wgt::ImageSubresourceRange, + format: wgt::TextureFormat, +) -> vk::ImageSubresourceRange { + vk::ImageSubresourceRange { + aspect_mask: map_aspects(crate::FormatAspects::new(format, range.aspect)), + base_mip_level: range.base_mip_level, + level_count: range.mip_level_count.unwrap_or(vk::REMAINING_MIP_LEVELS), + base_array_layer: range.base_array_layer, + layer_count: range + .array_layer_count + .unwrap_or(vk::REMAINING_ARRAY_LAYERS), + } +} + +pub fn map_subresource_layers( + base: &crate::TextureCopyBase, +) -> (vk::ImageSubresourceLayers, vk::Offset3D) { + let offset = vk::Offset3D { + x: base.origin.x as i32, + y: base.origin.y as i32, + z: base.origin.z as i32, + }; + let subresource = vk::ImageSubresourceLayers { + aspect_mask: map_aspects(base.aspect), + mip_level: base.mip_level, + base_array_layer: base.array_layer, + layer_count: 1, + }; + (subresource, offset) +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> vk::Filter { + match mode { + wgt::FilterMode::Nearest => vk::Filter::NEAREST, + wgt::FilterMode::Linear => vk::Filter::LINEAR, + } +} + +pub fn map_mip_filter_mode(mode: wgt::FilterMode) -> vk::SamplerMipmapMode { + match mode { + wgt::FilterMode::Nearest => vk::SamplerMipmapMode::NEAREST, + wgt::FilterMode::Linear => vk::SamplerMipmapMode::LINEAR, + } +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> vk::SamplerAddressMode { + match mode { + wgt::AddressMode::ClampToEdge => vk::SamplerAddressMode::CLAMP_TO_EDGE, + wgt::AddressMode::Repeat => vk::SamplerAddressMode::REPEAT, + wgt::AddressMode::MirrorRepeat => vk::SamplerAddressMode::MIRRORED_REPEAT, + wgt::AddressMode::ClampToBorder => vk::SamplerAddressMode::CLAMP_TO_BORDER, + // wgt::AddressMode::MirrorClamp => vk::SamplerAddressMode::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> vk::BorderColor { + match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + vk::BorderColor::FLOAT_TRANSPARENT_BLACK + } + wgt::SamplerBorderColor::OpaqueBlack => vk::BorderColor::FLOAT_OPAQUE_BLACK, + wgt::SamplerBorderColor::OpaqueWhite => vk::BorderColor::FLOAT_OPAQUE_WHITE, + } +} + +pub fn map_comparison(fun: wgt::CompareFunction) -> vk::CompareOp { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => vk::CompareOp::NEVER, + Cf::Less => vk::CompareOp::LESS, + Cf::LessEqual => vk::CompareOp::LESS_OR_EQUAL, + Cf::Equal => vk::CompareOp::EQUAL, + Cf::GreaterEqual => vk::CompareOp::GREATER_OR_EQUAL, + Cf::Greater => vk::CompareOp::GREATER, + Cf::NotEqual => vk::CompareOp::NOT_EQUAL, + Cf::Always => vk::CompareOp::ALWAYS, + } +} + +pub fn map_shader_stage(stage: wgt::ShaderStages) -> vk::ShaderStageFlags { + let mut flags = vk::ShaderStageFlags::empty(); + if stage.contains(wgt::ShaderStages::VERTEX) { + flags |= vk::ShaderStageFlags::VERTEX; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + flags |= vk::ShaderStageFlags::FRAGMENT; + } + if stage.contains(wgt::ShaderStages::COMPUTE) { + flags |= vk::ShaderStageFlags::COMPUTE; + } + flags +} + +pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { + match ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Storage { .. } => match has_dynamic_offset { + true => vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + false => vk::DescriptorType::STORAGE_BUFFER, + }, + wgt::BufferBindingType::Uniform => match has_dynamic_offset { + true => vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + false => vk::DescriptorType::UNIFORM_BUFFER, + }, + }, + wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, + wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, + wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + } +} + +pub fn map_topology(topology: wgt::PrimitiveTopology) -> vk::PrimitiveTopology { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => vk::PrimitiveTopology::POINT_LIST, + Pt::LineList => vk::PrimitiveTopology::LINE_LIST, + Pt::LineStrip => vk::PrimitiveTopology::LINE_STRIP, + Pt::TriangleList => vk::PrimitiveTopology::TRIANGLE_LIST, + Pt::TriangleStrip => vk::PrimitiveTopology::TRIANGLE_STRIP, + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> vk::PolygonMode { + match mode { + wgt::PolygonMode::Fill => vk::PolygonMode::FILL, + wgt::PolygonMode::Line => vk::PolygonMode::LINE, + wgt::PolygonMode::Point => vk::PolygonMode::POINT, + } +} + +pub fn map_front_face(front_face: wgt::FrontFace) -> vk::FrontFace { + match front_face { + wgt::FrontFace::Cw => vk::FrontFace::CLOCKWISE, + wgt::FrontFace::Ccw => vk::FrontFace::COUNTER_CLOCKWISE, + } +} + +pub fn map_cull_face(face: wgt::Face) -> vk::CullModeFlags { + match face { + wgt::Face::Front => vk::CullModeFlags::FRONT, + wgt::Face::Back => vk::CullModeFlags::BACK, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> vk::StencilOp { + use wgt::StencilOperation as So; + match op { + So::Keep => vk::StencilOp::KEEP, + So::Zero => vk::StencilOp::ZERO, + So::Replace => vk::StencilOp::REPLACE, + So::Invert => vk::StencilOp::INVERT, + So::IncrementClamp => vk::StencilOp::INCREMENT_AND_CLAMP, + So::IncrementWrap => vk::StencilOp::INCREMENT_AND_WRAP, + So::DecrementClamp => vk::StencilOp::DECREMENT_AND_CLAMP, + So::DecrementWrap => vk::StencilOp::DECREMENT_AND_WRAP, + } +} + +pub fn map_stencil_face( + face: &wgt::StencilFaceState, + compare_mask: u32, + write_mask: u32, +) -> vk::StencilOpState { + vk::StencilOpState { + fail_op: map_stencil_op(face.fail_op), + pass_op: map_stencil_op(face.pass_op), + depth_fail_op: map_stencil_op(face.depth_fail_op), + compare_op: map_comparison(face.compare), + compare_mask, + write_mask, + reference: 0, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> vk::BlendFactor { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => vk::BlendFactor::ZERO, + Bf::One => vk::BlendFactor::ONE, + Bf::Src => vk::BlendFactor::SRC_COLOR, + Bf::OneMinusSrc => vk::BlendFactor::ONE_MINUS_SRC_COLOR, + Bf::SrcAlpha => vk::BlendFactor::SRC_ALPHA, + Bf::OneMinusSrcAlpha => vk::BlendFactor::ONE_MINUS_SRC_ALPHA, + Bf::Dst => vk::BlendFactor::DST_COLOR, + Bf::OneMinusDst => vk::BlendFactor::ONE_MINUS_DST_COLOR, + Bf::DstAlpha => vk::BlendFactor::DST_ALPHA, + Bf::OneMinusDstAlpha => vk::BlendFactor::ONE_MINUS_DST_ALPHA, + Bf::SrcAlphaSaturated => vk::BlendFactor::SRC_ALPHA_SATURATE, + Bf::Constant => vk::BlendFactor::CONSTANT_COLOR, + Bf::OneMinusConstant => vk::BlendFactor::ONE_MINUS_CONSTANT_COLOR, + } +} + +fn map_blend_op(operation: wgt::BlendOperation) -> vk::BlendOp { + use wgt::BlendOperation as Bo; + match operation { + Bo::Add => vk::BlendOp::ADD, + Bo::Subtract => vk::BlendOp::SUBTRACT, + Bo::ReverseSubtract => vk::BlendOp::REVERSE_SUBTRACT, + Bo::Min => vk::BlendOp::MIN, + Bo::Max => vk::BlendOp::MAX, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> (vk::BlendOp, vk::BlendFactor, vk::BlendFactor) { + let op = map_blend_op(component.operation); + let src = map_blend_factor(component.src_factor); + let dst = map_blend_factor(component.dst_factor); + (op, src, dst) +} + +pub fn map_pipeline_statistics( + types: wgt::PipelineStatisticsTypes, +) -> vk::QueryPipelineStatisticFlags { + use wgt::PipelineStatisticsTypes as Pst; + let mut flags = vk::QueryPipelineStatisticFlags::empty(); + if types.contains(Pst::VERTEX_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::VERTEX_SHADER_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_PRIMITIVES_OUT) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_PRIMITIVES; + } + if types.contains(Pst::FRAGMENT_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::FRAGMENT_SHADER_INVOCATIONS; + } + if types.contains(Pst::COMPUTE_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::COMPUTE_SHADER_INVOCATIONS; + } + flags +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs new file mode 100644 index 0000000000..09b887772c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -0,0 +1,2032 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{ + borrow::Cow, + collections::{hash_map::Entry, BTreeMap}, + ffi::{CStr, CString}, + num::NonZeroU32, + ptr, + sync::Arc, +}; + +impl super::DeviceShared { + pub(super) unsafe fn set_object_name( + &self, + object_type: vk::ObjectType, + object: impl vk::Handle, + name: &str, + ) { + let extension = match self.instance.debug_utils { + Some(ref debug_utils) => &debug_utils.extension, + None => return, + }; + + // Keep variables outside the if-else block to ensure they do not + // go out of scope while we hold a pointer to them + let mut buffer: [u8; 64] = [0u8; 64]; + let buffer_vec: Vec<u8>; + + // Append a null terminator to the string + let name_bytes = if name.len() < buffer.len() { + // Common case, string is very small. Allocate a copy on the stack. + buffer[..name.len()].copy_from_slice(name.as_bytes()); + // Add null terminator + buffer[name.len()] = 0; + &buffer[..name.len() + 1] + } else { + // Less common case, the string is large. + // This requires a heap allocation. + buffer_vec = name + .as_bytes() + .iter() + .cloned() + .chain(std::iter::once(0)) + .collect(); + &buffer_vec + }; + + let name = unsafe { CStr::from_bytes_with_nul_unchecked(name_bytes) }; + + let _result = unsafe { + extension.set_debug_utils_object_name( + self.raw.handle(), + &vk::DebugUtilsObjectNameInfoEXT::builder() + .object_type(object_type) + .object_handle(object.as_raw()) + .object_name(name), + ) + }; + } + + pub fn make_render_pass( + &self, + key: super::RenderPassKey, + ) -> Result<vk::RenderPass, crate::DeviceError> { + Ok(match self.render_passes.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let mut vk_attachments = Vec::new(); + let mut color_refs = Vec::with_capacity(e.key().colors.len()); + let mut resolve_refs = Vec::with_capacity(color_refs.capacity()); + let mut ds_ref = None; + let samples = vk::SampleCountFlags::from_raw(e.key().sample_count); + let unused = vk::AttachmentReference { + attachment: vk::ATTACHMENT_UNUSED, + layout: vk::ImageLayout::UNDEFINED, + }; + for cat in e.key().colors.iter() { + let (color_ref, resolve_ref) = if let Some(cat) = cat.as_ref() { + let color_ref = vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: cat.base.layout, + }; + vk_attachments.push({ + let (load_op, store_op) = conv::map_attachment_ops(cat.base.ops); + vk::AttachmentDescription::builder() + .format(cat.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .initial_layout(cat.base.layout) + .final_layout(cat.base.layout) + .build() + }); + let resolve_ref = if let Some(ref rat) = cat.resolve { + let (load_op, store_op) = conv::map_attachment_ops(rat.ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(rat.format) + .samples(vk::SampleCountFlags::TYPE_1) + .load_op(load_op) + .store_op(store_op) + .initial_layout(rat.layout) + .final_layout(rat.layout) + .build(); + vk_attachments.push(vk_attachment); + + vk::AttachmentReference { + attachment: vk_attachments.len() as u32 - 1, + layout: rat.layout, + } + } else { + unused + }; + + (color_ref, resolve_ref) + } else { + (unused, unused) + }; + + color_refs.push(color_ref); + resolve_refs.push(resolve_ref); + } + + if let Some(ref ds) = e.key().depth_stencil { + ds_ref = Some(vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: ds.base.layout, + }); + let (load_op, store_op) = conv::map_attachment_ops(ds.base.ops); + let (stencil_load_op, stencil_store_op) = + conv::map_attachment_ops(ds.stencil_ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(ds.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .stencil_load_op(stencil_load_op) + .stencil_store_op(stencil_store_op) + .initial_layout(ds.base.layout) + .final_layout(ds.base.layout) + .build(); + vk_attachments.push(vk_attachment); + } + + let vk_subpasses = [{ + let mut vk_subpass = vk::SubpassDescription::builder() + .pipeline_bind_point(vk::PipelineBindPoint::GRAPHICS) + .color_attachments(&color_refs) + .resolve_attachments(&resolve_refs); + + if self + .workarounds + .contains(super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS) + && resolve_refs.is_empty() + { + vk_subpass.p_resolve_attachments = ptr::null(); + } + + if let Some(ref reference) = ds_ref { + vk_subpass = vk_subpass.depth_stencil_attachment(reference) + } + vk_subpass.build() + }]; + + let mut vk_info = vk::RenderPassCreateInfo::builder() + .attachments(&vk_attachments) + .subpasses(&vk_subpasses); + + let mut multiview_info; + let mask; + if let Some(multiview) = e.key().multiview { + // Sanity checks, better to panic here than cause a driver crash + assert!(multiview.get() <= 8); + assert!(multiview.get() > 1); + + // Right now we enable all bits on the view masks and correlation masks. + // This means we're rendering to all views in the subpass, and that all views + // can be rendered concurrently. + mask = [(1 << multiview.get()) - 1]; + + // On Vulkan 1.1 or later, this is an alias for core functionality + multiview_info = vk::RenderPassMultiviewCreateInfoKHR::builder() + .view_masks(&mask) + .correlation_masks(&mask) + .build(); + vk_info = vk_info.push_next(&mut multiview_info); + } + + let raw = unsafe { self.raw.create_render_pass(&vk_info, None)? }; + + *e.insert(raw) + } + }) + } + + pub fn make_framebuffer( + &self, + key: super::FramebufferKey, + raw_pass: vk::RenderPass, + pass_label: crate::Label, + ) -> Result<vk::Framebuffer, crate::DeviceError> { + Ok(match self.framebuffers.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let vk_views = e + .key() + .attachments + .iter() + .map(|at| at.raw) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats = e + .key() + .attachments + .iter() + .map(|at| self.private_caps.map_texture_format(at.view_format)) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats_list = e + .key() + .attachments + .iter() + .map(|at| at.raw_view_formats.clone()) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let vk_image_infos = e + .key() + .attachments + .iter() + .enumerate() + .map(|(i, at)| { + let mut info = vk::FramebufferAttachmentImageInfo::builder() + .usage(conv::map_texture_usage(at.view_usage)) + .flags(at.raw_image_flags) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layer_count(e.key().extent.depth_or_array_layers); + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03214 + if vk_view_formats_list[i].is_empty() { + info = info.view_formats(&vk_view_formats[i..i + 1]); + } else { + info = info.view_formats(&vk_view_formats_list[i]); + }; + info.build() + }) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let mut vk_attachment_info = vk::FramebufferAttachmentsCreateInfo::builder() + .attachment_image_infos(&vk_image_infos) + .build(); + let mut vk_info = vk::FramebufferCreateInfo::builder() + .render_pass(raw_pass) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layers(e.key().extent.depth_or_array_layers); + + if self.private_caps.imageless_framebuffers { + //TODO: https://github.com/MaikKlein/ash/issues/450 + vk_info = vk_info + .flags(vk::FramebufferCreateFlags::IMAGELESS_KHR) + .push_next(&mut vk_attachment_info); + vk_info.attachment_count = e.key().attachments.len() as u32; + } else { + vk_info = vk_info.attachments(&vk_views); + } + + *e.insert(unsafe { + let raw = self.raw.create_framebuffer(&vk_info, None).unwrap(); + if let Some(label) = pass_label { + self.set_object_name(vk::ObjectType::FRAMEBUFFER, raw, label); + } + raw + }) + } + }) + } + + fn make_memory_ranges<'a, I: 'a + Iterator<Item = crate::MemoryRange>>( + &self, + buffer: &'a super::Buffer, + ranges: I, + ) -> impl 'a + Iterator<Item = vk::MappedMemoryRange> { + let block = buffer.block.lock(); + let mask = self.private_caps.non_coherent_map_mask; + ranges.map(move |range| { + vk::MappedMemoryRange::builder() + .memory(*block.memory()) + .offset((block.offset() + range.start) & !mask) + .size((range.end - range.start + mask) & !mask) + .build() + }) + } + + unsafe fn free_resources(&self) { + for &raw in self.render_passes.lock().values() { + unsafe { self.raw.destroy_render_pass(raw, None) }; + } + for &raw in self.framebuffers.lock().values() { + unsafe { self.raw.destroy_framebuffer(raw, None) }; + } + if self.handle_is_owned { + unsafe { self.raw.destroy_device(None) }; + } + } +} + +impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared { + unsafe fn allocate_memory( + &self, + size: u64, + memory_type: u32, + flags: gpu_alloc::AllocationFlags, + ) -> Result<vk::DeviceMemory, gpu_alloc::OutOfMemory> { + let mut info = vk::MemoryAllocateInfo::builder() + .allocation_size(size) + .memory_type_index(memory_type); + + let mut info_flags; + + if flags.contains(gpu_alloc::AllocationFlags::DEVICE_ADDRESS) { + info_flags = vk::MemoryAllocateFlagsInfo::builder() + .flags(vk::MemoryAllocateFlags::DEVICE_ADDRESS); + info = info.push_next(&mut info_flags); + } + + match unsafe { self.raw.allocate_memory(&info, None) } { + Ok(memory) => Ok(memory), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfHostMemory) + } + Err(vk::Result::ERROR_TOO_MANY_OBJECTS) => panic!("Too many objects"), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn deallocate_memory(&self, memory: vk::DeviceMemory) { + unsafe { self.raw.free_memory(memory, None) }; + } + + unsafe fn map_memory( + &self, + memory: &mut vk::DeviceMemory, + offset: u64, + size: u64, + ) -> Result<ptr::NonNull<u8>, gpu_alloc::DeviceMapError> { + match unsafe { + self.raw + .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty()) + } { + Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8) + .expect("Pointer to memory mapping must not be null")), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfHostMemory) + } + Err(vk::Result::ERROR_MEMORY_MAP_FAILED) => Err(gpu_alloc::DeviceMapError::MapFailed), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn unmap_memory(&self, memory: &mut vk::DeviceMemory) { + unsafe { self.raw.unmap_memory(*memory) }; + } + + unsafe fn invalidate_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } + + unsafe fn flush_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } +} + +impl + gpu_descriptor::DescriptorDevice<vk::DescriptorSetLayout, vk::DescriptorPool, vk::DescriptorSet> + for super::DeviceShared +{ + unsafe fn create_descriptor_pool( + &self, + descriptor_count: &gpu_descriptor::DescriptorTotalCount, + max_sets: u32, + flags: gpu_descriptor::DescriptorPoolCreateFlags, + ) -> Result<vk::DescriptorPool, gpu_descriptor::CreatePoolError> { + //Note: ignoring other types, since they can't appear here + let unfiltered_counts = [ + (vk::DescriptorType::SAMPLER, descriptor_count.sampler), + ( + vk::DescriptorType::SAMPLED_IMAGE, + descriptor_count.sampled_image, + ), + ( + vk::DescriptorType::STORAGE_IMAGE, + descriptor_count.storage_image, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER, + descriptor_count.uniform_buffer, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + descriptor_count.uniform_buffer_dynamic, + ), + ( + vk::DescriptorType::STORAGE_BUFFER, + descriptor_count.storage_buffer, + ), + ( + vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + descriptor_count.storage_buffer_dynamic, + ), + ]; + + let filtered_counts = unfiltered_counts + .iter() + .cloned() + .filter(|&(_, count)| count != 0) + .map(|(ty, count)| vk::DescriptorPoolSize { + ty, + descriptor_count: count, + }) + .collect::<ArrayVec<_, 8>>(); + + let mut vk_flags = + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND) { + vk::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND + } else { + vk::DescriptorPoolCreateFlags::empty() + }; + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET) { + vk_flags |= vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET; + } + let vk_info = vk::DescriptorPoolCreateInfo::builder() + .max_sets(max_sets) + .flags(vk_flags) + .pool_sizes(&filtered_counts) + .build(); + + match unsafe { self.raw.create_descriptor_pool(&vk_info, None) } { + Ok(pool) => Ok(pool), + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTATION) => { + Err(gpu_descriptor::CreatePoolError::Fragmentation) + } + Err(other) => { + log::error!("create_descriptor_pool: {:?}", other); + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + } + } + + unsafe fn destroy_descriptor_pool(&self, pool: vk::DescriptorPool) { + unsafe { self.raw.destroy_descriptor_pool(pool, None) } + } + + unsafe fn alloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + layouts: impl ExactSizeIterator<Item = &'a vk::DescriptorSetLayout>, + sets: &mut impl Extend<vk::DescriptorSet>, + ) -> Result<(), gpu_descriptor::DeviceAllocationError> { + let result = unsafe { + self.raw.allocate_descriptor_sets( + &vk::DescriptorSetAllocateInfo::builder() + .descriptor_pool(*pool) + .set_layouts( + &smallvec::SmallVec::<[vk::DescriptorSetLayout; 32]>::from_iter( + layouts.cloned(), + ), + ) + .build(), + ) + }; + + match result { + Ok(vk_sets) => { + sets.extend(vk_sets); + Ok(()) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) + | Err(vk::Result::ERROR_OUT_OF_POOL_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTED_POOL) => { + Err(gpu_descriptor::DeviceAllocationError::FragmentedPool) + } + Err(other) => { + log::error!("allocate_descriptor_sets: {:?}", other); + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + } + } + + unsafe fn dealloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + sets: impl Iterator<Item = vk::DescriptorSet>, + ) { + let result = unsafe { + self.raw.free_descriptor_sets( + *pool, + &smallvec::SmallVec::<[vk::DescriptorSet; 32]>::from_iter(sets), + ) + }; + match result { + Ok(()) => {} + Err(err) => log::error!("free_descriptor_sets: {:?}", err), + } + } +} + +struct CompiledStage { + create_info: vk::PipelineShaderStageCreateInfo, + _entry_point: CString, + temp_raw_module: Option<vk::ShaderModule>, +} + +impl super::Device { + pub(super) unsafe fn create_swapchain( + &self, + surface: &mut super::Surface, + config: &crate::SurfaceConfiguration, + provided_old_swapchain: Option<super::Swapchain>, + ) -> Result<super::Swapchain, crate::SurfaceError> { + profiling::scope!("Device::create_swapchain"); + let functor = khr::Swapchain::new(&surface.instance.raw, &self.shared.raw); + + let old_swapchain = match provided_old_swapchain { + Some(osc) => osc.raw, + None => vk::SwapchainKHR::null(), + }; + + let color_space = if config.format == wgt::TextureFormat::Rgba16Float { + // Enable wide color gamut mode + // Vulkan swapchain for Android only supports DISPLAY_P3_NONLINEAR_EXT and EXTENDED_SRGB_LINEAR_EXT + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT + } else { + vk::ColorSpaceKHR::SRGB_NONLINEAR + }; + + let original_format = self.shared.private_caps.map_texture_format(config.format); + let mut raw_flags = vk::SwapchainCreateFlagsKHR::empty(); + let mut raw_view_formats: Vec<vk::Format> = vec![]; + let mut wgt_view_formats = vec![]; + if !config.view_formats.is_empty() { + raw_flags |= vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT; + raw_view_formats = config + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + raw_view_formats.push(original_format); + + wgt_view_formats = config.view_formats.clone(); + wgt_view_formats.push(config.format); + } + + let mut info = vk::SwapchainCreateInfoKHR::builder() + .flags(raw_flags) + .surface(surface.raw) + .min_image_count(config.swap_chain_size) + .image_format(original_format) + .image_color_space(color_space) + .image_extent(vk::Extent2D { + width: config.extent.width, + height: config.extent.height, + }) + .image_array_layers(config.extent.depth_or_array_layers) + .image_usage(conv::map_texture_usage(config.usage)) + .image_sharing_mode(vk::SharingMode::EXCLUSIVE) + .pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY) + .composite_alpha(conv::map_composite_alpha_mode(config.composite_alpha_mode)) + .present_mode(conv::map_present_mode(config.present_mode)) + .clipped(true) + .old_swapchain(old_swapchain); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !raw_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&raw_view_formats); + info = info.push_next(&mut format_list_info); + } + + let result = { + profiling::scope!("vkCreateSwapchainKHR"); + unsafe { functor.create_swapchain(&info, None) } + }; + + // doing this before bailing out with error + if old_swapchain != vk::SwapchainKHR::null() { + unsafe { functor.destroy_swapchain(old_swapchain, None) } + } + + let raw = match result { + Ok(swapchain) => swapchain, + Err(error) => { + return Err(match error { + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + vk::Result::ERROR_NATIVE_WINDOW_IN_USE_KHR => { + crate::SurfaceError::Other("Native window is in use") + } + other => crate::DeviceError::from(other).into(), + }) + } + }; + + let images = + unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; + + let vk_info = vk::FenceCreateInfo::builder().build(); + let fence = unsafe { self.shared.raw.create_fence(&vk_info, None) } + .map_err(crate::DeviceError::from)?; + + Ok(super::Swapchain { + raw, + raw_flags, + functor, + device: Arc::clone(&self.shared), + fence, + images, + config: config.clone(), + view_formats: wgt_view_formats, + }) + } + + /// # Safety + /// + /// - `vk_image` must be created respecting `desc` + /// - If `drop_guard` is `Some`, the application must manually destroy the image handle. This + /// can be done inside the `Drop` impl of `drop_guard`. + /// - If the `ImageCreateFlags` does not contain `MUTABLE_FORMAT`, the `view_formats` of `desc` must be empty. + pub unsafe fn texture_from_raw( + vk_image: vk::Image, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + let mut raw_flags = vk::ImageCreateFlags::empty(); + let mut view_formats = vec![]; + for tf in desc.view_formats.iter() { + if *tf == desc.format { + continue; + } + view_formats.push(*tf); + } + if !view_formats.is_empty() { + raw_flags |= + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE; + view_formats.push(desc.format) + } + + super::Texture { + raw: vk_image, + drop_guard, + block: None, + usage: desc.usage, + format: desc.format, + raw_flags: vk::ImageCreateFlags::empty(), + copy_size: desc.copy_extent(), + view_formats, + } + } + + fn create_shader_module_impl( + &self, + spv: &[u32], + ) -> Result<vk::ShaderModule, crate::DeviceError> { + let vk_info = vk::ShaderModuleCreateInfo::builder() + .flags(vk::ShaderModuleCreateFlags::empty()) + .code(spv); + + let raw = unsafe { + profiling::scope!("vkCreateShaderModule"); + self.shared.raw.create_shader_module(&vk_info, None)? + }; + Ok(raw) + } + + fn compile_stage( + &self, + stage: &crate::ProgrammableStage<super::Api>, + naga_stage: naga::ShaderStage, + binding_map: &naga::back::spv::BindingMap, + ) -> Result<CompiledStage, crate::PipelineError> { + let stage_flags = crate::auxil::map_naga_stage(naga_stage); + let vk_module = match *stage.module { + super::ShaderModule::Raw(raw) => raw, + super::ShaderModule::Intermediate { + ref naga_shader, + runtime_checks, + } => { + let pipeline_options = naga::back::spv::PipelineOptions { + entry_point: stage.entry_point.to_string(), + shader_stage: naga_stage, + }; + let needs_temp_options = !runtime_checks || !binding_map.is_empty(); + let mut temp_options; + let options = if needs_temp_options { + temp_options = self.naga_options.clone(); + if !runtime_checks { + temp_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + if !binding_map.is_empty() { + temp_options.binding_map = binding_map.clone(); + } + &temp_options + } else { + &self.naga_options + }; + let spv = { + profiling::scope!("naga::spv::write_vec"); + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + options, + Some(&pipeline_options), + ) + } + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + self.create_shader_module_impl(&spv)? + } + }; + + let entry_point = CString::new(stage.entry_point).unwrap(); + let create_info = vk::PipelineShaderStageCreateInfo::builder() + .stage(conv::map_shader_stage(stage_flags)) + .module(vk_module) + .name(&entry_point) + .build(); + + Ok(CompiledStage { + create_info, + _entry_point: entry_point, + temp_raw_module: match *stage.module { + super::ShaderModule::Raw(_) => None, + super::ShaderModule::Intermediate { .. } => Some(vk_module), + }, + }) + } + + /// Returns the queue family index of the device's internal queue. + /// + /// This is useful for constructing memory barriers needed for queue family ownership transfer when + /// external memory is involved (from/to `VK_QUEUE_FAMILY_EXTERNAL_KHR` and `VK_QUEUE_FAMILY_FOREIGN_EXT` + /// for example). + pub fn queue_family_index(&self) -> u32 { + self.shared.family_index + } + + pub fn queue_index(&self) -> u32 { + self.shared.queue_index + } + + pub fn raw_device(&self) -> &ash::Device { + &self.shared.raw + } + + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.shared.physical_device + } + + pub fn raw_queue(&self) -> ash::vk::Queue { + self.shared.raw_queue + } + + pub fn enabled_device_extensions(&self) -> &[&'static CStr] { + &self.shared.enabled_extensions + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared.instance + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) }; + unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) }; + for &sem in queue.relay_semaphores.iter() { + unsafe { self.shared.raw.destroy_semaphore(sem, None) }; + } + unsafe { self.shared.free_resources() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let vk_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(conv::map_buffer_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw = unsafe { self.shared.raw.create_buffer(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) }; + + let mut alloc_usage = if desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) + { + let mut flags = gpu_alloc::UsageFlags::HOST_ACCESS; + //TODO: find a way to use `crate::MemoryFlags::PREFER_COHERENT` + flags.set( + gpu_alloc::UsageFlags::DOWNLOAD, + desc.usage.contains(crate::BufferUses::MAP_READ), + ); + flags.set( + gpu_alloc::UsageFlags::UPLOAD, + desc.usage.contains(crate::BufferUses::MAP_WRITE), + ); + flags + } else { + gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS + }; + alloc_usage.set( + gpu_alloc::UsageFlags::TRANSIENT, + desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), + ); + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: alloc_usage, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_buffer_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw, label) + }; + } + + Ok(super::Buffer { + raw, + block: Mutex::new(block), + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + unsafe { self.shared.raw.destroy_buffer(buffer.raw, None) }; + unsafe { + self.mem_allocator + .lock() + .dealloc(&*self.shared, buffer.block.into_inner()) + }; + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let size = range.end - range.start; + let mut block = buffer.block.lock(); + let ptr = unsafe { block.map(&*self.shared, range.start, size as usize)? }; + let is_coherent = block + .props() + .contains(gpu_alloc::MemoryPropertyFlags::HOST_COHERENT); + Ok(crate::BufferMapping { ptr, is_coherent }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe { buffer.block.lock().unmap(&*self.shared) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + let vk_ranges = self.shared.make_memory_ranges(buffer, ranges); + + unsafe { + self.shared + .raw + .flush_mapped_memory_ranges( + &smallvec::SmallVec::<[vk::MappedMemoryRange; 32]>::from_iter(vk_ranges), + ) + } + .unwrap(); + } + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + let vk_ranges = self.shared.make_memory_ranges(buffer, ranges); + + unsafe { + self.shared + .raw + .invalidate_mapped_memory_ranges( + &smallvec::SmallVec::<[vk::MappedMemoryRange; 32]>::from_iter(vk_ranges), + ) + } + .unwrap(); + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let copy_size = desc.copy_extent(); + + let mut raw_flags = vk::ImageCreateFlags::empty(); + if desc.is_cube_compatible() { + raw_flags |= vk::ImageCreateFlags::CUBE_COMPATIBLE; + } + + let original_format = self.shared.private_caps.map_texture_format(desc.format); + let mut vk_view_formats = vec![]; + let mut wgt_view_formats = vec![]; + if !desc.view_formats.is_empty() { + raw_flags |= vk::ImageCreateFlags::MUTABLE_FORMAT; + wgt_view_formats = desc.view_formats.clone(); + wgt_view_formats.push(desc.format); + + if self.shared_instance().driver_api_version >= vk::API_VERSION_1_2 + || self + .enabled_device_extensions() + .contains(&vk::KhrImageFormatListFn::name()) + { + vk_view_formats = desc + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + vk_view_formats.push(original_format) + } + } + + let mut vk_info = vk::ImageCreateInfo::builder() + .flags(raw_flags) + .image_type(conv::map_texture_dimension(desc.dimension)) + .format(original_format) + .extent(conv::map_copy_extent(©_size)) + .mip_levels(desc.mip_level_count) + .array_layers(desc.array_layer_count()) + .samples(vk::SampleCountFlags::from_raw(desc.sample_count)) + .tiling(vk::ImageTiling::OPTIMAL) + .usage(conv::map_texture_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !vk_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&vk_view_formats); + vk_info = vk_info.push_next(&mut format_list_info); + } + + let raw = unsafe { self.shared.raw.create_image(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_image_memory_requirements(raw) }; + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_image_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE, raw, label) + }; + } + + Ok(super::Texture { + raw, + drop_guard: None, + block: Some(block), + usage: desc.usage, + format: desc.format, + raw_flags, + copy_size, + view_formats: wgt_view_formats, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + unsafe { self.shared.raw.destroy_image(texture.raw, None) }; + } + if let Some(block) = texture.block { + unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) }; + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let subresource_range = conv::map_subresource_range(&desc.range, desc.format); + let mut vk_info = vk::ImageViewCreateInfo::builder() + .flags(vk::ImageViewCreateFlags::empty()) + .image(texture.raw) + .view_type(conv::map_view_dimension(desc.dimension)) + .format(self.shared.private_caps.map_texture_format(desc.format)) + .subresource_range(subresource_range); + let layers = + NonZeroU32::new(subresource_range.layer_count).expect("Unexpected zero layer count"); + + let mut image_view_info; + let view_usage = if self.shared.private_caps.image_view_usage && !desc.usage.is_empty() { + image_view_info = vk::ImageViewUsageCreateInfo::builder() + .usage(conv::map_texture_usage(desc.usage)) + .build(); + vk_info = vk_info.push_next(&mut image_view_info); + desc.usage + } else { + texture.usage + }; + + let raw = unsafe { self.shared.raw.create_image_view(&vk_info, None) }?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE_VIEW, raw, label) + }; + } + + let attachment = super::FramebufferAttachment { + raw: if self.shared.private_caps.imageless_framebuffers { + vk::ImageView::null() + } else { + raw + }, + raw_image_flags: texture.raw_flags, + view_usage, + view_format: desc.format, + raw_view_formats: texture + .view_formats + .iter() + .map(|tf| self.shared.private_caps.map_texture_format(*tf)) + .collect(), + }; + + Ok(super::TextureView { + raw, + layers, + attachment, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if !self.shared.private_caps.imageless_framebuffers { + let mut fbuf_lock = self.shared.framebuffers.lock(); + for (key, &raw_fbuf) in fbuf_lock.iter() { + if key.attachments.iter().any(|at| at.raw == view.raw) { + unsafe { self.shared.raw.destroy_framebuffer(raw_fbuf, None) }; + } + } + fbuf_lock.retain(|key, _| !key.attachments.iter().any(|at| at.raw == view.raw)); + } + unsafe { self.shared.raw.destroy_image_view(view.raw, None) }; + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let mut vk_info = vk::SamplerCreateInfo::builder() + .flags(vk::SamplerCreateFlags::empty()) + .mag_filter(conv::map_filter_mode(desc.mag_filter)) + .min_filter(conv::map_filter_mode(desc.min_filter)) + .mipmap_mode(conv::map_mip_filter_mode(desc.mipmap_filter)) + .address_mode_u(conv::map_address_mode(desc.address_modes[0])) + .address_mode_v(conv::map_address_mode(desc.address_modes[1])) + .address_mode_w(conv::map_address_mode(desc.address_modes[2])) + .min_lod(desc.lod_clamp.start) + .max_lod(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + vk_info = vk_info + .compare_enable(true) + .compare_op(conv::map_comparison(fun)); + } + + if desc.anisotropy_clamp != 1 { + // We only enable anisotropy if it is supported, and wgpu-hal interface guarentees + // the clamp is in the range [1, 16] which is always supported if anisotropy is. + vk_info = vk_info + .anisotropy_enable(true) + .max_anisotropy(desc.anisotropy_clamp as f32); + } + + if let Some(color) = desc.border_color { + vk_info = vk_info.border_color(conv::map_border_color(color)); + } + + let raw = unsafe { self.shared.raw.create_sampler(&vk_info, None)? }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SAMPLER, raw, label) + }; + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + unsafe { self.shared.raw.destroy_sampler(sampler.raw, None) }; + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let vk_info = vk::CommandPoolCreateInfo::builder() + .queue_family_index(desc.queue.family_index) + .flags(vk::CommandPoolCreateFlags::TRANSIENT) + .build(); + let raw = unsafe { self.shared.raw.create_command_pool(&vk_info, None)? }; + + Ok(super::CommandEncoder { + raw, + device: Arc::clone(&self.shared), + active: vk::CommandBuffer::null(), + bind_point: vk::PipelineBindPoint::default(), + temp: super::Temp::default(), + free: Vec::new(), + discarded: Vec::new(), + rpass_debug_marker_active: false, + }) + } + unsafe fn destroy_command_encoder(&self, cmd_encoder: super::CommandEncoder) { + unsafe { + if !cmd_encoder.free.is_empty() { + self.shared + .raw + .free_command_buffers(cmd_encoder.raw, &cmd_encoder.free) + } + if !cmd_encoder.discarded.is_empty() { + self.shared + .raw + .free_command_buffers(cmd_encoder.raw, &cmd_encoder.discarded) + } + self.shared.raw.destroy_command_pool(cmd_encoder.raw, None); + } + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let mut desc_count = gpu_descriptor::DescriptorTotalCount::default(); + let mut types = Vec::new(); + for entry in desc.entries { + let count = entry.count.map_or(1, |c| c.get()); + if entry.binding as usize >= types.len() { + types.resize( + entry.binding as usize + 1, + (vk::DescriptorType::INPUT_ATTACHMENT, 0), + ); + } + types[entry.binding as usize] = ( + conv::map_binding_type(entry.ty), + entry.count.map_or(1, |c| c.get()), + ); + + match entry.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Uniform => { + if has_dynamic_offset { + desc_count.uniform_buffer_dynamic += count; + } else { + desc_count.uniform_buffer += count; + } + } + wgt::BufferBindingType::Storage { .. } => { + if has_dynamic_offset { + desc_count.storage_buffer_dynamic += count; + } else { + desc_count.storage_buffer += count; + } + } + }, + wgt::BindingType::Sampler { .. } => { + desc_count.sampler += count; + } + wgt::BindingType::Texture { .. } => { + desc_count.sampled_image += count; + } + wgt::BindingType::StorageTexture { .. } => { + desc_count.storage_image += count; + } + } + } + + //Note: not bothering with on stack array here as it's low frequency + let vk_bindings = desc + .entries + .iter() + .map(|entry| vk::DescriptorSetLayoutBinding { + binding: entry.binding, + descriptor_type: types[entry.binding as usize].0, + descriptor_count: types[entry.binding as usize].1, + stage_flags: conv::map_shader_stage(entry.visibility), + p_immutable_samplers: ptr::null(), + }) + .collect::<Vec<_>>(); + + let vk_info = vk::DescriptorSetLayoutCreateInfo::builder().bindings(&vk_bindings); + + let binding_arrays = desc + .entries + .iter() + .enumerate() + .filter_map(|(idx, entry)| entry.count.map(|count| (idx as u32, count))) + .collect(); + + let mut binding_flag_info; + let binding_flag_vec; + + let partially_bound = desc + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + + let vk_info = if partially_bound { + binding_flag_vec = desc + .entries + .iter() + .map(|entry| { + let mut flags = vk::DescriptorBindingFlags::empty(); + + if partially_bound && entry.count.is_some() { + flags |= vk::DescriptorBindingFlags::PARTIALLY_BOUND; + } + + flags + }) + .collect::<Vec<_>>(); + + binding_flag_info = vk::DescriptorSetLayoutBindingFlagsCreateInfo::builder() + .binding_flags(&binding_flag_vec); + + vk_info.push_next(&mut binding_flag_info) + } else { + vk_info + }; + + let raw = unsafe { + self.shared + .raw + .create_descriptor_set_layout(&vk_info, None)? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET_LAYOUT, raw, label) + }; + } + + Ok(super::BindGroupLayout { + raw, + desc_count, + types: types.into_boxed_slice(), + binding_arrays, + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + unsafe { + self.shared + .raw + .destroy_descriptor_set_layout(bg_layout.raw, None) + }; + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + //Note: not bothering with on stack array here as it's low frequency + let vk_set_layouts = desc + .bind_group_layouts + .iter() + .map(|bgl| bgl.raw) + .collect::<Vec<_>>(); + let vk_push_constant_ranges = desc + .push_constant_ranges + .iter() + .map(|pcr| vk::PushConstantRange { + stage_flags: conv::map_shader_stage(pcr.stages), + offset: pcr.range.start, + size: pcr.range.end - pcr.range.start, + }) + .collect::<Vec<_>>(); + + let vk_info = vk::PipelineLayoutCreateInfo::builder() + .flags(vk::PipelineLayoutCreateFlags::empty()) + .set_layouts(&vk_set_layouts) + .push_constant_ranges(&vk_push_constant_ranges); + + let raw = { + profiling::scope!("vkCreatePipelineLayout"); + unsafe { self.shared.raw.create_pipeline_layout(&vk_info, None)? } + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE_LAYOUT, raw, label) + }; + } + + let mut binding_arrays = BTreeMap::new(); + for (group, &layout) in desc.bind_group_layouts.iter().enumerate() { + for &(binding, binding_array_size) in &layout.binding_arrays { + binding_arrays.insert( + naga::ResourceBinding { + group: group as u32, + binding, + }, + naga::back::spv::BindingInfo { + binding_array_size: Some(binding_array_size.get()), + }, + ); + } + } + + Ok(super::PipelineLayout { + raw, + binding_arrays, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { + self.shared + .raw + .destroy_pipeline_layout(pipeline_layout.raw, None) + }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut vk_sets = unsafe { + self.desc_allocator.lock().allocate( + &*self.shared, + &desc.layout.raw, + gpu_descriptor::DescriptorSetLayoutCreateFlags::empty(), + &desc.layout.desc_count, + 1, + )? + }; + + let set = vk_sets.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET, *set.raw(), label) + }; + } + + let mut writes = Vec::with_capacity(desc.entries.len()); + let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); + let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); + let mut image_infos = Vec::with_capacity(desc.textures.len()); + for entry in desc.entries { + let (ty, size) = desc.layout.types[entry.binding as usize]; + if size == 0 { + continue; // empty slot + } + let mut write = vk::WriteDescriptorSet::builder() + .dst_set(*set.raw()) + .dst_binding(entry.binding) + .descriptor_type(ty); + write = match ty { + vk::DescriptorType::SAMPLER => { + let index = sampler_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + sampler_infos.extend(desc.samplers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorImageInfo::builder() + .sampler(binding.raw) + .build() + }, + )); + write.image_info(&sampler_infos[index..]) + } + vk::DescriptorType::SAMPLED_IMAGE | vk::DescriptorType::STORAGE_IMAGE => { + let index = image_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + image_infos.extend(desc.textures[start as usize..end as usize].iter().map( + |binding| { + let layout = conv::derive_image_layout( + binding.usage, + binding.view.attachment.view_format, + ); + vk::DescriptorImageInfo::builder() + .image_view(binding.view.raw) + .image_layout(layout) + .build() + }, + )); + write.image_info(&image_infos[index..]) + } + vk::DescriptorType::UNIFORM_BUFFER + | vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC + | vk::DescriptorType::STORAGE_BUFFER + | vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => { + let index = buffer_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + buffer_infos.extend(desc.buffers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorBufferInfo::builder() + .buffer(binding.buffer.raw) + .offset(binding.offset) + .range(binding.size.map_or(vk::WHOLE_SIZE, wgt::BufferSize::get)) + .build() + }, + )); + write.buffer_info(&buffer_infos[index..]) + } + _ => unreachable!(), + }; + writes.push(write.build()); + } + + unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; + Ok(super::BindGroup { set }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + unsafe { + self.desc_allocator + .lock() + .free(&*self.shared, Some(group.set)) + }; + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let spv = match shader { + crate::ShaderInput::Naga(naga_shader) => { + if self + .shared + .workarounds + .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + { + return Ok(super::ShaderModule::Intermediate { + naga_shader, + runtime_checks: desc.runtime_checks, + }); + } + let mut naga_options = self.naga_options.clone(); + if !desc.runtime_checks { + naga_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + Cow::Owned( + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + &naga_options, + None, + ) + .map_err(|e| crate::ShaderError::Compilation(format!("{e}")))?, + ) + } + crate::ShaderInput::SpirV(spv) => Cow::Borrowed(spv), + }; + + let raw = self.create_shader_module_impl(&spv)?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SHADER_MODULE, raw, label) + }; + } + + Ok(super::ShaderModule::Raw(raw)) + } + unsafe fn destroy_shader_module(&self, module: super::ShaderModule) { + match module { + super::ShaderModule::Raw(raw) => { + unsafe { self.shared.raw.destroy_shader_module(raw, None) }; + } + super::ShaderModule::Intermediate { .. } => {} + } + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let dynamic_states = [ + vk::DynamicState::VIEWPORT, + vk::DynamicState::SCISSOR, + vk::DynamicState::BLEND_CONSTANTS, + vk::DynamicState::STENCIL_REFERENCE, + ]; + let mut compatible_rp_key = super::RenderPassKey { + sample_count: desc.multisample.count, + multiview: desc.multiview, + ..Default::default() + }; + let mut stages = ArrayVec::<_, 2>::new(); + let mut vertex_buffers = Vec::with_capacity(desc.vertex_buffers.len()); + let mut vertex_attributes = Vec::new(); + + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + vertex_buffers.push(vk::VertexInputBindingDescription { + binding: i as u32, + stride: vb.array_stride as u32, + input_rate: match vb.step_mode { + wgt::VertexStepMode::Vertex => vk::VertexInputRate::VERTEX, + wgt::VertexStepMode::Instance => vk::VertexInputRate::INSTANCE, + }, + }); + for at in vb.attributes { + vertex_attributes.push(vk::VertexInputAttributeDescription { + location: at.shader_location, + binding: i as u32, + format: conv::map_vertex_format(at.format), + offset: at.offset as u32, + }); + } + } + + let vk_vertex_input = vk::PipelineVertexInputStateCreateInfo::builder() + .vertex_binding_descriptions(&vertex_buffers) + .vertex_attribute_descriptions(&vertex_attributes) + .build(); + + let vk_input_assembly = vk::PipelineInputAssemblyStateCreateInfo::builder() + .topology(conv::map_topology(desc.primitive.topology)) + .primitive_restart_enable(desc.primitive.strip_index_format.is_some()) + .build(); + + let compiled_vs = self.compile_stage( + &desc.vertex_stage, + naga::ShaderStage::Vertex, + &desc.layout.binding_arrays, + )?; + stages.push(compiled_vs.create_info); + let compiled_fs = match desc.fragment_stage { + Some(ref stage) => { + let compiled = self.compile_stage( + stage, + naga::ShaderStage::Fragment, + &desc.layout.binding_arrays, + )?; + stages.push(compiled.create_info); + Some(compiled) + } + None => None, + }; + + let mut vk_rasterization = vk::PipelineRasterizationStateCreateInfo::builder() + .polygon_mode(conv::map_polygon_mode(desc.primitive.polygon_mode)) + .front_face(conv::map_front_face(desc.primitive.front_face)) + .line_width(1.0); + if let Some(face) = desc.primitive.cull_mode { + vk_rasterization = vk_rasterization.cull_mode(conv::map_cull_face(face)) + } + let mut vk_rasterization_conservative_state = + vk::PipelineRasterizationConservativeStateCreateInfoEXT::builder() + .conservative_rasterization_mode(vk::ConservativeRasterizationModeEXT::OVERESTIMATE) + .build(); + if desc.primitive.conservative { + vk_rasterization = vk_rasterization.push_next(&mut vk_rasterization_conservative_state); + } + let mut vk_depth_clip_state = + vk::PipelineRasterizationDepthClipStateCreateInfoEXT::builder() + .depth_clip_enable(false) + .build(); + if desc.primitive.unclipped_depth { + vk_rasterization = vk_rasterization.push_next(&mut vk_depth_clip_state); + } + + let mut vk_depth_stencil = vk::PipelineDepthStencilStateCreateInfo::builder(); + if let Some(ref ds) = desc.depth_stencil { + let vk_format = self.shared.private_caps.map_texture_format(ds.format); + let vk_layout = if ds.is_read_only(desc.primitive.cull_mode) { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } else { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + }; + compatible_rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: super::AttachmentKey::compatible(vk_format, vk_layout), + stencil_ops: crate::AttachmentOps::all(), + }); + + if ds.is_depth_enabled() { + vk_depth_stencil = vk_depth_stencil + .depth_test_enable(true) + .depth_write_enable(ds.depth_write_enabled) + .depth_compare_op(conv::map_comparison(ds.depth_compare)); + } + if ds.stencil.is_enabled() { + let s = &ds.stencil; + let front = conv::map_stencil_face(&s.front, s.read_mask, s.write_mask); + let back = conv::map_stencil_face(&s.back, s.read_mask, s.write_mask); + vk_depth_stencil = vk_depth_stencil + .stencil_test_enable(true) + .front(front) + .back(back); + } + + if ds.bias.is_enabled() { + vk_rasterization = vk_rasterization + .depth_bias_enable(true) + .depth_bias_constant_factor(ds.bias.constant as f32) + .depth_bias_clamp(ds.bias.clamp) + .depth_bias_slope_factor(ds.bias.slope_scale); + } + } + + let vk_viewport = vk::PipelineViewportStateCreateInfo::builder() + .flags(vk::PipelineViewportStateCreateFlags::empty()) + .scissor_count(1) + .viewport_count(1) + .build(); + + let vk_sample_mask = [ + desc.multisample.mask as u32, + (desc.multisample.mask >> 32) as u32, + ]; + let vk_multisample = vk::PipelineMultisampleStateCreateInfo::builder() + .rasterization_samples(vk::SampleCountFlags::from_raw(desc.multisample.count)) + .alpha_to_coverage_enable(desc.multisample.alpha_to_coverage_enabled) + .sample_mask(&vk_sample_mask) + .build(); + + let mut vk_attachments = Vec::with_capacity(desc.color_targets.len()); + for cat in desc.color_targets { + let (key, attarchment) = if let Some(cat) = cat.as_ref() { + let mut vk_attachment = vk::PipelineColorBlendAttachmentState::builder() + .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits())); + if let Some(ref blend) = cat.blend { + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + vk_attachment = vk_attachment + .blend_enable(true) + .color_blend_op(color_op) + .src_color_blend_factor(color_src) + .dst_color_blend_factor(color_dst) + .alpha_blend_op(alpha_op) + .src_alpha_blend_factor(alpha_src) + .dst_alpha_blend_factor(alpha_dst); + } + + let vk_format = self.shared.private_caps.map_texture_format(cat.format); + ( + Some(super::ColorAttachmentKey { + base: super::AttachmentKey::compatible( + vk_format, + vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + ), + resolve: None, + }), + vk_attachment.build(), + ) + } else { + (None, vk::PipelineColorBlendAttachmentState::default()) + }; + + compatible_rp_key.colors.push(key); + vk_attachments.push(attarchment); + } + + let vk_color_blend = vk::PipelineColorBlendStateCreateInfo::builder() + .attachments(&vk_attachments) + .build(); + + let vk_dynamic_state = vk::PipelineDynamicStateCreateInfo::builder() + .dynamic_states(&dynamic_states) + .build(); + + let raw_pass = self + .shared + .make_render_pass(compatible_rp_key) + .map_err(crate::DeviceError::from)?; + + let vk_infos = [{ + vk::GraphicsPipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stages(&stages) + .vertex_input_state(&vk_vertex_input) + .input_assembly_state(&vk_input_assembly) + .rasterization_state(&vk_rasterization) + .viewport_state(&vk_viewport) + .multisample_state(&vk_multisample) + .depth_stencil_state(&vk_depth_stencil) + .color_blend_state(&vk_color_blend) + .dynamic_state(&vk_dynamic_state) + .render_pass(raw_pass) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateGraphicsPipelines"); + unsafe { + self.shared + .raw + .create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled_vs.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + if let Some(CompiledStage { + temp_raw_module: Some(raw_module), + .. + }) = compiled_fs + { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::RenderPipeline { raw }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let compiled = self.compile_stage( + &desc.stage, + naga::ShaderStage::Compute, + &desc.layout.binding_arrays, + )?; + + let vk_infos = [{ + vk::ComputePipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stage(compiled.create_info) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateComputePipelines"); + unsafe { + self.shared + .raw + .create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::ComputePipeline { raw }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (vk_type, pipeline_statistics) = match desc.ty { + wgt::QueryType::Occlusion => ( + vk::QueryType::OCCLUSION, + vk::QueryPipelineStatisticFlags::empty(), + ), + wgt::QueryType::PipelineStatistics(statistics) => ( + vk::QueryType::PIPELINE_STATISTICS, + conv::map_pipeline_statistics(statistics), + ), + wgt::QueryType::Timestamp => ( + vk::QueryType::TIMESTAMP, + vk::QueryPipelineStatisticFlags::empty(), + ), + }; + + let vk_info = vk::QueryPoolCreateInfo::builder() + .query_type(vk_type) + .query_count(desc.count) + .pipeline_statistics(pipeline_statistics) + .build(); + + let raw = unsafe { self.shared.raw.create_query_pool(&vk_info, None) }?; + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::QUERY_POOL, raw, label) + }; + } + + Ok(super::QuerySet { raw }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { self.shared.raw.destroy_query_pool(set.raw, None) }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(if self.shared.private_caps.timeline_semaphores { + let mut sem_type_info = + vk::SemaphoreTypeCreateInfo::builder().semaphore_type(vk::SemaphoreType::TIMELINE); + let vk_info = vk::SemaphoreCreateInfo::builder().push_next(&mut sem_type_info); + let raw = unsafe { self.shared.raw.create_semaphore(&vk_info, None) }?; + super::Fence::TimelineSemaphore(raw) + } else { + super::Fence::FencePool { + last_completed: 0, + active: Vec::new(), + free: Vec::new(), + } + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + match fence { + super::Fence::TimelineSemaphore(raw) => { + unsafe { self.shared.raw.destroy_semaphore(raw, None) }; + } + super::Fence::FencePool { + active, + free, + last_completed: _, + } => { + for (_, raw) in active { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + for raw in free { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + } + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + fence.get_latest( + &self.shared.raw, + self.shared.extension_fns.timeline_semaphore.as_ref(), + ) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS; + match *fence { + super::Fence::TimelineSemaphore(raw) => { + let semaphores = [raw]; + let values = [wait_value]; + let vk_info = vk::SemaphoreWaitInfo::builder() + .semaphores(&semaphores) + .values(&values); + let result = match self.shared.extension_fns.timeline_semaphore { + Some(super::ExtensionFn::Extension(ref ext)) => unsafe { + ext.wait_semaphores(&vk_info, timeout_ns) + }, + Some(super::ExtensionFn::Promoted) => unsafe { + self.shared.raw.wait_semaphores(&vk_info, timeout_ns) + }, + None => unreachable!(), + }; + match result { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + super::Fence::FencePool { + last_completed, + ref active, + free: _, + } => { + if wait_value <= last_completed { + Ok(true) + } else { + match active.iter().find(|&&(value, _)| value >= wait_value) { + Some(&(_, raw)) => { + match unsafe { + self.shared.raw.wait_for_fences(&[raw], true, timeout_ns) + } { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + None => { + log::error!("No signals reached value {}", wait_value); + Err(crate::DeviceError::Lost) + } + } + } + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + unsafe { + self.render_doc + .start_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + + unsafe { + self.render_doc + .end_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + } +} + +impl From<gpu_alloc::AllocationError> for crate::DeviceError { + fn from(error: gpu_alloc::AllocationError) -> Self { + use gpu_alloc::AllocationError as Ae; + match error { + Ae::OutOfDeviceMemory | Ae::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory allocation: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_alloc::MapError> for crate::DeviceError { + fn from(error: gpu_alloc::MapError) -> Self { + use gpu_alloc::MapError as Me; + match error { + Me::OutOfDeviceMemory | Me::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory mapping: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_descriptor::AllocationError> for crate::DeviceError { + fn from(error: gpu_descriptor::AllocationError) -> Self { + log::error!("descriptor allocation: {:?}", error); + Self::OutOfMemory + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/instance.rs b/third_party/rust/wgpu-hal/src/vulkan/instance.rs new file mode 100644 index 0000000000..101f303c16 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/instance.rs @@ -0,0 +1,832 @@ +use std::{ + ffi::{c_void, CStr, CString}, + slice, + sync::Arc, + thread, +}; + +use ash::{ + extensions::{ext, khr}, + vk, +}; + +unsafe extern "system" fn debug_utils_messenger_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_type: vk::DebugUtilsMessageTypeFlagsEXT, + callback_data_ptr: *const vk::DebugUtilsMessengerCallbackDataEXT, + _user_data: *mut c_void, +) -> vk::Bool32 { + const VUID_VKSWAPCHAINCREATEINFOKHR_IMAGEEXTENT_01274: i32 = 0x7cd0911d; + use std::borrow::Cow; + + if thread::panicking() { + return vk::FALSE; + } + + let level = match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => log::Level::Debug, + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => log::Level::Warn, + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => log::Level::Error, + _ => log::Level::Warn, + }; + + let cd = unsafe { &*callback_data_ptr }; + + let message_id_name = if cd.p_message_id_name.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message_id_name) }.to_string_lossy() + }; + let message = if cd.p_message.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message) }.to_string_lossy() + }; + + // Silence Vulkan Validation error "VUID-VkSwapchainCreateInfoKHR-imageExtent-01274" + // - it's a false positive due to the inherent racy-ness of surface resizing + if cd.message_id_number == VUID_VKSWAPCHAINCREATEINFOKHR_IMAGEEXTENT_01274 { + return vk::FALSE; + } + + let _ = std::panic::catch_unwind(|| { + log::log!( + level, + "{:?} [{} (0x{:x})]\n\t{}", + message_type, + message_id_name, + cd.message_id_number, + message, + ); + }); + + if cd.queue_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_queue_labels, cd.queue_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tqueues: {}", names.join(", ")); + }); + } + + if cd.cmd_buf_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_cmd_buf_labels, cd.cmd_buf_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tcommand buffers: {}", names.join(", ")); + }); + } + + if cd.object_count != 0 { + let labels = unsafe { slice::from_raw_parts(cd.p_objects, cd.object_count as usize) }; + //TODO: use color fields of `vk::DebugUtilsLabelExt`? + let names = labels + .iter() + .map(|obj_info| { + let name = unsafe { obj_info.p_object_name.as_ref() } + .map(|name| unsafe { CStr::from_ptr(name) }.to_string_lossy()) + .unwrap_or(Cow::Borrowed("?")); + + format!( + "(type: {:?}, hndl: 0x{:x}, name: {})", + obj_info.object_type, obj_info.object_handle, name + ) + }) + .collect::<Vec<_>>(); + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tobjects: {}", names.join(", ")); + }); + } + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } + + vk::FALSE +} + +impl super::Swapchain { + unsafe fn release_resources(self, device: &ash::Device) -> Self { + profiling::scope!("Swapchain::release_resources"); + { + profiling::scope!("vkDeviceWaitIdle"); + let _ = unsafe { device.device_wait_idle() }; + }; + unsafe { device.destroy_fence(self.fence, None) }; + self + } +} + +impl super::InstanceShared { + pub fn entry(&self) -> &ash::Entry { + &self.entry + } + + pub fn raw_instance(&self) -> &ash::Instance { + &self.raw + } + + pub fn driver_api_version(&self) -> u32 { + self.driver_api_version + } + + pub fn extensions(&self) -> &[&'static CStr] { + &self.extensions[..] + } +} + +impl super::Instance { + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared + } + + pub fn required_extensions( + entry: &ash::Entry, + _driver_api_version: u32, + flags: crate::InstanceFlags, + ) -> Result<Vec<&'static CStr>, crate::InstanceError> { + let instance_extensions = entry + .enumerate_instance_extension_properties(None) + .map_err(|e| { + log::info!("enumerate_instance_extension_properties: {:?}", e); + crate::InstanceError + })?; + + // Check our extensions against the available extensions + let mut extensions: Vec<&'static CStr> = Vec::new(); + + // VK_KHR_surface + extensions.push(khr::Surface::name()); + + // Platform-specific WSI extensions + if cfg!(all( + unix, + not(target_os = "android"), + not(target_os = "macos") + )) { + // VK_KHR_xlib_surface + extensions.push(khr::XlibSurface::name()); + // VK_KHR_xcb_surface + extensions.push(khr::XcbSurface::name()); + // VK_KHR_wayland_surface + extensions.push(khr::WaylandSurface::name()); + } + if cfg!(target_os = "android") { + // VK_KHR_android_surface + extensions.push(khr::AndroidSurface::name()); + } + if cfg!(target_os = "windows") { + // VK_KHR_win32_surface + extensions.push(khr::Win32Surface::name()); + } + if cfg!(target_os = "macos") { + // VK_EXT_metal_surface + extensions.push(ext::MetalSurface::name()); + } + + if flags.contains(crate::InstanceFlags::DEBUG) { + // VK_EXT_debug_utils + extensions.push(ext::DebugUtils::name()); + } + + // VK_EXT_swapchain_colorspace + // Provid wide color gamut + extensions.push(vk::ExtSwapchainColorspaceFn::name()); + + // VK_KHR_get_physical_device_properties2 + // Even though the extension was promoted to Vulkan 1.1, we still require the extension + // so that we don't have to conditionally use the functions provided by the 1.1 instance + extensions.push(vk::KhrGetPhysicalDeviceProperties2Fn::name()); + + // Only keep available extensions. + extensions.retain(|&ext| { + if instance_extensions.iter().any(|inst_ext| { + crate::auxil::cstr_from_bytes_until_nul(&inst_ext.extension_name) == Some(ext) + }) { + true + } else { + log::info!("Unable to find extension: {}", ext.to_string_lossy()); + false + } + }); + Ok(extensions) + } + + /// # Safety + /// + /// - `raw_instance` must be created from `entry` + /// - `raw_instance` must be created respecting `driver_api_version`, `extensions` and `flags` + /// - `extensions` must be a superset of `required_extensions()` and must be created from the + /// same entry, driver_api_version and flags. + /// - `android_sdk_version` is ignored and can be `0` for all platforms besides Android + #[allow(clippy::too_many_arguments)] + pub unsafe fn from_raw( + entry: ash::Entry, + raw_instance: ash::Instance, + driver_api_version: u32, + android_sdk_version: u32, + extensions: Vec<&'static CStr>, + flags: crate::InstanceFlags, + has_nv_optimus: bool, + drop_guard: Option<crate::DropGuard>, + ) -> Result<Self, crate::InstanceError> { + log::info!("Instance version: 0x{:x}", driver_api_version); + + let debug_utils = if extensions.contains(&ext::DebugUtils::name()) { + log::info!("Enabling debug utils"); + let extension = ext::DebugUtils::new(&entry, &raw_instance); + // having ERROR unconditionally because Vk doesn't like empty flags + let mut severity = vk::DebugUtilsMessageSeverityFlagsEXT::ERROR; + if log::max_level() >= log::LevelFilter::Debug { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE; + } + if log::max_level() >= log::LevelFilter::Info { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::INFO; + } + if log::max_level() >= log::LevelFilter::Warn { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::WARNING; + } + let vk_info = vk::DebugUtilsMessengerCreateInfoEXT::builder() + .flags(vk::DebugUtilsMessengerCreateFlagsEXT::empty()) + .message_severity(severity) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) + .pfn_user_callback(Some(debug_utils_messenger_callback)); + let messenger = + unsafe { extension.create_debug_utils_messenger(&vk_info, None) }.unwrap(); + Some(super::DebugUtils { + extension, + messenger, + }) + } else { + None + }; + + let get_physical_device_properties = + if extensions.contains(&khr::GetPhysicalDeviceProperties2::name()) { + log::info!("Enabling device properties2"); + Some(khr::GetPhysicalDeviceProperties2::new( + &entry, + &raw_instance, + )) + } else { + None + }; + + Ok(Self { + shared: Arc::new(super::InstanceShared { + raw: raw_instance, + extensions, + drop_guard, + flags, + debug_utils, + get_physical_device_properties, + entry, + has_nv_optimus, + driver_api_version, + android_sdk_version, + }), + }) + } + + #[allow(dead_code)] + fn create_surface_from_xlib( + &self, + dpy: *mut vk::Display, + window: vk::Window, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XlibSurface::name()) { + log::warn!("Vulkan driver does not support VK_KHR_xlib_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let xlib_loader = khr::XlibSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XlibSurfaceCreateInfoKHR::builder() + .flags(vk::XlibSurfaceCreateFlagsKHR::empty()) + .window(window) + .dpy(dpy); + + unsafe { xlib_loader.create_xlib_surface(&info, None) } + .expect("XlibSurface::create_xlib_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_xcb( + &self, + connection: *mut vk::xcb_connection_t, + window: vk::xcb_window_t, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XcbSurface::name()) { + log::warn!("Vulkan driver does not support VK_KHR_xcb_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let xcb_loader = khr::XcbSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XcbSurfaceCreateInfoKHR::builder() + .flags(vk::XcbSurfaceCreateFlagsKHR::empty()) + .window(window) + .connection(connection); + + unsafe { xcb_loader.create_xcb_surface(&info, None) } + .expect("XcbSurface::create_xcb_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_wayland( + &self, + display: *mut c_void, + surface: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::WaylandSurface::name()) + { + log::debug!("Vulkan driver does not support VK_KHR_wayland_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let w_loader = khr::WaylandSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::WaylandSurfaceCreateInfoKHR::builder() + .flags(vk::WaylandSurfaceCreateFlagsKHR::empty()) + .display(display) + .surface(surface); + + unsafe { w_loader.create_wayland_surface(&info, None) }.expect("WaylandSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_android( + &self, + window: *const c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::AndroidSurface::name()) + { + log::warn!("Vulkan driver does not support VK_KHR_android_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let a_loader = khr::AndroidSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::AndroidSurfaceCreateInfoKHR::builder() + .flags(vk::AndroidSurfaceCreateFlagsKHR::empty()) + .window(window as *mut _); + + unsafe { a_loader.create_android_surface(&info, None) }.expect("AndroidSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_hwnd( + &self, + hinstance: *mut c_void, + hwnd: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::Win32Surface::name()) { + log::debug!("Vulkan driver does not support VK_KHR_win32_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let info = vk::Win32SurfaceCreateInfoKHR::builder() + .flags(vk::Win32SurfaceCreateFlagsKHR::empty()) + .hinstance(hinstance) + .hwnd(hwnd); + let win32_loader = khr::Win32Surface::new(&self.shared.entry, &self.shared.raw); + unsafe { + win32_loader + .create_win32_surface(&info, None) + .expect("Unable to create Win32 surface") + } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[cfg(any(target_os = "macos", target_os = "ios"))] + fn create_surface_from_view( + &self, + view: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&ext::MetalSurface::name()) { + log::warn!("Vulkan driver does not support VK_EXT_metal_surface"); + return Err(crate::InstanceError); + } + + let layer = unsafe { + crate::metal::Surface::get_metal_layer(view as *mut objc::runtime::Object, None) + }; + + let surface = { + let metal_loader = ext::MetalSurface::new(&self.shared.entry, &self.shared.raw); + let vk_info = vk::MetalSurfaceCreateInfoEXT::builder() + .flags(vk::MetalSurfaceCreateFlagsEXT::empty()) + .layer(layer as *mut _) + .build(); + + unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + fn create_surface_from_vk_surface_khr(&self, surface: vk::SurfaceKHR) -> super::Surface { + let functor = khr::Surface::new(&self.shared.entry, &self.shared.raw); + super::Surface { + raw: surface, + functor, + instance: Arc::clone(&self.shared), + swapchain: None, + } + } +} + +impl Drop for super::InstanceShared { + fn drop(&mut self) { + unsafe { + if let Some(du) = self.debug_utils.take() { + du.extension + .destroy_debug_utils_messenger(du.messenger, None); + } + if let Some(_drop_guard) = self.drop_guard.take() { + self.raw.destroy_instance(None); + } + } + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + use crate::auxil::cstr_from_bytes_until_nul; + + let entry = match unsafe { ash::Entry::load() } { + Ok(entry) => entry, + Err(err) => { + log::info!("Missing Vulkan entry points: {:?}", err); + return Err(crate::InstanceError); + } + }; + let driver_api_version = match entry.try_enumerate_instance_version() { + // Vulkan 1.1+ + Ok(Some(version)) => version, + Ok(None) => vk::API_VERSION_1_0, + Err(err) => { + log::warn!("try_enumerate_instance_version: {:?}", err); + return Err(crate::InstanceError); + } + }; + + let app_name = CString::new(desc.name).unwrap(); + let app_info = vk::ApplicationInfo::builder() + .application_name(app_name.as_c_str()) + .application_version(1) + .engine_name(CStr::from_bytes_with_nul(b"wgpu-hal\0").unwrap()) + .engine_version(2) + .api_version( + // Vulkan 1.0 doesn't like anything but 1.0 passed in here... + if driver_api_version < vk::API_VERSION_1_1 { + vk::API_VERSION_1_0 + } else { + // This is the max Vulkan API version supported by `wgpu-hal`. + // + // If we want to increment this, there are some things that must be done first: + // - Audit the behavioral differences between the previous and new API versions. + // - Audit all extensions used by this backend: + // - If any were promoted in the new API version and the behavior has changed, we must handle the new behavior in addition to the old behavior. + // - If any were obsoleted in the new API version, we must implement a fallback for the new API version + // - If any are non-KHR-vendored, we must ensure the new behavior is still correct (since backwards-compatibility is not guaranteed). + vk::HEADER_VERSION_COMPLETE + }, + ); + + let extensions = Self::required_extensions(&entry, driver_api_version, desc.flags)?; + + let instance_layers = entry.enumerate_instance_layer_properties().map_err(|e| { + log::info!("enumerate_instance_layer_properties: {:?}", e); + crate::InstanceError + })?; + + let nv_optimus_layer = CStr::from_bytes_with_nul(b"VK_LAYER_NV_optimus\0").unwrap(); + let has_nv_optimus = instance_layers.iter().any(|inst_layer| { + cstr_from_bytes_until_nul(&inst_layer.layer_name) == Some(nv_optimus_layer) + }); + + // Check requested layers against the available layers + let layers = { + let mut layers: Vec<&'static CStr> = Vec::new(); + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + layers.push(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap()); + } + + // Only keep available layers. + layers.retain(|&layer| { + if instance_layers.iter().any(|inst_layer| { + cstr_from_bytes_until_nul(&inst_layer.layer_name) == Some(layer) + }) { + true + } else { + log::warn!("Unable to find layer: {}", layer.to_string_lossy()); + false + } + }); + layers + }; + + #[cfg(target_os = "android")] + let android_sdk_version = { + let properties = android_system_properties::AndroidSystemProperties::new(); + // See: https://developer.android.com/reference/android/os/Build.VERSION_CODES + if let Some(val) = properties.get("ro.build.version.sdk") { + match val.parse::<u32>() { + Ok(sdk_ver) => sdk_ver, + Err(err) => { + log::error!( + "Couldn't parse Android's ro.build.version.sdk system property ({val}): {err}" + ); + 0 + } + } + } else { + log::error!("Couldn't read Android's ro.build.version.sdk system property"); + 0 + } + }; + #[cfg(not(target_os = "android"))] + let android_sdk_version = 0; + + let vk_instance = { + let str_pointers = layers + .iter() + .chain(extensions.iter()) + .map(|&s| { + // Safe because `layers` and `extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let create_info = vk::InstanceCreateInfo::builder() + .flags(vk::InstanceCreateFlags::empty()) + .application_info(&app_info) + .enabled_layer_names(&str_pointers[..layers.len()]) + .enabled_extension_names(&str_pointers[layers.len()..]); + + unsafe { entry.create_instance(&create_info, None) }.map_err(|e| { + log::warn!("create_instance: {:?}", e); + crate::InstanceError + })? + }; + + unsafe { + Self::from_raw( + entry, + vk_instance, + driver_api_version, + android_sdk_version, + extensions, + desc.flags, + has_nv_optimus, + Some(Box::new(())), // `Some` signals that wgpu-hal is in charge of destroying vk_instance + ) + } + } + + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + use raw_window_handle::{RawDisplayHandle as Rdh, RawWindowHandle as Rwh}; + + match (window_handle, display_handle) { + (Rwh::Wayland(handle), Rdh::Wayland(display)) => { + self.create_surface_from_wayland(display.display, handle.surface) + } + (Rwh::Xlib(handle), Rdh::Xlib(display)) => { + self.create_surface_from_xlib(display.display as *mut _, handle.window) + } + (Rwh::Xcb(handle), Rdh::Xcb(display)) => { + self.create_surface_from_xcb(display.connection, handle.window) + } + (Rwh::AndroidNdk(handle), _) => self.create_surface_android(handle.a_native_window), + #[cfg(windows)] + (Rwh::Win32(handle), _) => { + use winapi::um::libloaderapi::GetModuleHandleW; + + let hinstance = unsafe { GetModuleHandleW(std::ptr::null()) }; + self.create_surface_from_hwnd(hinstance as *mut _, handle.hwnd) + } + #[cfg(target_os = "macos")] + (Rwh::AppKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ns_view) + } + #[cfg(target_os = "ios")] + (Rwh::UiKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ui_view) + } + (_, _) => Err(crate::InstanceError), + } + } + + unsafe fn destroy_surface(&self, surface: super::Surface) { + unsafe { surface.functor.destroy_surface(surface.raw, None) }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + use crate::auxil::db; + + let raw_devices = match unsafe { self.shared.raw.enumerate_physical_devices() } { + Ok(devices) => devices, + Err(err) => { + log::error!("enumerate_adapters: {}", err); + Vec::new() + } + }; + + let mut exposed_adapters = raw_devices + .into_iter() + .flat_map(|device| self.expose_adapter(device)) + .collect::<Vec<_>>(); + + // Detect if it's an Intel + NVidia configuration with Optimus + let has_nvidia_dgpu = exposed_adapters.iter().any(|exposed| { + exposed.info.device_type == wgt::DeviceType::DiscreteGpu + && exposed.info.vendor == db::nvidia::VENDOR + }); + if cfg!(target_os = "linux") && has_nvidia_dgpu && self.shared.has_nv_optimus { + for exposed in exposed_adapters.iter_mut() { + if exposed.info.device_type == wgt::DeviceType::IntegratedGpu + && exposed.info.vendor == db::intel::VENDOR + { + // See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4688 + log::warn!( + "Disabling presentation on '{}' (id {:?}) because of NV Optimus (on Linux)", + exposed.info.name, + exposed.adapter.raw + ); + exposed.adapter.private_caps.can_present = false; + } + } + } + + exposed_adapters + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let old = self + .swapchain + .take() + .map(|sc| unsafe { sc.release_resources(&device.shared.raw) }); + + let swapchain = unsafe { device.create_swapchain(self, config, old)? }; + self.swapchain = Some(swapchain); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + if let Some(sc) = self.swapchain.take() { + let swapchain = unsafe { sc.release_resources(&device.shared.raw) }; + unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) }; + } + } + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_mut().unwrap(); + + let mut timeout_ns = match timeout { + Some(duration) => duration.as_nanos() as u64, + None => u64::MAX, + }; + + // AcquireNextImageKHR on Android (prior to Android 11) doesn't support timeouts + // and will also log verbose warnings if tying to use a timeout. + // + // Android 10 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-10.0.0_r13/vulkan/libvulkan/swapchain.cpp#1426 + // Android 11 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-11.0.0_r45/vulkan/libvulkan/swapchain.cpp#1438 + // + // Android 11 corresponds to an SDK_INT/ro.build.version.sdk of 30 + if cfg!(target_os = "android") && self.instance.android_sdk_version < 30 { + timeout_ns = u64::MAX; + } + + // will block if no image is available + let (index, suboptimal) = match unsafe { + sc.functor + .acquire_next_image(sc.raw, timeout_ns, vk::Semaphore::null(), sc.fence) + } { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // See the comment in `Queue::present`. + #[cfg(target_os = "android")] + Ok((index, _)) => (index, false), + #[cfg(not(target_os = "android"))] + Ok(pair) => pair, + Err(error) => { + return match error { + vk::Result::TIMEOUT => Ok(None), + vk::Result::NOT_READY | vk::Result::ERROR_OUT_OF_DATE_KHR => { + Err(crate::SurfaceError::Outdated) + } + vk::Result::ERROR_SURFACE_LOST_KHR => Err(crate::SurfaceError::Lost), + other => Err(crate::DeviceError::from(other).into()), + } + } + }; + + // special case for Intel Vulkan returning bizzare values (ugh) + if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { + return Err(crate::SurfaceError::Outdated); + } + + let fences = &[sc.fence]; + + unsafe { sc.device.raw.wait_for_fences(fences, true, !0) } + .map_err(crate::DeviceError::from)?; + unsafe { sc.device.raw.reset_fences(fences) }.map_err(crate::DeviceError::from)?; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 + let raw_flags = if sc + .raw_flags + .contains(vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT) + { + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE + } else { + vk::ImageCreateFlags::empty() + }; + + let texture = super::SurfaceTexture { + index, + texture: super::Texture { + raw: sc.images[index as usize], + drop_guard: None, + block: None, + usage: sc.config.usage, + format: sc.config.format, + raw_flags, + copy_size: crate::CopyExtent { + width: sc.config.extent.width, + height: sc.config.extent.height, + depth: 1, + }, + view_formats: sc.view_formats.clone(), + }, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs new file mode 100644 index 0000000000..27200dc4e0 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -0,0 +1,626 @@ +/*! +# Vulkan API internals. + +## Stack memory + +Ash expects slices, which we don't generally have available. +We cope with this requirement by the combination of the following ways: + - temporarily allocating `Vec` on heap, where overhead is permitted + - growing temporary local storage + - using `implace_it` on iterators + +## Framebuffers and Render passes + +Render passes are cached on the device and kept forever. + +Framebuffers are also cached on the device, but they are removed when +any of the image views (they have) gets removed. +If Vulkan supports image-less framebuffers, +then the actual views are excluded from the framebuffer key. + +## Fences + +If timeline semaphores are available, they are used 1:1 with wgpu-hal fences. +Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod instance; + +use std::{borrow::Borrow, ffi::CStr, fmt, num::NonZeroU32, sync::Arc}; + +use arrayvec::ArrayVec; +use ash::{ + extensions::{ext, khr}, + vk, +}; +use parking_lot::Mutex; + +const MILLIS_TO_NANOS: u64 = 1_000_000; +const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +struct DebugUtils { + extension: ext::DebugUtils, + messenger: vk::DebugUtilsMessengerEXT, +} + +pub struct InstanceShared { + raw: ash::Instance, + extensions: Vec<&'static CStr>, + drop_guard: Option<crate::DropGuard>, + flags: crate::InstanceFlags, + debug_utils: Option<DebugUtils>, + get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>, + entry: ash::Entry, + has_nv_optimus: bool, + android_sdk_version: u32, + driver_api_version: u32, +} + +pub struct Instance { + shared: Arc<InstanceShared>, +} + +struct Swapchain { + raw: vk::SwapchainKHR, + raw_flags: vk::SwapchainCreateFlagsKHR, + functor: khr::Swapchain, + device: Arc<DeviceShared>, + fence: vk::Fence, + images: Vec<vk::Image>, + config: crate::SurfaceConfiguration, + view_formats: Vec<wgt::TextureFormat>, +} + +pub struct Surface { + raw: vk::SurfaceKHR, + functor: khr::Surface, + instance: Arc<InstanceShared>, + swapchain: Option<Swapchain>, +} + +#[derive(Debug)] +pub struct SurfaceTexture { + index: u32, + texture: Texture, +} + +impl Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +pub struct Adapter { + raw: vk::PhysicalDevice, + instance: Arc<InstanceShared>, + //queue_families: Vec<vk::QueueFamilyProperties>, + known_memory_flags: vk::MemoryPropertyFlags, + phd_capabilities: adapter::PhysicalDeviceCapabilities, + //phd_features: adapter::PhysicalDeviceFeatures, + downlevel_flags: wgt::DownlevelFlags, + private_caps: PrivateCapabilities, + workarounds: Workarounds, +} + +// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`. +enum ExtensionFn<T> { + /// The loaded function pointer struct for an extension. + Extension(T), + /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used. + Promoted, +} + +struct DeviceExtensionFunctions { + draw_indirect_count: Option<khr::DrawIndirectCount>, + timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>, +} + +/// Set of internal capabilities, which don't show up in the exposed +/// device geometry, but affect the code paths taken internally. +#[derive(Clone, Debug)] +struct PrivateCapabilities { + /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift. + /// + /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`). + flip_y_requires_shift: bool, + imageless_framebuffers: bool, + image_view_usage: bool, + timeline_semaphores: bool, + texture_d24: bool, + texture_d24_s8: bool, + texture_s8: bool, + /// Ability to present contents to any screen. Only needed to work around broken platform configurations. + can_present: bool, + non_coherent_map_mask: wgt::BufferAddress, + robust_buffer_access: bool, + robust_image_access: bool, + zero_initialize_workgroup_memory: bool, +} + +bitflags::bitflags!( + /// Workaround flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct Workarounds: u32 { + /// Only generate SPIR-V for one entry point at a time. + const SEPARATE_ENTRY_POINTS = 0x1; + /// Qualcomm OOMs when there are zero color attachments but a non-null pointer + /// to a subpass resolve attachment array. This nulls out that pointer in that case. + const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2; + } +); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct AttachmentKey { + format: vk::Format, + layout: vk::ImageLayout, + ops: crate::AttachmentOps, +} + +impl AttachmentKey { + /// Returns an attachment key for a compatible attachment. + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { + Self { + format, + layout, + ops: crate::AttachmentOps::all(), + } + } +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct ColorAttachmentKey { + base: AttachmentKey, + resolve: Option<AttachmentKey>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct DepthStencilAttachmentKey { + base: AttachmentKey, + stencil_ops: crate::AttachmentOps, +} + +#[derive(Clone, Eq, Default, Hash, PartialEq)] +struct RenderPassKey { + colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>, + depth_stencil: Option<DepthStencilAttachmentKey>, + sample_count: u32, + multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct FramebufferAttachment { + /// Can be NULL if the framebuffer is image-less + raw: vk::ImageView, + raw_image_flags: vk::ImageCreateFlags, + view_usage: crate::TextureUses, + view_format: wgt::TextureFormat, + raw_view_formats: Vec<vk::Format>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct FramebufferKey { + attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>, + extent: wgt::Extent3d, + sample_count: u32, +} + +struct DeviceShared { + raw: ash::Device, + family_index: u32, + queue_index: u32, + raw_queue: ash::vk::Queue, + handle_is_owned: bool, + instance: Arc<InstanceShared>, + physical_device: ash::vk::PhysicalDevice, + enabled_extensions: Vec<&'static CStr>, + extension_fns: DeviceExtensionFunctions, + vendor_id: u32, + timestamp_period: f32, + private_caps: PrivateCapabilities, + workarounds: Workarounds, + render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>, + framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>, +} + +pub struct Device { + shared: Arc<DeviceShared>, + mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>, + desc_allocator: + Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>, + valid_ash_memory_types: u32, + naga_options: naga::back::spv::Options, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + raw: vk::Queue, + swapchain_fn: khr::Swapchain, + device: Arc<DeviceShared>, + family_index: u32, + /// We use a redundant chain of semaphores to pass on the signal + /// from submissions to the last present, since it's required by the + /// specification. + /// It would be correct to use a single semaphore there, but + /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508). + relay_semaphores: [vk::Semaphore; 2], + relay_index: Option<usize>, +} + +#[derive(Debug)] +pub struct Buffer { + raw: vk::Buffer, + block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, +} + +#[derive(Debug)] +pub struct Texture { + raw: vk::Image, + drop_guard: Option<crate::DropGuard>, + block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, + usage: crate::TextureUses, + format: wgt::TextureFormat, + raw_flags: vk::ImageCreateFlags, + copy_size: crate::CopyExtent, + view_formats: Vec<wgt::TextureFormat>, +} + +impl Texture { + /// # Safety + /// + /// - The image handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::Image { + self.raw + } +} + +#[derive(Debug)] +pub struct TextureView { + raw: vk::ImageView, + layers: NonZeroU32, + attachment: FramebufferAttachment, +} + +#[derive(Debug)] +pub struct Sampler { + raw: vk::Sampler, +} + +#[derive(Debug)] +pub struct BindGroupLayout { + raw: vk::DescriptorSetLayout, + desc_count: gpu_descriptor::DescriptorTotalCount, + types: Box<[(vk::DescriptorType, u32)]>, + /// Map of binding index to size, + binding_arrays: Vec<(u32, NonZeroU32)>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + raw: vk::PipelineLayout, + binding_arrays: naga::back::spv::BindingMap, +} + +#[derive(Debug)] +pub struct BindGroup { + set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, +} + +#[derive(Default)] +struct Temp { + marker: Vec<u8>, + buffer_barriers: Vec<vk::BufferMemoryBarrier>, + image_barriers: Vec<vk::ImageMemoryBarrier>, +} + +unsafe impl Send for Temp {} +unsafe impl Sync for Temp {} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.buffer_barriers.clear(); + self.image_barriers.clear(); + //see also - https://github.com/NotIntMan/inplace_it/issues/8 + } + + fn make_c_str(&mut self, name: &str) -> &CStr { + self.marker.clear(); + self.marker.extend_from_slice(name.as_bytes()); + self.marker.push(0); + unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) } + } +} + +pub struct CommandEncoder { + raw: vk::CommandPool, + device: Arc<DeviceShared>, + active: vk::CommandBuffer, + bind_point: vk::PipelineBindPoint, + temp: Temp, + free: Vec<vk::CommandBuffer>, + discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, + /// and needs to be disabled on renderpass close. + rpass_debug_marker_active: bool, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw", &self.raw) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: vk::CommandBuffer, +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum ShaderModule { + Raw(vk::ShaderModule), + Intermediate { + naga_shader: crate::NagaShader, + runtime_checks: bool, + }, +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct QuerySet { + raw: vk::QueryPool, +} + +#[derive(Debug)] +pub enum Fence { + TimelineSemaphore(vk::Semaphore), + FencePool { + last_completed: crate::FenceValue, + /// The pending fence values have to be ascending. + active: Vec<(crate::FenceValue, vk::Fence)>, + free: Vec<vk::Fence>, + }, +} + +impl Fence { + fn check_active( + device: &ash::Device, + mut max_value: crate::FenceValue, + active: &[(crate::FenceValue, vk::Fence)], + ) -> Result<crate::FenceValue, crate::DeviceError> { + for &(value, raw) in active.iter() { + unsafe { + if value > max_value && device.get_fence_status(raw)? { + max_value = value; + } + } + } + Ok(max_value) + } + + fn get_latest( + &self, + device: &ash::Device, + extension: Option<&ExtensionFn<khr::TimelineSemaphore>>, + ) -> Result<crate::FenceValue, crate::DeviceError> { + match *self { + Self::TimelineSemaphore(raw) => unsafe { + Ok(match *extension.unwrap() { + ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?, + ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?, + }) + }, + Self::FencePool { + last_completed, + ref active, + free: _, + } => Self::check_active(device, last_completed, active), + } + } + + fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { + match *self { + Self::TimelineSemaphore(_) => {} + Self::FencePool { + ref mut last_completed, + ref mut active, + ref mut free, + } => { + let latest = Self::check_active(device, *last_completed, active)?; + let base_free = free.len(); + for &(value, raw) in active.iter() { + if value <= latest { + free.push(raw); + } + } + if free.len() != base_free { + active.retain(|&(value, _)| value > latest); + unsafe { + device.reset_fences(&free[base_free..])?; + } + } + *last_completed = latest; + } + } + Ok(()) + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::<Vec<_>>(); + + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + let mut fence_raw = vk::Fence::null(); + let mut vk_timeline_info; + let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; + let signal_values; + + if let Some((fence, value)) = signal_fence { + fence.maintain(&self.device.raw)?; + match *fence { + Fence::TimelineSemaphore(raw) => { + signal_values = [!0, value]; + signal_semaphores[1] = raw; + vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() + .signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } + Fence::FencePool { + ref mut active, + ref mut free, + .. + } => { + fence_raw = match free.pop() { + Some(raw) => raw, + None => unsafe { + self.device + .raw + .create_fence(&vk::FenceCreateInfo::builder(), None)? + }, + }; + active.push((value, fence_raw)); + } + } + } + + let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; + let sem_index = match self.relay_index { + Some(old_index) => { + vk_info = vk_info + .wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]) + .wait_dst_stage_mask(&wait_stage_mask); + (old_index + 1) % self.relay_semaphores.len() + } + None => 0, + }; + self.relay_index = Some(sem_index); + signal_semaphores[0] = self.relay_semaphores[sem_index]; + + let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { + 1 + } else { + 2 + }; + vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + + profiling::scope!("vkQueueSubmit"); + unsafe { + self.device + .raw + .queue_submit(self.raw, &[vk_info.build()], fence_raw)? + }; + Ok(()) + } + + unsafe fn present( + &mut self, + surface: &mut Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let ssc = surface.swapchain.as_ref().unwrap(); + + let swapchains = [ssc.raw]; + let image_indices = [texture.index]; + let mut vk_info = vk::PresentInfoKHR::builder() + .swapchains(&swapchains) + .image_indices(&image_indices); + + if let Some(old_index) = self.relay_index.take() { + vk_info = vk_info.wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]); + } + + let suboptimal = { + profiling::scope!("vkQueuePresentKHR"); + unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| { + match error { + vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated, + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + _ => crate::DeviceError::from(error).into(), + } + })? + }; + if suboptimal { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation + // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation). + // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`. + #[cfg(not(target_os = "android"))] + log::warn!("Suboptimal present of frame {}", texture.index); + } + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.device.timestamp_period + } +} + +impl From<vk::Result> for crate::DeviceError { + fn from(result: vk::Result) -> Self { + match result { + vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => { + Self::OutOfMemory + } + vk::Result::ERROR_DEVICE_LOST => Self::Lost, + _ => { + log::warn!("Unrecognized device error {:?}", result); + Self::Lost + } + } + } +} |