diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/wgpu-hal/src/dx12 | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-hal/src/dx12')
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/adapter.rs | 587 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/command.rs | 1143 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/conv.rs | 350 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/descriptor.rs | 311 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/device.rs | 1590 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/instance.rs | 117 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/mod.rs | 902 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs | 294 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/suballocation.rs | 331 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/types.rs | 34 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/dx12/view.rs | 380 |
11 files changed, 6039 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..13530afb3e --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,587 @@ +use crate::{ + auxil::{self, dxgi::result::HResult as _}, + dx12::SurfaceTarget, +}; +use std::{mem, ptr, sync::Arc, thread}; +use winapi::{ + shared::{dxgi, dxgi1_2, minwindef::DWORD, windef, winerror}, + um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser}, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + // Debug tracking alive objects + if !thread::panicking() + && self + .private_caps + .instance_flags + .contains(crate::InstanceFlags::VALIDATION) + { + unsafe { + self.report_live_objects(); + } + } + unsafe { + self.raw.destroy(); + } + } +} + +impl super::Adapter { + pub unsafe fn report_live_objects(&self) { + if let Ok(debug_device) = unsafe { + self.raw + .cast::<d3d12sdklayers::ID3D12DebugDevice>() + .into_result() + } { + unsafe { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ) + }; + unsafe { debug_device.destroy() }; + } + } + + pub fn raw_adapter(&self) -> &d3d12::DxgiAdapter { + &self.raw + } + + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: d3d12::DxgiAdapter, + library: &Arc<d3d12::D3D12Lib>, + instance_flags: crate::InstanceFlags, + dx12_shader_compiler: &wgt::Dx12Compiler, + ) -> Option<crate::ExposedAdapter<super::Api>> { + // Create the device so that we can get the capabilities. + let device = { + profiling::scope!("ID3D12Device::create_device"); + match library.create_device(*adapter, d3d12::FeatureLevel::L11_0) { + Ok(pair) => match pair.into_result() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + } + }; + + profiling::scope!("feature queries"); + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.unwrap_adapter2().GetDesc2(&mut desc); + } + + let device_name = { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _, + ) + }); + + let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = + d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, + }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_SHADER_MODEL, + &mut shader_model_support as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId, + device: desc.DeviceId, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::Cpu + } else if features_architecture.UMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + driver: String::new(), + driver_info: String::new(), + }; + + let mut options: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let casting_fully_typed_format_supported = { + let mut features3: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 21, // D3D12_FEATURE_D3D12_OPTIONS3 + &mut features3 as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _, + ) + }; + hr == 0 && features3.CastingFullyTypedFormatSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + instance_flags, + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12_ty::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + heap_create_not_zeroed: false, //TODO: winapi support for Options7 + casting_fully_typed_format_supported, + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, _uav_count) = match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, // conservative, is 64 on feature level 11.1 + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::DEPTH32FLOAT_STENCIL8 + | wgt::Features::INDIRECT_FIRST_INSTANCE + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::POLYGON_MODE_POINT + | wgt::Features::VERTEX_WRITABLE_STORAGE + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES + | wgt::Features::TEXTURE_COMPRESSION_BC + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::SHADER_PRIMITIVE_INDEX + | wgt::Features::RG11B10UFLOAT_RENDERABLE; + //TODO: in order to expose this, we need to run a compute shader + // that extract the necessary statistics out of the D3D12 result. + // Alternatively, we could allocate a buffer for the query set, + // write the results there, and issue a bunch of copy commands. + //| wgt::Features::PIPELINE_STATISTICS_QUERY + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + features.set( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + ); + + // TODO: Determine if IPresentationManager is supported + let presentation_timer = auxil::dxgi::time::PresentationTimer::new_dxgi(); + + let base = wgt::Limits::default(); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + presentation_timer, + workarounds, + dx12_shader_compiler: dx12_shader_compiler.clone(), + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12_ty::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12_ty::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12_ty::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12_ty::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12_ty::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 128, + _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 16, + _ => d3d12_ty::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base + .max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: + d3d12_ty::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, + max_storage_buffer_binding_size: crate::auxil::MAX_I32_BINDING_SIZE, + max_vertex_buffers: d3d12_ty::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + // The push constants are part of the root signature which + // has a limit of 64 DWORDS (256 bytes), but other resources + // also share the root signature: + // + // - push constants consume a `DWORD` for each `4 bytes` of data + // - If a bind group has buffers it will consume a `DWORD` + // for the descriptor table + // - If a bind group has samplers it will consume a `DWORD` + // for the descriptor table + // - Each dynamic buffer will consume `2 DWORDs` for the + // root descriptor + // - The special constants buffer count as constants + // + // Since we can't know beforehand all root signatures that + // will be created, the max size to be used for push + // constants needs to be set to a reasonable number instead. + // + // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs + max_push_constant_size: 128, + min_uniform_buffer_offset_alignment: + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, + min_storage_buffer_offset_alignment: 4, + max_inter_stage_shader_components: base.max_inter_stage_shader_components, + max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO? + max_compute_invocations_per_workgroup: + d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP, + max_compute_workgroup_size_x: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_X, + max_compute_workgroup_size_y: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Y, + max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z, + max_compute_workgroups_per_dimension: + d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + max_buffer_size: u64::MAX, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + }, + downlevel: wgt::DownlevelCapabilities::default(), + }, + }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + _features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = { + profiling::scope!("ID3D12Device::CreateCommandQueue"); + self.device + .create_command_queue( + d3d12::CmdListType::Direct, + d3d12::Priority::Normal, + d3d12::CommandQueueFlags::empty(), + 0, + ) + .into_device_result("Queue creation")? + }; + + let device = super::Device::new( + self.device, + queue, + self.private_caps, + &self.library, + self.dx12_shader_compiler.clone(), + )?; + Ok(crate::OpenDevice { + device, + queue: super::Queue { + raw: queue, + temp_lists: Vec::new(), + }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let raw_format = match auxil::dxgi::conv::map_texture_format_failable(format) { + Some(f) => f, + None => return Tfc::empty(), + }; + let srv_uav_format = if format.is_combined_depth_stencil_format() { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + // use the depth aspect here as opposed to stencil since it has more capabilities + crate::FormatAspects::DEPTH, + ) + } else { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + crate::FormatAspects::from(format), + ) + } + .unwrap(); + + let mut data = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: unsafe { mem::zeroed() }, + Support2: unsafe { mem::zeroed() }, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }); + + // Because we use a different format for SRV and UAV views of depth textures, we need to check + // the features that use SRV/UAVs using the no-depth format. + let mut data_srv_uav = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: srv_uav_format, + Support1: d3d12_ty::D3D12_FORMAT_SUPPORT1_NONE, + Support2: d3d12_ty::D3D12_FORMAT_SUPPORT2_NONE, + }; + if raw_format != srv_uav_format { + // Only-recheck if we're using a different format + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + ptr::addr_of_mut!(data_srv_uav).cast(), + DWORD::try_from(mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>()) + .unwrap(), + ) + }); + } else { + // Same format, just copy over. + data_srv_uav = data; + } + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let is_texture = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + != 0; + // SRVs use srv_uav_format + caps.set( + Tfc::SAMPLED, + is_texture && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0, + ); + caps.set( + Tfc::SAMPLED_LINEAR, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + // UAVs use srv_uav_format + caps.set( + Tfc::STORAGE, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW + != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data_srv_uav.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + // We load via UAV/SRV so use srv_uav_format + let no_msaa_load = caps.contains(Tfc::SAMPLED) + && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD == 0; + + let no_msaa_target = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET + | d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) + != 0 + && data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET == 0; + + caps.set( + Tfc::MULTISAMPLE_RESOLVE, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE != 0, + ); + + let mut ms_levels = d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS { + Format: raw_format, + SampleCount: 0, + Flags: d3d12_ty::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, + NumQualityLevels: 0, + }; + + let mut set_sample_count = |sc: u32, tfc: Tfc| { + ms_levels.SampleCount = sc; + + if unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + <*mut _>::cast(&mut ms_levels), + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _, + ) + } == winerror::S_OK + && ms_levels.NumQualityLevels != 0 + { + caps.set(tfc, !no_msaa_load && !no_msaa_target); + } + }; + + set_sample_count(2, Tfc::MULTISAMPLE_X2); + set_sample_count(4, Tfc::MULTISAMPLE_X4); + set_sample_count(8, Tfc::MULTISAMPLE_X8); + set_sample_count(16, Tfc::MULTISAMPLE_X16); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = { + match surface.target { + SurfaceTarget::WndHandle(wnd_handle) => { + let mut rect: windef::RECT = unsafe { mem::zeroed() }; + if unsafe { winuser::GetClientRect(wnd_handle, &mut rect) } != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => None, + } + }; + + let mut present_modes = vec![wgt::PresentMode::Mailbox, wgt::PresentMode::Fifo]; + if surface.supports_allow_tearing { + present_modes.push(wgt::PresentMode::Immediate); + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // we currently use a flip effect which supports 2..=16 buffers + swap_chain_sizes: 2..=16, + current_extent, + // TODO: figure out the exact bounds + extents: wgt::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: 4096, + height: 4096, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp(self.presentation_timer.get_timestamp_ns()) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/command.rs b/third_party/rust/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..4786a61bf9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,1143 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::conv; +use std::{mem, ops::Range, ptr}; +use winapi::um::d3d12 as d3d12_ty; + +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12_ty::D3D12_BOX { + d3d12_ty::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + +impl crate::BufferTextureCopy { + fn to_subresource_footprint( + &self, + format: wgt::TextureFormat, + ) -> d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + let (block_width, block_height) = format.block_dimensions(); + d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: self.buffer_layout.offset, + Footprint: d3d12_ty::D3D12_SUBRESOURCE_FOOTPRINT { + Format: auxil::dxgi::conv::map_texture_format_for_copy( + format, + self.texture_base.aspect, + ) + .unwrap(), + Width: self.size.width, + Height: self + .buffer_layout + .rows_per_image + .map_or(self.size.height, |count| count * block_height), + Depth: self.size.depth, + RowPitch: { + let actual = self.buffer_layout.bytes_per_row.unwrap_or_else(|| { + // this may happen for single-line updates + let block_size = format + .block_size(Some(self.texture_base.aspect.map())) + .unwrap(); + (self.size.width / block_width) * block_size + }); + wgt::math::align_to(actual, d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }, + }, + } + } +} + +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { + let list = self.list.unwrap(); + self.pass.kind = kind; + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) }; + self.pass.has_label = true; + } + self.pass.dirty_root_elements = 0; + self.pass.dirty_vertex_buffers = 0; + list.set_descriptor_heaps(&[self.shared.heap_views.raw, self.shared.heap_samplers.raw]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.unwrap(); + list.set_descriptor_heaps(&[]); + if self.pass.has_label { + unsafe { list.EndEvent() }; + } + self.pass.clear(); + } + + unsafe fn prepare_draw(&mut self, base_vertex: i32, base_instance: u32) { + while self.pass.dirty_vertex_buffers != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + unsafe { + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex: other_vertex, + base_instance: other_instance, + other: _, + } => base_vertex != other_vertex || base_instance != other_instance, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other: 0, + }; + } + } + self.update_root_elements(); + } + + fn prepare_dispatch(&mut self, count: [u32; 3]) { + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => [base_vertex as u32, base_instance, other] != count, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: count[0] as i32, + base_instance: count[1], + other: count[2], + }; + } + } + self.update_root_elements(); + } + + //Note: we have to call this lazily before draw calls. Otherwise, D3D complains + // about the root parameters being incompatible with root signature. + fn update_root_elements(&mut self) { + use super::{BufferViewKind as Bvk, PassKind as Pk}; + + while self.pass.dirty_root_elements != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_root_elements.trailing_zeros(); + self.pass.dirty_root_elements ^= 1 << index; + + match self.pass.root_elements[index as usize] { + super::RootElement::Empty => log::error!("Root index {} is not bound", index), + super::RootElement::Constant => { + let info = self.pass.layout.root_constant_info.as_ref().unwrap(); + + for offset in info.range.clone() { + let val = self.pass.constant_data[offset as usize]; + match self.pass.kind { + Pk::Render => list.set_graphics_root_constant(index, val, offset), + Pk::Compute => list.set_compute_root_constant(index, val, offset), + Pk::Transfer => (), + } + } + } + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => match self.pass.kind { + Pk::Render => { + list.set_graphics_root_constant(index, base_vertex as u32, 0); + list.set_graphics_root_constant(index, base_instance, 1); + } + Pk::Compute => { + list.set_compute_root_constant(index, base_vertex as u32, 0); + list.set_compute_root_constant(index, base_instance, 1); + list.set_compute_root_constant(index, other, 2); + } + Pk::Transfer => (), + }, + super::RootElement::Table(descriptor) => match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(index, descriptor), + Pk::Transfer => (), + }, + super::RootElement::DynamicOffsetBuffer { kind, address } => { + match (self.pass.kind, kind) { + (Pk::Render, Bvk::Constant) => { + list.set_graphics_root_constant_buffer_view(index, address) + } + (Pk::Compute, Bvk::Constant) => { + list.set_compute_root_constant_buffer_view(index, address) + } + (Pk::Render, Bvk::ShaderResource) => { + list.set_graphics_root_shader_resource_view(index, address) + } + (Pk::Compute, Bvk::ShaderResource) => { + list.set_compute_root_shader_resource_view(index, address) + } + (Pk::Render, Bvk::UnorderedAccess) => { + list.set_graphics_root_unordered_access_view(index, address) + } + (Pk::Compute, Bvk::UnorderedAccess) => { + list.set_compute_root_unordered_access_view(index, address) + } + (Pk::Transfer, _) => (), + } + } + } + } + } + + fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) { + log::trace!("Reset signature {:?}", layout.signature); + if let Some(root_index) = layout.special_constants_root_index { + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: 0, + base_instance: 0, + other: 0, + }; + } + self.pass.layout = layout.clone(); + self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = loop { + if let Some(list) = self.free_lists.pop() { + let reset_result = list + .reset(self.allocator, d3d12::PipelineState::null()) + .into_result(); + if reset_result.is_ok() { + break Some(list); + } else { + unsafe { + list.destroy(); + } + } + } else { + break None; + } + }; + + let list = if let Some(list) = list { + list + } else { + self.device + .create_graphics_command_list( + d3d12::CmdListType::Direct, + self.allocator, + d3d12::PipelineState::null(), + 0, + ) + .into_device_result("Create command list")? + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + unsafe { list.SetName(cwstr.as_ptr()) }; + } + + self.list = Some(list); + self.temp.clear(); + self.pass.clear(); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + if list.close().into_result().is_ok() { + self.free_lists.push(list); + } else { + unsafe { + list.destroy(); + } + } + } + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.list.take().unwrap(); + let closed = raw.close().into_result().is_ok(); + Ok(super::CommandBuffer { raw, closed }) + } + unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + if cmd_buf.closed { + self.free_lists.push(cmd_buf.raw); + } else { + unsafe { + cmd_buf.raw.destroy(); + } + } + } + self.allocator.reset(); + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} buffer transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}", + barrier.buffer.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end + ); + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} texture transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}, range {:?}", + barrier.texture.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end, + barrier.range + ); + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + + let tex_mip_level_count = barrier.texture.mip_level_count; + let tex_array_layer_count = barrier.texture.array_layer_count(); + + if barrier.range.is_full_resource( + barrier.texture.format, + tex_mip_level_count, + tex_array_layer_count, + ) { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Selected texture aspect is relevant if the texture format has both depth _and_ stencil aspects. + let planes = if barrier.texture.format.is_combined_depth_stencil_format() { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..2, + wgt::TextureAspect::DepthOnly => 0..1, + wgt::TextureAspect::StencilOnly => 1..2, + } + } else { + match barrier.texture.format { + wgt::TextureFormat::Stencil8 => 1..2, + wgt::TextureFormat::Depth24Plus => 0..2, // TODO: investigate why tests fail if we set this to 0..1 + _ => 0..1, + } + }; + + for mip_level in barrier.range.mip_range(tex_mip_level_count) { + for array_layer in barrier.range.layer_range(tex_array_layer_count) { + for plane in planes.clone() { + unsafe { + raw.u.Transition_mut().Subresource = barrier + .texture + .calc_subresource(mip_level, array_layer, plane); + }; + self.temp.barriers.push(raw); + } + } + } + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let list = self.list.unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + unsafe { + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.shared.zero_buffer.as_mut_ptr(), + 0, + size, + ) + }; + offset += size; + } + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let list = self.list.unwrap(); + for r in regions { + unsafe { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ) + }; + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + + for r in regions { + let src_box = make_box(&r.src_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base) + }; + + unsafe { + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + unsafe { + *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = + dst.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&r.texture_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = + src.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format) + }; + unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box) }; + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list.unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + // nothing to do here + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + unsafe { + self.list.unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; + let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + *rtv = cat.target.view.handle_rtv.unwrap().raw; + } else { + *rtv = self.null_rtv_handle.raw; + } + } + + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + let list = self.list.unwrap(); + unsafe { + list.OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ) + }; + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base, + dst: target.view.target_base, + format: target.view.raw_format, + }); + } + } + } + + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = d3d12::ClearFlags::empty(); + let aspects = ds.target.view.aspects; + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::DEPTH) + { + flags |= d3d12::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::STENCIL) + { + flags |= d3d12::ClearFlags::STENCIL; + } + + if !ds_view.is_null() && !flags.is_empty() { + list.clear_depth_stencil_view( + unsafe { *ds_view }, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } + + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: 0.0, + TopLeftY: 0.0, + Width: desc.extent.width as f32, + Height: desc.extent.height as f32, + MinDepth: 0.0, + MaxDepth: 1.0, + }; + let raw_rect = d3d12_ty::D3D12_RECT { + left: 0, + top: 0, + right: desc.extent.width as i32, + bottom: desc.extent.height as i32, + }; + unsafe { list.RSSetViewports(1, &raw_vp) }; + unsafe { list.RSSetScissorRects(1, &raw_rect) }; + } + + unsafe fn end_render_pass(&mut self) { + if !self.pass.resolves.is_empty() { + let list = self.list.unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + } + }; + self.temp.barriers.push(barrier); + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_DEST, + } + }; + self.temp.barriers.push(barrier); + } + + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + + for resolve in self.pass.resolves.iter() { + profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource"); + unsafe { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ) + }; + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = unsafe { barrier.u.Transition_mut() }; + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + } + + unsafe { self.end_pass() }; + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + log::trace!("Set group[{}]", index); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index as usize; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + log::trace!("\tBind element[{}] = view", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_views.unwrap().gpu); + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + log::trace!("\tBind element[{}] = sampler", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_samplers.unwrap().gpu); + root_index += 1; + } + + // Bind root descriptors + for ((&kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + log::trace!("\tBind element[{}] = dynamic", root_index); + self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { + kind, + address: gpu_base + offset as d3d12::GpuAddress, + }; + root_index += 1; + } + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= (1 << root_index) - (1 << info.base_root_index); + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + let info = layout.shared.root_constant_info.as_ref().unwrap(); + + self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; + + self.pass.constant_data[(offset as usize)..(offset as usize + data.len())] + .copy_from_slice(data); + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << info.root_index; + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { + self.list + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + unsafe { + self.list + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn end_debug_marker(&mut self) { + unsafe { self.list.unwrap().EndEvent() } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_graphics_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + unsafe { list.IASetPrimitiveTopology(pipeline.topology) }; + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.list.unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + auxil::dxgi::conv::map_index_format(format), + ); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + unsafe { self.list.unwrap().RSSetViewports(1, &raw_vp) }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let raw_rect = d3d12_ty::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + unsafe { self.list.unwrap().RSSetScissorRects(1, &raw_rect) }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.unwrap().set_blend_factor(*color); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(start_vertex as i32, start_instance) }; + self.list + .unwrap() + .draw(vertex_count, instance_count, start_vertex, start_instance); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(base_vertex, start_instance) }; + self.list.unwrap().draw_indexed( + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; + } + unsafe fn end_compute_pass(&mut self) { + unsafe { self.end_pass() }; + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_compute_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.prepare_dispatch(count); + self.list.unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.prepare_dispatch([0; 3]); + //TODO: update special constants indirectly + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..7b39e98ad2 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,350 @@ +use std::iter; +use winapi::{ + shared::minwindef::BOOL, + um::{d3d12 as d3d12_ty, d3dcommon}, +}; + +pub fn map_buffer_usage_to_resource_flags( + usage: crate::BufferUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + flags +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12_ty::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.contains(crate::TextureUses::RESOURCE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12_ty::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12_ty::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12_ty::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12_ty::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12_ty::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12_ty::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12_ty::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12_ty::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12_ty::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12_ty::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option<wgt::SamplerBorderColor>) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | Some(Sbc::Zero) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} + +pub fn map_visibility(visibility: wgt::ShaderStages) -> d3d12::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => d3d12::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => d3d12::ShaderVisibility::PS, + _ => d3d12::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => d3d12::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } => d3d12::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV, + } +} + +pub fn map_label(name: &str) -> Vec<u16> { + name.encode_utf16().chain(iter::once(0)).collect() +} + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } else if usage.intersects(Bu::STORAGE_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::RESOURCE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12_ty::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12_ty::D3D12_BLEND_ZERO, + Bf::One => d3d12_ty::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12_ty::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12_ty::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12_ty::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12_ty::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12_ty::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12_ty::D3D12_BLEND_OP, + d3d12_ty::D3D12_BLEND, + d3d12_ty::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12_ty::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12_ty::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12_ty::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12_ty::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12_ty::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[Option<wgt::ColorTargetState>], +) -> [d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] { + let dummy_target = d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12_ty::D3D12_BLEND_ZERO, + DestBlend: d3d12_ty::D3D12_BLEND_ZERO, + BlendOp: d3d12_ty::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12_ty::D3D12_BLEND_OP_ADD, + LogicOp: d3d12_ty::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + if let Some(ct) = ct.as_ref() { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12_ty::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12_ty::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12_ty::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12_ty::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12_ty::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12_ty::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12_ty::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12_ty::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12_ty::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: BOOL::from(ds.is_depth_enabled()), + DepthWriteMask: if ds.depth_write_enabled { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: BOOL::from(ds.stencil.is_enabled()), + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/descriptor.rs b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..430e3a2f4b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,311 @@ +use crate::auxil::dxgi::result::HResult as _; +use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: d3d12::CpuDescriptor, + pub gpu: d3d12::GpuDescriptor, + /// How large the block allocated to this handle is. + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } +} + +type DescriptorIndex = u64; + +pub(super) struct GeneralHeap { + pub raw: d3d12::DescriptorHeap, + ty: d3d12::DescriptorHeapType, + handle_size: u64, + total_handles: u64, + start: DualHandle, + ranges: Mutex<RangeAllocator<DescriptorIndex>>, +} + +impl GeneralHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total_handles: u64, + ) -> Result<Self, crate::DeviceError> { + let raw = { + profiling::scope!("ID3D12Device::CreateDescriptorHeap"); + device + .create_descriptor_heap( + total_handles as u32, + ty, + d3d12::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")? + }; + + Ok(Self { + raw, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + count: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + count, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> d3d12::GpuDescriptor { + d3d12::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + raw: d3d12::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: d3d12::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + d3d12::DescriptorHeapFlags::empty(), + 0, + ); + + Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + fn alloc_handle(&mut self) -> d3d12::CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + d3d12::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + fn free_handle(&mut self, handle: d3d12::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } + + unsafe fn destroy(&self) { + unsafe { self.raw.destroy() }; + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: d3d12::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + heaps: Vec<FixedSizeHeap>, + avaliable_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(FixedSizeHeap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub(super) unsafe fn destroy(&self) { + for heap in &self.heaps { + unsafe { heap.destroy() }; + } + } +} + +pub(super) struct CpuHeapInner { + pub raw: d3d12::DescriptorHeap, + pub stage: Vec<d3d12::CpuDescriptor>, +} + +pub(super) struct CpuHeap { + pub inner: Mutex<CpuHeapInner>, + start: d3d12::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total: u32, + ) -> Result<Self, crate::DeviceError> { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + unsafe { self.inner.into_inner().raw.destroy() }; + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: d3d12::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result<DualHandle, crate::DeviceError> { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + unsafe { + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ) + }; + Ok(dst.at(index, count as u64)) +} diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..7e14818572 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,1590 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::{conv, descriptor, view}; +use parking_lot::Mutex; +use std::{ffi, mem, num::NonZeroU32, ptr, sync::Arc}; +use winapi::{ + shared::{dxgiformat, dxgitype, minwindef::BOOL, winerror}, + um::{d3d12 as d3d12_ty, synchapi, winbase}, + Interface, +}; + +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; + +impl super::Device { + pub(super) fn new( + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + private_caps: super::PrivateCapabilities, + library: &Arc<d3d12::D3D12Lib>, + dx12_shader_compiler: wgt::Dx12Compiler, + ) -> Result<Self, crate::DeviceError> { + let mem_allocator = super::suballocation::create_allocator_wrapper(&raw)?; + + let dxc_container = match dx12_shader_compiler { + wgt::Dx12Compiler::Dxc { + dxil_path, + dxc_path, + } => super::shader_compilation::get_dxc_container(dxc_path, dxil_path)?, + wgt::Dx12Compiler::Fxc => None, + }; + + let mut idle_fence = d3d12::Fence::null(); + let hr = unsafe { + profiling::scope!("ID3D12Device::CreateFence"); + raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.into_device_result("Idle fence creation")?; + + let mut zero_buffer = d3d12::Resource::null(); + unsafe { + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12_ty::D3D12_RESOURCE_FLAG_NONE, + }; + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + profiling::scope!("Zero Buffer Allocation"); + raw.CreateCommittedResource( + &heap_properties, + d3d12_ty::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ) + .into_device_result("Zero buffer creation")?; + + // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. + }; + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = 1_000_000; + let capacity_samplers = 2_048; + + let shared = super::DeviceShared { + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw()], + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw_indexed()], + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::dispatch()], + mem::size_of::<wgt::DispatchIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + heap_views: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + capacity_views, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::Sampler, + capacity_samplers, + )?, + }; + + let mut rtv_pool = descriptor::CpuPool::new(raw, d3d12::DescriptorHeapType::Rtv); + let null_rtv_handle = rtv_pool.alloc_handle(); + // A null pResource is used to initialize a null descriptor, + // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded) + raw.create_render_target_view( + d3d12::WeakPtr::null(), + &d3d12::RenderTargetViewDesc::texture_2d( + winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + 0, + 0, + ), + null_rtv_handle.raw, + ); + + Ok(super::Device { + raw, + present_queue, + idler: super::Idler { + fence: idle_fence, + event: d3d12::Event::create(false, false), + }, + private_caps, + shared: Arc::new(shared), + rtv_pool: Mutex::new(rtv_pool), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Sampler, + )), + library: Arc::clone(library), + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + null_rtv_handle, + mem_allocator, + dxc_container, + }) + } + + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; + log::info!("Waiting for idle with value {}", value); + self.present_queue.signal(self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + unsafe { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE) }; + Ok(()) + } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result<super::CompiledShader, crate::PipelineError> { + use naga::back::hlsl; + + let stage_bit = crate::auxil::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let reflection_info = { + profiling::scope!("naga::back::hlsl::write"); + writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? + }; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + let raw_ep = reflection_info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; + + let source_name = stage + .module + .raw_name + .as_ref() + .and_then(|cstr| cstr.to_str().ok()) + .unwrap_or_default(); + + // Compile with DXC if available, otherwise fall back to FXC + let (result, log_level) = if let Some(ref dxc_container) = self.dxc_container { + super::shader_compilation::compile_dxc( + self, + &source, + source_name, + raw_ep, + stage_bit, + full_stage, + dxc_container, + ) + } else { + super::shader_compilation::compile_fxc( + self, + &source, + source_name, + &ffi::CString::new(raw_ep.as_str()).unwrap(), + stage_bit, + full_stage, + ) + }; + + log::log!( + log_level, + "Naga generated shader for {:?} at {:?}:\n{}", + raw_ep, + naga_stage, + source + ); + result + } + + pub fn raw_device(&self) -> &d3d12::Device { + &self.raw + } + + pub fn raw_queue(&self) -> &d3d12::CommandQueue { + &self.present_queue + } + + pub unsafe fn texture_from_raw( + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + ) -> super::Texture { + super::Texture { + resource, + format, + dimension, + size, + mip_level_count, + sample_count, + allocation: None, + } + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(mut self, queue: super::Queue) { + self.rtv_pool.lock().free_handle(self.null_rtv_handle); + unsafe { self.rtv_pool.into_inner().destroy() }; + unsafe { self.dsv_pool.into_inner().destroy() }; + unsafe { self.srv_uav_pool.into_inner().destroy() }; + unsafe { self.sampler_pool.into_inner().destroy() }; + unsafe { self.shared.destroy() }; + unsafe { self.idler.destroy() }; + self.mem_allocator = None; + unsafe { queue.raw.destroy() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let mut resource = d3d12::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = + super::suballocation::create_buffer_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Buffer creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Buffer { + resource, + size, + allocation, + }) + } + + unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) { + unsafe { buffer.resource.destroy() }; + // Only happens when it's using the windows_rs feature and there's an allocation + if let Some(alloc) = buffer.allocation.take() { + super::suballocation::free_buffer_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let mut ptr = ptr::null_mut(); + // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware? + let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) }; + hr.into_device_result("Map buffer")?; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::<u8>() }) + .unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe { (*buffer.resource).Unmap(0, ptr::null()) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + use super::suballocation::create_texture_resource; + + let mut resource = d3d12::Resource::null(); + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + Format: auxil::dxgi::conv::map_texture_format_for_resource( + desc.format, + desc.usage, + !desc.view_formats.is_empty(), + self.private_caps.casting_fully_typed_format_supported, + ), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = create_texture_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Texture creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Texture { + resource, + format: desc.format, + dimension: desc.dimension, + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + allocation, + }) + } + + unsafe fn destroy_texture(&self, mut texture: super::Texture) { + unsafe { texture.resource.destroy() }; + if let Some(alloc) = texture.allocation.take() { + super::suballocation::free_texture_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let view_desc = desc.to_internal(texture); + + Ok(super::TextureView { + raw_format: view_desc.rtv_dsv_format, + aspects: view_desc.aspects, + target_base: ( + texture.resource, + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), + handle_srv: if desc.usage.intersects(crate::TextureUses::RESOURCE) { + let raw_desc = unsafe { view_desc.to_srv() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateShaderResourceView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_uav: if desc.usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + let raw_desc = unsafe { view_desc.to_uav() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + let raw_desc = unsafe { view_desc.to_rtv() }; + let handle = self.rtv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateRenderTargetView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + let raw_desc = unsafe { view_desc.to_dsv(true) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + let raw_desc = unsafe { view_desc.to_dsv(false) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } + } + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let handle = self.sampler_pool.lock().alloc_handle(); + + let reduction = match desc.compare { + Some(_) => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let mut filter = conv::map_filter_mode(desc.min_filter) << d3d12_ty::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12_ty::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12_ty::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_SHIFT; + + if desc.anisotropy_clamp != 1 { + filter |= d3d12_ty::D3D12_FILTER_ANISOTROPIC; + }; + + let border_color = conv::map_border_color(desc.border_color); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp as u32, + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + border_color, + desc.lod_clamp.clone(), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let allocator = self + .raw + .create_command_allocator(d3d12::CmdListType::Direct) + .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { allocator.SetName(cwstr.as_ptr()) }; + } + + Ok(super::CommandEncoder { + allocator, + device: self.raw, + shared: Arc::clone(&self.shared), + null_rtv_handle: self.null_rtv_handle, + list: None, + free_lists: Vec::new(), + pass: super::PassState::new(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + unsafe { list.destroy() }; + } + for list in encoder.free_lists { + unsafe { list.destroy() }; + } + unsafe { encoder.allocator.destroy() }; + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + let count = entry.count.map_or(1, NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += count, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += count + } + wgt::BindingType::Sampler { .. } => num_samplers += count, + } + } + + let num_views = num_buffer_views + num_texture_views; + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + unsafe { cpu_heap.destroy() }; + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + unsafe { cpu_heap.destroy() }; + } + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::hlsl; + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Buffers with dynamic offsets are implemented as root descriptors. + // This is easier than trying to patch up the offset on the shader side. + // + // Root signature layout: + // Root Constants: Parameter=0, Space=0 + // ... + // (bind group [0]) - Space=0 + // View descriptor table, if any + // Sampler descriptor table, if any + // Root descriptors (for dynamic offset buffers) + // (bind group [1]) - Space=0 + // ... + // (bind group [2]) - Space=0 + // Special constant buffer: Space=0 + + //TODO: put lower bind group indices futher down the root signature. See: + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + // Currently impossible because wgpu-core only re-binds the descriptor sets based + // on Vulkan-like layout compatibility rules. + + fn native_binding(bt: &hlsl::BindTarget) -> d3d12::Binding { + d3d12::Binding { + space: bt.space as u32, + register: bt.register, + } + } + + log::debug!( + "Creating Root Signature '{}'", + desc.label.unwrap_or_default() + ); + + let mut binding_map = hlsl::BindingMap::default(); + let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + ); + let mut parameters = Vec::new(); + let mut push_constants_target = None; + let mut root_constant_info = None; + + let mut pc_start = u32::MAX; + let mut pc_end = u32::MIN; + + for pc in desc.push_constant_ranges.iter() { + pc_start = pc_start.min(pc.range.start); + pc_end = pc_end.max(pc.range.end); + } + + if pc_start != u32::MAX && pc_end != u32::MIN { + let parameter_index = parameters.len(); + let size = (pc_end - pc_start) / 4; + log::debug!( + "\tParam[{}] = push constant (count = {})", + parameter_index, + size, + ); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, + native_binding(&bind_cbv), + size, + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + root_constant_info = Some(super::RootConstantInfo { + root_index: parameter_index as u32, + range: (pc_start / 4)..(pc_end / 4), + }); + push_constants_target = Some(binding); + + bind_cbv.space += 1; + } + + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut bind_group_infos = + arrayvec::ArrayVec::<super::BindGroupInfo, { crate::MAX_BIND_GROUPS }>::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => continue, + ref other => conv::map_binding_type(other), + }; + let bt = match range_ty { + d3d12::DescriptorRangeType::CBV => &mut bind_cbv, + d3d12::DescriptorRangeType::SRV => &mut bind_srv, + d3d12::DescriptorRangeType::UAV => &mut bind_uav, + d3d12::DescriptorRangeType::Sampler => continue, + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(bt), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = views (vis = {:?}, count = {})", + parameters.len(), + visibility_view_static, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SRV_CBV_UAV; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + _ => continue, + }; + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bind_sampler.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(&bind_sampler), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = samplers (vis = {:?}, count = {})", + parameters.len(), + visibility_sampler, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SAMPLERS; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + + let (kind, parameter_ty, bt) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_CBV, + &mut bind_cbv, + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_SRV, + &mut bind_srv, + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_UAV, + &mut bind_uav, + ), + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + info.dynamic_buffers.push(kind); + + log::debug!( + "\tParam[{}] = dynamic {:?} (vis = {:?})", + parameters.len(), + buffer_ty, + dynamic_buffers_visibility, + ); + parameters.push(d3d12::RootParameter::descriptor( + parameter_ty, + dynamic_buffers_visibility, + native_binding(bt), + )); + + bt.register += entry.count.map_or(1, NonZeroU32::get); + } + + bind_group_infos.push(info); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + + let (special_constants_root_index, special_constants_binding) = if desc.flags.intersects( + crate::PipelineLayoutFlags::BASE_VERTEX_INSTANCE + | crate::PipelineLayoutFlags::NUM_WORK_GROUPS, + ) { + let parameter_index = parameters.len(); + log::debug!("\tParam[{}] = special", parameter_index); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, // really needed for VS and CS only + native_binding(&bind_cbv), + 3, // 0 = base vertex, 1 = base instance, 2 = other + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + (Some(parameter_index as u32), Some(binding)) + } else { + (None, None) + }; + + log::trace!("{:#?}", parameters); + log::trace!("Bindings {:#?}", binding_map); + + let (blob, error) = self + .library + .serialize_root_signature( + d3d12::RootSignatureVersion::V1_0, + ¶meters, + &[], + d3d12::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + crate::DeviceError::Lost + })? + .into_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + unsafe { error.as_c_str() }.to_str().unwrap() + ); + unsafe { error.destroy() }; + return Err(crate::DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .into_device_result("Root signature creation")?; + unsafe { blob.destroy() }; + + log::debug!("\traw = {:?}", raw); + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::PipelineLayout { + shared: super::PipelineLayoutShared { + signature: raw, + total_root_elements: parameters.len() as super::RootIndex, + special_constants_root_index, + root_constant_info, + }, + bind_group_infos, + naga_options: hlsl::Options { + shader_model: match self.dxc_container { + // DXC + Some(_) => hlsl::ShaderModel::V6_0, + // FXC doesn't support SM 6.0 + None => hlsl::ShaderModel::V5_1, + }, + binding_map, + fake_missing_bindings: false, + special_constants_binding, + push_constants_target, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { pipeline_layout.shared.signature.destroy() }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + dynamic_buffers.push(data.resolve_address()); + } + } + wgt::BindingType::Buffer { ty, .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12_ty::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + unsafe { self.raw.CreateConstantBufferView(&raw_desc, handle) }; + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: + view::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12_ty::D3D12_SRV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_SRV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12_ty::D3D12_BUFFER_SRV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ) + }; + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12_ty::D3D12_UAV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_UAV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12_ty::D3D12_BUFFER_UAV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ) + }; + } + } + inner.stage.push(handle); + } + } + wgt::BindingType::Texture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.samplers[start..end] { + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + + Ok(super::BindGroup { + handle_views, + handle_samplers, + dynamic_buffers, + }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + self.shared.heap_samplers.free_slice(dual); + } + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let raw_name = desc.label.and_then(|label| ffi::CString::new(label).ok()); + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga, raw_name }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + } + None => None, + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::VertexStepMode::Vertex => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::VertexStepMode::Instance => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC { + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, + SemanticIndex: attribute.shader_location, + Format: auxil::dxgi::conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + if let Some(ct) = ct.as_ref() { + *rtv_format = auxil::dxgi::conv::map_texture_format(ct.format); + } + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(); + + let raw_rasterizer = d3d12_ty::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12_ty::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12_ty::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12_ty::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: BOOL::from(!desc.primitive.unclipped_depth), + MultisampleEnable: BOOL::from(desc.multisample.count > 1), + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12_ty::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.shared.signature.as_mut_ptr(), + VS: *blob_vs.create_native_shader(), + PS: match blob_fs { + Some(ref shader) => *shader.create_native_shader(), + None => *d3d12::Shader::null(), + }, + GS: *d3d12::Shader::null(), + DS: *d3d12::Shader::null(), + HS: *d3d12::Shader::null(), + StreamOutput: d3d12_ty::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12_ty::D3D12_BLEND_DESC { + AlphaToCoverageEnable: BOOL::from(desc.multisample.alpha_to_coverage_enabled), + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => unsafe { mem::zeroed() }, + }, + InputLayout: d3d12_ty::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + } + Some(wgt::IndexFormat::Uint32) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + auxil::dxgi::conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12_ty::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12_ty::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = d3d12::PipelineState::null(); + let hr = { + profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); + unsafe { + self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12_ty::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ) + } + }; + + unsafe { blob_vs.destroy() }; + if let Some(blob_fs) = blob_fs { + unsafe { blob_fs.destroy() }; + }; + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::RenderPipeline { + raw, + layout: desc.layout.shared.clone(), + topology, + vertex_strides, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = { + profiling::scope!("ID3D12Device::CreateComputePipelineState"); + self.raw.create_compute_pipeline_state( + desc.layout.shared.signature, + blob_cs.create_native_shader(), + 0, + d3d12::CachedPSO::null(), + d3d12::PipelineStateFlags::empty(), + ) + }; + + unsafe { blob_cs.destroy() }; + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::ComputePipeline { + raw, + layout: desc.layout.shared.clone(), + }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + d3d12::QueryHeapType::Occlusion, + d3d12_ty::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + d3d12::QueryHeapType::PipelineStatistics, + d3d12_ty::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), + wgt::QueryType::Timestamp => ( + d3d12::QueryHeapType::Timestamp, + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + ), + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .into_device_result("Query heap creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::QuerySet { raw, raw_ty }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { set.raw.destroy() }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + let mut raw = d3d12::Fence::null(); + let hr = unsafe { + self.raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + raw.mut_void(), + ) + }; + hr.into_device_result("Fence creation")?; + Ok(super::Fence { raw }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + unsafe { fence.raw.destroy() }; + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + Ok(unsafe { fence.raw.GetCompletedValue() }) + } + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if unsafe { fence.raw.GetCompletedValue() } >= value { + return Ok(true); + } + let hr = fence.raw.set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + + match unsafe { synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::DeviceError::Lost) + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + unsafe { + self.render_doc + .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + unsafe { + self.render_doc + .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/instance.rs b/third_party/rust/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..be7a3f7306 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,117 @@ +use winapi::shared::{dxgi1_5, minwindef}; + +use super::SurfaceTarget; +use crate::auxil::{self, dxgi::result::HResult as _}; +use std::{mem, sync::Arc}; + +impl Drop for super::Instance { + fn drop(&mut self) { + unsafe { self.factory.destroy() }; + crate::auxil::dxgi::exception::unregister_exception_handler(); + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + let lib_main = d3d12::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + unsafe { debug_controller.Release() }; + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + } + + // Create DXGIFactory4 + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory4, + desc.flags, + )?; + + // Create IDXGIFactoryMedia + let factory_media = match lib_dxgi.create_factory_media() { + Ok(pair) => match pair.into_result() { + Ok(factory_media) => Some(factory_media), + Err(err) => { + log::error!("Failed to create IDXGIFactoryMedia: {}", err); + None + } + }, + Err(err) => { + log::info!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + let mut supports_allow_tearing = false; + #[allow(trivial_casts)] + if let Some(factory5) = factory.as_factory5() { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = unsafe { + factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::<minwindef::BOOL>() as _, + ) + }; + + match hr.into_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => supports_allow_tearing = true, + } + } + + Ok(Self { + // The call to create_factory will only succeed if we get a factory4, so this is safe. + factory, + factory_media, + library: Arc::new(lib_main), + _lib_dxgi: lib_dxgi, + supports_allow_tearing, + flags: desc.flags, + dx12_shader_compiler: desc.dx12_shader_compiler.clone(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + match window_handle { + raw_window_handle::RawWindowHandle::Win32(handle) => Ok(super::Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::WndHandle(handle.hwnd as *mut _), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + }), + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory); + + adapters + .into_iter() + .filter_map(|raw| { + super::Adapter::expose(raw, &self.library, self.flags, &self.dx12_shader_compiler) + }) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..6cdf3ffe64 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,902 @@ +/*! +# DirectX12 API internals. + +Generally the mapping is straightforwad. + +## Resource transitions + +D3D12 API matches WebGPU internal states very well. The only +caveat here is issuing a special UAV barrier whenever both source +and destination states match, and they are for storage sync. + +## Memory + +For now, all resources are created with "committed" memory. + +## Resource binding + +See ['Device::create_pipeline_layout`] documentation for the structure +of the root signature corresponding to WebGPU pipeline layout. + +Binding groups is mostly straightforward, with one big caveat: +all bindings have to be reset whenever the pipeline layout changes. +This is the rule of D3D12, and we can do nothing to help it. + +We detect this change at both [`crate::CommandEncoder::set_bind_group`] +and [`crate::CommandEncoder::set_render_pipeline`] with +[`crate::CommandEncoder::set_compute_pipeline`]. + +For this reason, in order avoid repeating the binding code, +we are binding everything in `CommandEncoder::update_root_elements`. +When the pipeline layout is changed, we reset all bindings. +Otherwise, we pass a range corresponding only to the current bind group. + +!*/ + +mod adapter; +mod command; +mod conv; +mod descriptor; +mod device; +mod instance; +mod shader_compilation; +mod suballocation; +mod types; +mod view; + +use crate::auxil::{self, dxgi::result::HResult as _}; + +use arrayvec::ArrayVec; +use parking_lot::Mutex; +use std::{ffi, fmt, mem, num::NonZeroU32, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_4, dxgitype, windef, winerror}, + um::{d3d12 as d3d12_ty, dcomp, synchapi, winbase, winnt}, + Interface as _, +}; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. +const MAX_ROOT_ELEMENTS: usize = 64; +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + +pub struct Instance { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + library: Arc<d3d12::D3D12Lib>, + supports_allow_tearing: bool, + _lib_dxgi: d3d12::DxgiLib, + flags: crate::InstanceFlags, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +impl Instance { + pub unsafe fn create_surface_from_visual( + &self, + visual: *mut dcomp::IDCompositionVisual, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::Visual(unsafe { d3d12::WeakPtr::from_raw(visual) }), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } + + pub unsafe fn create_surface_from_surface_handle( + &self, + surface_handle: winnt::HANDLE, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::SurfaceHandle(surface_handle), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +struct SwapChain { + raw: d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3>, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec<d3d12::Resource>, + waitable: winnt::HANDLE, + acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, +} + +enum SurfaceTarget { + WndHandle(windef::HWND), + Visual(d3d12::WeakPtr<dcomp::IDCompositionVisual>), + SurfaceHandle(winnt::HANDLE), +} + +pub struct Surface { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + target: SurfaceTarget, + supports_allow_tearing: bool, + swap_chain: Option<SwapChain>, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { + #[allow(unused)] + cache_coherent: bool, + }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + instance_flags: crate::InstanceFlags, + #[allow(unused)] + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, + #[allow(unused)] // TODO: Exists until windows-rs is standard, then it can probably be removed? + heap_create_not_zeroed: bool, + casting_fully_typed_format_supported: bool, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: d3d12::DxgiAdapter, + device: d3d12::Device, + library: Arc<d3d12::D3D12Lib>, + private_caps: PrivateCapabilities, + presentation_timer: auxil::dxgi::time::PresentationTimer, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] + workarounds: Workarounds, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +/// Helper structure for waiting for GPU. +struct Idler { + fence: d3d12::Fence, + event: d3d12::Event, +} + +impl Idler { + unsafe fn destroy(self) { + unsafe { self.fence.destroy() }; + } +} + +struct CommandSignatures { + draw: d3d12::CommandSignature, + draw_indexed: d3d12::CommandSignature, + dispatch: d3d12::CommandSignature, +} + +impl CommandSignatures { + unsafe fn destroy(&self) { + unsafe { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } + } +} + +struct DeviceShared { + zero_buffer: d3d12::Resource, + cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, +} + +impl DeviceShared { + unsafe fn destroy(&self) { + unsafe { + self.zero_buffer.destroy(); + self.cmd_signatures.destroy(); + self.heap_views.raw.destroy(); + self.heap_samplers.raw.destroy(); + } + } +} + +pub struct Device { + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, + shared: Arc<DeviceShared>, + // CPU only pools + rtv_pool: Mutex<descriptor::CpuPool>, + dsv_pool: Mutex<descriptor::CpuPool>, + srv_uav_pool: Mutex<descriptor::CpuPool>, + sampler_pool: Mutex<descriptor::CpuPool>, + // library + library: Arc<d3d12::D3D12Lib>, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, + null_rtv_handle: descriptor::Handle, + mem_allocator: Option<Mutex<suballocation::GpuAllocatorWrapper>>, + dxc_container: Option<shader_compilation::DxcContainer>, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: d3d12::CommandQueue, + temp_lists: Vec<d3d12::CommandList>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +#[derive(Default)] +struct Temp { + marker: Vec<u16>, + barriers: Vec<d3d12_ty::D3D12_RESOURCE_BARRIER>, +} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.barriers.clear(); + } +} + +struct PassResolve { + src: (d3d12::Resource, u32), + dst: (d3d12::Resource, u32), + format: d3d12::Format, +} + +#[derive(Clone, Copy)] +enum RootElement { + Empty, + Constant, + SpecialConstantBuffer { + base_vertex: i32, + base_instance: u32, + other: u32, + }, + /// Descriptor table. + Table(d3d12::GpuDescriptor), + /// Descriptor for a buffer that has dynamic offset. + DynamicOffsetBuffer { + kind: BufferViewKind, + address: d3d12::GpuAddress, + }, +} + +#[derive(Clone, Copy)] +enum PassKind { + Render, + Compute, + Transfer, +} + +struct PassState { + has_label: bool, + resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>, + layout: PipelineLayoutShared, + root_elements: [RootElement; MAX_ROOT_ELEMENTS], + constant_data: [u32; MAX_ROOT_ELEMENTS], + dirty_root_elements: u64, + vertex_buffers: [d3d12_ty::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, +} + +#[test] +fn test_dirty_mask() { + assert_eq!(MAX_ROOT_ELEMENTS, std::mem::size_of::<u64>() * 8); +} + +impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + layout: PipelineLayoutShared { + signature: d3d12::RootSignature::null(), + total_root_elements: 0, + special_constants_root_index: None, + root_constant_info: None, + }, + root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], + constant_data: [0; MAX_ROOT_ELEMENTS], + dirty_root_elements: 0, + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + + fn clear(&mut self) { + // careful about heap allocations! + *self = Self::new(); + } +} + +pub struct CommandEncoder { + allocator: d3d12::CommandAllocator, + device: d3d12::Device, + shared: Arc<DeviceShared>, + null_rtv_handle: descriptor::Handle, + list: Option<d3d12::GraphicsCommandList>, + free_lists: Vec<d3d12::GraphicsCommandList>, + pass: PassState, + temp: Temp, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("allocator", &self.allocator) + .field("device", &self.allocator) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: d3d12::GraphicsCommandList, + closed: bool, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer { + resource: d3d12::Resource, + size: wgt::BufferAddress, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + +#[derive(Debug)] +pub struct Texture { + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } + + /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count + } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + let plane = match base.aspect { + crate::FormatAspects::COLOR | crate::FormatAspects::DEPTH => 0, + crate::FormatAspects::STENCIL => 1, + _ => unreachable!(), + }; + self.calc_subresource(base.mip_level, base.array_layer, plane) + } +} + +#[derive(Debug)] +pub struct TextureView { + raw_format: d3d12::Format, + aspects: crate::FormatAspects, + target_base: (d3d12::Resource, u32), + handle_srv: Option<descriptor::Handle>, + handle_uav: Option<descriptor::Handle>, + handle_rtv: Option<descriptor::Handle>, + handle_dsv_ro: Option<descriptor::Handle>, + handle_dsv_rw: Option<descriptor::Handle>, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +#[derive(Debug)] +pub struct Sampler { + handle: descriptor::Handle, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet { + raw: d3d12::QueryHeap, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: d3d12::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec<wgt::BindGroupLayoutEntry>, + cpu_heap_views: Option<descriptor::CpuHeap>, + cpu_heap_samplers: Option<descriptor::CpuHeap>, + copy_counts: Vec<u32>, // all 1's +} + +#[derive(Clone, Copy)] +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +#[derive(Debug)] +pub struct BindGroup { + handle_views: Option<descriptor::DualHandle>, + handle_samplers: Option<descriptor::DualHandle>, + dynamic_buffers: Vec<d3d12::GpuAddress>, +} + +bitflags::bitflags! { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct TableTypes: u8 { + const SRV_CBV_UAV = 1 << 0; + const SAMPLERS = 1 << 1; + } +} + +// Element (also known as parameter) index into the root signature. +type RootIndex = u32; + +struct BindGroupInfo { + base_root_index: RootIndex, + tables: TableTypes, + dynamic_buffers: Vec<BufferViewKind>, +} + +#[derive(Clone)] +struct RootConstantInfo { + root_index: RootIndex, + range: std::ops::Range<u32>, +} + +#[derive(Clone)] +struct PipelineLayoutShared { + signature: d3d12::RootSignature, + total_root_elements: RootIndex, + special_constants_root_index: Option<RootIndex>, + root_constant_info: Option<RootConstantInfo>, +} + +unsafe impl Send for PipelineLayoutShared {} +unsafe impl Sync for PipelineLayoutShared {} + +pub struct PipelineLayout { + shared: PipelineLayoutShared, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + bind_group_infos: ArrayVec<BindGroupInfo, { crate::MAX_BIND_GROUPS }>, + naga_options: naga::back::hlsl::Options, +} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + raw_name: Option<ffi::CString>, +} + +pub(super) enum CompiledShader { + #[allow(unused)] + Dxc(Vec<u8>), + Fxc(d3d12::Blob), +} + +impl CompiledShader { + fn create_native_shader(&self) -> d3d12::Shader { + match *self { + CompiledShader::Dxc(ref shader) => d3d12::Shader::from_raw(shader), + CompiledShader::Fxc(shader) => d3d12::Shader::from_blob(shader), + } + } + + unsafe fn destroy(self) { + match self { + CompiledShader::Dxc(_) => {} + CompiledShader::Fxc(shader) => unsafe { + shader.destroy(); + }, + } + } +} + +pub struct RenderPipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, + topology: d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +impl SwapChain { + unsafe fn release_resources(self) -> d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3> { + for resource in self.resources { + unsafe { resource.destroy() }; + } + self.raw + } + + unsafe fn wait( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<bool, crate::SurfaceError> { + let timeout_ms = match timeout { + Some(duration) => duration.as_millis() as u32, + None => winbase::INFINITE, + }; + match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + // We always set ALLOW_TEARING on the swapchain no matter + // what kind of swapchain we want because ResizeBuffers + // cannot change if ALLOW_TEARING is applied to the swapchain. + if self.supports_allow_tearing { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + + let swap_chain = match self.swap_chain.take() { + //Note: this path doesn't properly re-initialize all of the things + Some(sc) => { + // can't have image resources in flight used by GPU + let _ = unsafe { device.wait_idle() }; + + let raw = unsafe { sc.release_resources() }; + let result = unsafe { + raw.ResizeBuffers( + config.swap_chain_size, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ) + }; + if let Err(err) = result.into_result() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let desc = d3d12::SwapchainDesc { + alpha_mode: auxil::dxgi::conv::map_acomposite_alpha_mode( + config.composite_alpha_mode, + ), + width: config.extent.width, + height: config.extent.height, + format: non_srgb_format, + stereo: false, + sample: d3d12::SampleDesc { + count: 1, + quality: 0, + }, + buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + buffer_count: config.swap_chain_size, + scaling: d3d12::Scaling::Stretch, + swap_effect: d3d12::SwapEffect::FlipDiscard, + flags, + }; + let swap_chain1 = match self.target { + SurfaceTarget::Visual(_) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForComposition"); + self.factory + .unwrap_factory2() + .create_swapchain_for_composition( + device.present_queue.as_mut_ptr() as *mut _, + &desc, + ) + .into_result() + } + SurfaceTarget::SurfaceHandle(handle) => { + profiling::scope!( + "IDXGIFactoryMedia::CreateSwapChainForCompositionSurfaceHandle" + ); + self.factory_media + .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))? + .create_swapchain_for_composition_surface_handle( + device.present_queue.as_mut_ptr() as *mut _, + handle, + &desc, + ) + .into_result() + } + SurfaceTarget::WndHandle(hwnd) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd"); + self.factory + .as_factory2() + .unwrap() + .create_swapchain_for_hwnd( + device.present_queue.as_mut_ptr() as *mut _, + hwnd, + &desc, + ) + .into_result() + } + }; + + let swap_chain1 = match swap_chain1 { + Ok(s) => s, + Err(err) => { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + }; + + match self.target { + SurfaceTarget::WndHandle(_) | SurfaceTarget::SurfaceHandle(_) => {} + SurfaceTarget::Visual(visual) => { + if let Err(err) = + unsafe { visual.SetContent(swap_chain1.as_unknown()) }.into_result() + { + log::error!("Unable to SetContent: {}", err); + return Err(crate::SurfaceError::Other( + "IDCompositionVisual::SetContent", + )); + } + } + } + + match unsafe { swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>() }.into_result() { + Ok(swap_chain3) => { + unsafe { swap_chain1.destroy() }; + swap_chain3 + } + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + match self.target { + SurfaceTarget::WndHandle(wnd_handle) => { + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + unsafe { + self.factory.MakeWindowAssociation( + wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ) + }; + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => {} + } + + unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; + + let mut resources = vec![d3d12::Resource::null(); config.swap_chain_size as usize]; + for (i, res) in resources.iter_mut().enumerate() { + unsafe { + swap_chain.GetBuffer(i as _, &d3d12_ty::ID3D12Resource::uuidof(), res.mut_void()) + }; + } + + self.swap_chain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + present_mode: config.present_mode, + format: config.format, + size: config.extent, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Device) { + if let Some(mut sc) = self.swap_chain.take() { + unsafe { + let _ = sc.wait(None); + //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + let _ = device.wait_idle(); + let raw = sc.release_resources(); + raw.destroy(); + } + } + } + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + let sc = self.swap_chain.as_mut().unwrap(); + + unsafe { sc.wait(timeout) }?; + + let base_index = unsafe { sc.raw.GetCurrentBackBufferIndex() } as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + allocation: None, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + self.temp_lists.clear(); + for cmd_buf in command_buffers { + self.temp_lists.push(cmd_buf.raw.as_list()); + } + + { + profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists"); + self.raw.execute_command_lists(&self.temp_lists); + } + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(fence.raw, value) + .into_device_result("Signal fence")?; + } + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Surface, + _texture: Texture, + ) -> Result<(), crate::SurfaceError> { + let sc = surface.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + // We only allow immediate if ALLOW_TEARING is valid. + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Mailbox => (0, 0), + wgt::PresentMode::Fifo => (1, 0), + m => unreachable!("Cannot make surface with present mode {m:?}"), + }; + + profiling::scope!("IDXGISwapchain3::Present"); + unsafe { sc.raw.Present(interval, flags) }; + + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + let mut frequency = 0u64; + unsafe { self.raw.GetTimestampFrequency(&mut frequency) }; + (1_000_000_000.0 / frequency as f64) as f32 + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs new file mode 100644 index 0000000000..9f9be7c409 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs @@ -0,0 +1,294 @@ +use std::ptr; + +pub(super) use dxc::{compile_dxc, get_dxc_container, DxcContainer}; +use winapi::um::d3dcompiler; + +use crate::auxil::dxgi::result::HResult; + +// This exists so that users who don't want to use dxc can disable the dxc_shader_compiler feature +// and not have to compile hassle_rs. +// Currently this will use Dxc if it is chosen as the dx12 compiler at `Instance` creation time, and will +// fallback to FXC if the Dxc libraries (dxil.dll and dxcompiler.dll) are not found, or if Fxc is chosen at' +// `Instance` creation time. + +pub(super) fn compile_fxc( + device: &super::Device, + source: &String, + source_name: &str, + raw_ep: &std::ffi::CString, + stage_bit: wgt::ShaderStages, + full_stage: String, +) -> ( + Result<super::CompiledShader, crate::PipelineError>, + log::Level, +) { + profiling::scope!("compile_fxc"); + let mut shader_data = d3d12::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG | d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION; + } + let mut error = d3d12::Blob::null(); + let hr = unsafe { + profiling::scope!("d3dcompiler::D3DCompile"); + d3dcompiler::D3DCompile( + source.as_ptr().cast(), + source.len(), + source_name.as_ptr().cast(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr().cast(), + compile_flags, + 0, + shader_data.mut_void().cast(), + error.mut_void().cast(), + ) + }; + + match hr.into_result() { + Ok(()) => ( + Ok(super::CompiledShader::Fxc(shader_data)), + log::Level::Info, + ), + Err(e) => { + let mut full_msg = format!("FXC D3DCompile error ({e})"); + if !error.is_null() { + use std::fmt::Write as _; + let message = unsafe { + std::slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ) + }; + let _ = write!(full_msg, ": {}", String::from_utf8_lossy(message)); + unsafe { + error.destroy(); + } + } + ( + Err(crate::PipelineError::Linkage(stage_bit, full_msg)), + log::Level::Warn, + ) + } + } +} + +// The Dxc implementation is behind a feature flag so that users who don't want to use dxc can disable the feature. +#[cfg(feature = "dxc_shader_compiler")] +mod dxc { + use std::path::PathBuf; + + // Destructor order should be fine since _dxil and _dxc don't rely on each other. + pub(crate) struct DxcContainer { + compiler: hassle_rs::DxcCompiler, + library: hassle_rs::DxcLibrary, + validator: hassle_rs::DxcValidator, + // Has to be held onto for the lifetime of the device otherwise shaders will fail to compile. + _dxc: hassle_rs::Dxc, + // Also Has to be held onto for the lifetime of the device otherwise shaders will fail to validate. + _dxil: hassle_rs::Dxil, + } + + pub(crate) fn get_dxc_container( + dxc_path: Option<PathBuf>, + dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Make sure that dxil.dll exists. + let dxil = match hassle_rs::Dxil::new(dxil_path) { + Ok(dxil) => dxil, + Err(e) => { + log::warn!("Failed to load dxil.dll. Defaulting to Fxc instead: {}", e); + return Ok(None); + } + }; + + // Needed for explicit validation. + let validator = dxil.create_validator()?; + + let dxc = match hassle_rs::Dxc::new(dxc_path) { + Ok(dxc) => dxc, + Err(e) => { + log::warn!( + "Failed to load dxcompiler.dll. Defaulting to Fxc instead: {}", + e + ); + return Ok(None); + } + }; + let compiler = dxc.create_compiler()?; + let library = dxc.create_library()?; + + Ok(Some(DxcContainer { + _dxc: dxc, + compiler, + library, + _dxil: dxil, + validator, + })) + } + + pub(crate) fn compile_dxc( + device: &crate::dx12::Device, + source: &str, + source_name: &str, + raw_ep: &str, + stage_bit: wgt::ShaderStages, + full_stage: String, + dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + profiling::scope!("compile_dxc"); + let mut compile_flags = arrayvec::ArrayVec::<&str, 4>::new_const(); + compile_flags.push("-Ges"); // d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS + compile_flags.push("-Vd"); // Disable implicit validation to work around bugs when dxil.dll isn't in the local directory. + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags.push("-Zi"); // d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION + compile_flags.push("-Od"); // d3dcompiler::D3DCOMPILE_DEBUG + } + + let blob = match dxc_container + .library + .create_blob_with_encoding_from_str(source) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("DXC blob error: {e}"))) + { + Ok(blob) => blob, + Err(e) => return (Err(e), log::Level::Error), + }; + + let compiled = dxc_container.compiler.compile( + &blob, + source_name, + raw_ep, + &full_stage, + &compile_flags, + None, + &[], + ); + + let (result, log_level) = match compiled { + Ok(dxc_result) => match dxc_result.get_result() { + Ok(dxc_blob) => { + // Validate the shader. + match dxc_container.validator.validate(dxc_blob) { + Ok(validated_blob) => ( + Ok(crate::dx12::CompiledShader::Dxc(validated_blob.to_vec())), + log::Level::Info, + ), + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC validation error: {:?}\n{:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default(), + e.1 + ), + )), + log::Level::Error, + ), + } + } + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!("DXC compile error: {e}"), + )), + log::Level::Error, + ), + }, + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC compile error: {:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default() + ), + )), + log::Level::Error, + ), + }; + + (result, log_level) + } + + impl From<hassle_rs::HassleError> for crate::DeviceError { + fn from(value: hassle_rs::HassleError) -> Self { + match value { + hassle_rs::HassleError::Win32Error(e) => { + // TODO: This returns an HRESULT, should we try and use the associated Windows error message? + log::error!("Win32 error: {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LoadLibraryError { filename, inner } => { + log::error!("Failed to load dxc library {filename:?}. Inner error: {inner:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LibLoadingError(e) => { + log::error!("Failed to load dxc library. {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::WindowsOnly(e) => { + log::error!("Signing with dxil.dll is only supported on Windows. {e:?}"); + crate::DeviceError::Lost + } + // `ValidationError` and `CompileError` should never happen in a context involving `DeviceError` + hassle_rs::HassleError::ValidationError(_e) => unimplemented!(), + hassle_rs::HassleError::CompileError(_e) => unimplemented!(), + } + } + } + + fn get_error_string_from_dxc_result( + library: &hassle_rs::DxcLibrary, + error: &hassle_rs::DxcOperationResult, + ) -> Result<String, hassle_rs::HassleError> { + error + .get_error_buffer() + .and_then(|error| library.get_blob_as_string(&hassle_rs::DxcBlob::from(error))) + } +} + +// These are stubs for when the `dxc_shader_compiler` feature is disabled. +#[cfg(not(feature = "dxc_shader_compiler"))] +mod dxc { + use std::path::PathBuf; + + pub(crate) struct DxcContainer {} + + pub(crate) fn get_dxc_container( + _dxc_path: Option<PathBuf>, + _dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Falls back to Fxc and logs an error. + log::error!("DXC shader compiler was requested on Instance creation, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + Ok(None) + } + + // It shouldn't be possible that this gets called with the `dxc_shader_compiler` feature disabled. + pub(crate) fn compile_dxc( + _device: &crate::dx12::Device, + _source: &str, + _source_name: &str, + _raw_ep: &str, + _stage_bit: wgt::ShaderStages, + _full_stage: String, + _dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + unimplemented!("Something went really wrong, please report this. Attempted to compile shader with DXC, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/suballocation.rs b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs new file mode 100644 index 0000000000..01ca997f21 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs @@ -0,0 +1,331 @@ +pub(crate) use allocation::{ + create_allocator_wrapper, create_buffer_resource, create_texture_resource, + free_buffer_allocation, free_texture_allocation, AllocationWrapper, GpuAllocatorWrapper, +}; + +// This exists to work around https://github.com/gfx-rs/wgpu/issues/3207 +// Currently this will work the older, slower way if the windows_rs feature is disabled, +// and will use the fast path of suballocating buffers and textures using gpu_allocator if +// the windows_rs feature is enabled. + +// This is the fast path using gpu_allocator to suballocate buffers and textures. +#[cfg(feature = "windows_rs")] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use wgt::assertions::StrictAssertUnwrapExt; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + use gpu_allocator::{ + d3d12::{AllocationCreateDesc, ToWinapi, ToWindows}, + MemoryLocation, + }; + + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper { + pub(crate) allocator: gpu_allocator::d3d12::Allocator, + } + + #[derive(Debug)] + pub(crate) struct AllocationWrapper { + pub(crate) allocation: gpu_allocator::d3d12::Allocation, + } + + pub(crate) fn create_allocator_wrapper( + raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + let device = raw.as_ptr(); + + match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc { + device: device.as_windows().clone(), + debug_settings: Default::default(), + }) { + Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))), + Err(e) => { + log::error!("Failed to create d3d12 allocator, error: {}", e); + Err(e)? + } + } + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let location = match (is_cpu_read, is_cpu_write) { + (true, true) => MemoryLocation::CpuToGpu, + (true, false) => MemoryLocation::GpuToCpu, + (false, true) => MemoryLocation::CpuToGpu, + (false, false) => MemoryLocation::GpuOnly, + }; + + let name = desc.label.unwrap_or("Unlabeled buffer"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + + // let mut allocator = unsafe { device.mem_allocator.as_ref().unwrap_unchecked().lock() }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let location = MemoryLocation::GpuOnly; + + let name = desc.label.unwrap_or("Unlabeled texture"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn free_buffer_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 buffer, {e}"), + }; + } + + pub(crate) fn free_texture_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 texture, {e}"), + }; + } + + #[cfg(feature = "windows_rs")] + impl From<gpu_allocator::AllocationError> for crate::DeviceError { + fn from(result: gpu_allocator::AllocationError) -> Self { + match result { + gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, + gpu_allocator::AllocationError::FailedToMap(e) => { + log::error!("DX12 gpu-allocator: Failed to map: {}", e); + Self::Lost + } + gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { + log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { + log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { + log::error!( + "DX12 gpu-allocator: Invalid Allocator Creation Description: {}", + e + ); + Self::Lost + } + gpu_allocator::AllocationError::Internal(e) => { + log::error!("DX12 gpu-allocator: Internal Error: {}", e); + Self::Lost + } + } + } + } +} + +// This is the older, slower path where it doesn't suballocate buffers. +// Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207 +#[cfg(not(feature = "windows_rs"))] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12_ty::D3D12_HEAP_FLAG_NONE; // TODO: find the exact value + + // Allocator isn't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper {} + + // Allocations aren't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct AllocationWrapper {} + + pub(crate) fn create_allocator_wrapper( + _raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + Ok(None) + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_read { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12_ty::D3D12_MEMORY_POOL_L1 + } + _ => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + _desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + crate::dx12::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn free_buffer_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } + + pub(crate) fn free_texture_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs new file mode 100644 index 0000000000..dec4e71337 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -0,0 +1,34 @@ +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +winapi::ENUM! { + enum D3D12_VIEW_INSTANCING_TIER { + D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED = 0, + D3D12_VIEW_INSTANCING_TIER_1 = 1, + D3D12_VIEW_INSTANCING_TIER_2 = 2, + D3D12_VIEW_INSTANCING_TIER_3 = 3, + } +} + +winapi::ENUM! { + enum D3D12_COMMAND_LIST_SUPPORT_FLAGS { + D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_ENCODE, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 { + CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL, + CastingFullyTypedFormatSupported: winapi::shared::minwindef::BOOL, + WriteBufferImmediateSupportFlags: D3D12_COMMAND_LIST_SUPPORT_FLAGS, + ViewInstancingTier: D3D12_VIEW_INSTANCING_TIER, + BarycentricsSupported: winapi::shared::minwindef::BOOL, + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/view.rs b/third_party/rust/wgpu-hal/src/dx12/view.rs new file mode 100644 index 0000000000..e7a051b535 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/view.rs @@ -0,0 +1,380 @@ +use crate::auxil; +use std::mem; +use winapi::um::d3d12 as d3d12_ty; + +pub(crate) const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; + +pub(super) struct ViewDescriptor { + dimension: wgt::TextureViewDimension, + pub aspects: crate::FormatAspects, + pub rtv_dsv_format: d3d12::Format, + srv_uav_format: Option<d3d12::Format>, + multisampled: bool, + array_layer_base: u32, + array_layer_count: u32, + mip_level_base: u32, + mip_level_count: u32, +} + +impl crate::TextureViewDescriptor<'_> { + pub(super) fn to_internal(&self, texture: &super::Texture) -> ViewDescriptor { + let aspects = crate::FormatAspects::new(self.format, self.range.aspect); + + ViewDescriptor { + dimension: self.dimension, + aspects, + rtv_dsv_format: auxil::dxgi::conv::map_texture_format(self.format), + srv_uav_format: auxil::dxgi::conv::map_texture_format_for_srv_uav(self.format, aspects), + multisampled: texture.sample_count > 1, + mip_level_base: self.range.base_mip_level, + mip_level_count: self.range.mip_level_count.unwrap_or(!0), + array_layer_base: self.range.base_array_layer, + array_layer_count: self.range.array_layer_count.unwrap_or(!0), + } + } +} + +impl ViewDescriptor { + pub(crate) unsafe fn to_srv(&self) -> Option<d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBE; + unsafe { + *desc.u.TextureCube_mut() = d3d12_ty::D3D12_TEXCUBE_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + unsafe { + *desc.u.TextureCubeArray_mut() = d3d12_ty::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + First2DArrayFace: self.array_layer_base, + NumCubes: if self.array_layer_count == !0 { + !0 + } else { + self.array_layer_count / 6 + }, + ResourceMinLODClamp: 0.0, + } + } + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_uav(&self) -> Option<d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_UAV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_UAV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_UAV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_rtv(&self) -> d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_RTV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_RTV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_RTV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + desc + } + + pub(crate) unsafe fn to_dsv(&self, read_only: bool) -> d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + Flags: { + let mut flags = d3d12_ty::D3D12_DSV_FLAG_NONE; + if read_only { + if self.aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if self.aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + } + flags + }, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_DSV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_DSV { + MipSlice: self.mip_level_base, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + desc + } +} |