diff options
Diffstat (limited to 'third_party/rust/gfx-backend-dx12')
-rw-r--r-- | third_party/rust/gfx-backend-dx12/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/Cargo.toml | 39 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/README.md | 13 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/shaders/blit.hlsl | 29 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/command.rs | 2867 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/conv.rs | 690 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/descriptors_cpu.rs | 305 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/device.rs | 3690 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/internal.rs | 244 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/lib.rs | 1495 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/pool.rs | 126 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/resource.rs | 882 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/root_constants.rs | 298 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-dx12/src/window.rs | 350 |
14 files changed, 11029 insertions, 0 deletions
diff --git a/third_party/rust/gfx-backend-dx12/.cargo-checksum.json b/third_party/rust/gfx-backend-dx12/.cargo-checksum.json new file mode 100644 index 0000000000..eb97a14294 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"8112e72eb788c2e34c73a0b50b5a209c4d9cdb5c930cb24b3e54e1dcb7071440","README.md":"c20deff79020aaf97a47d61f7998e3271fd03efa22104866f01a08f789836ed4","shaders/blit.hlsl":"0c95317cd14b0ae71459d74c3e5658acac0baf8476525d2063c143e300180fe6","src/command.rs":"8e0f7e77f073219c16acf9dbdd9ee4c301973f35bb5319f0600894963c7e938c","src/conv.rs":"791cd2b20ed8dbaf017fae62210206474a08061f4ed78790eb561870d4502f4b","src/descriptors_cpu.rs":"3aea4aa606ddf2240aa96ca8df7d9c531cb41e04f3e9e67690e124d1571b33b8","src/device.rs":"8da0b7386b4956432fb4544f72c27f451efb53bf025bab972e37c20f1d2b0123","src/internal.rs":"6f45623b7c6ca293310d295c87810fe4f2b7343189a88ad92b8d882ffedbe76b","src/lib.rs":"7698d2ff4a7a9d5050abbc2c18865d08dd06ecd15f6348d0b368743e31eb9187","src/pool.rs":"bae7edc7ba6c92534394f132da094c9682f7b2938850eac59dadba522379c366","src/resource.rs":"3e401e39baea4572e5ba294482db720957a29a4c496ed92e17cfc00d6d8d3729","src/root_constants.rs":"ebb2e0e779794d74a776cd352fa09132820d39ef49c02ae9ce093f4ae54f2f42","src/window.rs":"1973dc0736edb32ea54acee52272f483c7384c0d2a36a49630e1289a4066db6a"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/gfx-backend-dx12/Cargo.toml b/third_party/rust/gfx-backend-dx12/Cargo.toml new file mode 100644 index 0000000000..893294e3d2 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "gfx-backend-dx12" +version = "0.6.2" +description = "DirectX-12 API backend for gfx-rs" +homepage = "https://github.com/gfx-rs/gfx" +repository = "https://github.com/gfx-rs/gfx" +keywords = ["graphics", "gamedev"] +license = "MIT OR Apache-2.0" +authors = ["The Gfx-rs Developers"] +readme = "README.md" +documentation = "https://docs.rs/gfx-backend-dx12" +workspace = "../../.." +edition = "2018" + +[features] +default = [] + +[lib] +name = "gfx_backend_dx12" + +[dependencies] +arrayvec = "0.5" +auxil = { path = "../../auxil/auxil", version = "0.5", package = "gfx-auxil", features = ["spirv_cross"] } +hal = { path = "../../hal", version = "0.6", package = "gfx-hal" } +range-alloc = { path = "../../auxil/range-alloc", version = "0.1.1" } +bitflags = "1" +bit-set = "0.5" +native = { package = "d3d12", version = "0.3", features = ["libloading"] } +log = "0.4" +parking_lot = "0.11" +smallvec = "1" +spirv_cross = { version = "0.22", features = ["hlsl"] } +winapi = { version = "0.3", features = ["basetsd","d3d12","d3d12sdklayers","d3d12shader","d3dcommon","d3dcompiler","dxgi1_2","dxgi1_3","dxgi1_4","dxgi1_5","dxgi1_6","dxgidebug","dxgiformat","dxgitype","handleapi","minwindef","synchapi","unknwnbase","winbase","windef","winerror","winnt","winuser"] } +raw-window-handle = "0.3" + +# This forces docs.rs to build the crate on windows, otherwise the build fails +# and we get no docs at all. +[package.metadata.docs.rs] +default-target = "x86_64-pc-windows-msvc" diff --git a/third_party/rust/gfx-backend-dx12/README.md b/third_party/rust/gfx-backend-dx12/README.md new file mode 100644 index 0000000000..d387cbab9d --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/README.md @@ -0,0 +1,13 @@ +# gfx-backend-dx12 + +DX12 backend for gfx. + +## Normalized Coordinates + +Render | Depth | Texture +-------|-------|-------- +![render_coordinates](../../../info/gl_render_coordinates.png) | ![depth_coordinates](../../../info/dx_depth_coordinates.png) | ![texture_coordinates](../../../info/dx_texture_coordinates.png) + +## Mirroring + +TODO diff --git a/third_party/rust/gfx-backend-dx12/shaders/blit.hlsl b/third_party/rust/gfx-backend-dx12/shaders/blit.hlsl new file mode 100644 index 0000000000..bd15200980 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/shaders/blit.hlsl @@ -0,0 +1,29 @@ + +Texture2DArray BlitSource : register(t0); +SamplerState BlitSampler : register(s0); + +cbuffer Region : register(b0) { + float2 offset; + float2 extent; + float z; + float level; +}; + +struct VsOutput { + float4 pos: SV_POSITION; + float4 uv: TEXCOORD0; +}; + +// Create a screen filling triangle +VsOutput vs_blit_2d(uint id: SV_VertexID) { + float2 coord = float2((id << 1) & 2, id & 2); + VsOutput output = { + float4(float2(-1.0, 1.0) + coord * float2(2.0, -2.0), 0.0, 1.0), + float4(offset + coord * extent, z, level) + }; + return output; +} + +float4 ps_blit_2d(VsOutput input) : SV_TARGET { + return BlitSource.SampleLevel(BlitSampler, input.uv.xyz, input.uv.w); +} diff --git a/third_party/rust/gfx-backend-dx12/src/command.rs b/third_party/rust/gfx-backend-dx12/src/command.rs new file mode 100644 index 0000000000..a8d10bbaaa --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/command.rs @@ -0,0 +1,2867 @@ +use auxil::FastHashMap; +use hal::{ + buffer, command as com, format, format::Aspects, image, memory, pass, pso, query, DrawCount, + IndexCount, IndexType, InstanceCount, TaskCount, VertexCount, VertexOffset, WorkGroupCount, +}; + +use arrayvec::ArrayVec; +use smallvec::SmallVec; +use winapi::{ + ctypes, + shared::{dxgiformat, minwindef, winerror}, + um::{d3d12, d3dcommon}, + Interface, +}; + +use std::{borrow::Borrow, cmp, fmt, iter, mem, ops::Range, ptr, sync::Arc}; + +use crate::{ + conv, descriptors_cpu, device, internal, pool::PoolShared, resource as r, validate_line_width, + Backend, Device, Shared, MAX_DESCRIPTOR_SETS, MAX_VERTEX_BUFFERS, +}; + +// Fixed size of the root signature. +// Limited by D3D12. +const ROOT_SIGNATURE_SIZE: usize = 64; + +const NULL_VERTEX_BUFFER_VIEW: d3d12::D3D12_VERTEX_BUFFER_VIEW = d3d12::D3D12_VERTEX_BUFFER_VIEW { + BufferLocation: 0, + SizeInBytes: 0, + StrideInBytes: 0, +}; + +fn get_rect(rect: &pso::Rect) -> d3d12::D3D12_RECT { + d3d12::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + } +} + +fn div(a: u32, b: u32) -> u32 { + (a + b - 1) / b +} + +fn up_align(x: u32, alignment: u32) -> u32 { + (x + alignment - 1) & !(alignment - 1) +} + +#[derive(Clone, Debug)] +struct AttachmentClear { + subpass_id: Option<pass::SubpassId>, + value: Option<com::ClearValue>, + stencil_value: Option<u32>, +} + +pub struct RenderPassCache { + render_pass: r::RenderPass, + framebuffer: r::Framebuffer, + target_rect: d3d12::D3D12_RECT, + attachment_clears: Vec<AttachmentClear>, + has_name: bool, +} + +impl fmt::Debug for RenderPassCache { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("RenderPassCache") + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum OcclusionQuery { + Binary(minwindef::UINT), + Precise(minwindef::UINT), +} + +/// Strongly-typed root signature element +/// +/// Could be removed for an unsafer variant to occupy less memory +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum RootElement { + /// Root constant in the signature + Constant(u32), + /// Descriptor table, storing table offset for the current descriptor heap + TableSrvCbvUav(u32), + /// Descriptor table, storing table offset for the current descriptor heap + TableSampler(u32), + /// Root descriptors take 2 DWORDs in the root signature. + DescriptorCbv { + buffer: u64, + }, + DescriptorPlaceholder, + /// Undefined value, implementation specific + Undefined, +} + +/// Virtual data storage for the current root signature memory. +struct UserData { + data: [RootElement; ROOT_SIGNATURE_SIZE], + dirty_mask: u64, +} + +impl Default for UserData { + fn default() -> Self { + UserData { + data: [RootElement::Undefined; ROOT_SIGNATURE_SIZE], + dirty_mask: 0, + } + } +} + +impl fmt::Debug for UserData { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("UserData") + .field("data", &&self.data[..]) + .field("dirty_mask", &self.dirty_mask) + .finish() + } +} + +impl UserData { + /// Write root constant values into the user data, overwriting virtual memory + /// range [offset..offset + data.len()]. Changes are marked as dirty. + fn set_constants(&mut self, offset: usize, data: &[u32]) { + assert!(offset + data.len() <= ROOT_SIGNATURE_SIZE); + // Each root constant occupies one DWORD + for (i, val) in data.iter().enumerate() { + self.data[offset + i] = RootElement::Constant(*val); + self.dirty_mask |= 1u64 << (offset + i); + } + } + + /// Write a SRV/CBV/UAV descriptor table into the user data, overwriting virtual + /// memory range [offset..offset + 1]. Changes are marked as dirty. + fn set_srv_cbv_uav_table(&mut self, offset: usize, table_start: u32) { + assert!(offset < ROOT_SIGNATURE_SIZE); + // A descriptor table occupies one DWORD + self.data[offset] = RootElement::TableSrvCbvUav(table_start); + self.dirty_mask |= 1u64 << offset; + } + + /// Write a sampler descriptor table into the user data, overwriting virtual + /// memory range [offset..offset + 1]. Changes are marked as dirty. + fn set_sampler_table(&mut self, offset: usize, table_start: u32) { + assert!(offset < ROOT_SIGNATURE_SIZE); + // A descriptor table occupies one DWORD + self.data[offset] = RootElement::TableSampler(table_start); + self.dirty_mask |= 1u64 << offset; + } + + /// Write a CBV root descriptor into the user data, overwriting virtual + /// memory range [offset..offset + 2]. Changes are marked as dirty. + fn set_descriptor_cbv(&mut self, offset: usize, buffer: u64) { + assert!(offset + 1 < ROOT_SIGNATURE_SIZE); + // A root descriptor occupies two DWORDs + self.data[offset] = RootElement::DescriptorCbv { buffer }; + self.data[offset + 1] = RootElement::DescriptorPlaceholder; + self.dirty_mask |= 1u64 << offset; + self.dirty_mask |= 1u64 << offset + 1; + } + + fn is_dirty(&self) -> bool { + self.dirty_mask != 0 + } + + fn is_index_dirty(&self, i: u32) -> bool { + ((self.dirty_mask >> i) & 1) == 1 + } + + /// Mark all entries as dirty. + fn dirty_all(&mut self) { + self.dirty_mask = !0; + } + + /// Mark all entries as clear up to the given i. + fn clear_up_to(&mut self, i: u32) { + self.dirty_mask &= !((1 << i) - 1); + } +} + +#[derive(Debug, Default)] +struct PipelineCache { + // Bound pipeline and layout info. + // Changed on bind pipeline calls. + pipeline: Option<(native::PipelineState, Arc<r::PipelineShared>)>, + + // Virtualized root signature user data of the shaders + user_data: UserData, + + temp_constants: Vec<u32>, + + // Descriptor heap gpu handle offsets + srv_cbv_uav_start: u64, + sampler_start: u64, +} + +impl PipelineCache { + fn bind_descriptor_sets<'a, S, J>( + &mut self, + layout: &r::PipelineLayout, + first_set: usize, + sets: &[S], + offsets: J, + ) -> [native::DescriptorHeap; 2] + where + S: Borrow<r::DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<com::DescriptorSetOffset>, + { + let mut offsets = offsets.into_iter().map(|offset| *offset.borrow() as u64); + + // 'Global' GPU descriptor heaps. + // All descriptors live in the same heaps. + let (srv_cbv_uav_start, sampler_start, heap_srv_cbv_uav, heap_sampler) = + if let Some(set_0) = sets.first() { + let set = set_0.borrow(); + ( + set.srv_cbv_uav_gpu_start().ptr, + set.sampler_gpu_start().ptr, + set.heap_srv_cbv_uav, + set.heap_samplers, + ) + } else { + return [native::DescriptorHeap::null(); 2]; + }; + + self.srv_cbv_uav_start = srv_cbv_uav_start; + self.sampler_start = sampler_start; + + for (set, element) in sets.iter().zip(layout.elements[first_set..].iter()) { + let set = set.borrow(); + let mut root_offset = element.table.offset; + + // Bind CBV/SRC/UAV descriptor tables + if let Some(gpu) = set.first_gpu_view { + assert!(element.table.ty.contains(r::SRV_CBV_UAV)); + // Cast is safe as offset **must** be in u32 range. Unable to + // create heaps with more descriptors. + let table_gpu_offset = (gpu.ptr - srv_cbv_uav_start) as u32; + self.user_data + .set_srv_cbv_uav_table(root_offset, table_gpu_offset); + root_offset += 1; + } + + // Bind Sampler descriptor tables. + if let Some(gpu) = set.first_gpu_sampler.get() { + assert!(element.table.ty.contains(r::SAMPLERS)); + + // Cast is safe as offset **must** be in u32 range. Unable to + // create heaps with more descriptors. + let table_gpu_offset = (gpu.ptr - sampler_start) as u32; + self.user_data + .set_sampler_table(root_offset, table_gpu_offset); + root_offset += 1; + } + + // Bind root descriptors + // TODO: slow, can we move the dynamic descriptors into toplevel somehow during initialization? + // Requires changes then in the descriptor update process. + for binding in &set.binding_infos { + // It's not valid to modify the descriptor sets during recording -> access if safe. + let dynamic_descriptors = unsafe { &*binding.dynamic_descriptors.get() }; + for descriptor in dynamic_descriptors { + let gpu_offset = descriptor.gpu_buffer_location + offsets.next().unwrap(); + self.user_data.set_descriptor_cbv(root_offset, gpu_offset); + root_offset += 2; + } + } + } + + [heap_srv_cbv_uav, heap_sampler] + } + + fn flush_user_data<F, G, H>( + &mut self, + mut constants_update: F, + mut table_update: G, + mut descriptor_cbv_update: H, + ) where + F: FnMut(u32, &[u32]), + G: FnMut(u32, d3d12::D3D12_GPU_DESCRIPTOR_HANDLE), + H: FnMut(u32, d3d12::D3D12_GPU_VIRTUAL_ADDRESS), + { + let user_data = &mut self.user_data; + if !user_data.is_dirty() { + return; + } + + let shared = match self.pipeline { + Some((_, ref shared)) => shared, + None => return, + }; + + for (root_index, &root_offset) in shared.parameter_offsets.iter().enumerate() { + if !user_data.is_index_dirty(root_offset) { + continue; + } + match user_data.data[root_offset as usize] { + RootElement::Constant(_) => { + let c = &shared.constants[root_index]; + debug_assert_eq!(root_offset, c.range.start); + self.temp_constants.clear(); + self.temp_constants.extend( + user_data.data[c.range.start as usize..c.range.end as usize] + .iter() + .map(|ud| match *ud { + RootElement::Constant(v) => v, + _ => { + warn!( + "Unset or mismatching root constant at index {:?} ({:?})", + c, ud + ); + 0 + } + }), + ); + constants_update(root_index as u32, &self.temp_constants); + } + RootElement::TableSrvCbvUav(offset) => { + let gpu = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: self.srv_cbv_uav_start + offset as u64, + }; + table_update(root_index as u32, gpu); + } + RootElement::TableSampler(offset) => { + let gpu = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: self.sampler_start + offset as u64, + }; + table_update(root_index as u32, gpu); + } + RootElement::DescriptorCbv { buffer } => { + debug_assert!(user_data.is_index_dirty(root_offset + 1)); + debug_assert_eq!( + user_data.data[root_offset as usize + 1], + RootElement::DescriptorPlaceholder + ); + + descriptor_cbv_update(root_index as u32, buffer); + } + RootElement::DescriptorPlaceholder | RootElement::Undefined => { + error!( + "Undefined user data element in the root signature at {}", + root_offset + ); + } + } + } + + user_data.clear_up_to(shared.total_slots); + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum BindPoint { + Compute, + Graphics { + /// Internal pipelines used for blitting, copying, etc. + internal: bool, + }, +} + +#[derive(Clone, Debug)] +struct Copy { + footprint_offset: u64, + footprint: image::Extent, + row_pitch: u32, + img_subresource: u32, + img_offset: image::Offset, + buf_offset: image::Offset, + copy_extent: image::Extent, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum Phase { + Initial, + Recording, + Executable, + // Pending, // not useful, and we don't have mutable access + // to self in `submit()`, so we can't set it anyway. +} + +pub struct CommandBuffer { + //Note: this is going to be NULL instead of `Option` to avoid + // `unwrap()` on every operation. This is not idiomatic. + pub(crate) raw: native::GraphicsCommandList, + allocator: Option<native::CommandAllocator>, + phase: Phase, + shared: Arc<Shared>, + pool_shared: Arc<PoolShared>, + begin_flags: com::CommandBufferFlags, + + /// Cache renderpasses for graphics operations + pass_cache: Option<RenderPassCache>, + cur_subpass: pass::SubpassId, + + /// Cache current graphics root signature and pipeline to minimize rebinding and support two + /// bindpoints. + gr_pipeline: PipelineCache, + /// Primitive topology of the currently bound graphics pipeline. + /// Caching required for internal graphics pipelines. + primitive_topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, + /// Cache current compute root signature and pipeline. + comp_pipeline: PipelineCache, + /// D3D12 only has one slot for both bindpoints. Need to rebind everything if we want to switch + /// between different bind points (ie. calling draw or dispatch). + active_bindpoint: BindPoint, + /// Current descriptor heaps heaps (CBV/SRV/UAV and Sampler). + /// Required for resetting due to internal descriptor heaps. + active_descriptor_heaps: [native::DescriptorHeap; 2], + + /// Active queries in the command buffer. + /// Queries must begin and end in the same command buffer, which allows us to track them. + /// The query pool type on `begin_query` must differ from all currently active queries. + /// Therefore, only one query per query type can be active at the same time. Binary and precise + /// occlusion queries share one queue type in Vulkan. + occlusion_query: Option<OcclusionQuery>, + pipeline_stats_query: Option<minwindef::UINT>, + + /// Cached vertex buffer views to bind. + /// `Stride` values are not known at `bind_vertex_buffers` time because they are only stored + /// inside the pipeline state. + vertex_bindings_remap: [Option<r::VertexBinding>; MAX_VERTEX_BUFFERS], + + vertex_buffer_views: [d3d12::D3D12_VERTEX_BUFFER_VIEW; MAX_VERTEX_BUFFERS], + + /// Re-using allocation for the image-buffer copies. + copies: Vec<Copy>, + + /// D3D12 only allows setting all viewports or all scissors at once, not partial updates. + /// So we must cache the implied state for these partial updates. + viewport_cache: ArrayVec< + [d3d12::D3D12_VIEWPORT; + d3d12::D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE as usize], + >, + scissor_cache: ArrayVec< + [d3d12::D3D12_RECT; + d3d12::D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE as usize], + >, + + /// HACK: renderdoc workaround for temporary RTVs + rtv_pools: Vec<native::DescriptorHeap>, + /// Temporary gpu descriptor heaps (internal). + temporary_gpu_heaps: Vec<native::DescriptorHeap>, + /// Resources that need to be alive till the end of the GPU execution. + retained_resources: Vec<native::Resource>, + + /// Temporary wide string for the marker. + temp_marker: Vec<u16>, + + /// Temporary transition barriers. + barriers: Vec<d3d12::D3D12_RESOURCE_BARRIER>, +} + +impl fmt::Debug for CommandBuffer { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("CommandBuffer") + } +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +// Insetion point for subpasses. +enum BarrierPoint { + // Pre barriers are inserted of the beginning when switching into a new subpass. + Pre, + // Post barriers are applied after exectuing the user defined commands. + Post, +} + +impl CommandBuffer { + pub(crate) fn new(shared: &Arc<Shared>, pool_shared: &Arc<PoolShared>) -> Self { + CommandBuffer { + raw: native::GraphicsCommandList::null(), + allocator: None, + shared: Arc::clone(shared), + pool_shared: Arc::clone(pool_shared), + phase: Phase::Initial, + begin_flags: com::CommandBufferFlags::empty(), + pass_cache: None, + cur_subpass: !0, + gr_pipeline: PipelineCache::default(), + primitive_topology: d3dcommon::D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, + comp_pipeline: PipelineCache::default(), + active_bindpoint: BindPoint::Graphics { internal: false }, + active_descriptor_heaps: [native::DescriptorHeap::null(); 2], + occlusion_query: None, + pipeline_stats_query: None, + vertex_bindings_remap: [None; MAX_VERTEX_BUFFERS], + vertex_buffer_views: [NULL_VERTEX_BUFFER_VIEW; MAX_VERTEX_BUFFERS], + copies: Vec::new(), + viewport_cache: ArrayVec::new(), + scissor_cache: ArrayVec::new(), + rtv_pools: Vec::new(), + temporary_gpu_heaps: Vec::new(), + retained_resources: Vec::new(), + temp_marker: Vec::new(), + barriers: Vec::new(), + } + } + + pub(crate) unsafe fn destroy( + self, + ) -> ( + Option<native::CommandAllocator>, + Option<native::GraphicsCommandList>, + ) { + let list = match self.phase { + Phase::Initial => None, + Phase::Recording => { + self.raw.close(); + Some(self.raw) + } + Phase::Executable => Some(self.raw), + }; + for heap in &self.rtv_pools { + heap.destroy(); + } + for heap in &self.temporary_gpu_heaps { + heap.destroy(); + } + for resource in &self.retained_resources { + resource.destroy(); + } + (self.allocator, list) + } + + pub(crate) unsafe fn as_raw_list(&self) -> *mut d3d12::ID3D12CommandList { + match self.phase { + Phase::Executable => (), + other => error!("Submitting a command buffer in {:?} state", other), + } + self.raw.as_mut_ptr() as *mut _ + } + + // Indicates that the pipeline slot has been overriden with an internal pipeline. + // + // This only invalidates the slot and the user data! + fn set_internal_graphics_pipeline(&mut self) { + self.active_bindpoint = BindPoint::Graphics { internal: true }; + self.gr_pipeline.user_data.dirty_all(); + } + + fn bind_descriptor_heaps(&mut self) { + self.raw.set_descriptor_heaps(&self.active_descriptor_heaps); + } + + fn mark_bound_descriptor(&mut self, index: usize, set: &r::DescriptorSet) { + if !set.raw_name.is_empty() { + self.temp_marker.clear(); + self.temp_marker.push(0x20); // ' ' + self.temp_marker.push(0x30 + index as u16); // '1..9' + self.temp_marker.push(0x3A); // ':' + self.temp_marker.push(0x20); // ' ' + self.temp_marker.extend_from_slice(&set.raw_name); + unsafe { + self.raw.SetMarker( + 0, + self.temp_marker.as_ptr() as *const _, + self.temp_marker.len() as u32 * 2, + ) + }; + } + } + + unsafe fn insert_subpass_barriers(&mut self, insertion: BarrierPoint) { + let state = self.pass_cache.as_ref().unwrap(); + let proto_barriers = match state.render_pass.subpasses.get(self.cur_subpass as usize) { + Some(subpass) => match insertion { + BarrierPoint::Pre => &subpass.pre_barriers, + BarrierPoint::Post => &subpass.post_barriers, + }, + None => &state.render_pass.post_barriers, + }; + + self.barriers.clear(); + for barrier in proto_barriers { + let mut resource_barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: barrier.flags, + u: mem::zeroed(), + }; + + *resource_barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: state.framebuffer.attachments[barrier.attachment_id] + .resource + .as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: barrier.states.start, + StateAfter: barrier.states.end, + }; + + self.barriers.push(resource_barrier); + } + + self.flush_barriers(); + } + + fn bind_targets(&mut self) { + let state = self.pass_cache.as_ref().unwrap(); + let subpass = &state.render_pass.subpasses[self.cur_subpass as usize]; + + // collect render targets + let color_views = subpass + .color_attachments + .iter() + .map(|&(id, _)| state.framebuffer.attachments[id].handle_rtv.raw().unwrap()) + .collect::<Vec<_>>(); + let ds_view = match subpass.depth_stencil_attachment { + Some((id, _)) => state.framebuffer.attachments[id] + .handle_dsv + .as_ref() + .map(|handle| &handle.raw) + .unwrap() as *const _, + None => ptr::null(), + }; + // set render targets + unsafe { + self.raw.OMSetRenderTargets( + color_views.len() as _, + color_views.as_ptr(), + minwindef::FALSE, + ds_view, + ); + } + + // performs clears for all the attachments first used in this subpass + for (view, clear) in state + .framebuffer + .attachments + .iter() + .zip(state.attachment_clears.iter()) + { + if clear.subpass_id != Some(self.cur_subpass) { + continue; + } + + if let (Some(rtv), Some(cv)) = (view.handle_rtv.raw(), clear.value) { + self.clear_render_target_view(rtv, unsafe { cv.color }, &[state.target_rect]); + } + + if let Some(handle) = view.handle_dsv { + let depth = clear.value.map(|cv| unsafe { cv.depth_stencil.depth }); + let stencil = clear.stencil_value; + + if depth.is_some() || stencil.is_some() { + self.clear_depth_stencil_view(handle.raw, depth, stencil, &[state.target_rect]); + } + } + } + } + + fn resolve_attachments(&self) { + let state = self.pass_cache.as_ref().unwrap(); + let framebuffer = &state.framebuffer; + let subpass = &state.render_pass.subpasses[self.cur_subpass as usize]; + + for (&(src_attachment, _), &(dst_attachment, _)) in subpass + .color_attachments + .iter() + .zip(subpass.resolve_attachments.iter()) + { + if dst_attachment == pass::ATTACHMENT_UNUSED { + continue; + } + + let resolve_src = state.framebuffer.attachments[src_attachment]; + let resolve_dst = state.framebuffer.attachments[dst_attachment]; + + // The number of layers of the render area are given on framebuffer creation. + for l in 0..framebuffer.layers { + // Attachtments only have a single mip level by specification. + let subresource_src = resolve_src.calc_subresource( + resolve_src.mip_levels.0 as _, + (resolve_src.layers.0 + l) as _, + ); + let subresource_dst = resolve_dst.calc_subresource( + resolve_dst.mip_levels.0 as _, + (resolve_dst.layers.0 + l) as _, + ); + + // TODO: take width and height of render area into account. + unsafe { + self.raw.ResolveSubresource( + resolve_dst.resource.as_mut_ptr(), + subresource_dst, + resolve_src.resource.as_mut_ptr(), + subresource_src, + resolve_dst.dxgi_format, + ); + } + } + } + } + + fn clear_render_target_view( + &self, + rtv: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, + color: com::ClearColor, + rects: &[d3d12::D3D12_RECT], + ) { + let num_rects = rects.len() as _; + let rects = if num_rects > 0 { + rects.as_ptr() + } else { + ptr::null() + }; + + unsafe { + self.raw + .clone() + .ClearRenderTargetView(rtv, &color.float32, num_rects, rects); + } + } + + fn clear_depth_stencil_view( + &self, + dsv: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, + depth: Option<f32>, + stencil: Option<u32>, + rects: &[d3d12::D3D12_RECT], + ) { + let mut flags = native::ClearFlags::empty(); + if depth.is_some() { + flags |= native::ClearFlags::DEPTH; + } + if stencil.is_some() { + flags |= native::ClearFlags::STENCIL; + } + + self.raw.clear_depth_stencil_view( + dsv, + flags, + depth.unwrap_or_default(), + stencil.unwrap_or_default() as _, + rects, + ); + } + + fn set_graphics_bind_point(&mut self) { + match self.active_bindpoint { + BindPoint::Compute => { + // Switch to graphics bind point + let &(pipeline, _) = self + .gr_pipeline + .pipeline + .as_ref() + .expect("No graphics pipeline bound"); + self.raw.set_pipeline_state(pipeline); + } + BindPoint::Graphics { internal: true } => { + // Switch to graphics bind point + let &(pipeline, ref shared) = self + .gr_pipeline + .pipeline + .as_ref() + .expect("No graphics pipeline bound"); + self.raw.set_pipeline_state(pipeline); + self.raw.set_graphics_root_signature(shared.signature); + self.bind_descriptor_heaps(); + } + BindPoint::Graphics { internal: false } => {} + } + + self.active_bindpoint = BindPoint::Graphics { internal: false }; + let cmd_buffer = &mut self.raw; + + // Flush root signature data + self.gr_pipeline.flush_user_data( + |slot, data| unsafe { + cmd_buffer.clone().SetGraphicsRoot32BitConstants( + slot, + data.len() as _, + data.as_ptr() as *const _, + 0, + ) + }, + |slot, gpu| cmd_buffer.set_graphics_root_descriptor_table(slot, gpu), + |slot, buffer| cmd_buffer.set_graphics_root_constant_buffer_view(slot, buffer), + ); + } + + fn set_compute_bind_point(&mut self) { + match self.active_bindpoint { + BindPoint::Graphics { internal } => { + // Switch to compute bind point + let &(pipeline, _) = self + .comp_pipeline + .pipeline + .as_ref() + .expect("No compute pipeline bound"); + + self.raw.set_pipeline_state(pipeline); + + self.active_bindpoint = BindPoint::Compute; + + if internal { + self.bind_descriptor_heaps(); + // Rebind the graphics root signature as we come from an internal graphics. + // Issuing a draw call afterwards would hide the information that we internally + // changed the graphics root signature. + if let Some((_, ref shared)) = self.gr_pipeline.pipeline { + self.raw.set_graphics_root_signature(shared.signature); + } + } + } + BindPoint::Compute => {} // Nothing to do + } + + let cmd_buffer = &mut self.raw; + self.comp_pipeline.flush_user_data( + |slot, data| unsafe { + cmd_buffer.clone().SetComputeRoot32BitConstants( + slot, + data.len() as _, + data.as_ptr() as *const _, + 0, + ) + }, + |slot, gpu| cmd_buffer.set_compute_root_descriptor_table(slot, gpu), + |slot, buffer| cmd_buffer.set_compute_root_constant_buffer_view(slot, buffer), + ); + } + + fn transition_barrier( + transition: d3d12::D3D12_RESOURCE_TRANSITION_BARRIER, + ) -> d3d12::D3D12_RESOURCE_BARRIER { + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + + *unsafe { barrier.u.Transition_mut() } = transition; + barrier + } + + fn dual_transition_barriers( + resource: native::Resource, + sub: u32, + states: Range<u32>, + ) -> (d3d12::D3D12_RESOURCE_BARRIER, d3d12::D3D12_RESOURCE_BARRIER) { + ( + Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resource.as_mut_ptr(), + Subresource: sub, + StateBefore: states.start, + StateAfter: states.end, + }), + Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resource.as_mut_ptr(), + Subresource: sub, + StateBefore: states.end, + StateAfter: states.start, + }), + ) + } + + fn split_buffer_copy(copies: &mut Vec<Copy>, r: &com::BufferImageCopy, image: &r::ImageBound) { + let buffer_width = if r.buffer_width == 0 { + r.image_extent.width + } else { + r.buffer_width + }; + let buffer_height = if r.buffer_height == 0 { + r.image_extent.height + } else { + r.buffer_height + }; + let desc = image.surface_type.desc(); + let bytes_per_block = desc.bits as u32 / 8; + let image_extent_aligned = image::Extent { + width: up_align(r.image_extent.width, desc.dim.0 as _), + height: up_align(r.image_extent.height, desc.dim.1 as _), + depth: r.image_extent.depth, + }; + let row_pitch = div(buffer_width, desc.dim.0 as _) * bytes_per_block; + let slice_pitch = div(buffer_height, desc.dim.1 as _) * row_pitch; + let is_pitch_aligned = row_pitch % d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT == 0; + + for layer in r.image_layers.layers.clone() { + let img_subresource = image.calc_subresource(r.image_layers.level as _, layer as _, 0); + let layer_relative = (layer - r.image_layers.layers.start) as u32; + let layer_offset = r.buffer_offset as u64 + + (layer_relative * slice_pitch * r.image_extent.depth) as u64; + let aligned_offset = + layer_offset & !(d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64 - 1); + if layer_offset == aligned_offset && is_pitch_aligned { + // trivial case: everything is aligned, ready for copying + copies.push(Copy { + footprint_offset: aligned_offset, + footprint: image_extent_aligned, + row_pitch, + img_subresource, + img_offset: r.image_offset, + buf_offset: image::Offset::ZERO, + copy_extent: image_extent_aligned, + }); + } else if is_pitch_aligned { + // buffer offset is not aligned + let row_pitch_texels = row_pitch / bytes_per_block * desc.dim.0 as u32; + let gap = (layer_offset - aligned_offset) as i32; + let buf_offset = image::Offset { + x: (gap % row_pitch as i32) / bytes_per_block as i32 * desc.dim.0 as i32, + y: (gap % slice_pitch as i32) / row_pitch as i32 * desc.dim.1 as i32, + z: gap / slice_pitch as i32, + }; + let footprint = image::Extent { + width: buf_offset.x as u32 + image_extent_aligned.width, + height: buf_offset.y as u32 + image_extent_aligned.height, + depth: buf_offset.z as u32 + image_extent_aligned.depth, + }; + if r.image_extent.width + buf_offset.x as u32 <= row_pitch_texels { + // we can map it to the aligned one and adjust the offsets accordingly + copies.push(Copy { + footprint_offset: aligned_offset, + footprint, + row_pitch, + img_subresource, + img_offset: r.image_offset, + buf_offset, + copy_extent: image_extent_aligned, + }); + } else { + // split the copy region into 2 that suffice the previous condition + assert!(buf_offset.x as u32 <= row_pitch_texels); + let half = row_pitch_texels - buf_offset.x as u32; + assert!(half <= r.image_extent.width); + + copies.push(Copy { + footprint_offset: aligned_offset, + footprint: image::Extent { + width: row_pitch_texels, + ..footprint + }, + row_pitch, + img_subresource, + img_offset: r.image_offset, + buf_offset, + copy_extent: image::Extent { + width: half, + ..r.image_extent + }, + }); + copies.push(Copy { + footprint_offset: aligned_offset, + footprint: image::Extent { + width: image_extent_aligned.width - half, + height: footprint.height + desc.dim.1 as u32, + depth: footprint.depth, + }, + row_pitch, + img_subresource, + img_offset: image::Offset { + x: r.image_offset.x + half as i32, + ..r.image_offset + }, + buf_offset: image::Offset { + x: 0, + y: buf_offset.y + desc.dim.1 as i32, + z: buf_offset.z, + }, + copy_extent: image::Extent { + width: image_extent_aligned.width - half, + ..image_extent_aligned + }, + }); + } + } else { + // worst case: row by row copy + for z in 0..r.image_extent.depth { + for y in 0..image_extent_aligned.height / desc.dim.1 as u32 { + // an image row starts non-aligned + let row_offset = layer_offset + + z as u64 * slice_pitch as u64 + + y as u64 * row_pitch as u64; + let aligned_offset = row_offset + & !(d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64 - 1); + let next_aligned_offset = + aligned_offset + d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64; + let cut_row_texels = (next_aligned_offset - row_offset) + / bytes_per_block as u64 + * desc.dim.0 as u64; + let cut_width = + cmp::min(image_extent_aligned.width, cut_row_texels as image::Size); + let gap_texels = (row_offset - aligned_offset) as image::Size + / bytes_per_block as image::Size + * desc.dim.0 as image::Size; + // this is a conservative row pitch that should be compatible with both copies + let max_unaligned_pitch = + (r.image_extent.width + gap_texels) * bytes_per_block; + let row_pitch = (max_unaligned_pitch + | (d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)) + + 1; + + copies.push(Copy { + footprint_offset: aligned_offset, + footprint: image::Extent { + width: cut_width + gap_texels, + height: desc.dim.1 as _, + depth: 1, + }, + row_pitch, + img_subresource, + img_offset: image::Offset { + x: r.image_offset.x, + y: r.image_offset.y + desc.dim.1 as i32 * y as i32, + z: r.image_offset.z + z as i32, + }, + buf_offset: image::Offset { + x: gap_texels as i32, + y: 0, + z: 0, + }, + copy_extent: image::Extent { + width: cut_width, + height: desc.dim.1 as _, + depth: 1, + }, + }); + + // and if it crosses a pitch alignment - we copy the rest separately + if cut_width >= image_extent_aligned.width { + continue; + } + let leftover = image_extent_aligned.width - cut_width; + + copies.push(Copy { + footprint_offset: next_aligned_offset, + footprint: image::Extent { + width: leftover, + height: desc.dim.1 as _, + depth: 1, + }, + row_pitch, + img_subresource, + img_offset: image::Offset { + x: r.image_offset.x + cut_width as i32, + y: r.image_offset.y + y as i32 * desc.dim.1 as i32, + z: r.image_offset.z + z as i32, + }, + buf_offset: image::Offset::ZERO, + copy_extent: image::Extent { + width: leftover, + height: desc.dim.1 as _, + depth: 1, + }, + }); + } + } + } + } + } + + fn set_vertex_buffers(&mut self) { + let cmd_buffer = &mut self.raw; + let vbs_remap = &self.vertex_bindings_remap; + let vbs = &self.vertex_buffer_views; + let mut last_end_slot = 0; + loop { + let start_offset = match vbs_remap[last_end_slot..] + .iter() + .position(|remap| remap.is_some()) + { + Some(offset) => offset, + None => break, + }; + + let start_slot = last_end_slot + start_offset; + let buffers = vbs_remap[start_slot..] + .iter() + .take_while(|x| x.is_some()) + .filter_map(|mapping| { + let mapping = mapping.unwrap(); + let view = vbs[mapping.mapped_binding]; + // Skip bindings that don't make sense. Since this function is called eagerly, + // we expect it to be called with all the valid inputs prior to drawing. + view.SizeInBytes.checked_sub(mapping.offset).map(|size| { + d3d12::D3D12_VERTEX_BUFFER_VIEW { + BufferLocation: view.BufferLocation + mapping.offset as u64, + SizeInBytes: size, + StrideInBytes: mapping.stride, + } + }) + }) + .collect::<ArrayVec<[_; MAX_VERTEX_BUFFERS]>>(); + + if buffers.is_empty() { + last_end_slot = start_slot + 1; + } else { + let num_views = buffers.len(); + unsafe { + cmd_buffer.IASetVertexBuffers( + start_slot as _, + num_views as _, + buffers.as_ptr(), + ); + } + last_end_slot = start_slot + num_views; + } + } + } + + fn flip_barriers(&mut self) { + for barrier in self.barriers.iter_mut() { + if barrier.Type == d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION { + let transition = unsafe { barrier.u.Transition_mut() }; + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + } + } + + fn _set_buffer_barrier( + &mut self, + target: &r::BufferBound, + state: d3d12::D3D12_RESOURCE_STATES, + ) { + let barrier = Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: target.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: d3d12::D3D12_RESOURCE_STATE_COMMON, + StateAfter: state, + }); + self.barriers.clear(); + self.barriers.push(barrier); + } + + fn fill_texture_barries( + &mut self, + target: &r::ImageBound, + states: Range<d3d12::D3D12_RESOURCE_STATES>, + range: &image::SubresourceRange, + ) { + if states.start == states.end { + return; + } + + let mut bar = Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: target.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: states.start, + StateAfter: states.end, + }); + let full_range = image::SubresourceRange { + aspects: range.aspects, + ..Default::default() + }; + let num_levels = range.resolve_level_count(target.mip_levels); + let num_layers = range.resolve_layer_count(target.kind.num_layers()); + + if *range == full_range { + // Only one barrier if it affects the whole image. + self.barriers.push(bar); + } else { + // Generate barrier for each layer/level combination. + for rel_level in 0..num_levels { + for rel_layer in 0..num_layers { + unsafe { + let transition_barrier = &mut *bar.u.Transition_mut(); + transition_barrier.Subresource = target.calc_subresource( + (range.level_start + rel_level) as _, + (range.layer_start + rel_layer) as _, + 0, + ); + } + self.barriers.push(bar); + } + } + } + } + + fn fill_marker(&mut self, name: &str) -> (*const ctypes::c_void, u32) { + self.temp_marker.clear(); + self.temp_marker.extend(name.encode_utf16()); + self.temp_marker.push(0); + ( + self.temp_marker.as_ptr() as *const _, + self.temp_marker.len() as u32 * 2, + ) + } + + unsafe fn flush_barriers(&self) { + if !self.barriers.is_empty() { + self.raw + .ResourceBarrier(self.barriers.len() as _, self.barriers.as_ptr()); + } + } +} + +impl com::CommandBuffer<Backend> for CommandBuffer { + unsafe fn begin( + &mut self, + flags: com::CommandBufferFlags, + _info: com::CommandBufferInheritanceInfo<Backend>, + ) { + // TODO: Implement flags and secondary command buffers (bundles). + // Note: we need to be ready for a situation where the whole + // command pool was reset. + self.reset(true); + self.phase = Phase::Recording; + self.begin_flags = flags; + let (allocator, list) = self.pool_shared.acquire(); + + assert!(self.allocator.is_none()); + assert_eq!(self.raw, native::GraphicsCommandList::null()); + self.allocator = Some(allocator); + self.raw = list; + } + + unsafe fn finish(&mut self) { + self.raw.close(); + assert_eq!(self.phase, Phase::Recording); + self.phase = Phase::Executable; + self.pool_shared + .release_allocator(self.allocator.take().unwrap()); + } + + unsafe fn reset(&mut self, release_resources: bool) { + if self.phase == Phase::Recording { + self.raw.close(); + } + if self.phase != Phase::Initial { + self.pool_shared.release_list(self.raw); + self.raw = native::GraphicsCommandList::null(); + } + if release_resources { + if let Some(allocator) = self.allocator.take() { + self.pool_shared.release_allocator(allocator); + } + } + self.phase = Phase::Initial; + + self.pass_cache = None; + self.cur_subpass = !0; + self.gr_pipeline = PipelineCache::default(); + self.primitive_topology = d3dcommon::D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + self.comp_pipeline = PipelineCache::default(); + self.active_bindpoint = BindPoint::Graphics { internal: false }; + self.active_descriptor_heaps = [native::DescriptorHeap::null(); 2]; + self.occlusion_query = None; + self.pipeline_stats_query = None; + self.vertex_bindings_remap = [None; MAX_VERTEX_BUFFERS]; + self.vertex_buffer_views = [NULL_VERTEX_BUFFER_VIEW; MAX_VERTEX_BUFFERS]; + for heap in self.rtv_pools.drain(..) { + heap.destroy(); + } + for heap in self.temporary_gpu_heaps.drain(..) { + heap.destroy(); + } + for resource in self.retained_resources.drain(..) { + resource.destroy(); + } + } + + unsafe fn begin_render_pass<T>( + &mut self, + render_pass: &r::RenderPass, + framebuffer: &r::Framebuffer, + target_rect: pso::Rect, + clear_values: T, + _first_subpass: com::SubpassContents, + ) where + T: IntoIterator, + T::Item: Borrow<com::ClearValue>, + { + assert_eq!(framebuffer.attachments.len(), render_pass.attachments.len()); + // Make sure that no subpass works with Present as intermediate layout. + // This wouldn't make much sense, and proceeding with this constraint + // allows the state transitions generated from subpass dependencies + // to ignore the layouts completely. + assert!(!render_pass.subpasses.iter().any(|sp| sp + .color_attachments + .iter() + .chain(sp.depth_stencil_attachment.iter()) + .chain(sp.input_attachments.iter()) + .any(|aref| aref.1 == image::Layout::Present))); + + if !render_pass.raw_name.is_empty() { + let n = &render_pass.raw_name; + self.raw + .BeginEvent(0, n.as_ptr() as *const _, n.len() as u32 * 2); + } + + self.barriers.clear(); + let mut clear_iter = clear_values.into_iter(); + let mut attachment_clears = Vec::new(); + for (i, (view, attachment)) in framebuffer + .attachments + .iter() + .zip(render_pass.attachments.iter()) + .enumerate() + { + // for swapchain views, we consider the initial layout to always be `General` + let pass_start_state = + conv::map_image_resource_state(image::Access::empty(), attachment.layouts.start); + if view.is_swapchain() && pass_start_state != d3d12::D3D12_RESOURCE_STATE_COMMON { + let barrier = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: view.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: d3d12::D3D12_RESOURCE_STATE_COMMON, + StateAfter: pass_start_state, + }; + self.barriers.push(Self::transition_barrier(barrier)); + } + + let cv = if attachment.has_clears() { + Some(*clear_iter.next().unwrap().borrow()) + } else { + None + }; + + attachment_clears.push(AttachmentClear { + subpass_id: render_pass + .subpasses + .iter() + .position(|sp| sp.is_using(i)) + .map(|i| i as pass::SubpassId), + value: if attachment.ops.load == pass::AttachmentLoadOp::Clear { + assert!(cv.is_some()); + cv + } else { + None + }, + stencil_value: if attachment.stencil_ops.load == pass::AttachmentLoadOp::Clear { + Some(cv.unwrap().depth_stencil.stencil) + } else { + None + }, + }); + } + self.flush_barriers(); + + self.pass_cache = Some(RenderPassCache { + render_pass: render_pass.clone(), + framebuffer: framebuffer.clone(), + target_rect: get_rect(&target_rect), + attachment_clears, + has_name: !render_pass.raw_name.is_empty(), + }); + self.cur_subpass = 0; + self.insert_subpass_barriers(BarrierPoint::Pre); + self.bind_targets(); + } + + unsafe fn next_subpass(&mut self, _contents: com::SubpassContents) { + self.insert_subpass_barriers(BarrierPoint::Post); + self.resolve_attachments(); + + self.cur_subpass += 1; + self.insert_subpass_barriers(BarrierPoint::Pre); + self.bind_targets(); + } + + unsafe fn end_render_pass(&mut self) { + self.insert_subpass_barriers(BarrierPoint::Post); + self.resolve_attachments(); + + self.cur_subpass = !0; + self.insert_subpass_barriers(BarrierPoint::Pre); + + let pc = self.pass_cache.take().unwrap(); + self.barriers.clear(); + for (view, attachment) in pc + .framebuffer + .attachments + .iter() + .zip(pc.render_pass.attachments.iter()) + { + // for swapchain views, we consider the initial layout to always be `General` + let pass_end_state = + conv::map_image_resource_state(image::Access::empty(), attachment.layouts.end); + if view.is_swapchain() && pass_end_state != d3d12::D3D12_RESOURCE_STATE_COMMON { + let barrier = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: view.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: pass_end_state, + StateAfter: d3d12::D3D12_RESOURCE_STATE_COMMON, + }; + self.barriers.push(Self::transition_barrier(barrier)); + } + } + self.flush_barriers(); + + if pc.has_name { + self.raw.EndEvent(); + } + } + + unsafe fn pipeline_barrier<'a, T>( + &mut self, + stages: Range<pso::PipelineStage>, + _dependencies: memory::Dependencies, + barriers: T, + ) where + T: IntoIterator, + T::Item: Borrow<memory::Barrier<'a, Backend>>, + { + self.barriers.clear(); + + // transition barriers + for barrier in barriers { + match *barrier.borrow() { + memory::Barrier::AllBuffers(_) | memory::Barrier::AllImages(_) => { + // Aliasing barrier with NULL resource is the closest we can get to + // a global memory barrier in Vulkan. + // Was suggested by a Microsoft representative as well as some of the IHVs. + let mut bar = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *bar.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: ptr::null_mut(), + }; + self.barriers.push(bar); + } + memory::Barrier::Buffer { + ref states, + target, + ref families, + range: _, + } => { + // TODO: Implement queue family ownership transitions for dx12 + if let Some(f) = families { + if f.start.0 != f.end.0 { + unimplemented!("Queue family resource ownership transitions are not implemented for DX12 (attempted transition from queue family {} to {}", f.start.0, f.end.0); + } + } + + let state_src = conv::map_buffer_resource_state(states.start); + let state_dst = conv::map_buffer_resource_state(states.end); + + if state_src == state_dst { + continue; + } + + let target = target.expect_bound(); + let bar = Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: target.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: state_src, + StateAfter: state_dst, + }); + + self.barriers.push(bar); + } + memory::Barrier::Image { + ref states, + target, + ref families, + ref range, + } => { + // TODO: Implement queue family ownership transitions for dx12 + if let Some(f) = families { + if f.start.0 != f.end.0 { + unimplemented!("Queue family resource ownership transitions are not implemented for DX12 (attempted transition from queue family {} to {}", f.start.0, f.end.0); + } + } + + let state_src = conv::map_image_resource_state(states.start.0, states.start.1); + let state_dst = conv::map_image_resource_state(states.end.0, states.end.1); + + let target = target.expect_bound(); + + match target.place { + r::Place::Heap { .. } => { + self.fill_texture_barries(target, state_src..state_dst, range); + } + r::Place::Swapchain { .. } => {} //ignore + } + } + } + } + + let all_shader_stages = pso::PipelineStage::VERTEX_SHADER + | pso::PipelineStage::FRAGMENT_SHADER + | pso::PipelineStage::COMPUTE_SHADER + | pso::PipelineStage::GEOMETRY_SHADER + | pso::PipelineStage::HULL_SHADER + | pso::PipelineStage::DOMAIN_SHADER; + + // UAV barriers + // + // TODO: Currently always add a global UAV barrier. + // WAR only requires an execution barrier but D3D12 seems to need + // a UAV barrier for this according to docs. Can we make this better? + if (stages.start & stages.end).intersects(all_shader_stages) { + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *barrier.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: ptr::null_mut(), + }; + self.barriers.push(barrier); + } + + // Alias barriers + // + // TODO: Optimize, don't always add an alias barrier + if false { + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_ALIASING, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *barrier.u.Aliasing_mut() = d3d12::D3D12_RESOURCE_ALIASING_BARRIER { + pResourceBefore: ptr::null_mut(), + pResourceAfter: ptr::null_mut(), + }; + self.barriers.push(barrier); + } + + self.flush_barriers(); + } + + unsafe fn clear_image<T>( + &mut self, + image: &r::Image, + layout: image::Layout, + value: com::ClearValue, + subresource_ranges: T, + ) where + T: IntoIterator, + T::Item: Borrow<image::SubresourceRange>, + { + let image = image.expect_bound(); + let base_state = conv::map_image_resource_state(image::Access::TRANSFER_WRITE, layout); + + for subresource_range in subresource_ranges { + let sub = subresource_range.borrow(); + if sub.level_start != 0 || image.mip_levels != 1 { + warn!("Clearing non-zero mipmap levels is not supported yet"); + } + let target_state = if sub.aspects.contains(Aspects::COLOR) { + d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET + } else { + d3d12::D3D12_RESOURCE_STATE_DEPTH_WRITE + }; + + self.barriers.clear(); + self.fill_texture_barries(image, base_state..target_state, sub); + self.flush_barriers(); + + for rel_layer in 0..sub.resolve_layer_count(image.kind.num_layers()) { + let layer = (sub.layer_start + rel_layer) as usize; + if sub.aspects.contains(Aspects::COLOR) { + let rtv = image.clear_cv[layer].raw; + self.clear_render_target_view(rtv, value.color, &[]); + } + if sub.aspects.contains(Aspects::DEPTH) { + let dsv = image.clear_dv[layer].raw; + self.clear_depth_stencil_view(dsv, Some(value.depth_stencil.depth), None, &[]); + } + if sub.aspects.contains(Aspects::STENCIL) { + let dsv = image.clear_sv[layer].raw; + self.clear_depth_stencil_view( + dsv, + None, + Some(value.depth_stencil.stencil as _), + &[], + ); + } + } + + self.flip_barriers(); + self.flush_barriers(); + } + } + + unsafe fn clear_attachments<T, U>(&mut self, clears: T, rects: U) + where + T: IntoIterator, + T::Item: Borrow<com::AttachmentClear>, + U: IntoIterator, + U::Item: Borrow<pso::ClearRect>, + { + let pass_cache = match self.pass_cache { + Some(ref cache) => cache, + None => panic!("`clear_attachments` can only be called inside a renderpass"), + }; + let sub_pass = &pass_cache.render_pass.subpasses[self.cur_subpass as usize]; + + let clear_rects: SmallVec<[pso::ClearRect; 4]> = rects + .into_iter() + .map(|rect| rect.borrow().clone()) + .collect(); + + let device = self.shared.service_pipes.device; + + for clear in clears { + match *clear.borrow() { + com::AttachmentClear::Color { index, value } => { + let attachment = { + let rtv_id = sub_pass.color_attachments[index]; + pass_cache.framebuffer.attachments[rtv_id.0] + }; + + let mut rtv_pool = descriptors_cpu::HeapLinear::new( + device, + native::DescriptorHeapType::Rtv, + clear_rects.len(), + ); + + for clear_rect in &clear_rects { + assert!(attachment.layers.0 + clear_rect.layers.end <= attachment.layers.1); + let rect = [get_rect(&clear_rect.rect)]; + + let view_info = device::ViewInfo { + resource: attachment.resource, + kind: attachment.kind, + caps: image::ViewCapabilities::empty(), + view_kind: image::ViewKind::D2Array, + format: attachment.dxgi_format, + component_mapping: device::IDENTITY_MAPPING, + levels: attachment.mip_levels.0..attachment.mip_levels.1, + layers: attachment.layers.0 + clear_rect.layers.start + ..attachment.layers.0 + clear_rect.layers.end, + }; + let rtv = rtv_pool.alloc_handle(); + Device::view_image_as_render_target_impl(device, rtv, &view_info).unwrap(); + self.clear_render_target_view(rtv, value.into(), &rect); + } + + rtv_pool.destroy(); + } + com::AttachmentClear::DepthStencil { depth, stencil } => { + let attachment = { + let dsv_id = sub_pass.depth_stencil_attachment.unwrap(); + pass_cache.framebuffer.attachments[dsv_id.0] + }; + + let mut dsv_pool = descriptors_cpu::HeapLinear::new( + device, + native::DescriptorHeapType::Dsv, + clear_rects.len(), + ); + + for clear_rect in &clear_rects { + assert!(attachment.layers.0 + clear_rect.layers.end <= attachment.layers.1); + let rect = [get_rect(&clear_rect.rect)]; + + let view_info = device::ViewInfo { + resource: attachment.resource, + kind: attachment.kind, + caps: image::ViewCapabilities::empty(), + view_kind: image::ViewKind::D2Array, + format: attachment.dxgi_format, + component_mapping: device::IDENTITY_MAPPING, + levels: attachment.mip_levels.0..attachment.mip_levels.1, + layers: attachment.layers.0 + clear_rect.layers.start + ..attachment.layers.0 + clear_rect.layers.end, + }; + let dsv = dsv_pool.alloc_handle(); + Device::view_image_as_depth_stencil_impl(device, dsv, &view_info).unwrap(); + self.clear_depth_stencil_view(dsv, depth, stencil, &rect); + } + + dsv_pool.destroy(); + } + } + } + } + + unsafe fn resolve_image<T>( + &mut self, + src: &r::Image, + _src_layout: image::Layout, + dst: &r::Image, + _dst_layout: image::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageResolve>, + { + let src = src.expect_bound(); + let dst = dst.expect_bound(); + assert_eq!(src.descriptor.Format, dst.descriptor.Format); + + { + // Insert barrier for `COPY_DEST` to `RESOLVE_DEST` as we only expose + // `TRANSFER_WRITE` which is used for all copy commands. + let transition_barrier = + Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: dst.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, // TODO: only affected ranges + StateBefore: d3d12::D3D12_RESOURCE_STATE_COPY_DEST, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + }); + self.raw.ResourceBarrier(1, &transition_barrier); + } + + for region in regions { + let r = region.borrow(); + for layer in 0..r.extent.depth as u32 { + self.raw.ResolveSubresource( + src.resource.as_mut_ptr(), + src.calc_subresource( + r.src_subresource.level as _, + r.src_subresource.layers.start as u32 + layer, + 0, + ), + dst.resource.as_mut_ptr(), + dst.calc_subresource( + r.dst_subresource.level as _, + r.dst_subresource.layers.start as u32 + layer, + 0, + ), + src.descriptor.Format, + ); + } + } + + { + // Insert barrier for back transition from `RESOLVE_DEST` to `COPY_DEST`. + let transition_barrier = + Self::transition_barrier(d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: dst.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, // TODO: only affected ranges + StateBefore: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + StateAfter: d3d12::D3D12_RESOURCE_STATE_COPY_DEST, + }); + self.raw.ResourceBarrier(1, &transition_barrier); + } + } + + unsafe fn blit_image<T>( + &mut self, + src: &r::Image, + _src_layout: image::Layout, + dst: &r::Image, + _dst_layout: image::Layout, + filter: image::Filter, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageBlit>, + { + let device = self.shared.service_pipes.device.clone(); + let src = src.expect_bound(); + let dst = dst.expect_bound(); + + // TODO: Resource barriers for src. + // TODO: depth or stencil images not supported so far + + // TODO: only supporting 2D images + match (src.kind, dst.kind) { + (image::Kind::D2(..), image::Kind::D2(..)) => {} + _ => unimplemented!(), + } + + // Descriptor heap for the current blit, only storing the src image + let (srv_heap, _) = device.create_descriptor_heap( + 1, + native::DescriptorHeapType::CbvSrvUav, + native::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ); + self.raw.set_descriptor_heaps(&[srv_heap]); + self.temporary_gpu_heaps.push(srv_heap); + + let srv_desc = Device::build_image_as_shader_resource_desc(&device::ViewInfo { + resource: src.resource, + kind: src.kind, + caps: src.view_caps, + view_kind: image::ViewKind::D2Array, // TODO + format: src.default_view_format.unwrap(), + component_mapping: device::IDENTITY_MAPPING, + levels: 0..src.descriptor.MipLevels as _, + layers: 0..src.kind.num_layers(), + }) + .unwrap(); + device.CreateShaderResourceView( + src.resource.as_mut_ptr(), + &srv_desc, + srv_heap.start_cpu_descriptor(), + ); + + let filter = match filter { + image::Filter::Nearest => d3d12::D3D12_FILTER_MIN_MAG_MIP_POINT, + image::Filter::Linear => d3d12::D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, + }; + + struct Instance { + rtv: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, + viewport: d3d12::D3D12_VIEWPORT, + data: internal::BlitData, + }; + let mut instances = FastHashMap::<internal::BlitKey, Vec<Instance>>::default(); + let mut barriers = Vec::new(); + + for region in regions { + let r = region.borrow(); + + let first_layer = r.dst_subresource.layers.start; + let num_layers = r.dst_subresource.layers.end - first_layer; + + // WORKAROUND: renderdoc crashes if we destroy the pool too early + let rtv_pool = Device::create_descriptor_heap_impl( + device, + native::DescriptorHeapType::Rtv, + false, + num_layers as _, + ); + self.rtv_pools.push(rtv_pool.raw.clone()); + + let key = match r.dst_subresource.aspects { + format::Aspects::COLOR => { + let format = dst.default_view_format.unwrap(); + // Create RTVs of the dst image for the miplevel of the current region + for i in 0..num_layers { + let mut desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC { + Format: format, + ViewDimension: d3d12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY, + u: mem::zeroed(), + }; + + *desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_RTV { + MipSlice: r.dst_subresource.level as _, + FirstArraySlice: (i + first_layer) as u32, + ArraySize: 1, + PlaneSlice: 0, // TODO + }; + + let view = rtv_pool.at(i as _, 0).cpu; + device.CreateRenderTargetView(dst.resource.as_mut_ptr(), &desc, view); + } + + (format, filter) + } + _ => unimplemented!(), + }; + + // Take flipping into account + let viewport = d3d12::D3D12_VIEWPORT { + TopLeftX: cmp::min(r.dst_bounds.start.x, r.dst_bounds.end.x) as _, + TopLeftY: cmp::min(r.dst_bounds.start.y, r.dst_bounds.end.y) as _, + Width: (r.dst_bounds.end.x - r.dst_bounds.start.x).abs() as _, + Height: (r.dst_bounds.end.y - r.dst_bounds.start.y).abs() as _, + MinDepth: 0.0, + MaxDepth: 1.0, + }; + + let list = instances.entry(key).or_insert(Vec::new()); + + for i in 0..num_layers { + let src_layer = r.src_subresource.layers.start + i; + // Screen space triangle blitting + let data = { + // Image extents, layers are treated as depth + let (sx, dx) = if r.dst_bounds.start.x > r.dst_bounds.end.x { + ( + r.src_bounds.end.x, + r.src_bounds.start.x - r.src_bounds.end.x, + ) + } else { + ( + r.src_bounds.start.x, + r.src_bounds.end.x - r.src_bounds.start.x, + ) + }; + let (sy, dy) = if r.dst_bounds.start.y > r.dst_bounds.end.y { + ( + r.src_bounds.end.y, + r.src_bounds.start.y - r.src_bounds.end.y, + ) + } else { + ( + r.src_bounds.start.y, + r.src_bounds.end.y - r.src_bounds.start.y, + ) + }; + let image::Extent { width, height, .. } = + src.kind.level_extent(r.src_subresource.level); + + internal::BlitData { + src_offset: [sx as f32 / width as f32, sy as f32 / height as f32], + src_extent: [dx as f32 / width as f32, dy as f32 / height as f32], + layer: src_layer as f32, + level: r.src_subresource.level as _, + } + }; + + list.push(Instance { + rtv: rtv_pool.at(i as _, 0).cpu, + viewport, + data, + }); + + barriers.push(Self::transition_barrier( + d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: dst.resource.as_mut_ptr(), + Subresource: dst.calc_subresource( + r.dst_subresource.level as _, + (first_layer + i) as _, + 0, + ), + StateBefore: d3d12::D3D12_RESOURCE_STATE_COPY_DEST, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, + }, + )); + } + } + + // pre barriers + self.raw + .ResourceBarrier(barriers.len() as _, barriers.as_ptr()); + // execute blits + self.set_internal_graphics_pipeline(); + for (key, list) in instances { + let blit = self.shared.service_pipes.get_blit_2d_color(key); + self.raw + .IASetPrimitiveTopology(d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + self.raw.set_pipeline_state(blit.pipeline); + self.raw.set_graphics_root_signature(blit.signature); + self.raw + .set_graphics_root_descriptor_table(0, srv_heap.start_gpu_descriptor()); + for inst in list { + let scissor = d3d12::D3D12_RECT { + left: inst.viewport.TopLeftX as _, + top: inst.viewport.TopLeftY as _, + right: (inst.viewport.TopLeftX + inst.viewport.Width) as _, + bottom: (inst.viewport.TopLeftY + inst.viewport.Height) as _, + }; + self.raw.RSSetViewports(1, &inst.viewport); + self.raw.RSSetScissorRects(1, &scissor); + self.raw.SetGraphicsRoot32BitConstants( + 1, + (mem::size_of::<internal::BlitData>() / 4) as _, + &inst.data as *const _ as *const _, + 0, + ); + self.raw + .OMSetRenderTargets(1, &inst.rtv, minwindef::TRUE, ptr::null()); + self.raw.draw(3, 1, 0, 0); + } + } + // post barriers + for bar in &mut barriers { + let transition = bar.u.Transition_mut(); + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + self.raw + .ResourceBarrier(barriers.len() as _, barriers.as_ptr()); + + // Reset states + self.raw + .RSSetViewports(self.viewport_cache.len() as _, self.viewport_cache.as_ptr()); + self.raw + .RSSetScissorRects(self.scissor_cache.len() as _, self.scissor_cache.as_ptr()); + if self.primitive_topology != d3dcommon::D3D_PRIMITIVE_TOPOLOGY_UNDEFINED { + self.raw.IASetPrimitiveTopology(self.primitive_topology); + } + } + + unsafe fn bind_index_buffer( + &mut self, + buffer: &r::Buffer, + sub: buffer::SubRange, + ty: IndexType, + ) { + let buffer = buffer.expect_bound(); + let format = match ty { + IndexType::U16 => dxgiformat::DXGI_FORMAT_R16_UINT, + IndexType::U32 => dxgiformat::DXGI_FORMAT_R32_UINT, + }; + let location = buffer.resource.gpu_virtual_address(); + self.raw.set_index_buffer( + location + sub.offset, + sub.size_to(buffer.requirements.size) as u32, + format, + ); + } + + unsafe fn bind_vertex_buffers<I, T>(&mut self, first_binding: pso::BufferIndex, buffers: I) + where + I: IntoIterator<Item = (T, buffer::SubRange)>, + T: Borrow<r::Buffer>, + { + assert!(first_binding as usize <= MAX_VERTEX_BUFFERS); + + for (view, (buffer, sub)) in self.vertex_buffer_views[first_binding as _..] + .iter_mut() + .zip(buffers) + { + let b = buffer.borrow().expect_bound(); + let base = (*b.resource).GetGPUVirtualAddress(); + view.BufferLocation = base + sub.offset; + view.SizeInBytes = sub.size_to(b.requirements.size) as u32; + } + self.set_vertex_buffers(); + } + + unsafe fn set_viewports<T>(&mut self, first_viewport: u32, viewports: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Viewport>, + { + let viewports = viewports + .into_iter() + .map(|viewport| { + let viewport = viewport.borrow(); + d3d12::D3D12_VIEWPORT { + TopLeftX: viewport.rect.x as _, + TopLeftY: viewport.rect.y as _, + Width: viewport.rect.w as _, + Height: viewport.rect.h as _, + MinDepth: viewport.depth.start, + MaxDepth: viewport.depth.end, + } + }) + .enumerate(); + + for (i, viewport) in viewports { + if i + first_viewport as usize >= self.viewport_cache.len() { + self.viewport_cache.push(viewport); + } else { + self.viewport_cache[i + first_viewport as usize] = viewport; + } + } + + self.raw + .RSSetViewports(self.viewport_cache.len() as _, self.viewport_cache.as_ptr()); + } + + unsafe fn set_scissors<T>(&mut self, first_scissor: u32, scissors: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Rect>, + { + let rects = scissors + .into_iter() + .map(|rect| get_rect(rect.borrow())) + .enumerate(); + + for (i, rect) in rects { + if i + first_scissor as usize >= self.scissor_cache.len() { + self.scissor_cache.push(rect); + } else { + self.scissor_cache[i + first_scissor as usize] = rect; + } + } + + self.raw + .RSSetScissorRects(self.scissor_cache.len() as _, self.scissor_cache.as_ptr()) + } + + unsafe fn set_blend_constants(&mut self, color: pso::ColorValue) { + self.raw.set_blend_factor(color); + } + + unsafe fn set_stencil_reference(&mut self, faces: pso::Face, value: pso::StencilValue) { + assert!(!faces.is_empty()); + + if !faces.is_all() { + warn!( + "Stencil ref values set for both faces but only one was requested ({})", + faces.bits(), + ); + } + + self.raw.set_stencil_reference(value as _); + } + + unsafe fn set_stencil_read_mask(&mut self, _faces: pso::Face, _value: pso::StencilValue) { + //TODO: + // unimplemented!(); + } + + unsafe fn set_stencil_write_mask(&mut self, _faces: pso::Face, _value: pso::StencilValue) { + //TODO: + //unimplemented!(); + } + + unsafe fn set_depth_bounds(&mut self, bounds: Range<f32>) { + let (cmd_list1, hr) = self.raw.cast::<d3d12::ID3D12GraphicsCommandList1>(); + if winerror::SUCCEEDED(hr) { + cmd_list1.OMSetDepthBounds(bounds.start, bounds.end); + cmd_list1.destroy(); + } else { + warn!("Depth bounds test is not supported"); + } + } + + unsafe fn set_line_width(&mut self, width: f32) { + validate_line_width(width); + } + + unsafe fn set_depth_bias(&mut self, _depth_bias: pso::DepthBias) { + //TODO: + // unimplemented!() + } + + unsafe fn bind_graphics_pipeline(&mut self, pipeline: &r::GraphicsPipeline) { + match self.gr_pipeline.pipeline { + Some((_, ref shared)) if Arc::ptr_eq(shared, &pipeline.shared) => { + // Same root signature, nothing to do + } + _ => { + self.raw + .set_graphics_root_signature(pipeline.shared.signature); + // All slots need to be rebound internally on signature change. + self.gr_pipeline.user_data.dirty_all(); + } + } + self.raw.set_pipeline_state(pipeline.raw); + self.raw.IASetPrimitiveTopology(pipeline.topology); + self.primitive_topology = pipeline.topology; + + self.active_bindpoint = BindPoint::Graphics { internal: false }; + self.gr_pipeline.pipeline = Some((pipeline.raw, Arc::clone(&pipeline.shared))); + self.vertex_bindings_remap = pipeline.vertex_bindings; + + self.set_vertex_buffers(); + + if let Some(ref vp) = pipeline.baked_states.viewport { + self.set_viewports(0, iter::once(vp)); + } + if let Some(ref rect) = pipeline.baked_states.scissor { + self.set_scissors(0, iter::once(rect)); + } + if let Some(color) = pipeline.baked_states.blend_color { + self.set_blend_constants(color); + } + if let Some(ref bounds) = pipeline.baked_states.depth_bounds { + self.set_depth_bounds(bounds.clone()); + } + } + + unsafe fn bind_graphics_descriptor_sets<'a, I, J>( + &mut self, + layout: &r::PipelineLayout, + first_set: usize, + sets: I, + offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<r::DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<com::DescriptorSetOffset>, + { + let set_array = sets + .into_iter() + .collect::<ArrayVec<[_; MAX_DESCRIPTOR_SETS]>>(); + self.active_descriptor_heaps = self + .gr_pipeline + .bind_descriptor_sets(layout, first_set, &set_array, offsets); + self.bind_descriptor_heaps(); + + for (i, set) in set_array.into_iter().enumerate() { + self.mark_bound_descriptor(first_set + i, set.borrow()); + } + } + + unsafe fn bind_compute_pipeline(&mut self, pipeline: &r::ComputePipeline) { + match self.comp_pipeline.pipeline { + Some((_, ref shared)) if Arc::ptr_eq(shared, &pipeline.shared) => { + // Same root signature, nothing to do + } + _ => { + self.raw + .set_compute_root_signature(pipeline.shared.signature); + // All slots need to be rebound internally on signature change. + self.comp_pipeline.user_data.dirty_all(); + } + } + self.raw.set_pipeline_state(pipeline.raw); + + self.active_bindpoint = BindPoint::Compute; + self.comp_pipeline.pipeline = Some((pipeline.raw, Arc::clone(&pipeline.shared))); + } + + unsafe fn bind_compute_descriptor_sets<I, J>( + &mut self, + layout: &r::PipelineLayout, + first_set: usize, + sets: I, + offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<r::DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<com::DescriptorSetOffset>, + { + let set_array = sets + .into_iter() + .collect::<ArrayVec<[_; MAX_DESCRIPTOR_SETS]>>(); + self.active_descriptor_heaps = self + .comp_pipeline + .bind_descriptor_sets(layout, first_set, &set_array, offsets); + self.bind_descriptor_heaps(); + + for (i, set) in set_array.into_iter().enumerate() { + self.mark_bound_descriptor(first_set + i, set.borrow()); + } + } + + unsafe fn dispatch(&mut self, count: WorkGroupCount) { + self.set_compute_bind_point(); + self.raw.dispatch(count); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &r::Buffer, offset: buffer::Offset) { + let buffer = buffer.expect_bound(); + self.set_compute_bind_point(); + self.raw.ExecuteIndirect( + self.shared.signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } + + unsafe fn fill_buffer(&mut self, buffer: &r::Buffer, range: buffer::SubRange, data: u32) { + let buffer = buffer.expect_bound(); + let bytes_per_unit = 4; + let start = range.offset as i32; + let end = range + .size + .map_or(buffer.requirements.size, |s| range.offset + s) as i32; + if start % 4 != 0 || end % 4 != 0 { + warn!("Fill buffer bounds have to be multiples of 4"); + } + let rect = d3d12::D3D12_RECT { + left: start / bytes_per_unit, + top: 0, + right: end / bytes_per_unit, + bottom: 1, + }; + + let (pre_barrier, post_barrier) = Self::dual_transition_barriers( + buffer.resource, + d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + d3d12::D3D12_RESOURCE_STATE_COPY_DEST..d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + ); + self.raw.ResourceBarrier(1, &pre_barrier); + + // Descriptor heap for the current blit, only storing the src image + let device = self.shared.service_pipes.device.clone(); + let (uav_heap, _) = device.create_descriptor_heap( + 1, + native::DescriptorHeapType::CbvSrvUav, + native::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ); + let mut uav_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *uav_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: 0, + NumElements: (buffer.requirements.size / bytes_per_unit as u64) as u32, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + device.CreateUnorderedAccessView( + buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &uav_desc, + uav_heap.start_cpu_descriptor(), + ); + self.raw.set_descriptor_heaps(&[uav_heap]); + self.temporary_gpu_heaps.push(uav_heap); + + let cpu_descriptor = buffer + .clear_uav + .expect("Buffer needs to be created with usage `TRANSFER_DST`"); + + self.raw.ClearUnorderedAccessViewUint( + uav_heap.start_gpu_descriptor(), + cpu_descriptor.raw, + buffer.resource.as_mut_ptr(), + &[data; 4], + 1, + &rect as *const _, + ); + + self.raw.ResourceBarrier(1, &post_barrier); + } + + unsafe fn update_buffer(&mut self, _buffer: &r::Buffer, _offset: buffer::Offset, _data: &[u8]) { + unimplemented!() + } + + unsafe fn copy_buffer<T>(&mut self, src: &r::Buffer, dst: &r::Buffer, regions: T) + where + T: IntoIterator, + T::Item: Borrow<com::BufferCopy>, + { + let src = src.expect_bound(); + let dst = dst.expect_bound(); + + /* + let (pre_barrier, post_barrier) = Self::dual_transition_barriers( + dst.resource, + d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + d3d12::D3D12_RESOURCE_STATE_COPY_DEST, + ); + self.raw.ResourceBarrier(1, &pre_barrier); + */ + + // TODO: Optimization: Copy whole resource if possible + // copy each region + for region in regions { + let region = region.borrow(); + self.raw.CopyBufferRegion( + dst.resource.as_mut_ptr(), + region.dst as _, + src.resource.as_mut_ptr(), + region.src as _, + region.size as _, + ); + } + + //self.raw.ResourceBarrier(1, &post_barrier); + } + + unsafe fn copy_image<T>( + &mut self, + src: &r::Image, + _: image::Layout, + dst: &r::Image, + _: image::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageCopy>, + { + let src = src.expect_bound(); + let dst = dst.expect_bound(); + let mut src_image = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst_image = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + + let device = self.shared.service_pipes.device.clone(); + let src_desc = src.surface_type.desc(); + let dst_desc = dst.surface_type.desc(); + assert_eq!(src_desc.bits, dst_desc.bits); + //Note: Direct3D 10.1 enables copies between prestructured-typed textures + // and block-compressed textures of the same bit widths. + let do_alias = src.surface_type != dst.surface_type + && src_desc.is_compressed() == dst_desc.is_compressed(); + + if do_alias { + // D3D12 only permits changing the channel type for copies, + // similarly to how it allows the views to be created. + + // create an aliased resource to the source + let mut alias = native::Resource::null(); + let desc = d3d12::D3D12_RESOURCE_DESC { + Format: dst.descriptor.Format, + ..src.descriptor.clone() + }; + let (heap_ptr, heap_offset) = match src.place { + r::Place::Heap { raw, offset } => (raw.as_mut_ptr(), offset), + r::Place::Swapchain {} => { + error!("Unable to copy swapchain image, skipping"); + return; + } + }; + assert_eq!( + winerror::S_OK, + device.CreatePlacedResource( + heap_ptr, + heap_offset, + &desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + alias.mut_void(), + ) + ); + src_image.pResource = alias.as_mut_ptr(); + self.retained_resources.push(alias); + + // signal the aliasing transition + let sub_barrier = d3d12::D3D12_RESOURCE_ALIASING_BARRIER { + pResourceBefore: src.resource.as_mut_ptr(), + pResourceAfter: src_image.pResource, + }; + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_ALIASING, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *barrier.u.Aliasing_mut() = sub_barrier; + self.raw.ResourceBarrier(1, &barrier as *const _); + } + + for region in regions { + let r = region.borrow(); + debug_assert_eq!( + r.src_subresource.layers.len(), + r.dst_subresource.layers.len() + ); + let src_box = d3d12::D3D12_BOX { + left: r.src_offset.x as _, + top: r.src_offset.y as _, + right: (r.src_offset.x + r.extent.width as i32) as _, + bottom: (r.src_offset.y + r.extent.height as i32) as _, + front: r.src_offset.z as _, + back: (r.src_offset.z + r.extent.depth as i32) as _, + }; + + for (src_layer, dst_layer) in r + .src_subresource + .layers + .clone() + .zip(r.dst_subresource.layers.clone()) + { + *src_image.u.SubresourceIndex_mut() = + src.calc_subresource(r.src_subresource.level as _, src_layer as _, 0); + *dst_image.u.SubresourceIndex_mut() = + dst.calc_subresource(r.dst_subresource.level as _, dst_layer as _, 0); + self.raw.CopyTextureRegion( + &dst_image, + r.dst_offset.x as _, + r.dst_offset.y as _, + r.dst_offset.z as _, + &src_image, + &src_box, + ); + } + } + + if do_alias { + // signal the aliasing transition - back to the original + let sub_barrier = d3d12::D3D12_RESOURCE_ALIASING_BARRIER { + pResourceBefore: src_image.pResource, + pResourceAfter: src.resource.as_mut_ptr(), + }; + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_ALIASING, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *barrier.u.Aliasing_mut() = sub_barrier; + self.raw.ResourceBarrier(1, &barrier as *const _); + } + } + + unsafe fn copy_buffer_to_image<T>( + &mut self, + buffer: &r::Buffer, + image: &r::Image, + _: image::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::BufferImageCopy>, + { + let buffer = buffer.expect_bound(); + let image = image.expect_bound(); + assert!(self.copies.is_empty()); + + for region in regions { + let r = region.borrow(); + Self::split_buffer_copy(&mut self.copies, r, image); + } + + if self.copies.is_empty() { + return; + } + + let mut src = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: buffer.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + let mut dst = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: image.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + + for c in self.copies.drain(..) { + let src_box = d3d12::D3D12_BOX { + left: c.buf_offset.x as u32, + top: c.buf_offset.y as u32, + right: c.buf_offset.x as u32 + c.copy_extent.width, + bottom: c.buf_offset.y as u32 + c.copy_extent.height, + front: c.buf_offset.z as u32, + back: c.buf_offset.z as u32 + c.copy_extent.depth, + }; + let footprint = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: c.footprint_offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: image.descriptor.Format, + Width: c.footprint.width, + Height: c.footprint.height, + Depth: c.footprint.depth, + RowPitch: c.row_pitch, + }, + }; + *src.u.PlacedFootprint_mut() = footprint; + *dst.u.SubresourceIndex_mut() = c.img_subresource; + self.raw.CopyTextureRegion( + &dst, + c.img_offset.x as _, + c.img_offset.y as _, + c.img_offset.z as _, + &src, + &src_box, + ); + } + } + + unsafe fn copy_image_to_buffer<T>( + &mut self, + image: &r::Image, + _: image::Layout, + buffer: &r::Buffer, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::BufferImageCopy>, + { + let image = image.expect_bound(); + let buffer = buffer.expect_bound(); + assert!(self.copies.is_empty()); + + for region in regions { + let r = region.borrow(); + Self::split_buffer_copy(&mut self.copies, r, image); + } + + if self.copies.is_empty() { + return; + } + + let mut src = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: image.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: buffer.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + + for c in self.copies.drain(..) { + let src_box = d3d12::D3D12_BOX { + left: c.img_offset.x as u32, + top: c.img_offset.y as u32, + right: c.img_offset.x as u32 + c.copy_extent.width, + bottom: c.img_offset.y as u32 + c.copy_extent.height, + front: c.img_offset.z as u32, + back: c.img_offset.z as u32 + c.copy_extent.depth, + }; + let footprint = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: c.footprint_offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: image.descriptor.Format, + Width: c.footprint.width, + Height: c.footprint.height, + Depth: c.footprint.depth, + RowPitch: c.row_pitch, + }, + }; + *dst.u.PlacedFootprint_mut() = footprint; + *src.u.SubresourceIndex_mut() = c.img_subresource; + self.raw.CopyTextureRegion( + &dst, + c.buf_offset.x as _, + c.buf_offset.y as _, + c.buf_offset.z as _, + &src, + &src_box, + ); + } + } + + unsafe fn draw(&mut self, vertices: Range<VertexCount>, instances: Range<InstanceCount>) { + self.set_graphics_bind_point(); + self.raw.draw( + vertices.end - vertices.start, + instances.end - instances.start, + vertices.start, + instances.start, + ); + } + + unsafe fn draw_indexed( + &mut self, + indices: Range<IndexCount>, + base_vertex: VertexOffset, + instances: Range<InstanceCount>, + ) { + self.set_graphics_bind_point(); + self.raw.draw_indexed( + indices.end - indices.start, + instances.end - instances.start, + indices.start, + base_vertex, + instances.start, + ); + } + + unsafe fn draw_indirect( + &mut self, + buffer: &r::Buffer, + offset: buffer::Offset, + draw_count: DrawCount, + stride: u32, + ) { + assert_eq!(stride, 16); + let buffer = buffer.expect_bound(); + self.set_graphics_bind_point(); + self.raw.ExecuteIndirect( + self.shared.signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &r::Buffer, + offset: buffer::Offset, + draw_count: DrawCount, + stride: u32, + ) { + assert_eq!(stride, 20); + let buffer = buffer.expect_bound(); + self.set_graphics_bind_point(); + self.raw.ExecuteIndirect( + self.shared.signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } + + unsafe fn draw_mesh_tasks(&mut self, _: TaskCount, _: TaskCount) { + unimplemented!() + } + + unsafe fn draw_mesh_tasks_indirect( + &mut self, + _: &r::Buffer, + _: buffer::Offset, + _: hal::DrawCount, + _: u32, + ) { + unimplemented!() + } + + unsafe fn draw_mesh_tasks_indirect_count( + &mut self, + _: &r::Buffer, + _: buffer::Offset, + _: &r::Buffer, + _: buffer::Offset, + _: DrawCount, + _: u32, + ) { + unimplemented!() + } + + unsafe fn draw_indirect_count( + &mut self, + buffer: &r::Buffer, + offset: buffer::Offset, + count_buffer: &r::Buffer, + count_buffer_offset: buffer::Offset, + max_draw_count: DrawCount, + stride: u32, + ) { + assert_eq!(stride, 16); + let buffer = buffer.expect_bound(); + let count_buffer = count_buffer.expect_bound(); + self.set_graphics_bind_point(); + self.raw.ExecuteIndirect( + self.shared.signatures.draw.as_mut_ptr(), + max_draw_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_buffer_offset, + ); + } + + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &r::Buffer, + offset: buffer::Offset, + count_buffer: &r::Buffer, + count_buffer_offset: buffer::Offset, + max_draw_count: DrawCount, + stride: u32, + ) { + assert_eq!(stride, 20); + let buffer = buffer.expect_bound(); + let count_buffer = count_buffer.expect_bound(); + self.set_graphics_bind_point(); + self.raw.ExecuteIndirect( + self.shared.signatures.draw_indexed.as_mut_ptr(), + max_draw_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_buffer_offset, + ); + } + + unsafe fn set_event(&mut self, _: &(), _: pso::PipelineStage) { + unimplemented!() + } + + unsafe fn reset_event(&mut self, _: &(), _: pso::PipelineStage) { + unimplemented!() + } + + unsafe fn wait_events<'a, I, J>(&mut self, _: I, _: Range<pso::PipelineStage>, _: J) + where + I: IntoIterator, + I::Item: Borrow<()>, + J: IntoIterator, + J::Item: Borrow<memory::Barrier<'a, Backend>>, + { + unimplemented!() + } + + unsafe fn begin_query(&mut self, query: query::Query<Backend>, flags: query::ControlFlags) { + let query_ty = match query.pool.ty { + native::QueryHeapType::Occlusion => { + if flags.contains(query::ControlFlags::PRECISE) { + self.occlusion_query = Some(OcclusionQuery::Precise(query.id)); + d3d12::D3D12_QUERY_TYPE_OCCLUSION + } else { + // Default to binary occlusion as it might be faster due to early depth/stencil + // tests. + self.occlusion_query = Some(OcclusionQuery::Binary(query.id)); + d3d12::D3D12_QUERY_TYPE_BINARY_OCCLUSION + } + } + native::QueryHeapType::Timestamp => panic!("Timestap queries are issued via "), + native::QueryHeapType::PipelineStatistics => { + self.pipeline_stats_query = Some(query.id); + d3d12::D3D12_QUERY_TYPE_PIPELINE_STATISTICS + } + _ => unreachable!(), + }; + + self.raw + .BeginQuery(query.pool.raw.as_mut_ptr(), query_ty, query.id); + } + + unsafe fn end_query(&mut self, query: query::Query<Backend>) { + let id = query.id; + let query_ty = match query.pool.ty { + native::QueryHeapType::Occlusion + if self.occlusion_query == Some(OcclusionQuery::Precise(id)) => + { + self.occlusion_query = None; + d3d12::D3D12_QUERY_TYPE_OCCLUSION + } + native::QueryHeapType::Occlusion + if self.occlusion_query == Some(OcclusionQuery::Binary(id)) => + { + self.occlusion_query = None; + d3d12::D3D12_QUERY_TYPE_BINARY_OCCLUSION + } + native::QueryHeapType::PipelineStatistics if self.pipeline_stats_query == Some(id) => { + self.pipeline_stats_query = None; + d3d12::D3D12_QUERY_TYPE_PIPELINE_STATISTICS + } + _ => panic!("Missing `begin_query` call for query: {:?}", query), + }; + + self.raw.EndQuery(query.pool.raw.as_mut_ptr(), query_ty, id); + } + + unsafe fn reset_query_pool(&mut self, _pool: &r::QueryPool, _queries: Range<query::Id>) { + // Nothing to do here + // vkCmdResetQueryPool sets the queries to `unavailable` but the specification + // doesn't state an affect on the `active` state. Every queries at the end of the command + // buffer must be made inactive, which can only be done with EndQuery. + // Therefore, every `begin_query` must follow a `end_query` state, the resulting values + // after calling are undefined. + } + + unsafe fn copy_query_pool_results( + &mut self, + _pool: &r::QueryPool, + _queries: Range<query::Id>, + _buffer: &r::Buffer, + _offset: buffer::Offset, + _stride: buffer::Offset, + _flags: query::ResultFlags, + ) { + unimplemented!() + } + + unsafe fn write_timestamp(&mut self, _: pso::PipelineStage, query: query::Query<Backend>) { + self.raw.EndQuery( + query.pool.raw.as_mut_ptr(), + d3d12::D3D12_QUERY_TYPE_TIMESTAMP, + query.id, + ); + } + + unsafe fn push_graphics_constants( + &mut self, + _layout: &r::PipelineLayout, + _stages: pso::ShaderStageFlags, + offset: u32, + constants: &[u32], + ) { + assert!(offset % 4 == 0); + self.gr_pipeline + .user_data + .set_constants(offset as usize / 4, constants); + } + + unsafe fn push_compute_constants( + &mut self, + _layout: &r::PipelineLayout, + offset: u32, + constants: &[u32], + ) { + assert!(offset % 4 == 0); + self.comp_pipeline + .user_data + .set_constants(offset as usize / 4, constants); + } + + unsafe fn execute_commands<'a, T, I>(&mut self, cmd_buffers: I) + where + T: 'a + Borrow<CommandBuffer>, + I: IntoIterator<Item = &'a T>, + { + for _cmd_buf in cmd_buffers { + error!("TODO: execute_commands"); + } + } + + unsafe fn insert_debug_marker(&mut self, name: &str, _color: u32) { + let (ptr, size) = self.fill_marker(name); + self.raw.SetMarker(0, ptr, size); + } + unsafe fn begin_debug_marker(&mut self, name: &str, _color: u32) { + let (ptr, size) = self.fill_marker(name); + self.raw.BeginEvent(0, ptr, size); + } + unsafe fn end_debug_marker(&mut self) { + self.raw.EndEvent(); + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/conv.rs b/third_party/rust/gfx-backend-dx12/src/conv.rs new file mode 100644 index 0000000000..5181d9c866 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/conv.rs @@ -0,0 +1,690 @@ +use crate::validate_line_width; + +use spirv_cross::spirv; +use std::mem; + +use winapi::{ + shared::{ + basetsd::UINT8, + dxgiformat::*, + minwindef::{FALSE, INT, TRUE, UINT}, + }, + um::{d3d12::*, d3dcommon::*}, +}; + +use auxil::ShaderStage; +use hal::{ + buffer, + format::{Format, ImageFeature, SurfaceType, Swizzle}, + image, pso, +}; + +use native::ShaderVisibility; + +fn is_little_endinan() -> bool { + unsafe { 1 == *(&1u32 as *const _ as *const u8) } +} + +pub fn map_format(format: Format) -> Option<DXGI_FORMAT> { + use hal::format::Format::*; + + // Handling packed formats according to the platform endianness. + let reverse = is_little_endinan(); + let format = match format { + Bgra4Unorm if !reverse => DXGI_FORMAT_B4G4R4A4_UNORM, + R5g6b5Unorm if reverse => DXGI_FORMAT_B5G6R5_UNORM, + B5g6r5Unorm if !reverse => DXGI_FORMAT_B5G6R5_UNORM, + B5g5r5a1Unorm if !reverse => DXGI_FORMAT_B5G5R5A1_UNORM, + A1r5g5b5Unorm if reverse => DXGI_FORMAT_B5G5R5A1_UNORM, + R8Unorm => DXGI_FORMAT_R8_UNORM, + R8Snorm => DXGI_FORMAT_R8_SNORM, + R8Uint => DXGI_FORMAT_R8_UINT, + R8Sint => DXGI_FORMAT_R8_SINT, + Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Rgba8Srgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Bgra8Srgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Abgr8Unorm if reverse => DXGI_FORMAT_R8G8B8A8_UNORM, + Abgr8Snorm if reverse => DXGI_FORMAT_R8G8B8A8_SNORM, + Abgr8Uint if reverse => DXGI_FORMAT_R8G8B8A8_UINT, + Abgr8Sint if reverse => DXGI_FORMAT_R8G8B8A8_SINT, + Abgr8Srgb if reverse => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + A2b10g10r10Unorm if reverse => DXGI_FORMAT_R10G10B10A2_UNORM, + A2b10g10r10Uint if reverse => DXGI_FORMAT_R10G10B10A2_UINT, + R16Unorm => DXGI_FORMAT_R16_UNORM, + R16Snorm => DXGI_FORMAT_R16_SNORM, + R16Uint => DXGI_FORMAT_R16_UINT, + R16Sint => DXGI_FORMAT_R16_SINT, + R16Sfloat => DXGI_FORMAT_R16_FLOAT, + Rg16Unorm => DXGI_FORMAT_R16G16_UNORM, + Rg16Snorm => DXGI_FORMAT_R16G16_SNORM, + Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Rg16Sfloat => DXGI_FORMAT_R16G16_FLOAT, + Rgba16Unorm => DXGI_FORMAT_R16G16B16A16_UNORM, + Rgba16Snorm => DXGI_FORMAT_R16G16B16A16_SNORM, + Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Rgba16Sfloat => DXGI_FORMAT_R16G16B16A16_FLOAT, + R32Uint => DXGI_FORMAT_R32_UINT, + R32Sint => DXGI_FORMAT_R32_SINT, + R32Sfloat => DXGI_FORMAT_R32_FLOAT, + Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Rg32Sfloat => DXGI_FORMAT_R32G32_FLOAT, + Rgb32Uint => DXGI_FORMAT_R32G32B32_UINT, + Rgb32Sint => DXGI_FORMAT_R32G32B32_SINT, + Rgb32Sfloat => DXGI_FORMAT_R32G32B32_FLOAT, + Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Rgba32Sfloat => DXGI_FORMAT_R32G32B32A32_FLOAT, + B10g11r11Ufloat if reverse => DXGI_FORMAT_R11G11B10_FLOAT, + E5b9g9r9Ufloat if reverse => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + D16Unorm => DXGI_FORMAT_D16_UNORM, + D24UnormS8Uint => DXGI_FORMAT_D24_UNORM_S8_UINT, + X8D24Unorm if reverse => DXGI_FORMAT_D24_UNORM_S8_UINT, + D32Sfloat => DXGI_FORMAT_D32_FLOAT, + D32SfloatS8Uint => DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + Bc1RgbUnorm | Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Bc1RgbSrgb | Bc1RgbaSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Bc2Unorm => DXGI_FORMAT_BC2_UNORM, + Bc2Srgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Bc3Unorm => DXGI_FORMAT_BC3_UNORM, + Bc3Srgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Bc4Unorm => DXGI_FORMAT_BC4_UNORM, + Bc4Snorm => DXGI_FORMAT_BC4_SNORM, + Bc5Unorm => DXGI_FORMAT_BC5_UNORM, + Bc5Snorm => DXGI_FORMAT_BC5_SNORM, + Bc6hUfloat => DXGI_FORMAT_BC6H_UF16, + Bc6hSfloat => DXGI_FORMAT_BC6H_SF16, + Bc7Unorm => DXGI_FORMAT_BC7_UNORM, + Bc7Srgb => DXGI_FORMAT_BC7_UNORM_SRGB, + + _ => return None, + }; + + Some(format) +} + +pub fn map_format_shader_depth(surface: SurfaceType) -> Option<DXGI_FORMAT> { + match surface { + SurfaceType::D16 => Some(DXGI_FORMAT_R16_UNORM), + SurfaceType::X8D24 | SurfaceType::D24_S8 => Some(DXGI_FORMAT_R24_UNORM_X8_TYPELESS), + SurfaceType::D32 => Some(DXGI_FORMAT_R32_FLOAT), + SurfaceType::D32_S8 => Some(DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS), + _ => return None, + } +} + +pub fn map_format_shader_stencil(surface: SurfaceType) -> Option<DXGI_FORMAT> { + match surface { + SurfaceType::D24_S8 => Some(DXGI_FORMAT_X24_TYPELESS_G8_UINT), + SurfaceType::D32_S8 => Some(DXGI_FORMAT_X32_TYPELESS_G8X24_UINT), + _ => None, + } +} + +pub fn map_format_dsv(surface: SurfaceType) -> Option<DXGI_FORMAT> { + match surface { + SurfaceType::D16 => Some(DXGI_FORMAT_D16_UNORM), + SurfaceType::X8D24 | SurfaceType::D24_S8 => Some(DXGI_FORMAT_D24_UNORM_S8_UINT), + SurfaceType::D32 => Some(DXGI_FORMAT_D32_FLOAT), + SurfaceType::D32_S8 => Some(DXGI_FORMAT_D32_FLOAT_S8X24_UINT), + _ => None, + } +} + +pub fn map_format_nosrgb(format: Format) -> Option<DXGI_FORMAT> { + // NOTE: DXGI doesn't allow sRGB format on the swapchain, but + // creating RTV of swapchain buffers with sRGB works + match format { + Format::Bgra8Srgb => Some(DXGI_FORMAT_B8G8R8A8_UNORM), + Format::Rgba8Srgb => Some(DXGI_FORMAT_R8G8B8A8_UNORM), + _ => map_format(format), + } +} + +pub fn map_swizzle(swizzle: Swizzle) -> UINT { + use hal::format::Component::*; + + [swizzle.0, swizzle.1, swizzle.2, swizzle.3] + .iter() + .enumerate() + .fold( + D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES, + |mapping, (i, &component)| { + let value = match component { + R => D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + G => D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + B => D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + A => D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3, + Zero => D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + One => D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1, + }; + mapping | (value << D3D12_SHADER_COMPONENT_MAPPING_SHIFT as usize * i) + }, + ) +} + +pub fn swizzle_rg(swizzle: Swizzle) -> Swizzle { + use hal::format::Component as C; + fn map_component(c: C) -> C { + match c { + C::R => C::G, + C::G => C::R, + x => x, + } + } + Swizzle( + map_component(swizzle.0), + map_component(swizzle.1), + map_component(swizzle.2), + map_component(swizzle.3), + ) +} + +pub fn map_surface_type(st: SurfaceType) -> Option<DXGI_FORMAT> { + use hal::format::SurfaceType::*; + + assert_eq!(1, unsafe { *(&1u32 as *const _ as *const u8) }); + Some(match st { + R5_G6_B5 => DXGI_FORMAT_B5G6R5_UNORM, + A1_R5_G5_B5 => DXGI_FORMAT_B5G5R5A1_UNORM, + R8 => DXGI_FORMAT_R8_TYPELESS, + R8_G8 => DXGI_FORMAT_R8G8_TYPELESS, + R8_G8_B8_A8 => DXGI_FORMAT_R8G8B8A8_TYPELESS, + B8_G8_R8_A8 => DXGI_FORMAT_B8G8R8A8_TYPELESS, + A8_B8_G8_R8 => DXGI_FORMAT_R8G8B8A8_TYPELESS, + A2_B10_G10_R10 => DXGI_FORMAT_R10G10B10A2_TYPELESS, + R16 => DXGI_FORMAT_R16_TYPELESS, + R16_G16 => DXGI_FORMAT_R16G16_TYPELESS, + R16_G16_B16_A16 => DXGI_FORMAT_R16G16B16A16_TYPELESS, + R32 => DXGI_FORMAT_R32_TYPELESS, + R32_G32 => DXGI_FORMAT_R32G32_TYPELESS, + R32_G32_B32 => DXGI_FORMAT_R32G32B32_TYPELESS, + R32_G32_B32_A32 => DXGI_FORMAT_R32G32B32A32_TYPELESS, + B10_G11_R11 => DXGI_FORMAT_R11G11B10_FLOAT, + E5_B9_G9_R9 => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + D16 => DXGI_FORMAT_R16_TYPELESS, + X8D24 => DXGI_FORMAT_R24G8_TYPELESS, + D32 => DXGI_FORMAT_R32_TYPELESS, + D24_S8 => DXGI_FORMAT_R24G8_TYPELESS, + D32_S8 => DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, + _ => return None, + }) +} + +pub fn map_topology_type(primitive: pso::Primitive) -> D3D12_PRIMITIVE_TOPOLOGY_TYPE { + use hal::pso::Primitive::*; + match primitive { + PointList => D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + LineList | LineStrip => D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + TriangleList | TriangleStrip => D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + PatchList(_) => D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH, + } +} + +pub fn map_topology(ia: &pso::InputAssemblerDesc) -> D3D12_PRIMITIVE_TOPOLOGY { + use hal::pso::Primitive::*; + match (ia.primitive, ia.with_adjacency) { + (PointList, false) => D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + (PointList, true) => panic!("Points can't have adjacency info"), + (LineList, false) => D3D_PRIMITIVE_TOPOLOGY_LINELIST, + (LineList, true) => D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, + (LineStrip, false) => D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + (LineStrip, true) => D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ, + (TriangleList, false) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + (TriangleList, true) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, + (TriangleStrip, false) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + (TriangleStrip, true) => D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ, + (PatchList(num), false) => { + assert!(num != 0); + D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (num as u32) - 1 + } + (_, true) => panic!("Patches can't have adjacency info"), + } +} + +pub fn map_rasterizer(rasterizer: &pso::Rasterizer, multisample: bool) -> D3D12_RASTERIZER_DESC { + use hal::pso::FrontFace::*; + use hal::pso::PolygonMode::*; + + let bias = match rasterizer.depth_bias { + //TODO: support dynamic depth bias + Some(pso::State::Static(db)) => db, + Some(_) | None => pso::DepthBias::default(), + }; + + if let pso::State::Static(w) = rasterizer.line_width { + validate_line_width(w); + } + + D3D12_RASTERIZER_DESC { + FillMode: match rasterizer.polygon_mode { + Point => { + error!("Point rasterization is not supported"); + D3D12_FILL_MODE_WIREFRAME + } + Line => D3D12_FILL_MODE_WIREFRAME, + Fill => D3D12_FILL_MODE_SOLID, + }, + CullMode: match rasterizer.cull_face { + pso::Face::NONE => D3D12_CULL_MODE_NONE, + pso::Face::FRONT => D3D12_CULL_MODE_FRONT, + pso::Face::BACK => D3D12_CULL_MODE_BACK, + _ => panic!("Culling both front and back faces is not supported"), + }, + FrontCounterClockwise: match rasterizer.front_face { + Clockwise => FALSE, + CounterClockwise => TRUE, + }, + DepthBias: bias.const_factor as INT, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_factor, + DepthClipEnable: !rasterizer.depth_clamping as _, + MultisampleEnable: if multisample { TRUE } else { FALSE }, + ForcedSampleCount: 0, // TODO: currently not supported + AntialiasedLineEnable: FALSE, // TODO: currently not supported + ConservativeRaster: if rasterizer.conservative { + // TODO: check support + D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + } +} + +fn map_factor(factor: pso::Factor, is_alpha: bool) -> D3D12_BLEND { + use hal::pso::Factor::*; + match factor { + Zero => D3D12_BLEND_ZERO, + One => D3D12_BLEND_ONE, + SrcColor if is_alpha => D3D12_BLEND_SRC_ALPHA, + SrcColor => D3D12_BLEND_SRC_COLOR, + OneMinusSrcColor if is_alpha => D3D12_BLEND_INV_SRC_ALPHA, + OneMinusSrcColor => D3D12_BLEND_INV_SRC_COLOR, + DstColor if is_alpha => D3D12_BLEND_DEST_ALPHA, + DstColor => D3D12_BLEND_DEST_COLOR, + OneMinusDstColor if is_alpha => D3D12_BLEND_INV_DEST_ALPHA, + OneMinusDstColor => D3D12_BLEND_INV_DEST_COLOR, + SrcAlpha => D3D12_BLEND_SRC_ALPHA, + OneMinusSrcAlpha => D3D12_BLEND_INV_SRC_ALPHA, + DstAlpha => D3D12_BLEND_DEST_ALPHA, + OneMinusDstAlpha => D3D12_BLEND_INV_DEST_ALPHA, + ConstColor | ConstAlpha => D3D12_BLEND_BLEND_FACTOR, + OneMinusConstColor | OneMinusConstAlpha => D3D12_BLEND_INV_BLEND_FACTOR, + SrcAlphaSaturate => D3D12_BLEND_SRC_ALPHA_SAT, + Src1Color if is_alpha => D3D12_BLEND_SRC1_ALPHA, + Src1Color => D3D12_BLEND_SRC1_COLOR, + OneMinusSrc1Color if is_alpha => D3D12_BLEND_INV_SRC1_ALPHA, + OneMinusSrc1Color => D3D12_BLEND_INV_SRC1_COLOR, + Src1Alpha => D3D12_BLEND_SRC1_ALPHA, + OneMinusSrc1Alpha => D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_op( + operation: pso::BlendOp, + is_alpha: bool, +) -> (D3D12_BLEND_OP, D3D12_BLEND, D3D12_BLEND) { + use hal::pso::BlendOp::*; + match operation { + Add { src, dst } => ( + D3D12_BLEND_OP_ADD, + map_factor(src, is_alpha), + map_factor(dst, is_alpha), + ), + Sub { src, dst } => ( + D3D12_BLEND_OP_SUBTRACT, + map_factor(src, is_alpha), + map_factor(dst, is_alpha), + ), + RevSub { src, dst } => ( + D3D12_BLEND_OP_REV_SUBTRACT, + map_factor(src, is_alpha), + map_factor(dst, is_alpha), + ), + Min => (D3D12_BLEND_OP_MIN, D3D12_BLEND_ZERO, D3D12_BLEND_ZERO), + Max => (D3D12_BLEND_OP_MAX, D3D12_BLEND_ZERO, D3D12_BLEND_ZERO), + } +} + +pub fn map_render_targets( + color_targets: &[pso::ColorBlendDesc], +) -> [D3D12_RENDER_TARGET_BLEND_DESC; D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] { + let dummy_target = D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: FALSE, + LogicOpEnable: FALSE, + SrcBlend: D3D12_BLEND_ZERO, + DestBlend: D3D12_BLEND_ZERO, + BlendOp: D3D12_BLEND_OP_ADD, + SrcBlendAlpha: D3D12_BLEND_ZERO, + DestBlendAlpha: D3D12_BLEND_ZERO, + BlendOpAlpha: D3D12_BLEND_OP_ADD, + LogicOp: D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut targets = [dummy_target; D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (target, color_desc) in targets.iter_mut().zip(color_targets.iter()) { + target.RenderTargetWriteMask = color_desc.mask.bits() as UINT8; + if let Some(ref blend) = color_desc.blend { + let (color_op, color_src, color_dst) = map_blend_op(blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_op(blend.alpha, true); + target.BlendEnable = TRUE; + target.BlendOp = color_op; + target.SrcBlend = color_src; + target.DestBlend = color_dst; + target.BlendOpAlpha = alpha_op; + target.SrcBlendAlpha = alpha_src; + target.DestBlendAlpha = alpha_dst; + } + } + + targets +} + +pub fn map_depth_stencil(dsi: &pso::DepthStencilDesc) -> D3D12_DEPTH_STENCIL_DESC { + let (depth_on, depth_write, depth_func) = match dsi.depth { + Some(ref depth) => (TRUE, depth.write, map_comparison(depth.fun)), + None => unsafe { mem::zeroed() }, + }; + + let (stencil_on, front, back, read_mask, write_mask) = match dsi.stencil { + Some(ref stencil) => { + let read_masks = stencil.read_masks.static_or(pso::Sided::new(!0)); + let write_masks = stencil.write_masks.static_or(pso::Sided::new(!0)); + if read_masks.front != read_masks.back || write_masks.front != write_masks.back { + error!( + "Different sides are specified for read ({:?} and write ({:?}) stencil masks", + read_masks, write_masks + ); + } + ( + TRUE, + map_stencil_side(&stencil.faces.front), + map_stencil_side(&stencil.faces.back), + read_masks.front, + write_masks.front, + ) + } + None => unsafe { mem::zeroed() }, + }; + + D3D12_DEPTH_STENCIL_DESC { + DepthEnable: depth_on, + DepthWriteMask: if depth_write { + D3D12_DEPTH_WRITE_MASK_ALL + } else { + D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: depth_func, + StencilEnable: stencil_on, + StencilReadMask: read_mask as _, + StencilWriteMask: write_mask as _, + FrontFace: front, + BackFace: back, + } +} + +pub fn map_comparison(func: pso::Comparison) -> D3D12_COMPARISON_FUNC { + use hal::pso::Comparison::*; + match func { + Never => D3D12_COMPARISON_FUNC_NEVER, + Less => D3D12_COMPARISON_FUNC_LESS, + LessEqual => D3D12_COMPARISON_FUNC_LESS_EQUAL, + Equal => D3D12_COMPARISON_FUNC_EQUAL, + GreaterEqual => D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Greater => D3D12_COMPARISON_FUNC_GREATER, + NotEqual => D3D12_COMPARISON_FUNC_NOT_EQUAL, + Always => D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +fn map_stencil_op(op: pso::StencilOp) -> D3D12_STENCIL_OP { + use hal::pso::StencilOp::*; + match op { + Keep => D3D12_STENCIL_OP_KEEP, + Zero => D3D12_STENCIL_OP_ZERO, + Replace => D3D12_STENCIL_OP_REPLACE, + IncrementClamp => D3D12_STENCIL_OP_INCR_SAT, + IncrementWrap => D3D12_STENCIL_OP_INCR, + DecrementClamp => D3D12_STENCIL_OP_DECR_SAT, + DecrementWrap => D3D12_STENCIL_OP_DECR, + Invert => D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_side(side: &pso::StencilFace) -> D3D12_DEPTH_STENCILOP_DESC { + D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(side.op_fail), + StencilDepthFailOp: map_stencil_op(side.op_depth_fail), + StencilPassOp: map_stencil_op(side.op_pass), + StencilFunc: map_comparison(side.fun), + } +} + +pub fn map_wrap(wrap: image::WrapMode) -> D3D12_TEXTURE_ADDRESS_MODE { + use hal::image::WrapMode::*; + match wrap { + Tile => D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Mirror => D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Clamp => D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Border => D3D12_TEXTURE_ADDRESS_MODE_BORDER, + MirrorClamp => D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +fn map_filter_type(filter: image::Filter) -> D3D12_FILTER_TYPE { + match filter { + image::Filter::Nearest => D3D12_FILTER_TYPE_POINT, + image::Filter::Linear => D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_filter( + mag_filter: image::Filter, + min_filter: image::Filter, + mip_filter: image::Filter, + reduction: D3D12_FILTER_REDUCTION_TYPE, + anisotropy_clamp: Option<u8>, +) -> D3D12_FILTER { + let mag = map_filter_type(mag_filter); + let min = map_filter_type(min_filter); + let mip = map_filter_type(mip_filter); + + (min & D3D12_FILTER_TYPE_MASK) << D3D12_MIN_FILTER_SHIFT + | (mag & D3D12_FILTER_TYPE_MASK) << D3D12_MAG_FILTER_SHIFT + | (mip & D3D12_FILTER_TYPE_MASK) << D3D12_MIP_FILTER_SHIFT + | (reduction & D3D12_FILTER_REDUCTION_TYPE_MASK) << D3D12_FILTER_REDUCTION_TYPE_SHIFT + | anisotropy_clamp + .map(|_| D3D12_FILTER_ANISOTROPIC) + .unwrap_or(0) +} + +pub fn map_buffer_resource_state(access: buffer::Access) -> D3D12_RESOURCE_STATES { + use self::buffer::Access; + // Mutable states + if access.contains(Access::SHADER_WRITE) { + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + if access.contains(Access::TRANSFER_WRITE) { + // Resolve not relevant for buffers. + return D3D12_RESOURCE_STATE_COPY_DEST; + } + + // Read-only states + let mut state = D3D12_RESOURCE_STATE_COMMON; + + if access.contains(Access::TRANSFER_READ) { + state |= D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if access.contains(Access::INDEX_BUFFER_READ) { + state |= D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if access.contains(Access::VERTEX_BUFFER_READ) || access.contains(Access::UNIFORM_READ) { + state |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if access.contains(Access::INDIRECT_COMMAND_READ) { + state |= D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + if access.contains(Access::SHADER_READ) { + // SHADER_READ only allows SRV access + state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + + state +} + +/// Derive the combination of read-only states from the access flags. +fn derive_immutable_image_states(access: image::Access) -> D3D12_RESOURCE_STATES { + let mut state = D3D12_RESOURCE_STATE_COMMON; + + if access.contains(image::Access::TRANSFER_READ) { + state |= D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if access.contains(image::Access::INPUT_ATTACHMENT_READ) { + state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + if access.contains(image::Access::DEPTH_STENCIL_ATTACHMENT_READ) { + state |= D3D12_RESOURCE_STATE_DEPTH_READ; + } + if access.contains(image::Access::SHADER_READ) { + state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + + state +} + +/// Derive the mutable or read-only states from the access flags. +fn derive_mutable_image_states(access: image::Access) -> D3D12_RESOURCE_STATES { + if access.contains(image::Access::SHADER_WRITE) { + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + if access.contains(image::Access::COLOR_ATTACHMENT_WRITE) { + return D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if access.contains(image::Access::DEPTH_STENCIL_ATTACHMENT_WRITE) { + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if access.contains(image::Access::TRANSFER_WRITE) { + return D3D12_RESOURCE_STATE_COPY_DEST; + } + + derive_immutable_image_states(access) +} + +pub fn map_image_resource_state( + access: image::Access, + layout: image::Layout, +) -> D3D12_RESOURCE_STATES { + match layout { + // the same as COMMON (general state) + image::Layout::Present => D3D12_RESOURCE_STATE_PRESENT, + image::Layout::ColorAttachmentOptimal => D3D12_RESOURCE_STATE_RENDER_TARGET, + image::Layout::DepthStencilAttachmentOptimal => D3D12_RESOURCE_STATE_DEPTH_WRITE, + // `TRANSFER_WRITE` requires special handling as it requires RESOLVE_DEST | COPY_DEST + // but only 1 write-only allowed. We do the required translation before the commands. + // We currently assume that `COPY_DEST` is more common state than out of renderpass resolves. + // Resolve operations need to insert a barrier before and after the command to transition + // from and into `COPY_DEST` to have a consistent state for srcAccess. + image::Layout::TransferDstOptimal => D3D12_RESOURCE_STATE_COPY_DEST, + image::Layout::TransferSrcOptimal => D3D12_RESOURCE_STATE_COPY_SOURCE, + image::Layout::General => derive_mutable_image_states(access), + image::Layout::ShaderReadOnlyOptimal | image::Layout::DepthStencilReadOnlyOptimal => { + derive_immutable_image_states(access) + } + image::Layout::Undefined | image::Layout::Preinitialized => D3D12_RESOURCE_STATE_COMMON, + } +} + +pub fn map_shader_visibility(flags: pso::ShaderStageFlags) -> ShaderVisibility { + use hal::pso::ShaderStageFlags as Ssf; + + match flags { + Ssf::VERTEX => ShaderVisibility::VS, + Ssf::GEOMETRY => ShaderVisibility::GS, + Ssf::HULL => ShaderVisibility::HS, + Ssf::DOMAIN => ShaderVisibility::DS, + Ssf::FRAGMENT => ShaderVisibility::PS, + _ => ShaderVisibility::All, + } +} + +pub fn map_buffer_flags(usage: buffer::Usage) -> D3D12_RESOURCE_FLAGS { + let mut flags = D3D12_RESOURCE_FLAG_NONE; + + // TRANSFER_DST also used for clearing buffers, which is implemented via UAV clears. + if usage.contains(buffer::Usage::STORAGE) || usage.contains(buffer::Usage::TRANSFER_DST) { + flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_image_flags(usage: image::Usage, features: ImageFeature) -> D3D12_RESOURCE_FLAGS { + use self::image::Usage; + let mut flags = D3D12_RESOURCE_FLAG_NONE; + + // Blit operations implemented via a graphics pipeline + if usage.contains(Usage::COLOR_ATTACHMENT) { + debug_assert!(features.contains(ImageFeature::COLOR_ATTACHMENT)); + flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.contains(Usage::DEPTH_STENCIL_ATTACHMENT) { + debug_assert!(features.contains(ImageFeature::DEPTH_STENCIL_ATTACHMENT)); + flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } + if usage.contains(Usage::TRANSFER_DST) { + if features.contains(ImageFeature::COLOR_ATTACHMENT) { + flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET + }; + if features.contains(ImageFeature::DEPTH_STENCIL_ATTACHMENT) { + flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL + }; + } + if usage.contains(Usage::STORAGE) { + debug_assert!(features.contains(ImageFeature::STORAGE)); + flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + if !features.contains(ImageFeature::SAMPLED) { + flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + + flags +} + +pub fn map_execution_model(model: spirv::ExecutionModel) -> ShaderStage { + match model { + spirv::ExecutionModel::Vertex => ShaderStage::Vertex, + spirv::ExecutionModel::Fragment => ShaderStage::Fragment, + spirv::ExecutionModel::Geometry => ShaderStage::Geometry, + spirv::ExecutionModel::GlCompute => ShaderStage::Compute, + spirv::ExecutionModel::TessellationControl => ShaderStage::Hull, + spirv::ExecutionModel::TessellationEvaluation => ShaderStage::Domain, + spirv::ExecutionModel::Kernel => panic!("Kernel is not a valid execution model"), + } +} + +pub fn map_stage(stage: ShaderStage) -> spirv::ExecutionModel { + match stage { + ShaderStage::Vertex => spirv::ExecutionModel::Vertex, + ShaderStage::Fragment => spirv::ExecutionModel::Fragment, + ShaderStage::Geometry => spirv::ExecutionModel::Geometry, + ShaderStage::Compute => spirv::ExecutionModel::GlCompute, + ShaderStage::Hull => spirv::ExecutionModel::TessellationControl, + ShaderStage::Domain => spirv::ExecutionModel::TessellationEvaluation, + ShaderStage::Task | ShaderStage::Mesh => { + panic!("{:?} shader is not yet implemented in SPIRV-Cross", stage) + } + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/descriptors_cpu.rs b/third_party/rust/gfx-backend-dx12/src/descriptors_cpu.rs new file mode 100644 index 0000000000..818b82f28c --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/descriptors_cpu.rs @@ -0,0 +1,305 @@ +use bit_set::BitSet; +use native::{CpuDescriptor, DescriptorHeapFlags, DescriptorHeapType}; +use std::fmt; + +// Linear stack allocator for CPU descriptor heaps. +pub struct HeapLinear { + handle_size: usize, + num: usize, + size: usize, + start: CpuDescriptor, + raw: native::DescriptorHeap, +} + +impl fmt::Debug for HeapLinear { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("HeapLinear") + } +} + +impl HeapLinear { + pub fn new(device: native::Device, ty: DescriptorHeapType, size: usize) -> Self { + let (heap, _hr) = + device.create_descriptor_heap(size as _, ty, DescriptorHeapFlags::empty(), 0); + + HeapLinear { + handle_size: device.get_descriptor_increment_size(ty) as _, + num: 0, + size, + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + pub fn alloc_handle(&mut self) -> CpuDescriptor { + assert!(!self.is_full()); + + let slot = self.num; + self.num += 1; + + CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + pub fn is_full(&self) -> bool { + self.num >= self.size + } + + pub fn clear(&mut self) { + self.num = 0; + } + + pub unsafe fn destroy(&self) { + self.raw.destroy(); + } +} + +const HEAP_SIZE_FIXED: usize = 64; + +// Fixed-size free-list allocator for CPU descriptors. +struct Heap { + // Bit flag representation of available handles in the heap. + // + // 0 - Occupied + // 1 - free + availability: u64, + handle_size: usize, + start: CpuDescriptor, + raw: native::DescriptorHeap, +} + +impl fmt::Debug for Heap { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Heap") + } +} + +impl Heap { + fn new(device: native::Device, ty: DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + DescriptorHeapFlags::empty(), + 0, + ); + + Heap { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + pub fn alloc_handle(&mut self) -> CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + pub fn free_handle(&mut self, handle: CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + pub fn is_full(&self) -> bool { + self.availability == 0 + } + + pub unsafe fn destroy(&self) { + self.raw.destroy(); + } +} + +#[derive(Clone, Copy)] +pub struct Handle { + pub raw: CpuDescriptor, + heap_index: usize, +} + +pub struct DescriptorCpuPool { + device: native::Device, + ty: DescriptorHeapType, + heaps: Vec<Heap>, + avaliable_heap_indices: BitSet, +} + +impl fmt::Debug for DescriptorCpuPool { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("DescriptorCpuPool") + } +} + +impl DescriptorCpuPool { + pub fn new(device: native::Device, ty: DescriptorHeapType) -> Self { + DescriptorCpuPool { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(Heap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub unsafe fn destroy(&self) { + for heap in &self.heaps { + heap.destroy(); + } + } +} + +#[derive(Default)] +pub struct CopyAccumulator { + starts: Vec<CpuDescriptor>, + counts: Vec<u32>, +} + +impl CopyAccumulator { + pub fn add(&mut self, start: CpuDescriptor, count: u32) { + self.starts.push(start); + self.counts.push(count); + } + + pub fn clear(&mut self) { + self.starts.clear(); + self.counts.clear(); + } + + fn total(&self) -> u32 { + self.counts.iter().sum() + } +} + +#[derive(Default)] +pub struct MultiCopyAccumulator { + pub src_views: CopyAccumulator, + pub src_samplers: CopyAccumulator, + pub dst_views: CopyAccumulator, + pub dst_samplers: CopyAccumulator, +} + +impl MultiCopyAccumulator { + pub unsafe fn flush(&self, device: native::Device) { + use winapi::um::d3d12; + assert_eq!(self.src_views.total(), self.dst_views.total()); + assert_eq!(self.src_samplers.total(), self.dst_samplers.total()); + + if !self.src_views.starts.is_empty() { + device.CopyDescriptors( + self.dst_views.starts.len() as u32, + self.dst_views.starts.as_ptr(), + self.dst_views.counts.as_ptr(), + self.src_views.starts.len() as u32, + self.src_views.starts.as_ptr(), + self.src_views.counts.as_ptr(), + d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + ); + } + if !self.src_samplers.starts.is_empty() { + device.CopyDescriptors( + self.dst_samplers.starts.len() as u32, + self.dst_samplers.starts.as_ptr(), + self.dst_samplers.counts.as_ptr(), + self.src_samplers.starts.len() as u32, + self.src_samplers.starts.as_ptr(), + self.src_samplers.counts.as_ptr(), + d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + ); + } + } +} + +pub struct DescriptorUpdater { + heaps: Vec<HeapLinear>, + heap_index: usize, + reset_heap_index: usize, + avoid_overwrite: bool, +} + +impl DescriptorUpdater { + pub fn new(device: native::Device, avoid_overwrite: bool) -> Self { + DescriptorUpdater { + heaps: vec![Self::create_heap(device)], + heap_index: 0, + reset_heap_index: 0, + avoid_overwrite, + } + } + + pub unsafe fn destroy(&mut self) { + for heap in self.heaps.drain(..) { + heap.destroy(); + } + } + + pub fn reset(&mut self) { + if self.avoid_overwrite { + self.reset_heap_index = self.heap_index; + } else { + self.heap_index = 0; + for heap in self.heaps.iter_mut() { + heap.clear(); + } + } + } + + fn create_heap(device: native::Device) -> HeapLinear { + let size = 1 << 12; //arbitrary + HeapLinear::new(device, native::DescriptorHeapType::CbvSrvUav, size) + } + + pub fn alloc_handle(&mut self, device: native::Device) -> CpuDescriptor { + if self.heaps[self.heap_index].is_full() { + self.heap_index += 1; + if self.heap_index == self.heaps.len() { + self.heap_index = 0; + } + if self.heap_index == self.reset_heap_index { + let heap = Self::create_heap(device); + self.heaps.insert(self.heap_index, heap); + self.reset_heap_index += 1; + } else { + self.heaps[self.heap_index].clear(); + } + } + self.heaps[self.heap_index].alloc_handle() + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/device.rs b/third_party/rust/gfx-backend-dx12/src/device.rs new file mode 100644 index 0000000000..61f48aea9b --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/device.rs @@ -0,0 +1,3690 @@ +use std::{ + borrow::Borrow, + collections::{hash_map::Entry, BTreeMap}, + ffi, iter, mem, + ops::Range, + ptr, slice, + sync::Arc, +}; + +use range_alloc::RangeAllocator; +use smallvec::SmallVec; +use spirv_cross::{hlsl, spirv, ErrorCode as SpirvErrorCode}; + +use winapi::{ + shared::{ + dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, + minwindef::{FALSE, TRUE, UINT}, + windef, winerror, + }, + um::{d3d12, d3dcompiler, synchapi, winbase, winnt}, + Interface, +}; + +use auxil::{spirv_cross_specialize_ast, ShaderStage}; +use hal::{ + buffer, device as d, format, + format::Aspects, + image, memory, + memory::Requirements, + pass, + pool::CommandPoolCreateFlags, + pso, + pso::VertexInputRate, + query, + queue::{CommandQueue as _, QueueFamilyId}, + window as w, +}; + +use crate::{ + command as cmd, conv, descriptors_cpu, pool::CommandPool, resource as r, root_constants, + root_constants::RootConstant, window::Swapchain, Backend as B, Device, MemoryGroup, + MAX_VERTEX_BUFFERS, NUM_HEAP_PROPERTIES, QUEUE_FAMILIES, +}; +use native::{PipelineStateSubobject, Subobject}; + +// Register space used for root constants. +const ROOT_CONSTANT_SPACE: u32 = 0; + +const MEM_TYPE_MASK: u32 = 0x7; +const MEM_TYPE_SHIFT: u32 = 3; + +const MEM_TYPE_UNIVERSAL_SHIFT: u32 = MEM_TYPE_SHIFT * MemoryGroup::Universal as u32; +const MEM_TYPE_BUFFER_SHIFT: u32 = MEM_TYPE_SHIFT * MemoryGroup::BufferOnly as u32; +const MEM_TYPE_IMAGE_SHIFT: u32 = MEM_TYPE_SHIFT * MemoryGroup::ImageOnly as u32; +const MEM_TYPE_TARGET_SHIFT: u32 = MEM_TYPE_SHIFT * MemoryGroup::TargetOnly as u32; + +pub const IDENTITY_MAPPING: UINT = 0x1688; // D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING + +fn wide_cstr(name: &str) -> Vec<u16> { + name.encode_utf16().chain(iter::once(0)).collect() +} + +/// Emit error during shader module creation. Used if we don't expect an error +/// but might panic due to an exception in SPIRV-Cross. +fn gen_unexpected_error(err: SpirvErrorCode) -> d::ShaderError { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unexpected error".into(), + }; + d::ShaderError::CompilationFailed(msg) +} + +/// Emit error during shader module creation. Used if we execute an query command. +fn gen_query_error(err: SpirvErrorCode) -> d::ShaderError { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown query error".into(), + }; + d::ShaderError::CompilationFailed(msg) +} + +#[derive(Clone, Debug)] +pub(crate) struct ViewInfo { + pub(crate) resource: native::Resource, + pub(crate) kind: image::Kind, + pub(crate) caps: image::ViewCapabilities, + pub(crate) view_kind: image::ViewKind, + pub(crate) format: dxgiformat::DXGI_FORMAT, + pub(crate) component_mapping: UINT, + pub(crate) levels: Range<image::Level>, + pub(crate) layers: Range<image::Layer>, +} + +pub(crate) enum CommandSignature { + Draw, + DrawIndexed, + Dispatch, +} + +/// Compile a single shader entry point from a HLSL text shader +pub(crate) fn compile_shader( + stage: ShaderStage, + shader_model: hlsl::ShaderModel, + features: &hal::Features, + entry: &str, + code: &[u8], +) -> Result<native::Blob, d::ShaderError> { + let stage_str = match stage { + ShaderStage::Vertex => "vs", + ShaderStage::Fragment => "ps", + ShaderStage::Compute => "cs", + _ => unimplemented!(), + }; + let model_str = match shader_model { + hlsl::ShaderModel::V5_0 => "5_0", + hlsl::ShaderModel::V5_1 => "5_1", + hlsl::ShaderModel::V6_0 => "6_0", + _ => unimplemented!(), + }; + let full_stage = format!("{}_{}\0", stage_str, model_str); + + let mut shader_data = native::Blob::null(); + let mut error = native::Blob::null(); + let entry = ffi::CString::new(entry).unwrap(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if cfg!(debug_assertions) { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG; + } + if features.contains(hal::Features::UNSIZED_DESCRIPTOR_ARRAY) { + compile_flags |= d3dcompiler::D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; + } + let hr = unsafe { + d3dcompiler::D3DCompile( + code.as_ptr() as *const _, + code.len(), + ptr::null(), + ptr::null(), + ptr::null_mut(), + entry.as_ptr() as *const _, + full_stage.as_ptr() as *const i8, + compile_flags, + 0, + shader_data.mut_void() as *mut *mut _, + error.mut_void() as *mut *mut _, + ) + }; + if !winerror::SUCCEEDED(hr) { + error!("D3DCompile error {:x}", hr); + let message = unsafe { + let pointer = error.GetBufferPointer(); + let size = error.GetBufferSize(); + let slice = slice::from_raw_parts(pointer as *const u8, size as usize); + String::from_utf8_lossy(slice).into_owned() + }; + unsafe { + error.destroy(); + } + Err(d::ShaderError::CompilationFailed(message)) + } else { + Ok(shader_data) + } +} + +#[repr(C)] +struct GraphicsPipelineStateSubobjectStream { + root_signature: PipelineStateSubobject<*mut d3d12::ID3D12RootSignature>, + vs: PipelineStateSubobject<d3d12::D3D12_SHADER_BYTECODE>, + ps: PipelineStateSubobject<d3d12::D3D12_SHADER_BYTECODE>, + ds: PipelineStateSubobject<d3d12::D3D12_SHADER_BYTECODE>, + hs: PipelineStateSubobject<d3d12::D3D12_SHADER_BYTECODE>, + gs: PipelineStateSubobject<d3d12::D3D12_SHADER_BYTECODE>, + stream_output: PipelineStateSubobject<d3d12::D3D12_STREAM_OUTPUT_DESC>, + blend: PipelineStateSubobject<d3d12::D3D12_BLEND_DESC>, + sample_mask: PipelineStateSubobject<UINT>, + rasterizer: PipelineStateSubobject<d3d12::D3D12_RASTERIZER_DESC>, + depth_stencil: PipelineStateSubobject<d3d12::D3D12_DEPTH_STENCIL_DESC1>, + input_layout: PipelineStateSubobject<d3d12::D3D12_INPUT_LAYOUT_DESC>, + ib_strip_cut_value: PipelineStateSubobject<d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE>, + primitive_topology: PipelineStateSubobject<d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE>, + render_target_formats: PipelineStateSubobject<d3d12::D3D12_RT_FORMAT_ARRAY>, + depth_stencil_format: PipelineStateSubobject<dxgiformat::DXGI_FORMAT>, + sample_desc: PipelineStateSubobject<dxgitype::DXGI_SAMPLE_DESC>, + node_mask: PipelineStateSubobject<UINT>, + cached_pso: PipelineStateSubobject<d3d12::D3D12_CACHED_PIPELINE_STATE>, + flags: PipelineStateSubobject<d3d12::D3D12_PIPELINE_STATE_FLAGS>, +} + +impl GraphicsPipelineStateSubobjectStream { + fn new( + pso_desc: &d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC, + depth_bounds_test_enable: bool, + ) -> Self { + GraphicsPipelineStateSubobjectStream { + root_signature: PipelineStateSubobject::new( + Subobject::RootSignature, + pso_desc.pRootSignature, + ), + vs: PipelineStateSubobject::new(Subobject::VS, pso_desc.VS), + ps: PipelineStateSubobject::new(Subobject::PS, pso_desc.PS), + ds: PipelineStateSubobject::new(Subobject::DS, pso_desc.DS), + hs: PipelineStateSubobject::new(Subobject::HS, pso_desc.HS), + gs: PipelineStateSubobject::new(Subobject::GS, pso_desc.GS), + stream_output: PipelineStateSubobject::new( + Subobject::StreamOutput, + pso_desc.StreamOutput, + ), + blend: PipelineStateSubobject::new(Subobject::Blend, pso_desc.BlendState), + sample_mask: PipelineStateSubobject::new(Subobject::SampleMask, pso_desc.SampleMask), + rasterizer: PipelineStateSubobject::new( + Subobject::Rasterizer, + pso_desc.RasterizerState, + ), + depth_stencil: PipelineStateSubobject::new( + Subobject::DepthStencil1, + d3d12::D3D12_DEPTH_STENCIL_DESC1 { + DepthEnable: pso_desc.DepthStencilState.DepthEnable, + DepthWriteMask: pso_desc.DepthStencilState.DepthWriteMask, + DepthFunc: pso_desc.DepthStencilState.DepthFunc, + StencilEnable: pso_desc.DepthStencilState.StencilEnable, + StencilReadMask: pso_desc.DepthStencilState.StencilReadMask, + StencilWriteMask: pso_desc.DepthStencilState.StencilWriteMask, + FrontFace: pso_desc.DepthStencilState.FrontFace, + BackFace: pso_desc.DepthStencilState.BackFace, + DepthBoundsTestEnable: depth_bounds_test_enable as _, + }, + ), + input_layout: PipelineStateSubobject::new(Subobject::InputLayout, pso_desc.InputLayout), + ib_strip_cut_value: PipelineStateSubobject::new( + Subobject::IBStripCut, + pso_desc.IBStripCutValue, + ), + primitive_topology: PipelineStateSubobject::new( + Subobject::PrimitiveTopology, + pso_desc.PrimitiveTopologyType, + ), + render_target_formats: PipelineStateSubobject::new( + Subobject::RTFormats, + d3d12::D3D12_RT_FORMAT_ARRAY { + RTFormats: pso_desc.RTVFormats, + NumRenderTargets: pso_desc.NumRenderTargets, + }, + ), + depth_stencil_format: PipelineStateSubobject::new( + Subobject::DSFormat, + pso_desc.DSVFormat, + ), + sample_desc: PipelineStateSubobject::new(Subobject::SampleDesc, pso_desc.SampleDesc), + node_mask: PipelineStateSubobject::new(Subobject::NodeMask, pso_desc.NodeMask), + cached_pso: PipelineStateSubobject::new(Subobject::CachedPSO, pso_desc.CachedPSO), + flags: PipelineStateSubobject::new(Subobject::Flags, pso_desc.Flags), + } + } +} + +impl Device { + fn parse_spirv(raw_data: &[u32]) -> Result<spirv::Ast<hlsl::Target>, d::ShaderError> { + let module = spirv::Module::from_words(raw_data); + + spirv::Ast::parse(&module).map_err(|err| { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown parsing error".into(), + }; + d::ShaderError::CompilationFailed(msg) + }) + } + + /// Introspects the input attributes of given SPIR-V shader and returns an optional vertex semantic remapping. + /// + /// The returned hashmap has attribute location as a key and an Optional remapping to a two part semantic. + /// + /// eg. + /// `2 -> None` means use default semantic `TEXCOORD2` + /// `2 -> Some((0, 2))` means use two part semantic `TEXCOORD0_2`. This is how matrices are represented by spirv-cross. + /// + /// This is a temporary workaround for https://github.com/KhronosGroup/SPIRV-Cross/issues/1512. + /// + /// This workaround also exists under the same name in the DX11 backend. + pub(crate) fn introspect_spirv_vertex_semantic_remapping( + raw_data: &[u32], + ) -> Result<auxil::FastHashMap<u32, Option<(u32, u32)>>, hal::device::ShaderError> { + // This is inefficient as we already parse it once before. This is a temporary workaround only called + // on vertex shaders. If this becomes permanent or shows up in profiles, deduplicate these as first course of action. + let ast = Self::parse_spirv(raw_data)?; + + let mut map = auxil::FastHashMap::default(); + + let inputs = ast + .get_shader_resources() + .map_err(gen_query_error)? + .stage_inputs; + for input in inputs { + let idx = ast + .get_decoration(input.id, spirv::Decoration::Location) + .map_err(gen_query_error)?; + + let ty = ast.get_type(input.type_id).map_err(gen_query_error)?; + + match ty { + spirv::Type::Boolean { columns, .. } + | spirv::Type::Int { columns, .. } + | spirv::Type::UInt { columns, .. } + | spirv::Type::Half { columns, .. } + | spirv::Type::Float { columns, .. } + | spirv::Type::Double { columns, .. } + if columns > 1 => + { + for col in 0..columns { + if let Some(_) = map.insert(idx + col, Some((idx, col))) { + return Err(hal::device::ShaderError::CompilationFailed(format!( + "Shader has overlapping input attachments at location {}", + idx + ))); + } + } + } + _ => { + if let Some(_) = map.insert(idx, None) { + return Err(hal::device::ShaderError::CompilationFailed(format!( + "Shader has overlapping input attachments at location {}", + idx + ))); + } + } + } + } + + Ok(map) + } + + fn patch_spirv_resources( + ast: &mut spirv::Ast<hlsl::Target>, + layout: &r::PipelineLayout, + ) -> Result<(), d::ShaderError> { + // Move the descriptor sets away to yield for the root constants at "space0". + let space_offset = if layout.shared.constants.is_empty() { + 0 + } else { + 1 + }; + let shader_resources = ast.get_shader_resources().map_err(gen_query_error)?; + + if space_offset != 0 { + for image in &shader_resources.separate_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + ast.set_decoration( + image.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + } + + if space_offset != 0 { + for uniform_buffer in &shader_resources.uniform_buffers { + let set = ast + .get_decoration(uniform_buffer.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + ast.set_decoration( + uniform_buffer.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + } + + for storage_buffer in &shader_resources.storage_buffers { + let set = ast + .get_decoration(storage_buffer.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + let binding = ast + .get_decoration(storage_buffer.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + if space_offset != 0 { + ast.set_decoration( + storage_buffer.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + if !layout.elements[set as usize] + .mutable_bindings + .contains(&binding) + { + ast.set_decoration(storage_buffer.id, spirv::Decoration::NonWritable, 1) + .map_err(gen_unexpected_error)? + } + } + + for image in &shader_resources.storage_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + let binding = ast + .get_decoration(image.id, spirv::Decoration::Binding) + .map_err(gen_query_error)?; + if space_offset != 0 { + ast.set_decoration( + image.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + if !layout.elements[set as usize] + .mutable_bindings + .contains(&binding) + { + ast.set_decoration(image.id, spirv::Decoration::NonWritable, 1) + .map_err(gen_unexpected_error)? + } + } + + if space_offset != 0 { + for sampler in &shader_resources.separate_samplers { + let set = ast + .get_decoration(sampler.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + ast.set_decoration( + sampler.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + } + + if space_offset != 0 { + for image in &shader_resources.sampled_images { + let set = ast + .get_decoration(image.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + ast.set_decoration( + image.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + } + + if space_offset != 0 { + for input in &shader_resources.subpass_inputs { + let set = ast + .get_decoration(input.id, spirv::Decoration::DescriptorSet) + .map_err(gen_query_error)?; + ast.set_decoration( + input.id, + spirv::Decoration::DescriptorSet, + space_offset + set, + ) + .map_err(gen_unexpected_error)?; + } + } + + Ok(()) + } + + fn translate_spirv( + ast: &mut spirv::Ast<hlsl::Target>, + shader_model: hlsl::ShaderModel, + layout: &r::PipelineLayout, + stage: ShaderStage, + features: &hal::Features, + entry_point: &str, + ) -> Result<String, d::ShaderError> { + let mut compile_options = hlsl::CompilerOptions::default(); + compile_options.shader_model = shader_model; + compile_options.vertex.invert_y = !features.contains(hal::Features::NDC_Y_UP); + compile_options.force_zero_initialized_variables = true; + compile_options.nonwritable_uav_texture_as_srv = true; + // these are technically incorrect, but better than panicking + compile_options.point_size_compat = true; + compile_options.point_coord_compat = true; + compile_options.entry_point = Some((entry_point.to_string(), conv::map_stage(stage))); + + let stage_flag = stage.to_flag(); + let root_constant_layout = layout + .shared + .constants + .iter() + .filter_map(|constant| { + if constant.stages.contains(stage_flag) { + Some(hlsl::RootConstant { + start: constant.range.start * 4, + end: constant.range.end * 4, + binding: constant.range.start, + space: 0, + }) + } else { + None + } + }) + .collect(); + ast.set_compiler_options(&compile_options) + .map_err(gen_unexpected_error)?; + ast.set_root_constant_layout(root_constant_layout) + .map_err(gen_unexpected_error)?; + ast.compile().map_err(|err| { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown compile error".into(), + }; + d::ShaderError::CompilationFailed(msg) + }) + } + + // Extract entry point from shader module on pipeline creation. + // Returns compiled shader blob and bool to indicate if the shader should be + // destroyed after pipeline creation + fn extract_entry_point( + stage: ShaderStage, + source: &pso::EntryPoint<B>, + layout: &r::PipelineLayout, + features: &hal::Features, + ) -> Result<(native::Blob, bool), d::ShaderError> { + match *source.module { + r::ShaderModule::Compiled(ref shaders) => { + // TODO: do we need to check for specialization constants? + // Use precompiled shader, ignore specialization or layout. + shaders + .get(source.entry) + .map(|src| (*src, false)) + .ok_or(d::ShaderError::MissingEntryPoint(source.entry.into())) + } + r::ShaderModule::Spirv(ref raw_data) => { + let mut ast = Self::parse_spirv(raw_data)?; + spirv_cross_specialize_ast(&mut ast, &source.specialization)?; + Self::patch_spirv_resources(&mut ast, layout)?; + + let execution_model = conv::map_stage(stage); + let shader_model = hlsl::ShaderModel::V5_1; + let shader_code = Self::translate_spirv( + &mut ast, + shader_model, + layout, + stage, + features, + source.entry, + )?; + debug!("SPIRV-Cross generated shader:\n{}", shader_code); + + let real_name = ast + .get_cleansed_entry_point_name(source.entry, execution_model) + .map_err(gen_query_error)?; + + let stage = conv::map_execution_model(execution_model); + let shader = compile_shader( + stage, + shader_model, + features, + &real_name, + shader_code.as_bytes(), + )?; + Ok((shader, true)) + } + } + } + + /// Create a shader module from HLSL with a single entry point + pub fn create_shader_module_from_source( + &self, + stage: ShaderStage, + hlsl_entry: &str, + entry_point: &str, + code: &[u8], + ) -> Result<r::ShaderModule, d::ShaderError> { + let mut shader_map = BTreeMap::new(); + let blob = compile_shader( + stage, + hlsl::ShaderModel::V5_1, + &self.features, + hlsl_entry, + code, + )?; + shader_map.insert(entry_point.into(), blob); + Ok(r::ShaderModule::Compiled(shader_map)) + } + + pub(crate) fn create_command_signature( + device: native::Device, + ty: CommandSignature, + ) -> native::CommandSignature { + let (arg, stride) = match ty { + CommandSignature::Draw => (native::IndirectArgument::draw(), 16), + CommandSignature::DrawIndexed => (native::IndirectArgument::draw_indexed(), 20), + CommandSignature::Dispatch => (native::IndirectArgument::dispatch(), 12), + }; + + let (signature, hr) = + device.create_command_signature(native::RootSignature::null(), &[arg], stride, 0); + + if !winerror::SUCCEEDED(hr) { + error!("error on command signature creation: {:x}", hr); + } + signature + } + + pub(crate) fn create_descriptor_heap_impl( + device: native::Device, + heap_type: native::DescriptorHeapType, + shader_visible: bool, + capacity: usize, + ) -> r::DescriptorHeap { + assert_ne!(capacity, 0); + + let (heap, _hr) = device.create_descriptor_heap( + capacity as _, + heap_type, + if shader_visible { + native::DescriptorHeapFlags::SHADER_VISIBLE + } else { + native::DescriptorHeapFlags::empty() + }, + 0, + ); + + let descriptor_size = device.get_descriptor_increment_size(heap_type); + let cpu_handle = heap.start_cpu_descriptor(); + let gpu_handle = heap.start_gpu_descriptor(); + + r::DescriptorHeap { + raw: heap, + handle_size: descriptor_size as _, + total_handles: capacity as _, + start: r::DualHandle { + cpu: cpu_handle, + gpu: gpu_handle, + size: 0, + }, + } + } + + pub(crate) fn view_image_as_render_target_impl( + device: native::Device, + handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, + info: &ViewInfo, + ) -> Result<(), image::ViewCreationError> { + #![allow(non_snake_case)] + + let mut desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC { + Format: info.format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + let MipSlice = info.levels.start as _; + let FirstArraySlice = info.layers.start as _; + let ArraySize = (info.layers.end - info.layers.start) as _; + let is_msaa = info.kind.num_samples() > 1; + if info.levels.start + 1 != info.levels.end { + return Err(image::ViewCreationError::Level(info.levels.start)); + } + if info.layers.end > info.kind.num_layers() { + return Err(image::ViewCreationError::Layer( + image::LayerError::OutOfBounds, + )); + } + + match info.view_kind { + image::ViewKind::D1 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d12::D3D12_TEX1D_RTV { MipSlice } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d12::D3D12_TEX1D_ARRAY_RTV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 if is_msaa => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d12::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + image::ViewKind::D2 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d12::D3D12_TEX2D_RTV { + MipSlice, + PlaneSlice: 0, //TODO + } + } + image::ViewKind::D2Array if is_msaa => { + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d12::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d12::D3D12_TEX2D_ARRAY_RTV { + MipSlice, + FirstArraySlice, + ArraySize, + PlaneSlice: 0, //TODO + } + } + image::ViewKind::D3 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d12::D3D12_TEX3D_RTV { + MipSlice, + FirstWSlice: 0, + WSize: info.kind.extent().depth as _, + } + } + image::ViewKind::Cube | image::ViewKind::CubeArray => { + desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + //TODO: double-check if any *6 are needed + *unsafe { desc.u.Texture2DArray_mut() } = d3d12::D3D12_TEX2D_ARRAY_RTV { + MipSlice, + FirstArraySlice, + ArraySize, + PlaneSlice: 0, //TODO + } + } + }; + + unsafe { + device.CreateRenderTargetView(info.resource.as_mut_ptr(), &desc, handle); + } + + Ok(()) + } + + fn view_image_as_render_target( + &self, + info: &ViewInfo, + ) -> Result<descriptors_cpu::Handle, image::ViewCreationError> { + let handle = self.rtv_pool.lock().alloc_handle(); + Self::view_image_as_render_target_impl(self.raw, handle.raw, info).map(|_| handle) + } + + pub(crate) fn view_image_as_depth_stencil_impl( + device: native::Device, + handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, + info: &ViewInfo, + ) -> Result<(), image::ViewCreationError> { + #![allow(non_snake_case)] + + let mut desc = d3d12::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: info.format, + ViewDimension: 0, + Flags: 0, + u: unsafe { mem::zeroed() }, + }; + + let MipSlice = info.levels.start as _; + let FirstArraySlice = info.layers.start as _; + let ArraySize = (info.layers.end - info.layers.start) as _; + let is_msaa = info.kind.num_samples() > 1; + if info.levels.start + 1 != info.levels.end { + return Err(image::ViewCreationError::Level(info.levels.start)); + } + if info.layers.end > info.kind.num_layers() { + return Err(image::ViewCreationError::Layer( + image::LayerError::OutOfBounds, + )); + } + + match info.view_kind { + image::ViewKind::D1 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d12::D3D12_TEX1D_DSV { MipSlice } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d12::D3D12_TEX1D_ARRAY_DSV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 if is_msaa => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d12::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + image::ViewKind::D2 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d12::D3D12_TEX2D_DSV { MipSlice } + } + image::ViewKind::D2Array if is_msaa => { + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d12::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d12::D3D12_TEX2D_ARRAY_DSV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D3 | image::ViewKind::Cube | image::ViewKind::CubeArray => { + unimplemented!() + } + }; + + unsafe { + device.CreateDepthStencilView(info.resource.as_mut_ptr(), &desc, handle); + } + + Ok(()) + } + + fn view_image_as_depth_stencil( + &self, + info: &ViewInfo, + ) -> Result<descriptors_cpu::Handle, image::ViewCreationError> { + let handle = self.dsv_pool.lock().alloc_handle(); + Self::view_image_as_depth_stencil_impl(self.raw, handle.raw, info).map(|_| handle) + } + + pub(crate) fn build_image_as_shader_resource_desc( + info: &ViewInfo, + ) -> Result<d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC, image::ViewCreationError> { + #![allow(non_snake_case)] + + let mut desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: info.format, + ViewDimension: 0, + Shader4ComponentMapping: info.component_mapping, + u: unsafe { mem::zeroed() }, + }; + + let MostDetailedMip = info.levels.start as _; + let MipLevels = (info.levels.end - info.levels.start) as _; + let FirstArraySlice = info.layers.start as _; + let ArraySize = (info.layers.end - info.layers.start) as _; + + if info.layers.end > info.kind.num_layers() { + return Err(image::ViewCreationError::Layer( + image::LayerError::OutOfBounds, + )); + } + let is_msaa = info.kind.num_samples() > 1; + let is_cube = info.caps.contains(image::ViewCapabilities::KIND_CUBE); + + match info.view_kind { + image::ViewKind::D1 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d12::D3D12_TEX1D_SRV { + MostDetailedMip, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d12::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip, + MipLevels, + FirstArraySlice, + ArraySize, + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::D2 if is_msaa => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMS; + *unsafe { desc.u.Texture2DMS_mut() } = d3d12::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + image::ViewKind::D2 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d12::D3D12_TEX2D_SRV { + MostDetailedMip, + MipLevels, + PlaneSlice: 0, //TODO + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::D2Array if is_msaa => { + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + *unsafe { desc.u.Texture2DMSArray_mut() } = d3d12::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d12::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip, + MipLevels, + FirstArraySlice, + ArraySize, + PlaneSlice: 0, //TODO + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::D3 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d12::D3D12_TEX3D_SRV { + MostDetailedMip, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::Cube if is_cube => { + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBE; + *unsafe { desc.u.TextureCube_mut() } = d3d12::D3D12_TEXCUBE_SRV { + MostDetailedMip, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::CubeArray if is_cube => { + assert_eq!(0, ArraySize % 6); + desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + *unsafe { desc.u.TextureCubeArray_mut() } = d3d12::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip, + MipLevels, + First2DArrayFace: FirstArraySlice, + NumCubes: ArraySize / 6, + ResourceMinLODClamp: 0.0, + } + } + image::ViewKind::Cube | image::ViewKind::CubeArray => { + error!( + "Cube views are not supported for the image, kind: {:?}", + info.kind + ); + return Err(image::ViewCreationError::BadKind(info.view_kind)); + } + } + + Ok(desc) + } + + fn view_image_as_shader_resource( + &self, + info: &ViewInfo, + ) -> Result<descriptors_cpu::Handle, image::ViewCreationError> { + let desc = Self::build_image_as_shader_resource_desc(&info)?; + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw + .CreateShaderResourceView(info.resource.as_mut_ptr(), &desc, handle.raw); + } + + Ok(handle) + } + + fn view_image_as_storage( + &self, + info: &ViewInfo, + ) -> Result<descriptors_cpu::Handle, image::ViewCreationError> { + #![allow(non_snake_case)] + + // Cannot make a storage image over multiple mips + if info.levels.start + 1 != info.levels.end { + return Err(image::ViewCreationError::Unsupported); + } + + let mut desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: info.format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + let MipSlice = info.levels.start as _; + let FirstArraySlice = info.layers.start as _; + let ArraySize = (info.layers.end - info.layers.start) as _; + + if info.layers.end > info.kind.num_layers() { + return Err(image::ViewCreationError::Layer( + image::LayerError::OutOfBounds, + )); + } + if info.kind.num_samples() > 1 { + error!("MSAA images can't be viewed as UAV"); + return Err(image::ViewCreationError::Unsupported); + } + + match info.view_kind { + image::ViewKind::D1 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1D; + *unsafe { desc.u.Texture1D_mut() } = d3d12::D3D12_TEX1D_UAV { MipSlice } + } + image::ViewKind::D1Array => { + desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *unsafe { desc.u.Texture1DArray_mut() } = d3d12::D3D12_TEX1D_ARRAY_UAV { + MipSlice, + FirstArraySlice, + ArraySize, + } + } + image::ViewKind::D2 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2D; + *unsafe { desc.u.Texture2D_mut() } = d3d12::D3D12_TEX2D_UAV { + MipSlice, + PlaneSlice: 0, //TODO + } + } + image::ViewKind::D2Array => { + desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + *unsafe { desc.u.Texture2DArray_mut() } = d3d12::D3D12_TEX2D_ARRAY_UAV { + MipSlice, + FirstArraySlice, + ArraySize, + PlaneSlice: 0, //TODO + } + } + image::ViewKind::D3 => { + assert_eq!(info.layers, 0..1); + desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE3D; + *unsafe { desc.u.Texture3D_mut() } = d3d12::D3D12_TEX3D_UAV { + MipSlice, + FirstWSlice: 0, + WSize: info.kind.extent().depth as _, + } + } + image::ViewKind::Cube | image::ViewKind::CubeArray => { + error!("Cubic images can't be viewed as UAV"); + return Err(image::ViewCreationError::Unsupported); + } + } + + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateUnorderedAccessView( + info.resource.as_mut_ptr(), + ptr::null_mut(), + &desc, + handle.raw, + ); + } + + Ok(handle) + } + + pub(crate) fn create_raw_fence(&self, signalled: bool) -> native::Fence { + let mut handle = native::Fence::null(); + assert_eq!(winerror::S_OK, unsafe { + self.raw.CreateFence( + if signalled { 1 } else { 0 }, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + handle.mut_void(), + ) + }); + handle + } + + pub(crate) fn create_swapchain_impl( + &self, + config: &w::SwapchainConfig, + window_handle: windef::HWND, + factory: native::WeakPtr<dxgi1_4::IDXGIFactory4>, + ) -> Result< + ( + native::WeakPtr<dxgi1_4::IDXGISwapChain3>, + dxgiformat::DXGI_FORMAT, + ), + w::CreationError, + > { + let mut swap_chain1 = native::WeakPtr::<dxgi1_2::IDXGISwapChain1>::null(); + + //TODO: proper error type? + let non_srgb_format = conv::map_format_nosrgb(config.format).unwrap(); + + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + if config.present_mode.contains(w::PresentMode::IMMEDIATE) { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + // TODO: double-check values + let desc = dxgi1_2::DXGI_SWAP_CHAIN_DESC1 { + AlphaMode: dxgi1_2::DXGI_ALPHA_MODE_IGNORE, + BufferCount: config.image_count, + Width: config.extent.width, + Height: config.extent.height, + Format: non_srgb_format, + Flags: flags, + BufferUsage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Scaling: dxgi1_2::DXGI_SCALING_STRETCH, + Stereo: FALSE, + SwapEffect: dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD, + }; + + unsafe { + let hr = factory.CreateSwapChainForHwnd( + self.present_queue.as_mut_ptr() as *mut _, + window_handle, + &desc, + ptr::null(), + ptr::null_mut(), + swap_chain1.mut_void() as *mut *mut _, + ); + + if !winerror::SUCCEEDED(hr) { + error!("error on swapchain creation 0x{:x}", hr); + } + + let (swap_chain3, hr3) = swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>(); + if !winerror::SUCCEEDED(hr3) { + error!("error on swapchain cast 0x{:x}", hr3); + } + + swap_chain1.destroy(); + Ok((swap_chain3, non_srgb_format)) + } + } + + pub(crate) fn wrap_swapchain( + &self, + inner: native::WeakPtr<dxgi1_4::IDXGISwapChain3>, + config: &w::SwapchainConfig, + ) -> Swapchain { + let waitable = unsafe { + inner.SetMaximumFrameLatency(config.image_count); + inner.GetFrameLatencyWaitableObject() + }; + + let rtv_desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC { + Format: conv::map_format(config.format).unwrap(), + ViewDimension: d3d12::D3D12_RTV_DIMENSION_TEXTURE2D, + ..unsafe { mem::zeroed() } + }; + let rtv_heap = Device::create_descriptor_heap_impl( + self.raw, + native::DescriptorHeapType::Rtv, + false, + config.image_count as _, + ); + + let mut resources = vec![native::Resource::null(); config.image_count as usize]; + for (i, res) in resources.iter_mut().enumerate() { + let rtv_handle = rtv_heap.at(i as _, 0).cpu; + unsafe { + inner.GetBuffer(i as _, &d3d12::ID3D12Resource::uuidof(), res.mut_void()); + self.raw + .CreateRenderTargetView(res.as_mut_ptr(), &rtv_desc, rtv_handle); + } + } + + Swapchain { + inner, + rtv_heap, + resources, + waitable, + usage: config.image_usage, + acquired_count: 0, + } + } +} + +impl d::Device<B> for Device { + unsafe fn allocate_memory( + &self, + mem_type: hal::MemoryTypeId, + size: u64, + ) -> Result<r::Memory, d::AllocationError> { + let mem_type = mem_type.0; + let mem_base_id = mem_type % NUM_HEAP_PROPERTIES; + let heap_property = &self.heap_properties[mem_base_id]; + + let properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: heap_property.page_property, + MemoryPoolPreference: heap_property.memory_pool, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + // Exposed memory types are grouped according to their capabilities. + // See `MemoryGroup` for more details. + let mem_group = mem_type / NUM_HEAP_PROPERTIES; + + let desc = d3d12::D3D12_HEAP_DESC { + SizeInBytes: size, + Properties: properties, + Alignment: d3d12::D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT as _, // TODO: not always..? + Flags: match mem_group { + 0 => d3d12::D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES, + 1 => d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, + 2 => d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES, + 3 => d3d12::D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, + _ => unreachable!(), + }, + }; + + let mut heap = native::Heap::null(); + let hr = self + .raw + .clone() + .CreateHeap(&desc, &d3d12::ID3D12Heap::uuidof(), heap.mut_void()); + if hr == winerror::E_OUTOFMEMORY { + return Err(d::OutOfMemory::Device.into()); + } + assert_eq!(winerror::S_OK, hr); + + // The first memory heap of each group corresponds to the default heap, which is can never + // be mapped. + // Devices supporting heap tier 1 can only created buffers on mem group 1 (ALLOW_ONLY_BUFFERS). + // Devices supporting heap tier 2 always expose only mem group 0 and don't have any further restrictions. + let is_mapable = mem_base_id != 0 + && (mem_group == MemoryGroup::Universal as _ + || mem_group == MemoryGroup::BufferOnly as _); + + // Create a buffer resource covering the whole memory slice to be able to map the whole memory. + let resource = if is_mapable { + let mut resource = native::Resource::null(); + let desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, + }; + + assert_eq!( + winerror::S_OK, + self.raw.clone().CreatePlacedResource( + heap.as_mut_ptr(), + 0, + &desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + ); + + Some(resource) + } else { + None + }; + + Ok(r::Memory { + heap, + type_id: mem_type, + size, + resource, + }) + } + + unsafe fn create_command_pool( + &self, + family: QueueFamilyId, + create_flags: CommandPoolCreateFlags, + ) -> Result<CommandPool, d::OutOfMemory> { + let list_type = QUEUE_FAMILIES[family.0].native_type(); + Ok(CommandPool::new( + self.raw, + list_type, + &self.shared, + create_flags, + )) + } + + unsafe fn destroy_command_pool(&self, _pool: CommandPool) {} + + unsafe fn create_render_pass<'a, IA, IS, ID>( + &self, + attachments: IA, + subpasses: IS, + dependencies: ID, + ) -> Result<r::RenderPass, d::OutOfMemory> + where + IA: IntoIterator, + IA::Item: Borrow<pass::Attachment>, + IS: IntoIterator, + IS::Item: Borrow<pass::SubpassDesc<'a>>, + ID: IntoIterator, + ID::Item: Borrow<pass::SubpassDependency>, + { + #[derive(Copy, Clone, Debug, PartialEq)] + enum SubState { + New(d3d12::D3D12_RESOURCE_STATES), + // Color attachment which will be resolved at the end of the subpass + Resolve(d3d12::D3D12_RESOURCE_STATES), + Preserve, + Undefined, + } + /// Temporary information about every sub-pass + struct SubInfo<'a> { + desc: pass::SubpassDesc<'a>, + /// States before the render-pass (in self.start) + /// and after the render-pass (in self.end). + external_dependencies: Range<image::Access>, + /// Counts the number of dependencies that need to be resolved + /// before starting this subpass. + unresolved_dependencies: u16, + } + struct AttachmentInfo { + sub_states: Vec<SubState>, + last_state: d3d12::D3D12_RESOURCE_STATES, + barrier_start_index: usize, + } + + let attachments = attachments + .into_iter() + .map(|attachment| attachment.borrow().clone()) + .collect::<SmallVec<[_; 5]>>(); + let mut sub_infos = subpasses + .into_iter() + .map(|desc| SubInfo { + desc: desc.borrow().clone(), + external_dependencies: image::Access::empty()..image::Access::empty(), + unresolved_dependencies: 0, + }) + .collect::<SmallVec<[_; 1]>>(); + let dependencies = dependencies.into_iter().collect::<SmallVec<[_; 2]>>(); + + let mut att_infos = (0..attachments.len()) + .map(|_| AttachmentInfo { + sub_states: vec![SubState::Undefined; sub_infos.len()], + last_state: d3d12::D3D12_RESOURCE_STATE_COMMON, // is to be overwritten + barrier_start_index: 0, + }) + .collect::<SmallVec<[_; 5]>>(); + + for dep in &dependencies { + let dep = dep.borrow(); + match dep.passes { + Range { + start: None, + end: None, + } => { + error!("Unexpected external-external dependency!"); + } + Range { + start: None, + end: Some(sid), + } => { + sub_infos[sid as usize].external_dependencies.start |= dep.accesses.start; + } + Range { + start: Some(sid), + end: None, + } => { + sub_infos[sid as usize].external_dependencies.end |= dep.accesses.end; + } + Range { + start: Some(from_sid), + end: Some(sid), + } => { + //Note: self-dependencies are ignored + if from_sid != sid { + sub_infos[sid as usize].unresolved_dependencies += 1; + } + } + } + } + + // Fill out subpass known layouts + for (sid, sub_info) in sub_infos.iter().enumerate() { + let sub = &sub_info.desc; + for (i, &(id, _layout)) in sub.colors.iter().enumerate() { + let target_state = d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET; + let state = match sub.resolves.get(i) { + Some(_) => SubState::Resolve(target_state), + None => SubState::New(target_state), + }; + let old = mem::replace(&mut att_infos[id].sub_states[sid], state); + debug_assert_eq!(SubState::Undefined, old); + } + for &(id, layout) in sub.depth_stencil { + let state = SubState::New(match layout { + image::Layout::DepthStencilAttachmentOptimal => { + d3d12::D3D12_RESOURCE_STATE_DEPTH_WRITE + } + image::Layout::DepthStencilReadOnlyOptimal => { + d3d12::D3D12_RESOURCE_STATE_DEPTH_READ + } + image::Layout::General => d3d12::D3D12_RESOURCE_STATE_DEPTH_WRITE, + _ => { + error!("Unexpected depth/stencil layout: {:?}", layout); + d3d12::D3D12_RESOURCE_STATE_COMMON + } + }); + let old = mem::replace(&mut att_infos[id].sub_states[sid], state); + debug_assert_eq!(SubState::Undefined, old); + } + for &(id, _layout) in sub.inputs { + let state = SubState::New(d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + let old = mem::replace(&mut att_infos[id].sub_states[sid], state); + debug_assert_eq!(SubState::Undefined, old); + } + for &(id, _layout) in sub.resolves { + let state = SubState::New(d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST); + let old = mem::replace(&mut att_infos[id].sub_states[sid], state); + debug_assert_eq!(SubState::Undefined, old); + } + for &id in sub.preserves { + let old = mem::replace(&mut att_infos[id].sub_states[sid], SubState::Preserve); + debug_assert_eq!(SubState::Undefined, old); + } + } + + let mut rp = r::RenderPass { + attachments: attachments.iter().cloned().collect(), + subpasses: Vec::new(), + post_barriers: Vec::new(), + raw_name: Vec::new(), + }; + + while let Some(sid) = sub_infos + .iter() + .position(|si| si.unresolved_dependencies == 0) + { + for dep in &dependencies { + let dep = dep.borrow(); + if dep.passes.start != dep.passes.end + && dep.passes.start == Some(sid as pass::SubpassId) + { + if let Some(other) = dep.passes.end { + sub_infos[other as usize].unresolved_dependencies -= 1; + } + } + } + + let si = &mut sub_infos[sid]; + si.unresolved_dependencies = !0; // mark as done + + // Subpass barriers + let mut pre_barriers = Vec::new(); + let mut post_barriers = Vec::new(); + for (att_id, (ai, att)) in att_infos.iter_mut().zip(attachments.iter()).enumerate() { + // Attachment wasn't used before, figure out the initial state + if ai.barrier_start_index == 0 { + //Note: the external dependencies are provided for all attachments that are + // first used in this sub-pass, so they may contain more states than we expect + // for this particular attachment. + ai.last_state = conv::map_image_resource_state( + si.external_dependencies.start, + att.layouts.start, + ); + } + // Barrier from previous subpass to current or following subpasses. + match ai.sub_states[sid] { + SubState::Preserve => { + ai.barrier_start_index = rp.subpasses.len() + 1; + } + SubState::New(state) if state != ai.last_state => { + let barrier = r::BarrierDesc::new(att_id, ai.last_state..state); + match rp.subpasses.get_mut(ai.barrier_start_index) { + Some(past_subpass) => { + let split = barrier.split(); + past_subpass.pre_barriers.push(split.start); + pre_barriers.push(split.end); + } + None => pre_barriers.push(barrier), + } + ai.last_state = state; + ai.barrier_start_index = rp.subpasses.len() + 1; + } + SubState::Resolve(state) => { + // 1. Standard pre barrier to update state from previous pass into desired substate. + if state != ai.last_state { + let barrier = r::BarrierDesc::new(att_id, ai.last_state..state); + match rp.subpasses.get_mut(ai.barrier_start_index) { + Some(past_subpass) => { + let split = barrier.split(); + past_subpass.pre_barriers.push(split.start); + pre_barriers.push(split.end); + } + None => pre_barriers.push(barrier), + } + } + + // 2. Post Barrier at the end of the subpass into RESOLVE_SOURCE. + let resolve_state = d3d12::D3D12_RESOURCE_STATE_RESOLVE_SOURCE; + let barrier = r::BarrierDesc::new(att_id, state..resolve_state); + post_barriers.push(barrier); + + ai.last_state = resolve_state; + ai.barrier_start_index = rp.subpasses.len() + 1; + } + SubState::Undefined | SubState::New(_) => {} + }; + } + + rp.subpasses.push(r::SubpassDesc { + color_attachments: si.desc.colors.iter().cloned().collect(), + depth_stencil_attachment: si.desc.depth_stencil.cloned(), + input_attachments: si.desc.inputs.iter().cloned().collect(), + resolve_attachments: si.desc.resolves.iter().cloned().collect(), + pre_barriers, + post_barriers, + }); + } + // if this fails, our graph has cycles + assert_eq!(rp.subpasses.len(), sub_infos.len()); + assert!(sub_infos.iter().all(|si| si.unresolved_dependencies == !0)); + + // take care of the post-pass transitions at the end of the renderpass. + for (att_id, (ai, att)) in att_infos.iter().zip(attachments.iter()).enumerate() { + let state_dst = if ai.barrier_start_index == 0 { + // attachment wasn't used in any sub-pass? + continue; + } else { + let si = &sub_infos[ai.barrier_start_index - 1]; + conv::map_image_resource_state(si.external_dependencies.end, att.layouts.end) + }; + if state_dst == ai.last_state { + continue; + } + let barrier = r::BarrierDesc::new(att_id, ai.last_state..state_dst); + match rp.subpasses.get_mut(ai.barrier_start_index) { + Some(past_subpass) => { + let split = barrier.split(); + past_subpass.pre_barriers.push(split.start); + rp.post_barriers.push(split.end); + } + None => rp.post_barriers.push(barrier), + } + } + + Ok(rp) + } + + unsafe fn create_pipeline_layout<IS, IR>( + &self, + sets: IS, + push_constant_ranges: IR, + ) -> Result<r::PipelineLayout, d::OutOfMemory> + where + IS: IntoIterator, + IS::Item: Borrow<r::DescriptorSetLayout>, + IR: IntoIterator, + IR::Item: Borrow<(pso::ShaderStageFlags, Range<u32>)>, + { + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Dynamic uniform buffers are implemented as root descriptors. + // This allows to handle the dynamic offsets properly, which would not be feasible + // with a combination of root constant and descriptor table. + // + // Root signature layout: + // Root Constants: Register: Offest/4, Space: 0 + // ... + // DescriptorTable0: Space: 1 (SrvCbvUav) + // DescriptorTable0: Space: 1 (Sampler) + // Root Descriptors 0 + // DescriptorTable1: Space: 2 (SrvCbvUav) + // Root Descriptors 1 + // ... + + let sets = sets.into_iter().collect::<Vec<_>>(); + + let mut root_offset = 0u32; + let root_constants = root_constants::split(push_constant_ranges) + .iter() + .map(|constant| { + assert!(constant.range.start <= constant.range.end); + root_offset += constant.range.end - constant.range.start; + + RootConstant { + stages: constant.stages, + range: constant.range.start..constant.range.end, + } + }) + .collect::<Vec<_>>(); + + info!( + "Creating a pipeline layout with {} sets and {} root constants", + sets.len(), + root_constants.len() + ); + + // Number of elements in the root signature. + // Guarantees that no re-allocation is done, and our pointers are valid + let mut parameters = Vec::with_capacity(root_constants.len() + sets.len() * 2); + let mut parameter_offsets = Vec::with_capacity(parameters.capacity()); + + // Convert root signature descriptions into root signature parameters. + for root_constant in root_constants.iter() { + debug!( + "\tRoot constant set={} range {:?}", + ROOT_CONSTANT_SPACE, root_constant.range + ); + parameter_offsets.push(root_constant.range.start); + parameters.push(native::RootParameter::constants( + conv::map_shader_visibility(root_constant.stages), + native::Binding { + register: root_constant.range.start as _, + space: ROOT_CONSTANT_SPACE, + }, + (root_constant.range.end - root_constant.range.start) as _, + )); + } + + // Offest of `spaceN` for descriptor tables. Root constants will be in + // `space0`. + // This has to match `patch_spirv_resources` logic. + let root_space_offset = if !root_constants.is_empty() { 1 } else { 0 }; + + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total = sets + .iter() + .map(|desc_set| { + let mut sum = 0; + for binding in desc_set.borrow().bindings.iter() { + let content = r::DescriptorContent::from(binding.ty); + if !content.is_dynamic() { + sum += content.bits().count_ones() as usize; + } + } + sum + }) + .sum(); + let mut ranges = Vec::with_capacity(total); + + let elements = sets + .iter() + .enumerate() + .map(|(i, set)| { + let set = set.borrow(); + let space = (root_space_offset + i) as u32; + let mut table_type = r::SetTableTypes::empty(); + let root_table_offset = root_offset; + + //TODO: split between sampler and non-sampler tables + let visibility = conv::map_shader_visibility( + set.bindings + .iter() + .fold(pso::ShaderStageFlags::empty(), |u, bind| { + u | bind.stage_flags + }), + ); + + for bind in set.bindings.iter() { + debug!("\tRange {:?} at space={}", bind, space); + } + + let describe = |bind: &pso::DescriptorSetLayoutBinding, ty| { + native::DescriptorRange::new( + ty, + bind.count as _, + native::Binding { + register: bind.binding as _, + space, + }, + d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + ) + }; + + let mut mutable_bindings = auxil::FastHashSet::default(); + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for bind in set.bindings.iter() { + let content = r::DescriptorContent::from(bind.ty); + if !content.is_dynamic() { + // Descriptor table ranges + if content.contains(r::DescriptorContent::CBV) { + ranges.push(describe(bind, native::DescriptorRangeType::CBV)); + } + if content.contains(r::DescriptorContent::SRV) { + ranges.push(describe(bind, native::DescriptorRangeType::SRV)); + } + if content.contains(r::DescriptorContent::UAV) { + ranges.push(describe(bind, native::DescriptorRangeType::UAV)); + mutable_bindings.insert(bind.binding); + } + } + } + if ranges.len() > range_base { + parameter_offsets.push(root_offset); + parameters.push(native::RootParameter::descriptor_table( + visibility, + &ranges[range_base..], + )); + table_type |= r::SRV_CBV_UAV; + root_offset += 1; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for bind in set.bindings.iter() { + let content = r::DescriptorContent::from(bind.ty); + if content.contains(r::DescriptorContent::SAMPLER) { + ranges.push(describe(bind, native::DescriptorRangeType::Sampler)); + } + } + if ranges.len() > range_base { + parameter_offsets.push(root_offset); + parameters.push(native::RootParameter::descriptor_table( + visibility, + &ranges[range_base..], + )); + table_type |= r::SAMPLERS; + root_offset += 1; + } + + // Root (dynamic) descriptor tables + for bind in set.bindings.iter() { + let content = r::DescriptorContent::from(bind.ty); + if content.is_dynamic() { + let binding = native::Binding { + register: bind.binding as _, + space, + }; + + if content.contains(r::DescriptorContent::CBV) { + parameter_offsets.push(root_offset); + parameters + .push(native::RootParameter::cbv_descriptor(visibility, binding)); + root_offset += 2; // root CBV costs 2 words + } + if content.contains(r::DescriptorContent::SRV) { + parameter_offsets.push(root_offset); + parameters + .push(native::RootParameter::srv_descriptor(visibility, binding)); + root_offset += 2; // root SRV costs 2 words + } + if content.contains(r::DescriptorContent::UAV) { + parameter_offsets.push(root_offset); + parameters + .push(native::RootParameter::uav_descriptor(visibility, binding)); + root_offset += 2; // root UAV costs 2 words + } + } + } + + r::RootElement { + table: r::RootTable { + ty: table_type, + offset: root_table_offset as _, + }, + mutable_bindings, + } + }) + .collect(); + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total); + assert_eq!(parameters.len(), parameter_offsets.len()); + + // TODO: error handling + let (signature_raw, error) = match self.library.serialize_root_signature( + native::RootSignatureVersion::V1_0, + ¶meters, + &[], + native::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) { + Ok((pair, hr)) if winerror::SUCCEEDED(hr) => pair, + Ok((_, hr)) => panic!("Can't serialize root signature: {:?}", hr), + Err(e) => panic!("Can't find serialization function: {:?}", e), + }; + + if !error.is_null() { + error!( + "Root signature serialization error: {:?}", + error.as_c_str().to_str().unwrap() + ); + error.destroy(); + } + + // TODO: error handling + let (signature, _hr) = self.raw.create_root_signature(signature_raw, 0); + signature_raw.destroy(); + + Ok(r::PipelineLayout { + shared: Arc::new(r::PipelineShared { + signature, + constants: root_constants, + parameter_offsets, + total_slots: root_offset, + }), + elements, + }) + } + + unsafe fn create_pipeline_cache(&self, _data: Option<&[u8]>) -> Result<(), d::OutOfMemory> { + Ok(()) + } + + unsafe fn get_pipeline_cache_data(&self, _cache: &()) -> Result<Vec<u8>, d::OutOfMemory> { + //empty + Ok(Vec::new()) + } + + unsafe fn destroy_pipeline_cache(&self, _: ()) { + //empty + } + + unsafe fn merge_pipeline_caches<I>(&self, _: &(), _: I) -> Result<(), d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<()>, + { + //empty + Ok(()) + } + + unsafe fn create_graphics_pipeline<'a>( + &self, + desc: &pso::GraphicsPipelineDesc<'a, B>, + _cache: Option<&()>, + ) -> Result<r::GraphicsPipeline, pso::CreationError> { + enum ShaderBc { + Owned(native::Blob), + Borrowed(native::Blob), + None, + } + let features = &self.features; + impl ShaderBc { + pub fn shader(&self) -> native::Shader { + match *self { + ShaderBc::Owned(ref bc) | ShaderBc::Borrowed(ref bc) => { + native::Shader::from_blob(*bc) + } + ShaderBc::None => native::Shader::null(), + } + } + } + + let build_shader = |stage: ShaderStage, source: Option<&pso::EntryPoint<'a, B>>| { + let source = match source { + Some(src) => src, + None => return Ok(ShaderBc::None), + }; + + match Self::extract_entry_point(stage, source, desc.layout, features) { + Ok((shader, true)) => Ok(ShaderBc::Owned(shader)), + Ok((shader, false)) => Ok(ShaderBc::Borrowed(shader)), + Err(err) => Err(pso::CreationError::Shader(err)), + } + }; + + let vertex_buffers: Vec<pso::VertexBufferDesc> = Vec::new(); + let attributes: Vec<pso::AttributeDesc> = Vec::new(); + let mesh_input_assembler = pso::InputAssemblerDesc::new(pso::Primitive::TriangleList); + let (vertex_buffers, attributes, input_assembler, vs, gs, hs, ds, _, _) = + match desc.primitive_assembler { + pso::PrimitiveAssemblerDesc::Vertex { + buffers, + attributes, + ref input_assembler, + ref vertex, + ref tessellation, + ref geometry, + } => { + let (hs, ds) = if let Some(ts) = tessellation { + (Some(&ts.0), Some(&ts.1)) + } else { + (None, None) + }; + + ( + buffers, + attributes, + input_assembler, + Some(vertex), + geometry.as_ref(), + hs, + ds, + None, + None, + ) + } + pso::PrimitiveAssemblerDesc::Mesh { ref task, ref mesh } => ( + &vertex_buffers[..], + &attributes[..], + &mesh_input_assembler, + None, + None, + None, + None, + task.as_ref(), + Some(mesh), + ), + }; + + let vertex_semantic_remapping = if let Some(ref vs) = vs { + // If we have a pre-compiled shader, we've lost the information we need to recover + // this information, so just pretend like this workaround never existed and hope + // for the best. + if let crate::resource::ShaderModule::Spirv(ref spv) = vs.module { + Some( + Self::introspect_spirv_vertex_semantic_remapping(spv) + .map_err(pso::CreationError::Shader)?, + ) + } else { + None + } + } else { + None + }; + + let vs = build_shader(ShaderStage::Vertex, vs)?; + let gs = build_shader(ShaderStage::Geometry, gs)?; + let hs = build_shader(ShaderStage::Domain, hs)?; + let ds = build_shader(ShaderStage::Hull, ds)?; + let ps = build_shader(ShaderStage::Fragment, desc.fragment.as_ref())?; + + // Rebind vertex buffers, see native.rs for more details. + let mut vertex_bindings = [None; MAX_VERTEX_BUFFERS]; + let mut vertex_strides = [0; MAX_VERTEX_BUFFERS]; + + for buffer in vertex_buffers { + vertex_strides[buffer.binding as usize] = buffer.stride; + } + // Fill in identity mapping where we don't need to adjust anything. + for attrib in attributes { + let binding = attrib.binding as usize; + let stride = vertex_strides[attrib.binding as usize]; + if attrib.element.offset < stride { + vertex_bindings[binding] = Some(r::VertexBinding { + stride: vertex_strides[attrib.binding as usize], + offset: 0, + mapped_binding: binding, + }); + } + } + + // See [`introspect_spirv_vertex_semantic_remapping`] for details of why this is needed. + let semantics: Vec<_> = attributes + .iter() + .map(|attrib| { + let semantics = vertex_semantic_remapping + .as_ref() + .and_then(|map| map.get(&attrib.location)); + match semantics { + Some(Some((major, minor))) => { + let name = std::borrow::Cow::Owned(format!("TEXCOORD{}_\0", major)); + let location = *minor; + (name, location) + } + _ => { + let name = std::borrow::Cow::Borrowed("TEXCOORD\0"); + let location = attrib.location; + (name, location) + } + } + }) + .collect(); + + // Define input element descriptions + let input_element_descs = attributes + .iter() + .zip(semantics.iter()) + .filter_map(|(attrib, (semantic_name, semantic_index))| { + let buffer_desc = match vertex_buffers + .iter() + .find(|buffer_desc| buffer_desc.binding == attrib.binding) + { + Some(buffer_desc) => buffer_desc, + None => { + error!( + "Couldn't find associated vertex buffer description {:?}", + attrib.binding + ); + return Some(Err(pso::CreationError::Other)); + } + }; + + let (slot_class, step_rate) = match buffer_desc.rate { + VertexInputRate::Vertex => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + VertexInputRate::Instance(divisor) => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, divisor) + } + }; + let format = attrib.element.format; + + // Check if we need to add a new remapping in-case the offset is + // higher than the vertex stride. + // In this case we rebase the attribute to zero offset. + let binding = attrib.binding as usize; + let stride = vertex_strides[binding]; + let offset = attrib.element.offset; + let (input_slot, offset) = if stride <= offset { + // Number of input attributes may not exceed bindings, see limits. + // We will always find at least one free binding. + let mapping = vertex_bindings.iter().position(Option::is_none).unwrap(); + vertex_bindings[mapping] = Some(r::VertexBinding { + stride: vertex_strides[binding], + offset: offset, + mapped_binding: binding, + }); + + (mapping, 0) + } else { + (binding, offset) + }; + + Some(Ok(d3d12::D3D12_INPUT_ELEMENT_DESC { + SemanticName: semantic_name.as_ptr() as *const _, // Semantic name used by SPIRV-Cross + SemanticIndex: *semantic_index, + Format: match conv::map_format(format) { + Some(fm) => fm, + None => { + error!("Unable to find DXGI format for {:?}", format); + return Some(Err(pso::CreationError::Other)); + } + }, + InputSlot: input_slot as _, + AlignedByteOffset: offset, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate as _, + })) + }) + .collect::<Result<Vec<_>, _>>()?; + + // TODO: check maximum number of rtvs + // Get associated subpass information + let pass = { + let subpass = &desc.subpass; + match subpass.main_pass.subpasses.get(subpass.index as usize) { + Some(subpass) => subpass, + None => return Err(pso::CreationError::InvalidSubpass(subpass.index)), + } + }; + + // Get color attachment formats from subpass + let (rtvs, num_rtvs) = { + let mut rtvs = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + let mut num_rtvs = 0; + for (rtv, target) in rtvs.iter_mut().zip(pass.color_attachments.iter()) { + let format = desc.subpass.main_pass.attachments[target.0].format; + *rtv = format + .and_then(conv::map_format) + .unwrap_or(dxgiformat::DXGI_FORMAT_UNKNOWN); + num_rtvs += 1; + } + (rtvs, num_rtvs) + }; + + let sample_desc = dxgitype::DXGI_SAMPLE_DESC { + Count: match desc.multisampling { + Some(ref ms) => ms.rasterization_samples as _, + None => 1, + }, + Quality: 0, + }; + + // Setup pipeline description + let pso_desc = d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.shared.signature.as_mut_ptr(), + VS: *vs.shader(), + PS: *ps.shader(), + GS: *gs.shader(), + DS: *ds.shader(), + HS: *hs.shader(), + StreamOutput: d3d12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: desc.multisampling.as_ref().map_or(FALSE, |ms| { + if ms.alpha_coverage { + TRUE + } else { + FALSE + } + }), + IndependentBlendEnable: TRUE, + RenderTarget: conv::map_render_targets(&desc.blender.targets), + }, + SampleMask: match desc.multisampling { + Some(ref ms) => ms.sample_mask as u32, + None => UINT::max_value(), + }, + RasterizerState: conv::map_rasterizer(&desc.rasterizer, desc.multisampling.is_some()), + DepthStencilState: conv::map_depth_stencil(&desc.depth_stencil), + InputLayout: d3d12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match input_assembler.restart_index { + Some(hal::IndexType::U16) => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, + Some(hal::IndexType::U32) => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF, + None => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: conv::map_topology_type(input_assembler.primitive), + NumRenderTargets: num_rtvs, + RTVFormats: rtvs, + DSVFormat: pass + .depth_stencil_attachment + .and_then(|att_ref| { + desc.subpass.main_pass.attachments[att_ref.0] + .format + .and_then(|f| conv::map_format_dsv(f.base_format().0)) + }) + .unwrap_or(dxgiformat::DXGI_FORMAT_UNKNOWN), + SampleDesc: sample_desc, + NodeMask: 0, + CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + let topology = conv::map_topology(input_assembler); + + // Create PSO + let mut pipeline = native::PipelineState::null(); + let hr = if desc.depth_stencil.depth_bounds { + // The DepthBoundsTestEnable option isn't available in the original D3D12_GRAPHICS_PIPELINE_STATE_DESC struct. + // Instead, we must use the newer subobject stream method. + let (device2, hr) = self.raw.cast::<d3d12::ID3D12Device2>(); + if winerror::SUCCEEDED(hr) { + let mut pss_stream = GraphicsPipelineStateSubobjectStream::new(&pso_desc, true); + let pss_desc = d3d12::D3D12_PIPELINE_STATE_STREAM_DESC { + SizeInBytes: mem::size_of_val(&pss_stream), + pPipelineStateSubobjectStream: &mut pss_stream as *mut _ as _, + }; + device2.CreatePipelineState( + &pss_desc, + &d3d12::ID3D12PipelineState::uuidof(), + pipeline.mut_void(), + ) + } else { + hr + } + } else { + self.raw.clone().CreateGraphicsPipelineState( + &pso_desc, + &d3d12::ID3D12PipelineState::uuidof(), + pipeline.mut_void(), + ) + }; + + let destroy_shader = |shader: ShaderBc| { + if let ShaderBc::Owned(bc) = shader { + bc.destroy(); + } + }; + + destroy_shader(vs); + destroy_shader(ps); + destroy_shader(gs); + destroy_shader(hs); + destroy_shader(ds); + + if winerror::SUCCEEDED(hr) { + let mut baked_states = desc.baked_states.clone(); + if !desc.depth_stencil.depth_bounds { + baked_states.depth_bounds = None; + } + + Ok(r::GraphicsPipeline { + raw: pipeline, + shared: Arc::clone(&desc.layout.shared), + topology, + vertex_bindings, + baked_states, + }) + } else { + error!("Failed to build shader: {:x}", hr); + Err(pso::CreationError::Other) + } + } + + unsafe fn create_compute_pipeline<'a>( + &self, + desc: &pso::ComputePipelineDesc<'a, B>, + _cache: Option<&()>, + ) -> Result<r::ComputePipeline, pso::CreationError> { + let (cs, cs_destroy) = Self::extract_entry_point( + ShaderStage::Compute, + &desc.shader, + desc.layout, + &self.features, + ) + .map_err(|err| pso::CreationError::Shader(err))?; + + let (pipeline, hr) = self.raw.create_compute_pipeline_state( + desc.layout.shared.signature, + native::Shader::from_blob(cs), + 0, + native::CachedPSO::null(), + native::PipelineStateFlags::empty(), + ); + + if cs_destroy { + cs.destroy(); + } + + if winerror::SUCCEEDED(hr) { + Ok(r::ComputePipeline { + raw: pipeline, + shared: Arc::clone(&desc.layout.shared), + }) + } else { + error!("Failed to build shader: {:x}", hr); + Err(pso::CreationError::Other) + } + } + + unsafe fn create_framebuffer<I>( + &self, + _renderpass: &r::RenderPass, + attachments: I, + extent: image::Extent, + ) -> Result<r::Framebuffer, d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<r::ImageView>, + { + Ok(r::Framebuffer { + attachments: attachments.into_iter().map(|att| *att.borrow()).collect(), + layers: extent.depth as _, + }) + } + + unsafe fn create_shader_module( + &self, + raw_data: &[u32], + ) -> Result<r::ShaderModule, d::ShaderError> { + Ok(r::ShaderModule::Spirv(raw_data.into())) + } + + unsafe fn create_buffer( + &self, + mut size: u64, + usage: buffer::Usage, + ) -> Result<r::Buffer, buffer::CreationError> { + if usage.contains(buffer::Usage::UNIFORM) { + // Constant buffer view sizes need to be aligned. + // Coupled with the offset alignment we can enforce an aligned CBV size + // on descriptor updates. + let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = (size + mask) & !mask; + } + if usage.contains(buffer::Usage::TRANSFER_DST) { + // minimum of 1 word for the clear UAV + size = size.max(4); + } + + let type_mask_shift = if self.private_caps.heterogeneous_resource_heaps { + MEM_TYPE_UNIVERSAL_SHIFT + } else { + MEM_TYPE_BUFFER_SHIFT + }; + + let requirements = memory::Requirements { + size, + alignment: d3d12::D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT as u64, + type_mask: MEM_TYPE_MASK << type_mask_shift, + }; + + Ok(r::Buffer::Unbound(r::BufferUnbound { + requirements, + usage, + name: None, + })) + } + + unsafe fn get_buffer_requirements(&self, buffer: &r::Buffer) -> Requirements { + match buffer { + r::Buffer::Unbound(b) => b.requirements, + r::Buffer::Bound(b) => b.requirements, + } + } + + unsafe fn bind_buffer_memory( + &self, + memory: &r::Memory, + offset: u64, + buffer: &mut r::Buffer, + ) -> Result<(), d::BindError> { + let buffer_unbound = buffer.expect_unbound(); + if buffer_unbound.requirements.type_mask & (1 << memory.type_id) == 0 { + error!( + "Bind memory failure: supported mask 0x{:x}, given id {}", + buffer_unbound.requirements.type_mask, memory.type_id + ); + return Err(d::BindError::WrongMemory); + } + if offset + buffer_unbound.requirements.size > memory.size { + return Err(d::BindError::OutOfBounds); + } + + let mut resource = native::Resource::null(); + let desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: buffer_unbound.requirements.size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_flags(buffer_unbound.usage), + }; + + assert_eq!( + winerror::S_OK, + self.raw.clone().CreatePlacedResource( + memory.heap.as_mut_ptr(), + offset, + &desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + ); + + if let Some(ref name) = buffer_unbound.name { + resource.SetName(name.as_ptr()); + } + + let clear_uav = if buffer_unbound.usage.contains(buffer::Usage::TRANSFER_DST) { + let handle = self.srv_uav_pool.lock().alloc_handle(); + let mut view_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + + *view_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: 0, + NumElements: (buffer_unbound.requirements.size / 4) as _, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + + self.raw.CreateUnorderedAccessView( + resource.as_mut_ptr(), + ptr::null_mut(), + &view_desc, + handle.raw, + ); + Some(handle) + } else { + None + }; + + *buffer = r::Buffer::Bound(r::BufferBound { + resource, + requirements: buffer_unbound.requirements, + clear_uav, + }); + + Ok(()) + } + + unsafe fn create_buffer_view( + &self, + buffer: &r::Buffer, + format: Option<format::Format>, + sub: buffer::SubRange, + ) -> Result<r::BufferView, buffer::ViewCreationError> { + let buffer = buffer.expect_bound(); + let buffer_features = { + let idx = format.map(|fmt| fmt as usize).unwrap_or(0); + self.format_properties + .resolve(idx) + .properties + .buffer_features + }; + let (format, format_desc) = match format.and_then(conv::map_format) { + Some(fmt) => (fmt, format.unwrap().surface_desc()), + None => return Err(buffer::ViewCreationError::UnsupportedFormat(format)), + }; + + let start = sub.offset; + let size = sub.size.unwrap_or(buffer.requirements.size - start); + + let bytes_per_texel = (format_desc.bits / 8) as u64; + // Check if it adheres to the texel buffer offset limit + assert_eq!(start % bytes_per_texel, 0); + let first_element = start / bytes_per_texel; + let num_elements = size / bytes_per_texel; // rounds down to next smaller size + + let handle_srv = if buffer_features.contains(format::BufferFeature::UNIFORM_TEXEL) { + let mut desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: format, + ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER, + Shader4ComponentMapping: IDENTITY_MAPPING, + u: mem::zeroed(), + }; + + *desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV { + FirstElement: first_element, + NumElements: num_elements as _, + StructureByteStride: bytes_per_texel as _, + Flags: d3d12::D3D12_BUFFER_SRV_FLAG_NONE, + }; + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw.clone().CreateShaderResourceView( + buffer.resource.as_mut_ptr(), + &desc, + handle.raw, + ); + Some(handle) + } else { + None + }; + + let handle_uav = if buffer_features.intersects( + format::BufferFeature::STORAGE_TEXEL | format::BufferFeature::STORAGE_TEXEL_ATOMIC, + ) { + let mut desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: format, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + + *desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: first_element, + NumElements: num_elements as _, + StructureByteStride: bytes_per_texel as _, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_NONE, + CounterOffsetInBytes: 0, + }; + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw.clone().CreateUnorderedAccessView( + buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &desc, + handle.raw, + ); + Some(handle) + } else { + None + }; + + return Ok(r::BufferView { + handle_srv, + handle_uav, + }); + } + + unsafe fn create_image( + &self, + kind: image::Kind, + mip_levels: image::Level, + format: format::Format, + tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Result<r::Image, image::CreationError> { + assert!(mip_levels <= kind.compute_num_levels()); + + let base_format = format.base_format(); + let format_desc = base_format.0.desc(); + let bytes_per_block = (format_desc.bits / 8) as _; + let block_dim = format_desc.dim; + let view_format = conv::map_format(format); + let extent = kind.extent(); + + let format_info = self.format_properties.resolve(format as usize); + let (layout, features) = match tiling { + image::Tiling::Optimal => ( + d3d12::D3D12_TEXTURE_LAYOUT_UNKNOWN, + format_info.properties.optimal_tiling, + ), + image::Tiling::Linear => ( + d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + format_info.properties.linear_tiling, + ), + }; + if format_info.sample_count_mask & kind.num_samples() == 0 { + return Err(image::CreationError::Samples(kind.num_samples())); + } + + let desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: match kind { + image::Kind::D1(..) => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + image::Kind::D2(..) => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + image::Kind::D3(..) => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + }, + Alignment: 0, + Width: extent.width as _, + Height: extent.height as _, + DepthOrArraySize: if extent.depth > 1 { + extent.depth as _ + } else { + kind.num_layers() as _ + }, + MipLevels: mip_levels as _, + Format: if format_desc.is_compressed() { + view_format.unwrap() + } else { + match conv::map_surface_type(base_format.0) { + Some(format) => format, + None => return Err(image::CreationError::Format(format)), + } + }, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: kind.num_samples() as _, + Quality: 0, + }, + Layout: layout, + Flags: conv::map_image_flags(usage, features), + }; + + let alloc_info = self.raw.clone().GetResourceAllocationInfo(0, 1, &desc); + + // Image flags which require RT/DS heap due to internal implementation. + let target_flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET + | d3d12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + let type_mask_shift = if self.private_caps.heterogeneous_resource_heaps { + MEM_TYPE_UNIVERSAL_SHIFT + } else if desc.Flags & target_flags != 0 { + MEM_TYPE_TARGET_SHIFT + } else { + MEM_TYPE_IMAGE_SHIFT + }; + + Ok(r::Image::Unbound(r::ImageUnbound { + view_format, + dsv_format: conv::map_format_dsv(base_format.0), + desc, + requirements: memory::Requirements { + size: alloc_info.SizeInBytes, + alignment: alloc_info.Alignment, + type_mask: MEM_TYPE_MASK << type_mask_shift, + }, + format, + kind, + mip_levels, + usage, + tiling, + view_caps, + bytes_per_block, + block_dim, + name: None, + })) + } + + unsafe fn get_image_requirements(&self, image: &r::Image) -> Requirements { + match image { + r::Image::Bound(i) => i.requirements, + r::Image::Unbound(i) => i.requirements, + } + } + + unsafe fn get_image_subresource_footprint( + &self, + image: &r::Image, + sub: image::Subresource, + ) -> image::SubresourceFootprint { + let mut num_rows = 0; + let mut total_bytes = 0; + let _desc = match image { + r::Image::Bound(i) => i.descriptor, + r::Image::Unbound(i) => i.desc, + }; + let footprint = { + let mut footprint = mem::zeroed(); + self.raw.GetCopyableFootprints( + image.get_desc(), + image.calc_subresource(sub.level as _, sub.layer as _, 0), + 1, + 0, + &mut footprint, + &mut num_rows, + ptr::null_mut(), // row size in bytes + &mut total_bytes, + ); + footprint + }; + + let depth_pitch = (footprint.Footprint.RowPitch * num_rows) as buffer::Offset; + let array_pitch = footprint.Footprint.Depth as buffer::Offset * depth_pitch; + image::SubresourceFootprint { + slice: footprint.Offset..footprint.Offset + total_bytes, + row_pitch: footprint.Footprint.RowPitch as _, + depth_pitch, + array_pitch, + } + } + + unsafe fn bind_image_memory( + &self, + memory: &r::Memory, + offset: u64, + image: &mut r::Image, + ) -> Result<(), d::BindError> { + use self::image::Usage; + + let image_unbound = image.expect_unbound(); + if image_unbound.requirements.type_mask & (1 << memory.type_id) == 0 { + error!( + "Bind memory failure: supported mask 0x{:x}, given id {}", + image_unbound.requirements.type_mask, memory.type_id + ); + return Err(d::BindError::WrongMemory); + } + if offset + image_unbound.requirements.size > memory.size { + return Err(d::BindError::OutOfBounds); + } + + let mut resource = native::Resource::null(); + let num_layers = image_unbound.kind.num_layers(); + + assert_eq!( + winerror::S_OK, + self.raw.clone().CreatePlacedResource( + memory.heap.as_mut_ptr(), + offset, + &image_unbound.desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + ); + + if let Some(ref name) = image_unbound.name { + resource.SetName(name.as_ptr()); + } + + let info = ViewInfo { + resource, + kind: image_unbound.kind, + caps: image::ViewCapabilities::empty(), + view_kind: match image_unbound.kind { + image::Kind::D1(..) => image::ViewKind::D1Array, + image::Kind::D2(..) => image::ViewKind::D2Array, + image::Kind::D3(..) => image::ViewKind::D3, + }, + format: image_unbound.desc.Format, + component_mapping: IDENTITY_MAPPING, + levels: 0..1, + layers: 0..0, + }; + + //TODO: the clear_Xv is incomplete. We should support clearing images created without XXX_ATTACHMENT usage. + // for this, we need to check the format and force the `RENDER_TARGET` flag behind the user's back + // if the format supports being rendered into, allowing us to create clear_Xv + let format_properties = self + .format_properties + .resolve(image_unbound.format as usize) + .properties; + let props = match image_unbound.tiling { + image::Tiling::Optimal => format_properties.optimal_tiling, + image::Tiling::Linear => format_properties.linear_tiling, + }; + let can_clear_color = image_unbound + .usage + .intersects(Usage::TRANSFER_DST | Usage::COLOR_ATTACHMENT) + && props.contains(format::ImageFeature::COLOR_ATTACHMENT); + let can_clear_depth = image_unbound + .usage + .intersects(Usage::TRANSFER_DST | Usage::DEPTH_STENCIL_ATTACHMENT) + && props.contains(format::ImageFeature::DEPTH_STENCIL_ATTACHMENT); + let aspects = image_unbound.format.surface_desc().aspects; + + *image = r::Image::Bound(r::ImageBound { + resource: resource, + place: r::Place::Heap { + raw: memory.heap.clone(), + offset, + }, + surface_type: image_unbound.format.base_format().0, + kind: image_unbound.kind, + mip_levels: image_unbound.mip_levels, + usage: image_unbound.usage, + default_view_format: image_unbound.view_format, + view_caps: image_unbound.view_caps, + descriptor: image_unbound.desc, + clear_cv: if aspects.contains(Aspects::COLOR) && can_clear_color { + let format = image_unbound.view_format.unwrap(); + (0..num_layers) + .map(|layer| { + self.view_image_as_render_target(&ViewInfo { + format, + layers: layer..layer + 1, + ..info.clone() + }) + .unwrap() + }) + .collect() + } else { + Vec::new() + }, + clear_dv: if aspects.contains(Aspects::DEPTH) && can_clear_depth { + let format = image_unbound.dsv_format.unwrap(); + (0..num_layers) + .map(|layer| { + self.view_image_as_depth_stencil(&ViewInfo { + format, + layers: layer..layer + 1, + ..info.clone() + }) + .unwrap() + }) + .collect() + } else { + Vec::new() + }, + clear_sv: if aspects.contains(Aspects::STENCIL) && can_clear_depth { + let format = image_unbound.dsv_format.unwrap(); + (0..num_layers) + .map(|layer| { + self.view_image_as_depth_stencil(&ViewInfo { + format, + layers: layer..layer + 1, + ..info.clone() + }) + .unwrap() + }) + .collect() + } else { + Vec::new() + }, + requirements: image_unbound.requirements, + }); + + Ok(()) + } + + unsafe fn create_image_view( + &self, + image: &r::Image, + view_kind: image::ViewKind, + format: format::Format, + swizzle: format::Swizzle, + range: image::SubresourceRange, + ) -> Result<r::ImageView, image::ViewCreationError> { + let image = image.expect_bound(); + let is_array = image.kind.num_layers() > 1; + let mip_levels = ( + range.level_start, + range.level_start + range.resolve_level_count(image.mip_levels), + ); + let layers = ( + range.layer_start, + range.layer_start + range.resolve_layer_count(image.kind.num_layers()), + ); + let surface_format = format.base_format().0; + + let info = ViewInfo { + resource: image.resource, + kind: image.kind, + caps: image.view_caps, + // D3D12 doesn't allow looking at a single slice of an array as a non-array + view_kind: if is_array && view_kind == image::ViewKind::D2 { + image::ViewKind::D2Array + } else if is_array && view_kind == image::ViewKind::D1 { + image::ViewKind::D1Array + } else { + view_kind + }, + format: conv::map_format(format).ok_or(image::ViewCreationError::BadFormat(format))?, + component_mapping: conv::map_swizzle(swizzle), + levels: mip_levels.0..mip_levels.1, + layers: layers.0..layers.1, + }; + + //Note: we allow RTV/DSV/SRV/UAV views to fail to be created here, + // because we don't know if the user will even need to use them. + + Ok(r::ImageView { + resource: image.resource, + handle_srv: if image + .usage + .intersects(image::Usage::SAMPLED | image::Usage::INPUT_ATTACHMENT) + { + let info = if range.aspects.contains(format::Aspects::DEPTH) { + conv::map_format_shader_depth(surface_format).map(|format| ViewInfo { + format, + ..info.clone() + }) + } else if range.aspects.contains(format::Aspects::STENCIL) { + // Vulkan/gfx expects stencil to be read from the R channel, + // while DX12 exposes it in "G" always. + let new_swizzle = conv::swizzle_rg(swizzle); + conv::map_format_shader_stencil(surface_format).map(|format| ViewInfo { + format, + component_mapping: conv::map_swizzle(new_swizzle), + ..info.clone() + }) + } else { + Some(info.clone()) + }; + if let Some(ref info) = info { + self.view_image_as_shader_resource(&info).ok() + } else { + None + } + } else { + None + }, + handle_rtv: if image.usage.contains(image::Usage::COLOR_ATTACHMENT) { + // This view is not necessarily going to be rendered to, even + // if the image supports that in general. + match self.view_image_as_render_target(&info) { + Ok(handle) => r::RenderTargetHandle::Pool(handle), + Err(_) => r::RenderTargetHandle::None, + } + } else { + r::RenderTargetHandle::None + }, + handle_uav: if image.usage.contains(image::Usage::STORAGE) { + self.view_image_as_storage(&info).ok() + } else { + None + }, + handle_dsv: if image.usage.contains(image::Usage::DEPTH_STENCIL_ATTACHMENT) { + match conv::map_format_dsv(surface_format) { + Some(dsv_format) => self + .view_image_as_depth_stencil(&ViewInfo { + format: dsv_format, + ..info + }) + .ok(), + None => None, + } + } else { + None + }, + dxgi_format: image.default_view_format.unwrap(), + num_levels: image.descriptor.MipLevels as image::Level, + mip_levels, + layers, + kind: info.kind, + }) + } + + unsafe fn create_sampler( + &self, + info: &image::SamplerDesc, + ) -> Result<r::Sampler, d::AllocationError> { + if !info.normalized { + warn!("Sampler with unnormalized coordinates is not supported!"); + } + let handle = match self.samplers.map.lock().entry(info.clone()) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let handle = self.samplers.pool.lock().alloc_handle(); + let info = e.key(); + let op = match info.comparison { + Some(_) => d3d12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + self.raw.create_sampler( + handle.raw, + conv::map_filter( + info.mag_filter, + info.min_filter, + info.mip_filter, + op, + info.anisotropy_clamp, + ), + [ + conv::map_wrap(info.wrap_mode.0), + conv::map_wrap(info.wrap_mode.1), + conv::map_wrap(info.wrap_mode.2), + ], + info.lod_bias.0, + info.anisotropy_clamp.map_or(0, |aniso| aniso as u32), + conv::map_comparison(info.comparison.unwrap_or(pso::Comparison::Always)), + info.border.into(), + info.lod_range.start.0..info.lod_range.end.0, + ); + *e.insert(handle) + } + }; + Ok(r::Sampler { handle }) + } + + unsafe fn create_descriptor_pool<I>( + &self, + max_sets: usize, + descriptor_pools: I, + _flags: pso::DescriptorPoolCreateFlags, + ) -> Result<r::DescriptorPool, d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorRangeDesc>, + { + // Descriptor pools are implemented as slices of the global descriptor heaps. + // A descriptor pool will occupy a contiguous space in each heap (CBV/SRV/UAV and Sampler) depending + // on the total requested amount of descriptors. + + let mut num_srv_cbv_uav = 0; + let mut num_samplers = 0; + + let descriptor_pools = descriptor_pools + .into_iter() + .map(|desc| *desc.borrow()) + .collect::<Vec<_>>(); + + info!("create_descriptor_pool with {} max sets", max_sets); + for desc in &descriptor_pools { + let content = r::DescriptorContent::from(desc.ty); + debug!("\tcontent {:?}", content); + if content.contains(r::DescriptorContent::CBV) { + num_srv_cbv_uav += desc.count; + } + if content.contains(r::DescriptorContent::SRV) { + num_srv_cbv_uav += desc.count; + } + if content.contains(r::DescriptorContent::UAV) { + num_srv_cbv_uav += desc.count; + } + if content.contains(r::DescriptorContent::SAMPLER) { + num_samplers += desc.count; + } + } + + info!( + "total {} views and {} samplers", + num_srv_cbv_uav, num_samplers + ); + + // Allocate slices of the global GPU descriptor heaps. + let heap_srv_cbv_uav = { + let view_heap = &self.heap_srv_cbv_uav.0; + + let range = match num_srv_cbv_uav { + 0 => 0..0, + _ => self + .heap_srv_cbv_uav + .1 + .lock() + .allocate_range(num_srv_cbv_uav as _) + .map_err(|e| { + warn!("View pool allocation error: {:?}", e); + d::OutOfMemory::Host + })?, + }; + + r::DescriptorHeapSlice { + heap: view_heap.raw.clone(), + handle_size: view_heap.handle_size as _, + range_allocator: RangeAllocator::new(range), + start: view_heap.start, + } + }; + + Ok(r::DescriptorPool { + heap_srv_cbv_uav, + heap_raw_sampler: self.samplers.heap.raw, + pools: descriptor_pools, + max_size: max_sets as _, + }) + } + + unsafe fn create_descriptor_set_layout<I, J>( + &self, + bindings: I, + _immutable_samplers: J, + ) -> Result<r::DescriptorSetLayout, d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetLayoutBinding>, + J: IntoIterator, + J::Item: Borrow<r::Sampler>, + { + Ok(r::DescriptorSetLayout { + bindings: bindings.into_iter().map(|b| b.borrow().clone()).collect(), + }) + } + + unsafe fn write_descriptor_sets<'a, I, J>(&self, write_iter: I) + where + I: IntoIterator<Item = pso::DescriptorSetWrite<'a, B, J>>, + J: IntoIterator, + J::Item: Borrow<pso::Descriptor<'a, B>>, + { + let mut descriptor_updater = self.descriptor_updater.lock(); + descriptor_updater.reset(); + + let mut accum = descriptors_cpu::MultiCopyAccumulator::default(); + debug!("write_descriptor_sets"); + + for write in write_iter { + let mut offset = write.array_offset as u64; + let mut target_binding = write.binding as usize; + let mut bind_info = &write.set.binding_infos[target_binding]; + debug!( + "\t{:?} binding {} array offset {}", + bind_info, target_binding, offset + ); + let base_sampler_offset = write.set.sampler_offset(write.binding, write.array_offset); + trace!("\tsampler offset {}", base_sampler_offset); + let mut sampler_offset = base_sampler_offset; + let mut desc_samplers = write.set.sampler_origins.borrow_mut(); + + for descriptor in write.descriptors { + // spill over the writes onto the next binding + while offset >= bind_info.count { + assert_eq!(offset, bind_info.count); + target_binding += 1; + bind_info = &write.set.binding_infos[target_binding]; + offset = 0; + } + let mut src_cbv = None; + let mut src_srv = None; + let mut src_uav = None; + + match *descriptor.borrow() { + pso::Descriptor::Buffer(buffer, ref sub) => { + let buffer = buffer.expect_bound(); + + if bind_info.content.is_dynamic() { + // Root Descriptor + let buffer_address = (*buffer.resource).GetGPUVirtualAddress(); + // Descriptor sets need to be externally synchronized according to specification + let dynamic_descriptors = &mut *bind_info.dynamic_descriptors.get(); + dynamic_descriptors[offset as usize].gpu_buffer_location = + buffer_address + sub.offset; + } else { + // Descriptor table + let size = sub.size_to(buffer.requirements.size); + + if bind_info.content.contains(r::DescriptorContent::CBV) { + // Making the size field of buffer requirements for uniform + // buffers a multiple of 256 and setting the required offset + // alignment to 256 allows us to patch the size here. + // We can always enforce the size to be aligned to 256 for + // CBVs without going out-of-bounds. + let mask = + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: (*buffer.resource).GetGPUVirtualAddress() + + sub.offset, + SizeInBytes: (size as u32 + mask) as u32 & !mask, + }; + let handle = descriptor_updater.alloc_handle(self.raw); + self.raw.CreateConstantBufferView(&desc, handle); + src_cbv = Some(handle); + } + if bind_info.content.contains(r::DescriptorContent::SRV) { + assert_eq!(size % 4, 0); + let mut desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: IDENTITY_MAPPING, + ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV { + FirstElement: sub.offset as _, + NumElements: (size / 4) as _, + StructureByteStride: 0, + Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW, + }; + let handle = descriptor_updater.alloc_handle(self.raw); + self.raw.CreateShaderResourceView( + buffer.resource.as_mut_ptr(), + &desc, + handle, + ); + src_srv = Some(handle); + } + if bind_info.content.contains(r::DescriptorContent::UAV) { + assert_eq!(size % 4, 0); + let mut desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: sub.offset as _, + NumElements: (size / 4) as _, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + let handle = descriptor_updater.alloc_handle(self.raw); + self.raw.CreateUnorderedAccessView( + buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &desc, + handle, + ); + src_uav = Some(handle); + } + } + } + pso::Descriptor::Image(image, _layout) => { + if bind_info.content.contains(r::DescriptorContent::SRV) { + src_srv = image.handle_srv.map(|h| h.raw); + } + if bind_info.content.contains(r::DescriptorContent::UAV) { + src_uav = image.handle_uav.map(|h| h.raw); + } + } + pso::Descriptor::CombinedImageSampler(image, _layout, sampler) => { + src_srv = image.handle_srv.map(|h| h.raw); + desc_samplers[sampler_offset] = sampler.handle.raw; + sampler_offset += 1; + } + pso::Descriptor::Sampler(sampler) => { + desc_samplers[sampler_offset] = sampler.handle.raw; + sampler_offset += 1; + } + pso::Descriptor::TexelBuffer(buffer_view) => { + if bind_info.content.contains(r::DescriptorContent::SRV) { + let handle = buffer_view.handle_srv + .expect("SRV handle of the storage texel buffer is zero (not supported by specified format)"); + src_srv = Some(handle.raw); + } + if bind_info.content.contains(r::DescriptorContent::UAV) { + let handle = buffer_view.handle_uav + .expect("UAV handle of the storage texel buffer is zero (not supported by specified format)"); + src_uav = Some(handle.raw); + } + } + } + + if let Some(handle) = src_cbv { + trace!("\tcbv offset {}", offset); + accum.src_views.add(handle, 1); + accum + .dst_views + .add(bind_info.view_range.as_ref().unwrap().at(offset), 1); + } + if let Some(handle) = src_srv { + trace!("\tsrv offset {}", offset); + accum.src_views.add(handle, 1); + accum + .dst_views + .add(bind_info.view_range.as_ref().unwrap().at(offset), 1); + } + if let Some(handle) = src_uav { + let uav_offset = if bind_info.content.contains(r::DescriptorContent::SRV) { + bind_info.count + offset + } else { + offset + }; + trace!("\tuav offset {}", uav_offset); + accum.src_views.add(handle, 1); + accum + .dst_views + .add(bind_info.view_range.as_ref().unwrap().at(uav_offset), 1); + } + + offset += 1; + } + + if sampler_offset != base_sampler_offset { + drop(desc_samplers); + write + .set + .update_samplers(&self.samplers.heap, &self.samplers.origins, &mut accum); + } + } + + accum.flush(self.raw); + } + + unsafe fn copy_descriptor_sets<'a, I>(&self, copy_iter: I) + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetCopy<'a, B>>, + { + let mut accum = descriptors_cpu::MultiCopyAccumulator::default(); + + for copy_wrap in copy_iter { + let copy = copy_wrap.borrow(); + let src_info = ©.src_set.binding_infos[copy.src_binding as usize]; + let dst_info = ©.dst_set.binding_infos[copy.dst_binding as usize]; + + if let (Some(src_range), Some(dst_range)) = + (src_info.view_range.as_ref(), dst_info.view_range.as_ref()) + { + assert!(copy.src_array_offset + copy.count <= src_range.handle.size as usize); + assert!(copy.dst_array_offset + copy.count <= dst_range.handle.size as usize); + let count = copy.count as u32; + accum + .src_views + .add(src_range.at(copy.src_array_offset as _), count); + accum + .dst_views + .add(dst_range.at(copy.dst_array_offset as _), count); + + if (src_info.content & dst_info.content) + .contains(r::DescriptorContent::SRV | r::DescriptorContent::UAV) + { + assert!( + src_info.count as usize + copy.src_array_offset + copy.count + <= src_range.handle.size as usize + ); + assert!( + dst_info.count as usize + copy.dst_array_offset + copy.count + <= dst_range.handle.size as usize + ); + accum.src_views.add( + src_range.at(src_info.count + copy.src_array_offset as u64), + count, + ); + accum.dst_views.add( + dst_range.at(dst_info.count + copy.dst_array_offset as u64), + count, + ); + } + } + + if dst_info.content.contains(r::DescriptorContent::SAMPLER) { + let src_offset = copy + .src_set + .sampler_offset(copy.src_binding, copy.src_array_offset); + let dst_offset = copy + .dst_set + .sampler_offset(copy.dst_binding, copy.dst_array_offset); + let src_samplers = copy.src_set.sampler_origins.borrow(); + let mut dst_samplers = copy.dst_set.sampler_origins.borrow_mut(); + dst_samplers[dst_offset..dst_offset + copy.count] + .copy_from_slice(&src_samplers[src_offset..src_offset + copy.count]); + drop(dst_samplers); + + copy.dst_set.update_samplers( + &self.samplers.heap, + &self.samplers.origins, + &mut accum, + ); + } + } + + accum.flush(self.raw.clone()); + } + + unsafe fn map_memory( + &self, + memory: &r::Memory, + segment: memory::Segment, + ) -> Result<*mut u8, d::MapError> { + let mem = memory + .resource + .expect("Memory not created with a memory type exposing `CPU_VISIBLE`"); + let mut ptr = ptr::null_mut(); + assert_eq!( + winerror::S_OK, + (*mem).Map(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }, &mut ptr) + ); + ptr = ptr.offset(segment.offset as isize); + Ok(ptr as *mut _) + } + + unsafe fn unmap_memory(&self, memory: &r::Memory) { + if let Some(mem) = memory.resource { + (*mem).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); + } + } + + unsafe fn flush_mapped_memory_ranges<'a, I>(&self, ranges: I) -> Result<(), d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a r::Memory, memory::Segment)>, + { + for range in ranges { + let &(ref memory, ref segment) = range.borrow(); + if let Some(mem) = memory.resource { + // map and immediately unmap, hoping that dx12 drivers internally cache + // currently mapped buffers. + assert_eq!( + winerror::S_OK, + (*mem).Map(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }, ptr::null_mut()) + ); + + let start = segment.offset; + let end = segment.size.map_or(memory.size, |s| start + s); // TODO: only need to be end of current mapping + + (*mem).Unmap( + 0, + &d3d12::D3D12_RANGE { + Begin: start as _, + End: end as _, + }, + ); + } + } + + Ok(()) + } + + unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, ranges: I) -> Result<(), d::OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a r::Memory, memory::Segment)>, + { + for range in ranges { + let &(ref memory, ref segment) = range.borrow(); + if let Some(mem) = memory.resource { + let start = segment.offset; + let end = segment.size.map_or(memory.size, |s| start + s); // TODO: only need to be end of current mapping + + // map and immediately unmap, hoping that dx12 drivers internally cache + // currently mapped buffers. + assert_eq!( + winerror::S_OK, + (*mem).Map( + 0, + &d3d12::D3D12_RANGE { + Begin: start as _, + End: end as _, + }, + ptr::null_mut(), + ) + ); + + (*mem).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); + } + } + + Ok(()) + } + + fn create_semaphore(&self) -> Result<r::Semaphore, d::OutOfMemory> { + let fence = self.create_fence(false)?; + Ok(r::Semaphore { raw: fence.raw }) + } + + fn create_fence(&self, signalled: bool) -> Result<r::Fence, d::OutOfMemory> { + Ok(r::Fence { + raw: self.create_raw_fence(signalled), + }) + } + + unsafe fn reset_fence(&self, fence: &r::Fence) -> Result<(), d::OutOfMemory> { + assert_eq!(winerror::S_OK, fence.raw.signal(0)); + Ok(()) + } + + unsafe fn wait_for_fences<I>( + &self, + fences: I, + wait: d::WaitFor, + timeout_ns: u64, + ) -> Result<bool, d::OomOrDeviceLost> + where + I: IntoIterator, + I::Item: Borrow<r::Fence>, + { + let fences = fences.into_iter().collect::<Vec<_>>(); + let mut events = self.events.lock(); + for _ in events.len()..fences.len() { + events.push(native::Event::create(false, false)); + } + + for (&event, fence) in events.iter().zip(fences.iter()) { + synchapi::ResetEvent(event.0); + assert_eq!( + winerror::S_OK, + fence.borrow().raw.set_event_on_completion(event, 1) + ); + } + + let all = match wait { + d::WaitFor::Any => FALSE, + d::WaitFor::All => TRUE, + }; + + let hr = { + // This block handles overflow when converting to u32 and always rounds up + // The Vulkan specification allows to wait more than specified + let timeout_ms = { + if timeout_ns > (<u32>::max_value() as u64) * 1_000_000 { + <u32>::max_value() + } else { + ((timeout_ns + 999_999) / 1_000_000) as u32 + } + }; + + synchapi::WaitForMultipleObjects( + fences.len() as u32, + events.as_ptr() as *const _, + all, + timeout_ms, + ) + }; + + const WAIT_OBJECT_LAST: u32 = winbase::WAIT_OBJECT_0 + winnt::MAXIMUM_WAIT_OBJECTS; + const WAIT_ABANDONED_LAST: u32 = winbase::WAIT_ABANDONED_0 + winnt::MAXIMUM_WAIT_OBJECTS; + match hr { + winbase::WAIT_OBJECT_0..=WAIT_OBJECT_LAST => Ok(true), + winbase::WAIT_ABANDONED_0..=WAIT_ABANDONED_LAST => Ok(true), //TODO? + winerror::WAIT_TIMEOUT => Ok(false), + _ => panic!("Unexpected wait status 0x{:X}", hr), + } + } + + unsafe fn get_fence_status(&self, fence: &r::Fence) -> Result<bool, d::DeviceLost> { + match fence.raw.GetCompletedValue() { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(d::DeviceLost), + } + } + + fn create_event(&self) -> Result<(), d::OutOfMemory> { + unimplemented!() + } + + unsafe fn get_event_status(&self, _event: &()) -> Result<bool, d::OomOrDeviceLost> { + unimplemented!() + } + + unsafe fn set_event(&self, _event: &()) -> Result<(), d::OutOfMemory> { + unimplemented!() + } + + unsafe fn reset_event(&self, _event: &()) -> Result<(), d::OutOfMemory> { + unimplemented!() + } + + unsafe fn free_memory(&self, memory: r::Memory) { + memory.heap.destroy(); + if let Some(buffer) = memory.resource { + buffer.destroy(); + } + } + + unsafe fn create_query_pool( + &self, + query_ty: query::Type, + count: query::Id, + ) -> Result<r::QueryPool, query::CreationError> { + let heap_ty = match query_ty { + query::Type::Occlusion => native::QueryHeapType::Occlusion, + query::Type::PipelineStatistics(_) => native::QueryHeapType::PipelineStatistics, + query::Type::Timestamp => native::QueryHeapType::Timestamp, + }; + + let (query_heap, hr) = self.raw.create_query_heap(heap_ty, count, 0); + assert_eq!(winerror::S_OK, hr); + + Ok(r::QueryPool { + raw: query_heap, + ty: heap_ty, + }) + } + + unsafe fn destroy_query_pool(&self, pool: r::QueryPool) { + pool.raw.destroy(); + } + + unsafe fn get_query_pool_results( + &self, + _pool: &r::QueryPool, + _queries: Range<query::Id>, + _data: &mut [u8], + _stride: buffer::Offset, + _flags: query::ResultFlags, + ) -> Result<bool, d::OomOrDeviceLost> { + unimplemented!() + } + + unsafe fn destroy_shader_module(&self, shader_lib: r::ShaderModule) { + if let r::ShaderModule::Compiled(shaders) = shader_lib { + for (_, blob) in shaders { + blob.destroy(); + } + } + } + + unsafe fn destroy_render_pass(&self, _rp: r::RenderPass) { + // Just drop + } + + unsafe fn destroy_pipeline_layout(&self, layout: r::PipelineLayout) { + layout.shared.signature.destroy(); + } + + unsafe fn destroy_graphics_pipeline(&self, pipeline: r::GraphicsPipeline) { + pipeline.raw.destroy(); + } + + unsafe fn destroy_compute_pipeline(&self, pipeline: r::ComputePipeline) { + pipeline.raw.destroy(); + } + + unsafe fn destroy_framebuffer(&self, _fb: r::Framebuffer) { + // Just drop + } + + unsafe fn destroy_buffer(&self, buffer: r::Buffer) { + match buffer { + r::Buffer::Bound(buffer) => { + if let Some(handle) = buffer.clear_uav { + self.srv_uav_pool.lock().free_handle(handle); + } + buffer.resource.destroy(); + } + r::Buffer::Unbound(_) => {} + } + } + + unsafe fn destroy_buffer_view(&self, view: r::BufferView) { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + + unsafe fn destroy_image(&self, image: r::Image) { + match image { + r::Image::Bound(image) => { + let mut dsv_pool = self.dsv_pool.lock(); + for handle in image.clear_cv { + self.rtv_pool.lock().free_handle(handle); + } + for handle in image.clear_dv { + dsv_pool.free_handle(handle); + } + for handle in image.clear_sv { + dsv_pool.free_handle(handle); + } + image.resource.destroy(); + } + r::Image::Unbound(_) => {} + } + } + + unsafe fn destroy_image_view(&self, view: r::ImageView) { + if let Some(handle) = view.handle_srv { + self.srv_uav_pool.lock().free_handle(handle); + } + if let Some(handle) = view.handle_uav { + self.srv_uav_pool.lock().free_handle(handle); + } + if let r::RenderTargetHandle::Pool(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if let Some(handle) = view.handle_dsv { + self.dsv_pool.lock().free_handle(handle); + } + } + + unsafe fn destroy_sampler(&self, _sampler: r::Sampler) { + // We don't destroy samplers, they are permanently cached + } + + unsafe fn destroy_descriptor_pool(&self, pool: r::DescriptorPool) { + let view_range = pool.heap_srv_cbv_uav.range_allocator.initial_range(); + if view_range.start < view_range.end { + self.heap_srv_cbv_uav + .1 + .lock() + .free_range(view_range.clone()); + } + } + + unsafe fn destroy_descriptor_set_layout(&self, _layout: r::DescriptorSetLayout) { + // Just drop + } + + unsafe fn destroy_fence(&self, fence: r::Fence) { + fence.raw.destroy(); + } + + unsafe fn destroy_semaphore(&self, semaphore: r::Semaphore) { + semaphore.raw.destroy(); + } + + unsafe fn destroy_event(&self, _event: ()) { + unimplemented!() + } + + fn wait_idle(&self) -> Result<(), d::OutOfMemory> { + for queue in &self.queues { + queue.wait_idle()?; + } + Ok(()) + } + + unsafe fn set_image_name(&self, image: &mut r::Image, name: &str) { + let cwstr = wide_cstr(name); + match *image { + r::Image::Unbound(ref mut image) => image.name = Some(cwstr), + r::Image::Bound(ref image) => { + image.resource.SetName(cwstr.as_ptr()); + } + } + } + + unsafe fn set_buffer_name(&self, buffer: &mut r::Buffer, name: &str) { + let cwstr = wide_cstr(name); + match *buffer { + r::Buffer::Unbound(ref mut buffer) => buffer.name = Some(cwstr), + r::Buffer::Bound(ref buffer) => { + buffer.resource.SetName(cwstr.as_ptr()); + } + } + } + + unsafe fn set_command_buffer_name(&self, command_buffer: &mut cmd::CommandBuffer, name: &str) { + let cwstr = wide_cstr(name); + command_buffer.raw.SetName(cwstr.as_ptr()); + } + + unsafe fn set_semaphore_name(&self, semaphore: &mut r::Semaphore, name: &str) { + let cwstr = wide_cstr(name); + semaphore.raw.SetName(cwstr.as_ptr()); + } + + unsafe fn set_fence_name(&self, fence: &mut r::Fence, name: &str) { + let cwstr = wide_cstr(name); + fence.raw.SetName(cwstr.as_ptr()); + } + + unsafe fn set_framebuffer_name(&self, _framebuffer: &mut r::Framebuffer, _name: &str) { + // ignored + } + + unsafe fn set_render_pass_name(&self, render_pass: &mut r::RenderPass, name: &str) { + render_pass.raw_name = wide_cstr(name); + } + + unsafe fn set_descriptor_set_name(&self, descriptor_set: &mut r::DescriptorSet, name: &str) { + descriptor_set.raw_name = wide_cstr(name); + } + + unsafe fn set_descriptor_set_layout_name( + &self, + _descriptor_set_layout: &mut r::DescriptorSetLayout, + _name: &str, + ) { + // ignored + } + + unsafe fn set_pipeline_layout_name(&self, pipeline_layout: &mut r::PipelineLayout, name: &str) { + let cwstr = wide_cstr(name); + pipeline_layout.shared.signature.SetName(cwstr.as_ptr()); + } + + unsafe fn set_compute_pipeline_name(&self, pipeline: &mut r::ComputePipeline, name: &str) { + let cwstr = wide_cstr(name); + pipeline.raw.SetName(cwstr.as_ptr()); + } + + unsafe fn set_graphics_pipeline_name(&self, pipeline: &mut r::GraphicsPipeline, name: &str) { + let cwstr = wide_cstr(name); + pipeline.raw.SetName(cwstr.as_ptr()); + } +} + +#[test] +fn test_identity_mapping() { + assert_eq!(conv::map_swizzle(format::Swizzle::NO), IDENTITY_MAPPING); +} diff --git a/third_party/rust/gfx-backend-dx12/src/internal.rs b/third_party/rust/gfx-backend-dx12/src/internal.rs new file mode 100644 index 0000000000..9b563e007a --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/internal.rs @@ -0,0 +1,244 @@ +use auxil::FastHashMap; +use std::{ffi::CStr, mem, ptr, sync::Arc}; + +use parking_lot::Mutex; +use winapi::{ + shared::{ + dxgiformat, dxgitype, + minwindef::{FALSE, TRUE}, + winerror, + }, + um::d3d12::{self, *}, + Interface, +}; + +use native; + +#[derive(Clone, Debug)] +pub struct BlitPipe { + pub pipeline: native::PipelineState, + pub signature: native::RootSignature, +} + +impl BlitPipe { + pub unsafe fn destroy(&self) { + self.pipeline.destroy(); + self.signature.destroy(); + } +} + +// Information to pass to the shader +#[repr(C)] +#[derive(Debug)] +pub struct BlitData { + pub src_offset: [f32; 2], + pub src_extent: [f32; 2], + pub layer: f32, + pub level: f32, +} + +pub type BlitKey = (dxgiformat::DXGI_FORMAT, d3d12::D3D12_FILTER); +type BlitMap = FastHashMap<BlitKey, BlitPipe>; + +#[derive(Debug)] +pub(crate) struct ServicePipes { + pub(crate) device: native::Device, + library: Arc<native::D3D12Lib>, + blits_2d_color: Mutex<BlitMap>, +} + +impl ServicePipes { + pub fn new(device: native::Device, library: Arc<native::D3D12Lib>) -> Self { + ServicePipes { + device, + library, + blits_2d_color: Mutex::new(FastHashMap::default()), + } + } + + pub unsafe fn destroy(&self) { + let blits = self.blits_2d_color.lock(); + for (_, pipe) in &*blits { + pipe.destroy(); + } + } + + pub fn get_blit_2d_color(&self, key: BlitKey) -> BlitPipe { + let mut blits = self.blits_2d_color.lock(); + blits + .entry(key) + .or_insert_with(|| self.create_blit_2d_color(key)) + .clone() + } + + fn create_blit_2d_color(&self, (dst_format, filter): BlitKey) -> BlitPipe { + let descriptor_range = [native::DescriptorRange::new( + native::DescriptorRangeType::SRV, + 1, + native::Binding { + register: 0, + space: 0, + }, + 0, + )]; + + let root_parameters = [ + native::RootParameter::descriptor_table( + native::ShaderVisibility::All, + &descriptor_range, + ), + native::RootParameter::constants( + native::ShaderVisibility::All, + native::Binding { + register: 0, + space: 0, + }, + (mem::size_of::<BlitData>() / 4) as _, + ), + ]; + + let static_samplers = [native::StaticSampler::new( + native::ShaderVisibility::PS, + native::Binding { + register: 0, + space: 0, + }, + filter, + [ + d3d12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + d3d12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + d3d12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + ], + 0.0, + 0, + d3d12::D3D12_COMPARISON_FUNC_ALWAYS, + native::StaticBorderColor::TransparentBlack, + 0.0..d3d12::D3D12_FLOAT32_MAX, + )]; + + let (signature_raw, error) = match self.library.serialize_root_signature( + native::RootSignatureVersion::V1_0, + &root_parameters, + &static_samplers, + native::RootSignatureFlags::empty(), + ) { + Ok((pair, hr)) if winerror::SUCCEEDED(hr) => pair, + Ok((_, hr)) => panic!("Can't serialize internal root signature: {:?}", hr), + Err(e) => panic!("Can't find serialization function: {:?}", e), + }; + + if !error.is_null() { + error!("D3D12SerializeRootSignature error: {:?}", unsafe { + error.as_c_str().to_str().unwrap() + }); + unsafe { error.destroy() }; + } + + let (signature, _hr) = self.device.create_root_signature(signature_raw, 0); + unsafe { + signature_raw.destroy(); + } + + let shader_src = include_bytes!("../shaders/blit.hlsl"); + // TODO: check results + let ((vs, _), _hr_vs) = native::Shader::compile( + shader_src, + unsafe { CStr::from_bytes_with_nul_unchecked(b"vs_5_0\0") }, + unsafe { CStr::from_bytes_with_nul_unchecked(b"vs_blit_2d\0") }, + native::ShaderCompileFlags::empty(), + ); + let ((ps, _), _hr_ps) = native::Shader::compile( + shader_src, + unsafe { CStr::from_bytes_with_nul_unchecked(b"ps_5_0\0") }, + unsafe { CStr::from_bytes_with_nul_unchecked(b"ps_blit_2d\0") }, + native::ShaderCompileFlags::empty(), + ); + + let mut rtvs = [dxgiformat::DXGI_FORMAT_UNKNOWN; 8]; + rtvs[0] = dst_format; + + let dummy_target = D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: FALSE, + LogicOpEnable: FALSE, + SrcBlend: D3D12_BLEND_ZERO, + DestBlend: D3D12_BLEND_ZERO, + BlendOp: D3D12_BLEND_OP_ADD, + SrcBlendAlpha: D3D12_BLEND_ZERO, + DestBlendAlpha: D3D12_BLEND_ZERO, + BlendOpAlpha: D3D12_BLEND_OP_ADD, + LogicOp: D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: D3D12_COLOR_WRITE_ENABLE_ALL as _, + }; + let render_targets = [dummy_target; 8]; + + let pso_desc = d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: signature.as_mut_ptr(), + VS: *native::Shader::from_blob(vs), + PS: *native::Shader::from_blob(ps), + GS: *native::Shader::null(), + DS: *native::Shader::null(), + HS: *native::Shader::null(), + StreamOutput: d3d12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: FALSE, + IndependentBlendEnable: FALSE, + RenderTarget: render_targets, + }, + SampleMask: !0, + RasterizerState: D3D12_RASTERIZER_DESC { + FillMode: D3D12_FILL_MODE_SOLID, + CullMode: D3D12_CULL_MODE_NONE, + FrontCounterClockwise: TRUE, + DepthBias: 0, + DepthBiasClamp: 0.0, + SlopeScaledDepthBias: 0.0, + DepthClipEnable: FALSE, + MultisampleEnable: FALSE, + ForcedSampleCount: 0, + AntialiasedLineEnable: FALSE, + ConservativeRaster: D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }, + DepthStencilState: unsafe { mem::zeroed() }, + InputLayout: d3d12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: ptr::null(), + NumElements: 0, + }, + IBStripCutValue: d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + PrimitiveTopologyType: D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + NumRenderTargets: 1, + RTVFormats: rtvs, + DSVFormat: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut pipeline = native::PipelineState::null(); + let hr = unsafe { + self.device.CreateGraphicsPipelineState( + &pso_desc, + &d3d12::ID3D12PipelineState::uuidof(), + pipeline.mut_void(), + ) + }; + assert_eq!(hr, winerror::S_OK); + + BlitPipe { + pipeline, + signature, + } + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/lib.rs b/third_party/rust/gfx-backend-dx12/src/lib.rs new file mode 100644 index 0000000000..907977263f --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/lib.rs @@ -0,0 +1,1495 @@ +/*! +# D3D12 backend internals. + +## Resource transitions + +Vulkan semantics for resource states doesn't exactly match D3D12. + +For regular images, whenever there is a specific layout used, +we map it to a corresponding D3D12 resource state. + +For the swapchain images, we consider them to be in COMMON state +everywhere except for render passes, where it's forcefully +transitioned into the render state. When transfers to/from are +requested, we transition them into and from the COPY_ states. + +For buffers and images in General layout, we the best effort of guessing +the single mutable state based on the access flags. We can't reliably +handle a case where multiple mutable access flags are used. +*/ + +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate log; + +mod command; +mod conv; +mod descriptors_cpu; +mod device; +mod internal; +mod pool; +mod resource; +mod root_constants; +mod window; + +use auxil::FastHashMap; +use hal::{ + adapter, format as f, image, memory, pso::PipelineStage, queue as q, Features, Hints, Limits, +}; +use range_alloc::RangeAllocator; + +use parking_lot::{Mutex, RwLock}; +use smallvec::SmallVec; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, minwindef::TRUE, winerror}, + um::{d3d12, d3d12sdklayers, handleapi, synchapi, winbase}, + Interface, +}; + +use std::{ + borrow::Borrow, + ffi::OsString, + fmt, + mem, + os::windows::ffi::OsStringExt, + //TODO: use parking_lot + sync::Arc, +}; + +use self::descriptors_cpu::DescriptorCpuPool; + +#[derive(Debug)] +pub(crate) struct HeapProperties { + pub page_property: d3d12::D3D12_CPU_PAGE_PROPERTY, + pub memory_pool: d3d12::D3D12_MEMORY_POOL, +} + +// https://msdn.microsoft.com/de-de/library/windows/desktop/dn770377(v=vs.85).aspx +// Only 16 input slots allowed. +const MAX_VERTEX_BUFFERS: usize = 16; +const MAX_DESCRIPTOR_SETS: usize = 8; + +const NUM_HEAP_PROPERTIES: usize = 3; + +pub type DescriptorIndex = u64; + +// Memory types are grouped according to the supported resources. +// Grouping is done to circumvent the limitations of heap tier 1 devices. +// Devices with Tier 1 will expose `BuffersOnly`, `ImageOnly` and `TargetOnly`. +// Devices with Tier 2 or higher will only expose `Universal`. +enum MemoryGroup { + Universal = 0, + BufferOnly, + ImageOnly, + TargetOnly, + + NumGroups, +} + +// https://msdn.microsoft.com/de-de/library/windows/desktop/dn788678(v=vs.85).aspx +static HEAPS_NUMA: [HeapProperties; NUM_HEAP_PROPERTIES] = [ + // DEFAULT + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + memory_pool: d3d12::D3D12_MEMORY_POOL_L1, + }, + // UPLOAD + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, + // READBACK + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, +]; + +static HEAPS_UMA: [HeapProperties; NUM_HEAP_PROPERTIES] = [ + // DEFAULT + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, + // UPLOAD + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, + // READBACK + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, +]; + +static HEAPS_CCUMA: [HeapProperties; NUM_HEAP_PROPERTIES] = [ + // DEFAULT + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, + // UPLOAD + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, + //READBACK + HeapProperties { + page_property: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK, + memory_pool: d3d12::D3D12_MEMORY_POOL_L0, + }, +]; + +#[derive(Debug, Copy, Clone)] +pub enum QueueFamily { + // Specially marked present queue. + // It's basically a normal 3D queue but D3D12 swapchain creation requires an + // associated queue, which we don't know on `create_swapchain`. + Present, + Normal(q::QueueType), +} + +const MAX_QUEUES: usize = 16; // infinite, to be fair + +impl q::QueueFamily for QueueFamily { + fn queue_type(&self) -> q::QueueType { + match *self { + QueueFamily::Present => q::QueueType::General, + QueueFamily::Normal(ty) => ty, + } + } + fn max_queues(&self) -> usize { + match *self { + QueueFamily::Present => 1, + QueueFamily::Normal(_) => MAX_QUEUES, + } + } + fn id(&self) -> q::QueueFamilyId { + // This must match the order exposed by `QUEUE_FAMILIES` + q::QueueFamilyId(match *self { + QueueFamily::Present => 0, + QueueFamily::Normal(q::QueueType::General) => 1, + QueueFamily::Normal(q::QueueType::Compute) => 2, + QueueFamily::Normal(q::QueueType::Transfer) => 3, + _ => unreachable!(), + }) + } +} + +impl QueueFamily { + fn native_type(&self) -> native::CmdListType { + use hal::queue::QueueFamily as _; + use native::CmdListType as Clt; + + let queue_type = self.queue_type(); + match queue_type { + q::QueueType::General | q::QueueType::Graphics => Clt::Direct, + q::QueueType::Compute => Clt::Compute, + q::QueueType::Transfer => Clt::Copy, + } + } +} + +static QUEUE_FAMILIES: [QueueFamily; 4] = [ + QueueFamily::Present, + QueueFamily::Normal(q::QueueType::General), + QueueFamily::Normal(q::QueueType::Compute), + QueueFamily::Normal(q::QueueType::Transfer), +]; + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +//Note: fields are dropped in the order of declaration, so we put the +// most owning fields last. +pub struct PhysicalDevice { + features: Features, + hints: Hints, + limits: Limits, + format_properties: Arc<FormatProperties>, + private_caps: Capabilities, + workarounds: Workarounds, + heap_properties: &'static [HeapProperties; NUM_HEAP_PROPERTIES], + memory_properties: adapter::MemoryProperties, + // Indicates that there is currently an active logical device. + // Opening the same adapter multiple times will return the same D3D12Device again. + is_open: Arc<Mutex<bool>>, + adapter: native::WeakPtr<dxgi1_2::IDXGIAdapter2>, + library: Arc<native::D3D12Lib>, +} + +impl fmt::Debug for PhysicalDevice { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("PhysicalDevice") + } +} + +unsafe impl Send for PhysicalDevice {} +unsafe impl Sync for PhysicalDevice {} + +impl adapter::PhysicalDevice<Backend> for PhysicalDevice { + unsafe fn open( + &self, + families: &[(&QueueFamily, &[q::QueuePriority])], + requested_features: Features, + ) -> Result<adapter::Gpu<Backend>, hal::device::CreationError> { + let mut open_guard = match self.is_open.try_lock() { + Some(inner) => inner, + None => return Err(hal::device::CreationError::TooManyObjects), + }; + + if !self.features().contains(requested_features) { + return Err(hal::device::CreationError::MissingFeature); + } + + let device_raw = match self + .library + .create_device(self.adapter, native::FeatureLevel::L11_0) + { + Ok((device, hr)) if winerror::SUCCEEDED(hr) => device, + Ok((_, hr)) => { + error!("error on device creation: {:x}", hr); + return Err(hal::device::CreationError::InitializationFailed); + } + Err(e) => panic!("device creation failed with {:?}", e), + }; + + // Always create the presentation queue in case we want to build a swapchain. + let (present_queue, hr_queue) = device_raw.create_command_queue( + QueueFamily::Present.native_type(), + native::Priority::Normal, + native::CommandQueueFlags::empty(), + 0, + ); + if !winerror::SUCCEEDED(hr_queue) { + error!("error on queue creation: {:x}", hr_queue); + } + + let mut device = Device::new(device_raw, &self, present_queue); + device.features = requested_features; + + let queue_groups = families + .into_iter() + .map(|&(&family, priorities)| { + use hal::queue::QueueFamily as _; + let mut group = q::QueueGroup::new(family.id()); + + let create_idle_event = || native::Event::create(true, false); + + match family { + QueueFamily::Present => { + // Exactly **one** present queue! + // Number of queues need to be larger than 0 else it + // violates the specification. + let queue = CommandQueue { + raw: device.present_queue.clone(), + idle_fence: device.create_raw_fence(false), + idle_event: create_idle_event(), + }; + device.append_queue(queue.clone()); + group.add_queue(queue); + } + QueueFamily::Normal(_) => { + let list_type = family.native_type(); + for _ in 0..priorities.len() { + let (queue, hr_queue) = device_raw.create_command_queue( + list_type, + native::Priority::Normal, + native::CommandQueueFlags::empty(), + 0, + ); + + if winerror::SUCCEEDED(hr_queue) { + let queue = CommandQueue { + raw: queue, + idle_fence: device.create_raw_fence(false), + idle_event: create_idle_event(), + }; + device.append_queue(queue.clone()); + group.add_queue(queue); + } else { + error!("error on queue creation: {:x}", hr_queue); + } + } + } + } + + group + }) + .collect(); + + *open_guard = true; + + Ok(adapter::Gpu { + device, + queue_groups, + }) + } + + fn format_properties(&self, fmt: Option<f::Format>) -> f::Properties { + let idx = fmt.map(|fmt| fmt as usize).unwrap_or(0); + self.format_properties.resolve(idx).properties + } + + fn image_format_properties( + &self, + format: f::Format, + dimensions: u8, + tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Option<image::FormatProperties> { + conv::map_format(format)?; //filter out unknown formats + let format_info = self.format_properties.resolve(format as usize); + + let supported_usage = { + use hal::image::Usage as U; + let props = match tiling { + image::Tiling::Optimal => format_info.properties.optimal_tiling, + image::Tiling::Linear => format_info.properties.linear_tiling, + }; + let mut flags = U::empty(); + // Note: these checks would have been nicer if we had explicit BLIT usage + if props.contains(f::ImageFeature::BLIT_SRC) { + flags |= U::TRANSFER_SRC; + } + if props.contains(f::ImageFeature::BLIT_DST) { + flags |= U::TRANSFER_DST; + } + if props.contains(f::ImageFeature::SAMPLED) { + flags |= U::SAMPLED; + } + if props.contains(f::ImageFeature::STORAGE) { + flags |= U::STORAGE; + } + if props.contains(f::ImageFeature::COLOR_ATTACHMENT) { + flags |= U::COLOR_ATTACHMENT; + } + if props.contains(f::ImageFeature::DEPTH_STENCIL_ATTACHMENT) { + flags |= U::DEPTH_STENCIL_ATTACHMENT; + } + flags + }; + if !supported_usage.contains(usage) { + return None; + } + + let max_resource_size = + (d3d12::D3D12_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM as usize) << 20; + Some(match tiling { + image::Tiling::Optimal => image::FormatProperties { + max_extent: match dimensions { + 1 => image::Extent { + width: d3d12::D3D12_REQ_TEXTURE1D_U_DIMENSION, + height: 1, + depth: 1, + }, + 2 => image::Extent { + width: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, + height: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, + depth: 1, + }, + 3 => image::Extent { + width: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + height: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + depth: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + }, + _ => return None, + }, + max_levels: d3d12::D3D12_REQ_MIP_LEVELS as _, + max_layers: match dimensions { + 1 => d3d12::D3D12_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION as _, + 2 => d3d12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION as _, + _ => return None, + }, + sample_count_mask: if dimensions == 2 + && !view_caps.contains(image::ViewCapabilities::KIND_CUBE) + && !usage.contains(image::Usage::STORAGE) + { + format_info.sample_count_mask + } else { + 0x1 + }, + max_resource_size, + }, + image::Tiling::Linear => image::FormatProperties { + max_extent: match dimensions { + 2 => image::Extent { + width: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, + height: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, + depth: 1, + }, + _ => return None, + }, + max_levels: 1, + max_layers: 1, + sample_count_mask: 0x1, + max_resource_size, + }, + }) + } + + fn memory_properties(&self) -> adapter::MemoryProperties { + self.memory_properties.clone() + } + + fn features(&self) -> Features { + self.features + } + + fn hints(&self) -> Hints { + self.hints + } + + fn limits(&self) -> Limits { + self.limits + } +} + +#[derive(Clone)] +pub struct CommandQueue { + pub(crate) raw: native::CommandQueue, + idle_fence: native::Fence, + idle_event: native::Event, +} + +impl fmt::Debug for CommandQueue { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("CommandQueue") + } +} + +impl CommandQueue { + unsafe fn destroy(&self) { + handleapi::CloseHandle(self.idle_event.0); + self.idle_fence.destroy(); + self.raw.destroy(); + } +} + +unsafe impl Send for CommandQueue {} +unsafe impl Sync for CommandQueue {} + +impl q::CommandQueue<Backend> for CommandQueue { + unsafe fn submit<'a, T, Ic, S, Iw, Is>( + &mut self, + submission: q::Submission<Ic, Iw, Is>, + fence: Option<&resource::Fence>, + ) where + T: 'a + Borrow<command::CommandBuffer>, + Ic: IntoIterator<Item = &'a T>, + S: 'a + Borrow<resource::Semaphore>, + Iw: IntoIterator<Item = (&'a S, PipelineStage)>, + Is: IntoIterator<Item = &'a S>, + { + // Reset idle fence and event + // That's safe here due to exclusive access to the queue + self.idle_fence.signal(0); + synchapi::ResetEvent(self.idle_event.0); + + // TODO: semaphores + let lists = submission + .command_buffers + .into_iter() + .map(|cmd_buf| cmd_buf.borrow().as_raw_list()) + .collect::<SmallVec<[_; 4]>>(); + self.raw + .ExecuteCommandLists(lists.len() as _, lists.as_ptr()); + + if let Some(fence) = fence { + assert_eq!(winerror::S_OK, self.raw.Signal(fence.raw.as_mut_ptr(), 1)); + } + } + + unsafe fn present( + &mut self, + surface: &mut window::Surface, + image: window::SwapchainImage, + _wait_semaphore: Option<&resource::Semaphore>, + ) -> Result<Option<hal::window::Suboptimal>, hal::window::PresentError> { + surface.present(image).map(|()| None) + } + + fn wait_idle(&self) -> Result<(), hal::device::OutOfMemory> { + self.raw.signal(self.idle_fence, 1); + assert_eq!( + winerror::S_OK, + self.idle_fence.set_event_on_completion(self.idle_event, 1) + ); + + unsafe { + synchapi::WaitForSingleObject(self.idle_event.0, winbase::INFINITE); + } + + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + NUMA, + UMA, + CacheCoherentUMA, +} + +#[derive(Debug, Clone, Copy)] +pub struct Capabilities { + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, +} + +#[derive(Clone, Debug)] +struct CmdSignatures { + draw: native::CommandSignature, + draw_indexed: native::CommandSignature, + dispatch: native::CommandSignature, +} + +impl CmdSignatures { + unsafe fn destroy(&self) { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } +} + +// Shared objects between command buffers, owned by the device. +#[derive(Debug)] +struct Shared { + pub signatures: CmdSignatures, + pub service_pipes: internal::ServicePipes, +} + +impl Shared { + unsafe fn destroy(&self) { + self.signatures.destroy(); + self.service_pipes.destroy(); + } +} + +pub struct SamplerStorage { + map: Mutex<FastHashMap<image::SamplerDesc, descriptors_cpu::Handle>>, + pool: Mutex<DescriptorCpuPool>, + heap: resource::DescriptorHeap, + origins: RwLock<resource::DescriptorOrigins>, +} + +impl SamplerStorage { + unsafe fn destroy(&mut self) { + self.pool.lock().destroy(); + self.heap.destroy(); + } +} + +pub struct Device { + raw: native::Device, + private_caps: Capabilities, + features: Features, + format_properties: Arc<FormatProperties>, + heap_properties: &'static [HeapProperties], + // CPU only pools + rtv_pool: Mutex<DescriptorCpuPool>, + dsv_pool: Mutex<DescriptorCpuPool>, + srv_uav_pool: Mutex<DescriptorCpuPool>, + descriptor_updater: Mutex<descriptors_cpu::DescriptorUpdater>, + // CPU/GPU descriptor heaps + heap_srv_cbv_uav: ( + resource::DescriptorHeap, + Mutex<RangeAllocator<DescriptorIndex>>, + ), + samplers: SamplerStorage, + events: Mutex<Vec<native::Event>>, + shared: Arc<Shared>, + // Present queue exposed by the `Present` queue family. + // Required for swapchain creation. Only a single queue supports presentation. + present_queue: native::CommandQueue, + // List of all queues created from this device, including present queue. + // Needed for `wait_idle`. + queues: Vec<CommandQueue>, + // Indicates that there is currently an active device. + open: Arc<Mutex<bool>>, + library: Arc<native::D3D12Lib>, +} + +impl fmt::Debug for Device { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Device") + } +} + +unsafe impl Send for Device {} //blocked by ComPtr +unsafe impl Sync for Device {} //blocked by ComPtr + +impl Device { + fn new( + device: native::Device, + physical_device: &PhysicalDevice, + present_queue: native::CommandQueue, + ) -> Self { + // Allocate descriptor heaps + let rtv_pool = DescriptorCpuPool::new(device, native::DescriptorHeapType::Rtv); + let dsv_pool = DescriptorCpuPool::new(device, native::DescriptorHeapType::Dsv); + let srv_uav_pool = DescriptorCpuPool::new(device, native::DescriptorHeapType::CbvSrvUav); + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let view_capacity = 1_000_000; + let heap_srv_cbv_uav = Self::create_descriptor_heap_impl( + device, + native::DescriptorHeapType::CbvSrvUav, + true, + view_capacity, + ); + let view_range_allocator = RangeAllocator::new(0..(view_capacity as u64)); + + let sampler_pool = DescriptorCpuPool::new(device, native::DescriptorHeapType::Sampler); + let heap_sampler = Self::create_descriptor_heap_impl( + device, + native::DescriptorHeapType::Sampler, + true, + 2_048, + ); + + let descriptor_updater = descriptors_cpu::DescriptorUpdater::new( + device, + physical_device.workarounds.avoid_cpu_descriptor_overwrites, + ); + + let draw_signature = Self::create_command_signature(device, device::CommandSignature::Draw); + let draw_indexed_signature = + Self::create_command_signature(device, device::CommandSignature::DrawIndexed); + let dispatch_signature = + Self::create_command_signature(device, device::CommandSignature::Dispatch); + + let signatures = CmdSignatures { + draw: draw_signature, + draw_indexed: draw_indexed_signature, + dispatch: dispatch_signature, + }; + let service_pipes = + internal::ServicePipes::new(device, Arc::clone(&physical_device.library)); + let shared = Shared { + signatures, + service_pipes, + }; + + Device { + raw: device, + library: Arc::clone(&physical_device.library), + private_caps: physical_device.private_caps, + features: Features::empty(), + format_properties: physical_device.format_properties.clone(), + heap_properties: physical_device.heap_properties, + rtv_pool: Mutex::new(rtv_pool), + dsv_pool: Mutex::new(dsv_pool), + srv_uav_pool: Mutex::new(srv_uav_pool), + descriptor_updater: Mutex::new(descriptor_updater), + heap_srv_cbv_uav: (heap_srv_cbv_uav, Mutex::new(view_range_allocator)), + samplers: SamplerStorage { + map: Mutex::default(), + pool: Mutex::new(sampler_pool), + heap: heap_sampler, + origins: RwLock::default(), + }, + events: Mutex::new(Vec::new()), + shared: Arc::new(shared), + present_queue, + queues: Vec::new(), + open: Arc::clone(&physical_device.is_open), + } + } + + fn append_queue(&mut self, queue: CommandQueue) { + self.queues.push(queue); + } + + /// Get the native d3d12 device. + /// + /// Required for FFI with libraries like RenderDoc. + pub unsafe fn as_raw(&self) -> *mut d3d12::ID3D12Device { + self.raw.as_mut_ptr() + } +} + +impl Drop for Device { + fn drop(&mut self) { + *self.open.lock() = false; + + unsafe { + for queue in &mut self.queues { + let _ = q::CommandQueue::wait_idle(queue); + queue.destroy(); + } + + self.shared.destroy(); + self.heap_srv_cbv_uav.0.destroy(); + self.samplers.destroy(); + self.rtv_pool.lock().destroy(); + self.dsv_pool.lock().destroy(); + self.srv_uav_pool.lock().destroy(); + + self.descriptor_updater.lock().destroy(); + + // Debug tracking alive objects + let (debug_device, hr_debug) = self.raw.cast::<d3d12sdklayers::ID3D12DebugDevice>(); + if winerror::SUCCEEDED(hr_debug) { + debug_device.ReportLiveDeviceObjects(d3d12sdklayers::D3D12_RLDO_DETAIL); + debug_device.destroy(); + } + + self.raw.destroy(); + } + } +} + +#[derive(Debug)] +pub struct Instance { + pub(crate) factory: native::Factory4, + library: Arc<native::D3D12Lib>, + lib_dxgi: native::DxgiLib, +} + +impl Drop for Instance { + fn drop(&mut self) { + unsafe { + self.factory.destroy(); + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +impl hal::Instance<Backend> for Instance { + fn create(_: &str, _: u32) -> Result<Self, hal::UnsupportedBackend> { + let lib_main = match native::D3D12Lib::new() { + Ok(lib) => lib, + Err(_) => return Err(hal::UnsupportedBackend), + }; + + #[cfg(debug_assertions)] + { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok((debug_controller, hr)) if winerror::SUCCEEDED(hr) => { + debug_controller.enable_layer(); + unsafe { + debug_controller.Release(); + } + } + _ => { + warn!("Unable to get D3D12 debug interface"); + } + } + } + + let lib_dxgi = native::DxgiLib::new().unwrap(); + + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + let factory_flags = match lib_dxgi.get_debug_interface1() { + Ok((queue, hr)) if winerror::SUCCEEDED(hr) => { + unsafe { queue.destroy() }; + native::FactoryCreationFlags::DEBUG + } + _ => native::FactoryCreationFlags::empty(), + }; + + // Create DXGI factory + let factory = match lib_dxgi.create_factory2(factory_flags) { + Ok((factory, hr)) if winerror::SUCCEEDED(hr) => factory, + Ok((_, hr)) => { + info!("Failed on dxgi factory creation: {:?}", hr); + return Err(hal::UnsupportedBackend); + } + Err(_) => return Err(hal::UnsupportedBackend), + }; + + Ok(Instance { + factory, + library: Arc::new(lib_main), + lib_dxgi, + }) + } + + fn enumerate_adapters(&self) -> Vec<adapter::Adapter<Backend>> { + use self::memory::Properties; + + // Try to use high performance order by default (returns None on Windows < 1803) + let (use_f6, factory6) = unsafe { + let (f6, hr) = self.factory.cast::<dxgi1_6::IDXGIFactory6>(); + if winerror::SUCCEEDED(hr) { + // It's okay to decrement the refcount here because we + // have another reference to the factory already owned by `self`. + f6.destroy(); + (true, f6) + } else { + (false, native::WeakPtr::null()) + } + }; + + // Enumerate adapters + let mut cur_index = 0; + let mut adapters = Vec::new(); + loop { + let adapter = if use_f6 { + let mut adapter2 = native::WeakPtr::<dxgi1_2::IDXGIAdapter2>::null(); + let hr = unsafe { + factory6.EnumAdapterByGpuPreference( + cur_index, + 2, // HIGH_PERFORMANCE + &dxgi1_2::IDXGIAdapter2::uuidof(), + adapter2.mut_void() as *mut *mut _, + ) + }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if !winerror::SUCCEEDED(hr) { + error!("Failed enumerating adapters: 0x{:x}", hr); + break; + } + + adapter2 + } else { + let mut adapter1 = native::WeakPtr::<dxgi::IDXGIAdapter1>::null(); + let hr1 = unsafe { + self.factory + .EnumAdapters1(cur_index, adapter1.mut_void() as *mut *mut _) + }; + + if hr1 == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + + let (adapter2, hr2) = unsafe { adapter1.cast::<dxgi1_2::IDXGIAdapter2>() }; + if !winerror::SUCCEEDED(hr2) { + error!("Failed casting to Adapter2: 0x{:x}", hr2); + break; + } + + unsafe { + adapter1.destroy(); + } + adapter2 + }; + + cur_index += 1; + + // Check for D3D12 support + // Create temporary device to get physical device information + let device = match self + .library + .create_device(adapter, native::FeatureLevel::L11_0) + { + Ok((device, hr)) if winerror::SUCCEEDED(hr) => device, + _ => continue, + }; + + // We have found a possible adapter + // acquire the device information + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.GetDesc2(&mut desc); + } + + let device_name = { + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = <OsString as OsStringExt>::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(winerror::S_OK, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::<d3d12::D3D12_FEATURE_DATA_ARCHITECTURE>() as _, + ) + }); + + let mut workarounds = Workarounds::default(); + + let info = adapter::AdapterInfo { + name: device_name, + vendor: desc.VendorId as usize, + device: desc.DeviceId as usize, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + adapter::DeviceType::VirtualGpu + } else if features_architecture.CacheCoherentUMA == TRUE { + adapter::DeviceType::IntegratedGpu + } else { + adapter::DeviceType::DiscreteGpu + }, + }; + + let mut features: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(winerror::S_OK, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS, + &mut features as *mut _ as *mut _, + mem::size_of::<d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _, + ) + }); + + let depth_bounds_test_supported = { + let mut features2: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::<d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _, + ) + }; + if hr == winerror::S_OK { + features2.DepthBoundsTestSupported != 0 + } else { + false + } + }; + + let heterogeneous_resource_heaps = + features.ResourceHeapTier != d3d12::D3D12_RESOURCE_HEAP_TIER_1; + + let uma = features_architecture.UMA == TRUE; + let cc_uma = features_architecture.CacheCoherentUMA == TRUE; + + let (memory_architecture, heap_properties) = match (uma, cc_uma) { + (true, true) => (MemoryArchitecture::CacheCoherentUMA, &HEAPS_CCUMA), + (true, false) => (MemoryArchitecture::UMA, &HEAPS_UMA), + (false, _) => (MemoryArchitecture::NUMA, &HEAPS_NUMA), + }; + + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn788678(v=vs.85).aspx + let base_memory_types: [adapter::MemoryType; NUM_HEAP_PROPERTIES] = + match memory_architecture { + MemoryArchitecture::NUMA => [ + // DEFAULT + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + // UPLOAD + adapter::MemoryType { + properties: Properties::CPU_VISIBLE | Properties::COHERENT, + heap_index: 1, + }, + // READBACK + adapter::MemoryType { + properties: Properties::CPU_VISIBLE + | Properties::COHERENT + | Properties::CPU_CACHED, + heap_index: 1, + }, + ], + MemoryArchitecture::UMA => [ + // DEFAULT + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + // UPLOAD + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL + | Properties::CPU_VISIBLE + | Properties::COHERENT, + heap_index: 0, + }, + // READBACK + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL + | Properties::CPU_VISIBLE + | Properties::COHERENT + | Properties::CPU_CACHED, + heap_index: 0, + }, + ], + MemoryArchitecture::CacheCoherentUMA => [ + // DEFAULT + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + // UPLOAD + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL + | Properties::CPU_VISIBLE + | Properties::COHERENT + | Properties::CPU_CACHED, + heap_index: 0, + }, + // READBACK + adapter::MemoryType { + properties: Properties::DEVICE_LOCAL + | Properties::CPU_VISIBLE + | Properties::COHERENT + | Properties::CPU_CACHED, + heap_index: 0, + }, + ], + }; + + let memory_types = if heterogeneous_resource_heaps { + base_memory_types.to_vec() + } else { + // We multiplicate the base memory types depending on the resource usage: + // 0.. 3: Reserved for futures use + // 4.. 6: Buffers + // 7.. 9: Images + // 10..12: Targets + // + // The supported memory types for a resource can be requested by asking for + // the memory requirements. Memory type indices are encoded as bitflags. + // `device::MEM_TYPE_MASK` (0b111) defines the bitmask for one base memory type group. + // The corresponding shift masks (`device::MEM_TYPE_BUFFER_SHIFT`, + // `device::MEM_TYPE_IMAGE_SHIFT`, `device::MEM_TYPE_TARGET_SHIFT`) + // denote the usage group. + let mut types = Vec::new(); + for i in 0..MemoryGroup::NumGroups as _ { + types.extend(base_memory_types.iter().map(|mem_type| { + let mut ty = mem_type.clone(); + + // Images and Targets are not host visible as we can't create + // a corresponding buffer for mapping. + if i == MemoryGroup::ImageOnly as _ || i == MemoryGroup::TargetOnly as _ { + ty.properties.remove(Properties::CPU_VISIBLE); + // Coherent and cached can only be on memory types that are cpu visible + ty.properties.remove(Properties::COHERENT); + ty.properties.remove(Properties::CPU_CACHED); + } + ty + })); + } + types + }; + + let memory_heaps = { + // Get the IDXGIAdapter3 from the created device to query video memory information. + let adapter_id = unsafe { device.GetAdapterLuid() }; + let adapter = { + let mut adapter = native::WeakPtr::<dxgi1_4::IDXGIAdapter3>::null(); + unsafe { + assert_eq!( + winerror::S_OK, + self.factory.EnumAdapterByLuid( + adapter_id, + &dxgi1_4::IDXGIAdapter3::uuidof(), + adapter.mut_void(), + ) + ); + } + adapter + }; + + let query_memory = |segment: dxgi1_4::DXGI_MEMORY_SEGMENT_GROUP| unsafe { + let mut mem_info: dxgi1_4::DXGI_QUERY_VIDEO_MEMORY_INFO = mem::zeroed(); + assert_eq!( + winerror::S_OK, + adapter.QueryVideoMemoryInfo(0, segment, &mut mem_info) + ); + mem_info.Budget + }; + + let mut heaps = vec![adapter::MemoryHeap { + size: query_memory(dxgi1_4::DXGI_MEMORY_SEGMENT_GROUP_LOCAL), + flags: memory::HeapFlags::DEVICE_LOCAL, + }]; + if let MemoryArchitecture::NUMA = memory_architecture { + heaps.push(adapter::MemoryHeap { + size: query_memory(dxgi1_4::DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL), + flags: memory::HeapFlags::empty(), + }); + } + heaps + }; + //TODO: find a way to get a tighter bound? + let sample_count_mask = 0x3F; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let full_heap_count = match features.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => { + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1 + } + d3d12::D3D12_RESOURCE_BINDING_TIER_2 => { + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2 + } + d3d12::D3D12_RESOURCE_BINDING_TIER_3 => tier3_practical_descriptor_limit, + _ => unreachable!(), + } as _; + + let uav_limit = match features.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 8, // conservative, is 64 on feature level 11.1 + d3d12::D3D12_RESOURCE_BINDING_TIER_2 => 64, + d3d12::D3D12_RESOURCE_BINDING_TIER_3 => tier3_practical_descriptor_limit, + _ => unreachable!(), + } as _; + + let physical_device = PhysicalDevice { + library: Arc::clone(&self.library), + adapter, + features: + // TODO: add more features, based on + // https://msdn.microsoft.com/de-de/library/windows/desktop/mt186615(v=vs.85).aspx + Features::ROBUST_BUFFER_ACCESS | + Features::IMAGE_CUBE_ARRAY | + Features::GEOMETRY_SHADER | + Features::TESSELLATION_SHADER | + Features::NON_FILL_POLYGON_MODE | + if depth_bounds_test_supported { Features::DEPTH_BOUNDS } else { Features::empty() } | + //logic_op: false, // Optional on feature level 11_0 + Features::MULTI_DRAW_INDIRECT | + Features::FORMAT_BC | + Features::INSTANCE_RATE | + Features::DEPTH_CLAMP | + Features::SAMPLER_MIP_LOD_BIAS | + Features::SAMPLER_BORDER_COLOR | + Features::MUTABLE_COMPARISON_SAMPLER | + Features::SAMPLER_ANISOTROPY | + Features::TEXTURE_DESCRIPTOR_ARRAY | + Features::SAMPLER_MIRROR_CLAMP_EDGE | + Features::NDC_Y_UP | + Features::SHADER_SAMPLED_IMAGE_ARRAY_DYNAMIC_INDEXING | + Features::SHADER_STORAGE_IMAGE_ARRAY_DYNAMIC_INDEXING | + Features::SAMPLED_TEXTURE_DESCRIPTOR_INDEXING | + Features::STORAGE_TEXTURE_DESCRIPTOR_INDEXING | + Features::UNSIZED_DESCRIPTOR_ARRAY | + Features::DRAW_INDIRECT_COUNT, + hints: + Hints::BASE_VERTEX_INSTANCE_DRAWING, + limits: Limits { + //TODO: verify all of these not linked to constants + max_bound_descriptor_sets: MAX_DESCRIPTOR_SETS as u16, + max_descriptor_set_uniform_buffers_dynamic: 8, + max_descriptor_set_storage_buffers_dynamic: 4, + max_descriptor_set_sampled_images: full_heap_count, + max_descriptor_set_storage_buffers: uav_limit, + max_descriptor_set_storage_images: uav_limit, + max_descriptor_set_uniform_buffers: full_heap_count, + max_descriptor_set_samplers: d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE as _, + max_per_stage_descriptor_sampled_images: match features.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 128, + d3d12::D3D12_RESOURCE_BINDING_TIER_2 + | d3d12::D3D12_RESOURCE_BINDING_TIER_3 + | _ => full_heap_count, + } as _, + max_per_stage_descriptor_samplers: match features.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 16, + d3d12::D3D12_RESOURCE_BINDING_TIER_2 + | d3d12::D3D12_RESOURCE_BINDING_TIER_3 + | _ => d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE + } as _, + max_per_stage_descriptor_storage_buffers: uav_limit, + max_per_stage_descriptor_storage_images: uav_limit, + max_per_stage_descriptor_uniform_buffers: match features.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 + | d3d12::D3D12_RESOURCE_BINDING_TIER_2 => d3d12::D3D12_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, + d3d12::D3D12_RESOURCE_BINDING_TIER_3 + | _ => full_heap_count as _, + } as _, + max_uniform_buffer_range: (d3d12::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16) as _, + max_storage_buffer_range: !0, + // Is actually 256, but need space for the descriptors in there, so leave at 128 to discourage explosions + max_push_constants_size: 128, + max_image_1d_size: d3d12::D3D12_REQ_TEXTURE1D_U_DIMENSION as _, + max_image_2d_size: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION as _, + max_image_3d_size: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION as _, + max_image_cube_size: d3d12::D3D12_REQ_TEXTURECUBE_DIMENSION as _, + max_image_array_layers: d3d12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION as _, + max_texel_elements: 0, + max_patch_size: 0, + max_viewports: d3d12::D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE as _, + max_viewport_dimensions: [d3d12::D3D12_VIEWPORT_BOUNDS_MAX as _; 2], + max_framebuffer_extent: hal::image::Extent { //TODO + width: 4096, + height: 4096, + depth: 1, + }, + max_compute_work_group_count: [ + d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + ], + max_compute_work_group_invocations: d3d12::D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP as _, + max_compute_work_group_size: [ + d3d12::D3D12_CS_THREAD_GROUP_MAX_X, + d3d12::D3D12_CS_THREAD_GROUP_MAX_Y, + d3d12::D3D12_CS_THREAD_GROUP_MAX_Z, + ], + max_vertex_input_attributes: d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT as _, + max_vertex_input_bindings: 31, //TODO + max_vertex_input_attribute_offset: 255, // TODO + max_vertex_input_binding_stride: d3d12::D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES as _, + max_vertex_output_components: 16, // TODO + min_texel_buffer_offset_alignment: 1, // TODO + min_uniform_buffer_offset_alignment: d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as _, + min_storage_buffer_offset_alignment: 4, // TODO + framebuffer_color_sample_counts: sample_count_mask, + framebuffer_depth_sample_counts: sample_count_mask, + framebuffer_stencil_sample_counts: sample_count_mask, + max_color_attachments: d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as _, + buffer_image_granularity: 1, + non_coherent_atom_size: 1, //TODO: confirm + max_sampler_anisotropy: 16., + optimal_buffer_copy_offset_alignment: d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as _, + optimal_buffer_copy_pitch_alignment: d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as _, + min_vertex_input_binding_stride_alignment: 1, + .. Limits::default() //TODO + }, + format_properties: Arc::new(FormatProperties::new(device)), + private_caps: Capabilities { + heterogeneous_resource_heaps, + memory_architecture, + }, + workarounds, + heap_properties, + memory_properties: adapter::MemoryProperties { + memory_types, + memory_heaps, + }, + is_open: Arc::new(Mutex::new(false)), + }; + + let queue_families = QUEUE_FAMILIES.to_vec(); + + adapters.push(adapter::Adapter { + info, + physical_device, + queue_families, + }); + } + adapters + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result<window::Surface, hal::window::InitError> { + match has_handle.raw_window_handle() { + raw_window_handle::RawWindowHandle::Windows(handle) => { + Ok(self.create_surface_from_hwnd(handle.hwnd)) + } + _ => Err(hal::window::InitError::UnsupportedWindowHandle), + } + } + + unsafe fn destroy_surface(&self, _surface: window::Surface) { + // TODO: Implement Surface cleanup + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum Backend {} +impl hal::Backend for Backend { + type Instance = Instance; + type PhysicalDevice = PhysicalDevice; + type Device = Device; + type Surface = window::Surface; + + type QueueFamily = QueueFamily; + type CommandQueue = CommandQueue; + type CommandBuffer = command::CommandBuffer; + + type Memory = resource::Memory; + type CommandPool = pool::CommandPool; + + type ShaderModule = resource::ShaderModule; + type RenderPass = resource::RenderPass; + type Framebuffer = resource::Framebuffer; + + type Buffer = resource::Buffer; + type BufferView = resource::BufferView; + type Image = resource::Image; + type ImageView = resource::ImageView; + type Sampler = resource::Sampler; + + type ComputePipeline = resource::ComputePipeline; + type GraphicsPipeline = resource::GraphicsPipeline; + type PipelineLayout = resource::PipelineLayout; + type PipelineCache = (); + type DescriptorSetLayout = resource::DescriptorSetLayout; + type DescriptorPool = resource::DescriptorPool; + type DescriptorSet = resource::DescriptorSet; + + type Fence = resource::Fence; + type Semaphore = resource::Semaphore; + type Event = (); + type QueryPool = resource::QueryPool; +} + +fn validate_line_width(width: f32) { + // Note from the Vulkan spec: + // > If the wide lines feature is not enabled, lineWidth must be 1.0 + // Simply assert and no-op because DX12 never exposes `Features::LINE_WIDTH` + assert_eq!(width, 1.0); +} + +#[derive(Clone, Copy, Debug, Default)] +struct FormatInfo { + properties: f::Properties, + sample_count_mask: u8, +} + +#[derive(Debug)] +pub struct FormatProperties { + info: Box<[Mutex<Option<FormatInfo>>]>, + device: native::Device, +} + +impl Drop for FormatProperties { + fn drop(&mut self) { + unsafe { + self.device.destroy(); + } + } +} + +impl FormatProperties { + fn new(device: native::Device) -> Self { + let mut buf = Vec::with_capacity(f::NUM_FORMATS); + buf.push(Mutex::new(Some(FormatInfo::default()))); + for _ in 1..f::NUM_FORMATS { + buf.push(Mutex::new(None)) + } + FormatProperties { + info: buf.into_boxed_slice(), + device, + } + } + + fn resolve(&self, idx: usize) -> FormatInfo { + let mut guard = self.info[idx].lock(); + if let Some(info) = *guard { + return info; + } + let format: f::Format = unsafe { mem::transmute(idx as u32) }; + let is_compressed = format.surface_desc().is_compressed(); + let dxgi_format = match conv::map_format(format) { + Some(format) => format, + None => { + let info = FormatInfo::default(); + *guard = Some(info); + return info; + } + }; + + let properties = { + let mut props = f::Properties::default(); + let mut data = d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: dxgi_format, + Support1: unsafe { mem::zeroed() }, + Support2: unsafe { mem::zeroed() }, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }); + let can_buffer = 0 != data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_BUFFER; + let can_image = 0 + != data.Support1 + & (d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURECUBE); + let can_linear = can_image && !is_compressed; + if can_image { + props.optimal_tiling |= f::ImageFeature::SAMPLED | f::ImageFeature::BLIT_SRC; + } + if can_linear { + props.linear_tiling |= f::ImageFeature::SAMPLED | f::ImageFeature::BLIT_SRC; + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER != 0 { + props.buffer_features |= f::BufferFeature::VERTEX; + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0 { + props.optimal_tiling |= f::ImageFeature::SAMPLED_LINEAR; + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0 { + props.optimal_tiling |= + f::ImageFeature::COLOR_ATTACHMENT | f::ImageFeature::BLIT_DST; + if can_linear { + props.linear_tiling |= + f::ImageFeature::COLOR_ATTACHMENT | f::ImageFeature::BLIT_DST; + } + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0 { + props.optimal_tiling |= f::ImageFeature::COLOR_ATTACHMENT_BLEND; + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0 { + props.optimal_tiling |= f::ImageFeature::DEPTH_STENCIL_ATTACHMENT; + } + if data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0 { + //TODO: check d3d12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD ? + if can_buffer { + props.buffer_features |= f::BufferFeature::UNIFORM_TEXEL; + } + } + if data.Support2 & d3d12::D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD != 0 { + //TODO: other atomic flags? + if can_buffer { + props.buffer_features |= f::BufferFeature::STORAGE_TEXEL_ATOMIC; + } + if can_image { + props.optimal_tiling |= f::ImageFeature::STORAGE_ATOMIC; + } + } + if data.Support2 & d3d12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE != 0 { + if can_buffer { + props.buffer_features |= f::BufferFeature::STORAGE_TEXEL; + } + if can_image { + props.optimal_tiling |= f::ImageFeature::STORAGE; + } + } + //TODO: blits, linear tiling + props + }; + + let sample_count_mask = if is_compressed { + // just an optimization to avoid the queries + 1 + } else { + let mut mask = 0; + for i in 0..6 { + let mut data = d3d12::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS { + Format: dxgi_format, + SampleCount: 1 << i, + Flags: 0, + NumQualityLevels: 0, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _, + ) + }); + if data.NumQualityLevels != 0 { + mask |= 1 << i; + } + } + mask + }; + + let info = FormatInfo { + properties, + sample_count_mask, + }; + *guard = Some(info); + info + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/pool.rs b/third_party/rust/gfx-backend-dx12/src/pool.rs new file mode 100644 index 0000000000..2299dd18da --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/pool.rs @@ -0,0 +1,126 @@ +use std::{fmt, sync::Arc}; + +use parking_lot::Mutex; +use winapi::shared::winerror; + +use crate::{command::CommandBuffer, Backend, Shared}; +use hal::{command, pool}; + +pub struct PoolShared { + device: native::Device, + list_type: native::CmdListType, + allocators: Mutex<Vec<native::CommandAllocator>>, + lists: Mutex<Vec<native::GraphicsCommandList>>, +} + +impl fmt::Debug for PoolShared { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + // TODO: print out as struct + fmt.write_str("PoolShared") + } +} + +impl PoolShared { + pub fn acquire(&self) -> (native::CommandAllocator, native::GraphicsCommandList) { + let allocator = match self.allocators.lock().pop() { + Some(allocator) => allocator, + None => { + let (allocator, hr) = self.device.create_command_allocator(self.list_type); + assert_eq!( + winerror::S_OK, + hr, + "error on command allocator creation: {:x}", + hr + ); + allocator + } + }; + let list = match self.lists.lock().pop() { + Some(list) => { + list.reset(allocator, native::PipelineState::null()); + list + } + None => { + let (command_list, hr) = self.device.create_graphics_command_list( + self.list_type, + allocator, + native::PipelineState::null(), + 0, + ); + assert_eq!( + hr, + winerror::S_OK, + "error on command list creation: {:x}", + hr + ); + command_list + } + }; + (allocator, list) + } + + pub fn release_allocator(&self, allocator: native::CommandAllocator) { + self.allocators.lock().push(allocator); + } + + pub fn release_list(&self, list: native::GraphicsCommandList) { + //pre-condition: list must be closed + self.lists.lock().push(list); + } +} + +#[derive(Debug)] +pub struct CommandPool { + shared: Arc<Shared>, + pool_shared: Arc<PoolShared>, +} + +unsafe impl Send for CommandPool {} +unsafe impl Sync for CommandPool {} + +impl CommandPool { + pub(crate) fn new( + device: native::Device, + list_type: native::CmdListType, + shared: &Arc<Shared>, + _create_flags: pool::CommandPoolCreateFlags, + ) -> Self { + let pool_shared = Arc::new(PoolShared { + device, + list_type, + allocators: Mutex::default(), + lists: Mutex::default(), + }); + CommandPool { + shared: Arc::clone(shared), + pool_shared, + } + } +} + +impl pool::CommandPool<Backend> for CommandPool { + unsafe fn reset(&mut self, _release_resources: bool) { + //do nothing. The allocated command buffers would not know + // that this happened, but they should be ready to + // process `begin` as if they are in `Initial` state. + } + + unsafe fn allocate_one(&mut self, level: command::Level) -> CommandBuffer { + // TODO: Implement secondary buffers + assert_eq!(level, command::Level::Primary); + CommandBuffer::new(&self.shared, &self.pool_shared) + } + + unsafe fn free<I>(&mut self, cbufs: I) + where + I: IntoIterator<Item = CommandBuffer>, + { + let mut allocators = self.pool_shared.allocators.lock(); + let mut lists = self.pool_shared.lists.lock(); + for cbuf in cbufs { + let (allocator, list) = cbuf.destroy(); + allocators.extend(allocator); + lists.extend(list); + } + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/resource.rs b/third_party/rust/gfx-backend-dx12/src/resource.rs new file mode 100644 index 0000000000..d44fa7de76 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/resource.rs @@ -0,0 +1,882 @@ +use hal::{buffer, format, image, memory, pass, pso}; +use range_alloc::RangeAllocator; + +use parking_lot::RwLock; +use winapi::{ + shared::{dxgiformat::DXGI_FORMAT, minwindef::UINT}, + um::d3d12, +}; + +use std::{ + cell::{Cell, RefCell, UnsafeCell}, + collections::BTreeMap, + fmt, + ops::Range, + slice, + sync::Arc, +}; + +use crate::{ + descriptors_cpu::{Handle, MultiCopyAccumulator}, + root_constants::RootConstant, + Backend, DescriptorIndex, MAX_VERTEX_BUFFERS, +}; + +// ShaderModule is either a precompiled if the source comes from HLSL or +// the SPIR-V module doesn't contain specialization constants or push constants +// because they need to be adjusted on pipeline creation. +#[derive(Debug, Hash)] +pub enum ShaderModule { + Compiled(BTreeMap<String, native::Blob>), + Spirv(Vec<u32>), +} +unsafe impl Send for ShaderModule {} +unsafe impl Sync for ShaderModule {} + +#[derive(Clone, Debug, Hash)] +pub struct BarrierDesc { + pub(crate) attachment_id: pass::AttachmentId, + pub(crate) states: Range<d3d12::D3D12_RESOURCE_STATES>, + pub(crate) flags: d3d12::D3D12_RESOURCE_BARRIER_FLAGS, +} + +impl BarrierDesc { + pub(crate) fn new( + attachment_id: pass::AttachmentId, + states: Range<d3d12::D3D12_RESOURCE_STATES>, + ) -> Self { + BarrierDesc { + attachment_id, + states, + flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + } + } + + pub(crate) fn split(self) -> Range<Self> { + BarrierDesc { + flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY, + ..self.clone() + }..BarrierDesc { + flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_END_ONLY, + ..self + } + } +} + +#[derive(Clone, Debug, Hash)] +pub struct SubpassDesc { + pub(crate) color_attachments: Vec<pass::AttachmentRef>, + pub(crate) depth_stencil_attachment: Option<pass::AttachmentRef>, + pub(crate) input_attachments: Vec<pass::AttachmentRef>, + pub(crate) resolve_attachments: Vec<pass::AttachmentRef>, + pub(crate) pre_barriers: Vec<BarrierDesc>, + pub(crate) post_barriers: Vec<BarrierDesc>, +} + +impl SubpassDesc { + /// Check if an attachment is used by this sub-pass. + //Note: preserved attachment are not considered used. + pub(crate) fn is_using(&self, at_id: pass::AttachmentId) -> bool { + self.color_attachments + .iter() + .chain(self.depth_stencil_attachment.iter()) + .chain(self.input_attachments.iter()) + .chain(self.resolve_attachments.iter()) + .any(|&(id, _)| id == at_id) + } +} + +#[derive(Clone, Debug, Hash)] +pub struct RenderPass { + pub(crate) attachments: Vec<pass::Attachment>, + pub(crate) subpasses: Vec<SubpassDesc>, + pub(crate) post_barriers: Vec<BarrierDesc>, + pub(crate) raw_name: Vec<u16>, +} + +// Indirection layer attribute -> remap -> binding. +// +// Required as vulkan allows attribute offsets larger than the stride. +// Storing the stride specified in the pipeline required for vertex buffer binding. +#[derive(Copy, Clone, Debug)] +pub struct VertexBinding { + // Map into the specified bindings on pipeline creation. + pub mapped_binding: usize, + pub stride: UINT, + // Additional offset to rebase the attributes. + pub offset: u32, +} + +#[derive(Debug)] +pub struct GraphicsPipeline { + pub(crate) raw: native::PipelineState, + pub(crate) shared: Arc<PipelineShared>, + pub(crate) topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, + pub(crate) vertex_bindings: [Option<VertexBinding>; MAX_VERTEX_BUFFERS], + pub(crate) baked_states: pso::BakedStates, +} +unsafe impl Send for GraphicsPipeline {} +unsafe impl Sync for GraphicsPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + pub(crate) raw: native::PipelineState, + pub(crate) shared: Arc<PipelineShared>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +bitflags! { + pub struct SetTableTypes: u8 { + const SRV_CBV_UAV = 0x1; + const SAMPLERS = 0x2; + } +} + +pub const SRV_CBV_UAV: SetTableTypes = SetTableTypes::SRV_CBV_UAV; +pub const SAMPLERS: SetTableTypes = SetTableTypes::SAMPLERS; + +pub type RootSignatureOffset = usize; + +#[derive(Debug, Hash)] +pub struct RootTable { + pub ty: SetTableTypes, + pub offset: RootSignatureOffset, +} + +#[derive(Debug)] +pub struct RootElement { + pub table: RootTable, + pub mutable_bindings: auxil::FastHashSet<pso::DescriptorBinding>, +} + +#[derive(Debug)] +pub struct PipelineShared { + pub(crate) signature: native::RootSignature, + /// Disjunct, sorted vector of root constant ranges. + pub(crate) constants: Vec<RootConstant>, + /// A root offset per parameter. + pub(crate) parameter_offsets: Vec<u32>, + /// Total number of root slots occupied by the pipeline. + pub(crate) total_slots: u32, +} + +unsafe impl Send for PipelineShared {} +unsafe impl Sync for PipelineShared {} + +#[derive(Debug)] +pub struct PipelineLayout { + pub(crate) shared: Arc<PipelineShared>, + // Storing for each associated descriptor set layout, which tables we created + // in the root signature. This is required for binding descriptor sets. + pub(crate) elements: Vec<RootElement>, +} + +#[derive(Debug, Clone)] +pub struct Framebuffer { + pub(crate) attachments: Vec<ImageView>, + // Number of layers in the render area. Required for subpass resolves. + pub(crate) layers: image::Layer, +} + +#[derive(Clone, Debug)] +pub struct BufferUnbound { + pub(crate) requirements: memory::Requirements, + pub(crate) usage: buffer::Usage, + pub(crate) name: Option<Vec<u16>>, +} + +pub struct BufferBound { + pub(crate) resource: native::Resource, + pub(crate) requirements: memory::Requirements, + pub(crate) clear_uav: Option<Handle>, +} + +impl fmt::Debug for BufferBound { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("BufferBound") + } +} + +unsafe impl Send for BufferBound {} +unsafe impl Sync for BufferBound {} + +pub enum Buffer { + Unbound(BufferUnbound), + Bound(BufferBound), +} + +impl fmt::Debug for Buffer { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Buffer") + } +} + +impl Buffer { + pub(crate) fn expect_unbound(&self) -> &BufferUnbound { + match *self { + Buffer::Unbound(ref unbound) => unbound, + Buffer::Bound(_) => panic!("Expected unbound buffer"), + } + } + + pub(crate) fn expect_bound(&self) -> &BufferBound { + match *self { + Buffer::Unbound(_) => panic!("Expected bound buffer"), + Buffer::Bound(ref bound) => bound, + } + } +} + +#[derive(Copy, Clone)] +pub struct BufferView { + // Descriptor handle for uniform texel buffers. + pub(crate) handle_srv: Option<Handle>, + // Descriptor handle for storage texel buffers. + pub(crate) handle_uav: Option<Handle>, +} + +impl fmt::Debug for BufferView { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("BufferView") + } +} + +unsafe impl Send for BufferView {} +unsafe impl Sync for BufferView {} + +#[derive(Clone)] +pub enum Place { + Heap { raw: native::Heap, offset: u64 }, + Swapchain {}, +} + +#[derive(Clone)] +pub struct ImageBound { + pub(crate) resource: native::Resource, + pub(crate) place: Place, + pub(crate) surface_type: format::SurfaceType, + pub(crate) kind: image::Kind, + pub(crate) mip_levels: image::Level, + pub(crate) usage: image::Usage, + pub(crate) default_view_format: Option<DXGI_FORMAT>, + pub(crate) view_caps: image::ViewCapabilities, + pub(crate) descriptor: d3d12::D3D12_RESOURCE_DESC, + pub(crate) clear_cv: Vec<Handle>, + pub(crate) clear_dv: Vec<Handle>, + pub(crate) clear_sv: Vec<Handle>, + pub(crate) requirements: memory::Requirements, +} + +impl fmt::Debug for ImageBound { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ImageBound") + } +} + +unsafe impl Send for ImageBound {} +unsafe impl Sync for ImageBound {} + +impl ImageBound { + pub fn calc_subresource(&self, mip_level: UINT, layer: UINT, plane: UINT) -> UINT { + mip_level + + (layer * self.descriptor.MipLevels as UINT) + + (plane * self.descriptor.MipLevels as UINT * self.kind.num_layers() as UINT) + } +} + +#[derive(Clone)] +pub struct ImageUnbound { + pub(crate) desc: d3d12::D3D12_RESOURCE_DESC, + pub(crate) view_format: Option<DXGI_FORMAT>, + pub(crate) dsv_format: Option<DXGI_FORMAT>, + pub(crate) requirements: memory::Requirements, + pub(crate) format: format::Format, + pub(crate) kind: image::Kind, + pub(crate) mip_levels: image::Level, + pub(crate) usage: image::Usage, + pub(crate) tiling: image::Tiling, + pub(crate) view_caps: image::ViewCapabilities, + //TODO: use hal::format::FormatDesc + pub(crate) bytes_per_block: u8, + // Dimension of a texel block (compressed formats). + pub(crate) block_dim: (u8, u8), + pub(crate) name: Option<Vec<u16>>, +} + +impl fmt::Debug for ImageUnbound { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ImageUnbound") + } +} + +impl ImageUnbound { + pub fn calc_subresource(&self, mip_level: UINT, layer: UINT, plane: UINT) -> UINT { + mip_level + + (layer * self.desc.MipLevels as UINT) + + (plane * self.desc.MipLevels as UINT * self.kind.num_layers() as UINT) + } +} + +#[derive(Clone)] +pub enum Image { + Unbound(ImageUnbound), + Bound(ImageBound), +} + +impl fmt::Debug for Image { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Image") + } +} + +impl Image { + pub(crate) fn expect_unbound(&self) -> &ImageUnbound { + match *self { + Image::Unbound(ref unbound) => unbound, + Image::Bound(_) => panic!("Expected unbound image"), + } + } + + pub(crate) fn expect_bound(&self) -> &ImageBound { + match *self { + Image::Unbound(_) => panic!("Expected bound image"), + Image::Bound(ref bound) => bound, + } + } + + pub fn get_desc(&self) -> &d3d12::D3D12_RESOURCE_DESC { + match self { + Image::Bound(i) => &i.descriptor, + Image::Unbound(i) => &i.desc, + } + } + + pub fn calc_subresource(&self, mip_level: UINT, layer: UINT, plane: UINT) -> UINT { + match self { + Image::Bound(i) => i.calc_subresource(mip_level, layer, plane), + Image::Unbound(i) => i.calc_subresource(mip_level, layer, plane), + } + } +} + +#[derive(Copy, Clone)] +pub enum RenderTargetHandle { + None, + Swapchain(native::CpuDescriptor), + Pool(Handle), +} + +impl RenderTargetHandle { + pub fn raw(&self) -> Option<native::CpuDescriptor> { + match *self { + RenderTargetHandle::None => None, + RenderTargetHandle::Swapchain(rtv) => Some(rtv), + RenderTargetHandle::Pool(ref handle) => Some(handle.raw), + } + } +} + +#[derive(Copy, Clone)] +pub struct ImageView { + pub(crate) resource: native::Resource, // weak-ptr owned by image. + pub(crate) handle_srv: Option<Handle>, + pub(crate) handle_rtv: RenderTargetHandle, + pub(crate) handle_dsv: Option<Handle>, + pub(crate) handle_uav: Option<Handle>, + // Required for attachment resolves. + pub(crate) dxgi_format: DXGI_FORMAT, + pub(crate) num_levels: image::Level, + pub(crate) mip_levels: (image::Level, image::Level), + pub(crate) layers: (image::Layer, image::Layer), + pub(crate) kind: image::Kind, +} + +impl fmt::Debug for ImageView { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("ImageView") + } +} + +unsafe impl Send for ImageView {} +unsafe impl Sync for ImageView {} + +impl ImageView { + pub fn calc_subresource(&self, mip_level: UINT, layer: UINT) -> UINT { + mip_level + (layer * self.num_levels as UINT) + } + + pub fn is_swapchain(&self) -> bool { + match self.handle_rtv { + RenderTargetHandle::Swapchain(_) => true, + _ => false, + } + } +} + +pub struct Sampler { + pub(crate) handle: Handle, +} + +impl fmt::Debug for Sampler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Sampler") + } +} + +#[derive(Debug)] +pub struct DescriptorSetLayout { + pub(crate) bindings: Vec<pso::DescriptorSetLayoutBinding>, +} + +#[derive(Debug)] +pub struct Fence { + pub(crate) raw: native::Fence, +} +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +#[derive(Debug)] +pub struct Semaphore { + pub(crate) raw: native::Fence, +} + +unsafe impl Send for Semaphore {} +unsafe impl Sync for Semaphore {} + +#[derive(Debug)] +pub struct Memory { + pub(crate) heap: native::Heap, + pub(crate) type_id: usize, + pub(crate) size: u64, + // Buffer containing the whole memory for mapping (only for host visible heaps) + pub(crate) resource: Option<native::Resource>, +} + +unsafe impl Send for Memory {} +unsafe impl Sync for Memory {} + +bitflags! { + /// A set of D3D12 descriptor types that need to be associated + /// with a single gfx-hal `DescriptorType`. + #[derive(Default)] + pub struct DescriptorContent: u8 { + const CBV = 0x1; + const SRV = 0x2; + const UAV = 0x4; + const SAMPLER = 0x8; + + /// Indicates if the descriptor is a dynamic uniform/storage buffer. + /// Important as dynamic buffers are implemented as root descriptors. + const DYNAMIC = 0x10; + + const VIEW = DescriptorContent::CBV.bits |DescriptorContent::SRV.bits | DescriptorContent::UAV.bits; + } +} + +impl DescriptorContent { + pub fn is_dynamic(&self) -> bool { + self.contains(DescriptorContent::DYNAMIC) + } +} + +impl From<pso::DescriptorType> for DescriptorContent { + fn from(ty: pso::DescriptorType) -> Self { + use hal::pso::{ + BufferDescriptorFormat as Bdf, BufferDescriptorType as Bdt, DescriptorType as Dt, + ImageDescriptorType as Idt, + }; + + use DescriptorContent as Dc; + + match ty { + Dt::Sampler => Dc::SAMPLER, + Dt::Image { ty } => match ty { + Idt::Storage { read_only: true } => Dc::SRV, + Idt::Storage { read_only: false } => Dc::SRV | Dc::UAV, + Idt::Sampled { with_sampler } => match with_sampler { + true => Dc::SRV | Dc::SAMPLER, + false => Dc::SRV, + }, + }, + Dt::Buffer { ty, format } => match ty { + Bdt::Storage { read_only: true } => match format { + Bdf::Structured { + dynamic_offset: true, + } => Dc::SRV | Dc::DYNAMIC, + Bdf::Structured { + dynamic_offset: false, + } + | Bdf::Texel => Dc::SRV, + }, + Bdt::Storage { read_only: false } => match format { + Bdf::Structured { + dynamic_offset: true, + } => Dc::SRV | Dc::UAV | Dc::DYNAMIC, + Bdf::Structured { + dynamic_offset: false, + } + | Bdf::Texel => Dc::SRV | Dc::UAV, + }, + Bdt::Uniform => match format { + Bdf::Structured { + dynamic_offset: true, + } => Dc::CBV | Dc::DYNAMIC, + Bdf::Structured { + dynamic_offset: false, + } => Dc::CBV, + Bdf::Texel => Dc::SRV, + }, + }, + Dt::InputAttachment => Dc::SRV, + } + } +} + +#[derive(Debug)] +pub struct DescriptorRange { + pub(crate) handle: DualHandle, + pub(crate) ty: pso::DescriptorType, + pub(crate) handle_size: u64, +} + +impl DescriptorRange { + pub(crate) fn at(&self, index: DescriptorIndex) -> native::CpuDescriptor { + assert!(index < self.handle.size); + let ptr = self.handle.cpu.ptr + (self.handle_size * index) as usize; + native::CpuDescriptor { ptr } + } +} + +#[derive(Copy, Clone, Debug)] +pub(crate) struct DynamicDescriptor { + pub content: DescriptorContent, + pub gpu_buffer_location: u64, +} + +#[derive(Debug, Default)] +pub struct DescriptorBindingInfo { + pub(crate) count: u64, + pub(crate) view_range: Option<DescriptorRange>, + pub(crate) dynamic_descriptors: UnsafeCell<Vec<DynamicDescriptor>>, + pub(crate) content: DescriptorContent, +} + +#[derive(Default)] +pub struct DescriptorOrigins { + // For each index on the heap, this array stores the origin CPU handle. + origins: Vec<native::CpuDescriptor>, +} + +impl DescriptorOrigins { + fn find(&self, other: &[native::CpuDescriptor]) -> Option<DescriptorIndex> { + //TODO: need a smarter algorithm here! + for i in other.len()..=self.origins.len() { + let base = i - other.len(); + //TODO: use slice comparison when `CpuDescriptor` implements `PartialEq`. + if unsafe { + slice::from_raw_parts(&self.origins[base].ptr, other.len()) + == slice::from_raw_parts(&other[0].ptr, other.len()) + } { + return Some(base as DescriptorIndex); + } + } + None + } + + fn grow(&mut self, other: &[native::CpuDescriptor]) -> DescriptorIndex { + let base = self.origins.len() as DescriptorIndex; + self.origins.extend_from_slice(other); + base + } +} + +pub struct DescriptorSet { + // Required for binding at command buffer + pub(crate) heap_srv_cbv_uav: native::DescriptorHeap, + pub(crate) heap_samplers: native::DescriptorHeap, + pub(crate) sampler_origins: RefCell<Box<[native::CpuDescriptor]>>, + pub(crate) binding_infos: Vec<DescriptorBindingInfo>, + pub(crate) first_gpu_sampler: Cell<Option<native::GpuDescriptor>>, + pub(crate) first_gpu_view: Option<native::GpuDescriptor>, + pub(crate) raw_name: Vec<u16>, +} + +impl fmt::Debug for DescriptorSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("DescriptorSet") + } +} + +// TODO: is this really safe? +unsafe impl Send for DescriptorSet {} +unsafe impl Sync for DescriptorSet {} + +impl DescriptorSet { + pub fn srv_cbv_uav_gpu_start(&self) -> native::GpuDescriptor { + self.heap_srv_cbv_uav.start_gpu_descriptor() + } + + pub fn sampler_gpu_start(&self) -> native::GpuDescriptor { + self.heap_samplers.start_gpu_descriptor() + } + + pub fn sampler_offset(&self, binding: u32, last_offset: usize) -> usize { + let mut offset = 0; + for bi in &self.binding_infos[..binding as usize] { + if bi.content.contains(DescriptorContent::SAMPLER) { + offset += bi.count as usize; + } + } + if self.binding_infos[binding as usize] + .content + .contains(DescriptorContent::SAMPLER) + { + offset += last_offset; + } + offset + } + + pub fn update_samplers( + &self, + heap: &DescriptorHeap, + origins: &RwLock<DescriptorOrigins>, + accum: &mut MultiCopyAccumulator, + ) { + let desc_origins = self.sampler_origins.borrow(); + let start_index = if let Some(index) = { + // explicit variable allows to limit the lifetime of that borrow + let borrow = origins.read(); + borrow.find(&*desc_origins) + } { + Some(index) + } else if desc_origins.iter().any(|desc| desc.ptr == 0) { + // set is incomplete, don't try to build it + None + } else { + let base = origins.write().grow(&*desc_origins); + // copy the descriptors from their origins into the new location + accum + .dst_samplers + .add(heap.cpu_descriptor_at(base), desc_origins.len() as u32); + for &origin in desc_origins.iter() { + accum.src_samplers.add(origin, 1); + } + Some(base) + }; + + self.first_gpu_sampler + .set(start_index.map(|index| heap.gpu_descriptor_at(index))); + } +} + +#[derive(Copy, Clone)] +pub struct DualHandle { + pub(crate) cpu: native::CpuDescriptor, + pub(crate) gpu: native::GpuDescriptor, + /// How large the block allocated to this handle is. + pub(crate) size: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("DualHandle") + } +} + +pub struct DescriptorHeap { + pub(crate) raw: native::DescriptorHeap, + pub(crate) handle_size: u64, + pub(crate) total_handles: u64, + pub(crate) start: DualHandle, +} + +impl fmt::Debug for DescriptorHeap { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("DescriptorHeap") + } +} + +impl DescriptorHeap { + pub(crate) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + size, + } + } + + pub(crate) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + pub(crate) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + native::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(crate) unsafe fn destroy(&self) { + self.raw.destroy(); + } +} + +/// Slice of an descriptor heap, which is allocated for a pool. +/// Pools will create descriptor sets inside this slice. +#[derive(Debug)] +pub struct DescriptorHeapSlice { + pub(crate) heap: native::DescriptorHeap, // Weak reference, owned by descriptor heap. + pub(crate) start: DualHandle, + pub(crate) handle_size: u64, + pub(crate) range_allocator: RangeAllocator<u64>, +} + +impl DescriptorHeapSlice { + pub(crate) fn alloc_handles(&mut self, count: u64) -> Option<DualHandle> { + self.range_allocator + .allocate_range(count) + .ok() + .map(|range| DualHandle { + cpu: native::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * range.start) as usize, + }, + gpu: native::GpuDescriptor { + ptr: self.start.gpu.ptr + (self.handle_size * range.start) as u64, + }, + size: count, + }) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_handles(&mut self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + let handle_range = start..start + handle.size; + self.range_allocator.free_range(handle_range); + } + + /// Clear the allocator. + pub(crate) fn clear(&mut self) { + self.range_allocator.reset(); + } +} + +#[derive(Debug)] +pub struct DescriptorPool { + pub(crate) heap_raw_sampler: native::DescriptorHeap, + pub(crate) heap_srv_cbv_uav: DescriptorHeapSlice, + pub(crate) pools: Vec<pso::DescriptorRangeDesc>, + pub(crate) max_size: u64, +} +unsafe impl Send for DescriptorPool {} +unsafe impl Sync for DescriptorPool {} + +impl pso::DescriptorPool<Backend> for DescriptorPool { + unsafe fn allocate_set( + &mut self, + layout: &DescriptorSetLayout, + ) -> Result<DescriptorSet, pso::AllocationError> { + let mut binding_infos = Vec::new(); + let mut first_gpu_view = None; + let mut num_samplers = 0; + + info!("allocate_set"); + for binding in &layout.bindings { + // Add dummy bindings in case of out-of-range or sparse binding layout. + while binding_infos.len() <= binding.binding as usize { + binding_infos.push(DescriptorBindingInfo::default()); + } + let content = DescriptorContent::from(binding.ty); + debug!("\tbinding {:?} with content {:?}", binding, content); + + let (view_range, dynamic_descriptors) = if content.is_dynamic() { + let descriptor = DynamicDescriptor { + content: content ^ DescriptorContent::DYNAMIC, + gpu_buffer_location: 0, + }; + (None, vec![descriptor; binding.count]) + } else { + if content.contains(DescriptorContent::SAMPLER) { + num_samplers += binding.count; + } + + let view_range = if content.intersects(DescriptorContent::VIEW) { + let count = if content.contains(DescriptorContent::SRV | DescriptorContent::UAV) + { + 2 * binding.count as u64 + } else { + binding.count as u64 + }; + debug!("\tview handles: {}", count); + let handle = self + .heap_srv_cbv_uav + .alloc_handles(count) + .ok_or(pso::AllocationError::OutOfPoolMemory)?; + if first_gpu_view.is_none() { + first_gpu_view = Some(handle.gpu); + } + Some(DescriptorRange { + handle, + ty: binding.ty, + handle_size: self.heap_srv_cbv_uav.handle_size, + }) + } else { + None + }; + + (view_range, Vec::new()) + }; + + binding_infos[binding.binding as usize] = DescriptorBindingInfo { + count: binding.count as _, + view_range, + dynamic_descriptors: UnsafeCell::new(dynamic_descriptors), + content, + }; + } + + Ok(DescriptorSet { + heap_srv_cbv_uav: self.heap_srv_cbv_uav.heap, + heap_samplers: self.heap_raw_sampler, + sampler_origins: RefCell::new( + vec![native::CpuDescriptor { ptr: 0 }; num_samplers].into_boxed_slice(), + ), + binding_infos, + first_gpu_sampler: Cell::new(None), + first_gpu_view, + raw_name: Vec::new(), + }) + } + + unsafe fn free<I>(&mut self, descriptor_sets: I) + where + I: IntoIterator<Item = DescriptorSet>, + { + for descriptor_set in descriptor_sets { + for binding_info in descriptor_set.binding_infos { + if let Some(view_range) = binding_info.view_range { + if binding_info.content.intersects(DescriptorContent::VIEW) { + self.heap_srv_cbv_uav.free_handles(view_range.handle); + } + } + } + } + } + + unsafe fn reset(&mut self) { + self.heap_srv_cbv_uav.clear(); + } +} + +#[derive(Debug)] +pub struct QueryPool { + pub(crate) raw: native::QueryHeap, + pub(crate) ty: native::QueryHeapType, +} + +unsafe impl Send for QueryPool {} +unsafe impl Sync for QueryPool {} diff --git a/third_party/rust/gfx-backend-dx12/src/root_constants.rs b/third_party/rust/gfx-backend-dx12/src/root_constants.rs new file mode 100644 index 0000000000..6625cb2691 --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/root_constants.rs @@ -0,0 +1,298 @@ +//! Processing push constant ranges +//! +//! This module provides utitlity functions to make push constants root signature +//! compatible. Root constants are non-overlapping, therefore, the push constant +//! ranges passed at pipeline layout creation need to be `split` into disjunct +//! ranges. The disjunct ranges can be then converted into root signature entries. + +use hal::pso; +use std::{borrow::Borrow, cmp::Ordering, ops::Range}; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct RootConstant { + pub stages: pso::ShaderStageFlags, + pub range: Range<u32>, +} + +impl RootConstant { + fn is_empty(&self) -> bool { + self.range.end <= self.range.start + } + + // Divide a root constant into two separate ranges depending on the overlap + // with another root constant. + fn divide(self, other: &RootConstant) -> (RootConstant, RootConstant) { + assert!(self.range.start <= other.range.start); + let left = RootConstant { + stages: self.stages, + range: self.range.start..other.range.start, + }; + + let right = RootConstant { + stages: self.stages, + range: other.range.start..self.range.end, + }; + + (left, right) + } +} + +impl PartialOrd for RootConstant { + fn partial_cmp(&self, other: &RootConstant) -> Option<Ordering> { + Some( + self.range + .start + .cmp(&other.range.start) + .then(self.range.end.cmp(&other.range.end)) + .then(self.stages.cmp(&other.stages)), + ) + } +} + +impl Ord for RootConstant { + fn cmp(&self, other: &RootConstant) -> Ordering { + self.partial_cmp(other).unwrap() + } +} + +pub fn split<I>(ranges: I) -> Vec<RootConstant> +where + I: IntoIterator, + I::Item: Borrow<(pso::ShaderStageFlags, Range<u32>)>, +{ + // Frontier of unexplored root constant ranges, sorted descending + // (less element shifting for Vec) regarding to the start of ranges. + let mut ranges = into_vec(ranges); + ranges.sort_by(|a, b| b.cmp(a)); + + // Storing resulting disjunct root constant ranges. + let mut disjunct = Vec::with_capacity(ranges.len()); + + while let Some(cur) = ranges.pop() { + // Run trough all unexplored ranges. After each run the frontier will be + // resorted! + // + // Case 1: Single element remaining + // Push is to the disjunct list, done. + // Case 2: At least two ranges, which possibly overlap + // Divide the first range into a left set and right set, depending + // on the overlap of the two ranges: + // Range 1: |---- left ---||--- right ---| + // Range 2: |--------... + if let Some(mut next) = ranges.pop() { + let (left, mut right) = cur.divide(&next); + if !left.is_empty() { + // The left part is, by definition, disjunct to all other ranges. + // Push all remaining pieces in the frontier, handled by the next + // iteration. + disjunct.push(left); + ranges.push(next); + if !right.is_empty() { + ranges.push(right); + } + } else if !right.is_empty() { + // If the left part is empty this means that both ranges have the + // same start value. The right segment is a candidate for a disjunct + // segment but we haven't checked against other ranges so far. + // Therefore, we push is on the frontier again, but added the + // stage flags from the overlapping segment. + // The second range will be shrunken to be disjunct with the pushed + // segment as we have already processed it. + // In the next iteration we will look again at the push right + // segment and compare it to other elements on the list until we + // have a small enough disjunct segment, which doesn't overlap + // with any part of the frontier. + right.stages |= next.stages; + next.range.start = right.range.end; + ranges.push(right); + if !next.is_empty() { + ranges.push(next); + } + } + } else { + disjunct.push(cur); + } + ranges.sort_by(|a, b| b.cmp(a)); + } + + disjunct +} + +fn into_vec<I>(ranges: I) -> Vec<RootConstant> +where + I: IntoIterator, + I::Item: Borrow<(pso::ShaderStageFlags, Range<u32>)>, +{ + ranges + .into_iter() + .map(|borrowable| { + let &(stages, ref range) = borrowable.borrow(); + debug_assert_eq!(range.start % 4, 0); + debug_assert_eq!(range.end % 4, 0); + RootConstant { + stages, + range: range.start / 4..range.end / 4, + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_single() { + let range = &[(pso::ShaderStageFlags::VERTEX, 0..12)]; + assert_eq!(into_vec(range), split(range)); + } + + #[test] + fn test_overlap_1() { + // Case: + // |----------| + // |------------| + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 0..12), + (pso::ShaderStageFlags::FRAGMENT, 8..16), + ]; + + let reference = vec![ + RootConstant { + stages: pso::ShaderStageFlags::VERTEX, + range: 0..2, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + range: 2..3, + }, + RootConstant { + stages: pso::ShaderStageFlags::FRAGMENT, + range: 3..4, + }, + ]; + assert_eq!(reference, split(ranges)); + } + + #[test] + fn test_overlap_2() { + // Case: + // |-------------------| + // |------------| + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 0..20), + (pso::ShaderStageFlags::FRAGMENT, 8..16), + ]; + + let reference = vec![ + RootConstant { + stages: pso::ShaderStageFlags::VERTEX, + range: 0..2, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + range: 2..4, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX, + range: 4..5, + }, + ]; + assert_eq!(reference, split(ranges)); + } + + #[test] + fn test_overlap_4() { + // Case: + // |--------------| + // |------------| + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 0..20), + (pso::ShaderStageFlags::FRAGMENT, 0..16), + ]; + + let reference = vec![ + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + range: 0..4, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX, + range: 4..5, + }, + ]; + assert_eq!(reference, split(ranges)); + } + + #[test] + fn test_equal() { + // Case: + // |-----| + // |-----| + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 0..16), + (pso::ShaderStageFlags::FRAGMENT, 0..16), + ]; + + let reference = vec![RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + range: 0..4, + }]; + assert_eq!(reference, split(ranges)); + } + + #[test] + fn test_disjunct() { + // Case: + // |------| + // |------------| + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 0..12), + (pso::ShaderStageFlags::FRAGMENT, 12..16), + ]; + assert_eq!(into_vec(ranges), split(ranges)); + } + + #[test] + fn test_complex() { + let ranges = &[ + (pso::ShaderStageFlags::VERTEX, 8..40), + (pso::ShaderStageFlags::FRAGMENT, 0..20), + (pso::ShaderStageFlags::GEOMETRY, 24..40), + (pso::ShaderStageFlags::HULL, 16..28), + ]; + + let reference = vec![ + RootConstant { + stages: pso::ShaderStageFlags::FRAGMENT, + range: 0..2, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + range: 2..4, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX + | pso::ShaderStageFlags::FRAGMENT + | pso::ShaderStageFlags::HULL, + range: 4..5, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::HULL, + range: 5..6, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX + | pso::ShaderStageFlags::GEOMETRY + | pso::ShaderStageFlags::HULL, + range: 6..7, + }, + RootConstant { + stages: pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::GEOMETRY, + range: 7..10, + }, + ]; + + assert_eq!(reference, split(ranges)); + } +} diff --git a/third_party/rust/gfx-backend-dx12/src/window.rs b/third_party/rust/gfx-backend-dx12/src/window.rs new file mode 100644 index 0000000000..c3de1a24be --- /dev/null +++ b/third_party/rust/gfx-backend-dx12/src/window.rs @@ -0,0 +1,350 @@ +use std::{borrow::Borrow, fmt, mem, os::raw::c_void}; + +use winapi::{ + shared::{ + dxgi, dxgi1_4, dxgi1_5, dxgitype, + minwindef::{BOOL, FALSE, TRUE}, + windef::{HWND, RECT}, + winerror, + }, + um::{d3d12, synchapi, winbase, winnt::HANDLE, winuser::GetClientRect}, +}; + +use crate::{conv, resource as r, Backend, Device, Instance, PhysicalDevice, QueueFamily}; +use hal::{device::Device as _, format as f, image as i, window as w}; + +impl Instance { + pub fn create_surface_from_hwnd(&self, hwnd: *mut c_void) -> Surface { + Surface { + factory: self.factory, + wnd_handle: hwnd as *mut _, + presentation: None, + } + } +} + +#[derive(Debug)] +struct Presentation { + swapchain: Swapchain, + format: f::Format, + size: w::Extent2D, + mode: w::PresentMode, +} + +pub struct Surface { + pub(crate) factory: native::WeakPtr<dxgi1_4::IDXGIFactory4>, + pub(crate) wnd_handle: HWND, + presentation: Option<Presentation>, +} + +impl fmt::Debug for Surface { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("Surface") + } +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub(crate) unsafe fn present(&mut self, image: SwapchainImage) -> Result<(), w::PresentError> { + let present = self.presentation.as_mut().unwrap(); + let sc = &mut present.swapchain; + sc.acquired_count -= 1; + + let expected_index = sc.inner.GetCurrentBackBufferIndex() as w::SwapImageIndex; + if image.index != expected_index { + log::warn!( + "Expected frame {}, got frame {}", + expected_index, + image.index + ); + return Err(w::PresentError::OutOfDate); + } + + let (interval, flags) = match present.mode { + w::PresentMode::IMMEDIATE => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + w::PresentMode::FIFO => (1, 0), + _ => (1, 0), // Surface was created with an unsupported present mode, fall back to FIFO + }; + + sc.inner.Present(interval, flags); + Ok(()) + } +} + +impl w::Surface<Backend> for Surface { + fn supports_queue_family(&self, queue_family: &QueueFamily) -> bool { + match queue_family { + &QueueFamily::Present => true, + _ => false, + } + } + + fn capabilities(&self, _physical_device: &PhysicalDevice) -> w::SurfaceCapabilities { + let current_extent = unsafe { + let mut rect: RECT = mem::zeroed(); + if GetClientRect(self.wnd_handle as *mut _, &mut rect as *mut RECT) == 0 { + panic!("GetClientRect failed"); + } + Some(w::Extent2D { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + }) + }; + + let allow_tearing = unsafe { + let (f5, hr) = self.factory.cast::<dxgi1_5::IDXGIFactory5>(); + if winerror::SUCCEEDED(hr) { + let mut allow_tearing: BOOL = FALSE; + let hr = f5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::<BOOL>() as _, + ); + + f5.destroy(); + + winerror::SUCCEEDED(hr) && allow_tearing == TRUE + } else { + false + } + }; + + let mut present_modes = w::PresentMode::FIFO; + if allow_tearing { + present_modes |= w::PresentMode::IMMEDIATE; + } + + w::SurfaceCapabilities { + present_modes, + composite_alpha_modes: w::CompositeAlphaMode::OPAQUE, //TODO + image_count: 2..=16, // we currently use a flip effect which supports 2..=16 buffers + current_extent, + extents: w::Extent2D { + width: 16, + height: 16, + }..=w::Extent2D { + width: 4096, + height: 4096, + }, + max_image_layers: 1, + usage: i::Usage::COLOR_ATTACHMENT | i::Usage::TRANSFER_SRC | i::Usage::TRANSFER_DST, + } + } + + fn supported_formats(&self, _physical_device: &PhysicalDevice) -> Option<Vec<f::Format>> { + Some(vec![ + f::Format::Bgra8Srgb, + f::Format::Bgra8Unorm, + f::Format::Rgba8Srgb, + f::Format::Rgba8Unorm, + f::Format::A2b10g10r10Unorm, + f::Format::Rgba16Sfloat, + ]) + } +} + +#[derive(Debug)] +pub struct SwapchainImage { + index: w::SwapImageIndex, + image: r::Image, + view: r::ImageView, +} + +impl Borrow<r::Image> for SwapchainImage { + fn borrow(&self) -> &r::Image { + &self.image + } +} + +impl Borrow<r::ImageView> for SwapchainImage { + fn borrow(&self) -> &r::ImageView { + &self.view + } +} + +impl w::PresentationSurface<Backend> for Surface { + type SwapchainImage = SwapchainImage; + + unsafe fn configure_swapchain( + &mut self, + device: &Device, + config: w::SwapchainConfig, + ) -> Result<(), w::CreationError> { + assert!(i::Usage::COLOR_ATTACHMENT.contains(config.image_usage)); + + let swapchain = match self.presentation.take() { + Some(present) => { + if present.format == config.format && present.size == config.extent { + self.presentation = Some(present); + return Ok(()); + } + // can't have image resources in flight used by GPU + device.wait_idle().unwrap(); + + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + if config.present_mode.contains(w::PresentMode::IMMEDIATE) { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + let inner = present.swapchain.release_resources(); + let result = inner.ResizeBuffers( + config.image_count, + config.extent.width, + config.extent.height, + conv::map_format_nosrgb(config.format).unwrap(), + flags, + ); + if result != winerror::S_OK { + error!("ResizeBuffers failed with 0x{:x}", result as u32); + return Err(w::CreationError::WindowInUse(hal::device::WindowInUse)); + } + inner + } + None => { + let (swapchain, _) = + device.create_swapchain_impl(&config, self.wnd_handle, self.factory.clone())?; + swapchain + } + }; + + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + self.factory.MakeWindowAssociation( + self.wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ); + + self.presentation = Some(Presentation { + swapchain: device.wrap_swapchain(swapchain, &config), + format: config.format, + size: config.extent, + mode: config.present_mode, + }); + Ok(()) + } + + unsafe fn unconfigure_swapchain(&mut self, device: &Device) { + if let Some(mut present) = self.presentation.take() { + let _ = present.swapchain.wait(winbase::INFINITE); + let _ = device.wait_idle(); //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + let inner = present.swapchain.release_resources(); + inner.destroy(); + } + } + + unsafe fn acquire_image( + &mut self, + timeout_ns: u64, + ) -> Result<(SwapchainImage, Option<w::Suboptimal>), w::AcquireError> { + let present = self.presentation.as_mut().unwrap(); + let sc = &mut present.swapchain; + + sc.wait((timeout_ns / 1_000_000) as u32)?; + + let base_index = sc.inner.GetCurrentBackBufferIndex() as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + let resource = sc.resources[index]; + + let kind = i::Kind::D2(present.size.width, present.size.height, 1, 1); + let base_format = present.format.base_format(); + let dxgi_format = conv::map_format(present.format).unwrap(); + let rtv = sc.rtv_heap.at(index as _, 0).cpu; + + let descriptor = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + Alignment: 0, + Width: present.size.width as _, + Height: present.size.height as _, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgi_format, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, //TODO? + }; + + let image = r::ImageBound { + resource, + place: r::Place::Swapchain {}, + surface_type: base_format.0, + kind, + mip_levels: 1, + usage: sc.usage, + default_view_format: None, + view_caps: i::ViewCapabilities::empty(), + descriptor, + clear_cv: Vec::new(), //TODO + clear_dv: Vec::new(), + clear_sv: Vec::new(), + requirements: hal::memory::Requirements { + size: 0, + alignment: 1, + type_mask: 0, + }, + }; + + let swapchain_image = SwapchainImage { + index: index as _, + image: r::Image::Bound(image), + view: r::ImageView { + resource, + handle_srv: None, + handle_rtv: r::RenderTargetHandle::Swapchain(rtv), + handle_uav: None, + handle_dsv: None, + dxgi_format, + num_levels: 1, + mip_levels: (0, 1), + layers: (0, 1), + kind, + }, + }; + + Ok((swapchain_image, None)) + } +} + +#[derive(Debug)] +pub struct Swapchain { + pub(crate) inner: native::WeakPtr<dxgi1_4::IDXGISwapChain3>, + #[allow(dead_code)] + pub(crate) rtv_heap: r::DescriptorHeap, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + pub(crate) resources: Vec<native::Resource>, + pub(crate) waitable: HANDLE, + pub(crate) usage: i::Usage, + pub(crate) acquired_count: usize, +} + +impl Swapchain { + pub(crate) unsafe fn release_resources(self) -> native::WeakPtr<dxgi1_4::IDXGISwapChain3> { + for resource in &self.resources { + resource.destroy(); + } + self.rtv_heap.destroy(); + self.inner + } + + pub(crate) fn wait(&mut self, timeout_ms: u32) -> Result<(), w::AcquireError> { + match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => { + Err(w::AcquireError::DeviceLost(hal::device::DeviceLost)) + } + winbase::WAIT_OBJECT_0 => Ok(()), + winerror::WAIT_TIMEOUT => Err(w::AcquireError::Timeout), + hr => panic!("Unexpected wait status 0x{:X}", hr), + } + } +} + +unsafe impl Send for Swapchain {} +unsafe impl Sync for Swapchain {} |