diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/gfx-backend-metal/src | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/gfx-backend-metal/src')
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/command.rs | 4933 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/conversions.rs | 1242 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/device.rs | 3262 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/internal.rs | 522 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/lib.rs | 1126 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/native.rs | 1089 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/soft.rs | 524 | ||||
-rw-r--r-- | third_party/rust/gfx-backend-metal/src/window.rs | 286 |
8 files changed, 12984 insertions, 0 deletions
diff --git a/third_party/rust/gfx-backend-metal/src/command.rs b/third_party/rust/gfx-backend-metal/src/command.rs new file mode 100644 index 0000000000..8c4334e2b1 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/command.rs @@ -0,0 +1,4933 @@ +use crate::{ + conversions as conv, + internal::{BlitVertex, ClearKey, ClearVertex}, + native, soft, window, AsNative, Backend, BufferPtr, OnlineRecording, PrivateDisabilities, + ResourceIndex, ResourcePtr, SamplerPtr, Shared, TexturePtr, MAX_BOUND_DESCRIPTOR_SETS, + MAX_COLOR_ATTACHMENTS, +}; + +use hal::{ + buffer, command as com, + device::OutOfMemory, + format::{Aspects, FormatDesc}, + image as i, memory, + pass::AttachmentLoadOp, + pso, query, + window::{PresentError, Suboptimal}, + DrawCount, IndexCount, IndexType, InstanceCount, TaskCount, VertexCount, VertexOffset, + WorkGroupCount, +}; + +use arrayvec::ArrayVec; +use auxil::{FastHashMap, ShaderStage}; +use block::ConcreteBlock; +use cocoa_foundation::foundation::{NSRange, NSUInteger}; +use copyless::VecHelper; +#[cfg(feature = "dispatch")] +use dispatch; +use foreign_types::ForeignType; +use metal::{self, MTLIndexType, MTLPrimitiveType, MTLScissorRect, MTLSize, MTLViewport}; +use objc::rc::autoreleasepool; +use parking_lot::Mutex; + +#[cfg(feature = "dispatch")] +use std::fmt; +use std::{ + borrow::Borrow, + cell::RefCell, + iter, mem, + ops::{Deref, Range}, + ptr, slice, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread, time, +}; + +const INTERNAL_LABELS: bool = cfg!(debug_assertions); +const WORD_SIZE: usize = 4; +const WORD_ALIGNMENT: u64 = WORD_SIZE as _; +/// Number of frames to average when reporting the performance counters. +const COUNTERS_REPORT_WINDOW: usize = 0; + +#[cfg(feature = "dispatch")] +struct NoDebug<T>(T); +#[cfg(feature = "dispatch")] +impl<T> fmt::Debug for NoDebug<T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "<hidden>") + } +} + +#[derive(Debug)] +pub struct QueueInner { + raw: metal::CommandQueue, + reserve: Range<usize>, + debug_retain_references: bool, +} + +#[must_use] +#[derive(Debug)] +pub struct Token { + active: bool, +} + +impl Drop for Token { + fn drop(&mut self) { + // poor man's linear type... + if !thread::panicking() { + debug_assert!(!self.active); + } + } +} + +impl QueueInner { + pub(crate) fn new(device: &metal::DeviceRef, pool_size: Option<usize>) -> Self { + match pool_size { + Some(count) => QueueInner { + raw: device.new_command_queue_with_max_command_buffer_count(count as u64), + reserve: 0..count, + debug_retain_references: false, + }, + None => QueueInner { + raw: device.new_command_queue(), + reserve: 0..64, + debug_retain_references: true, + }, + } + } + + /// Spawns a command buffer from a virtual pool. + pub(crate) fn spawn(&mut self) -> (metal::CommandBuffer, Token) { + self.reserve.start += 1; + let cmd_buf = autoreleasepool(|| self.spawn_temp().to_owned()); + (cmd_buf, Token { active: true }) + } + + pub(crate) fn spawn_temp(&self) -> &metal::CommandBufferRef { + if self.debug_retain_references { + self.raw.new_command_buffer() + } else { + self.raw.new_command_buffer_with_unretained_references() + } + } + + /// Returns a command buffer to a virtual pool. + pub(crate) fn release(&mut self, mut token: Token) { + token.active = false; + self.reserve.start -= 1; + } + + /// Block until GPU is idle. + pub(crate) fn wait_idle(queue: &Mutex<Self>) { + debug!("waiting for idle"); + // note: we deliberately don't hold the Mutex lock while waiting, + // since the completion handlers need to access it. + let (cmd_buf, token) = queue.lock().spawn(); + if INTERNAL_LABELS { + cmd_buf.set_label("empty"); + } + cmd_buf.commit(); + cmd_buf.wait_until_completed(); + queue.lock().release(token); + } +} + +#[derive(Debug)] +pub struct BlockedSubmission { + wait_events: Vec<Arc<AtomicBool>>, + command_buffers: Vec<metal::CommandBuffer>, +} + +/// Class responsible for keeping the state of submissions between the +/// requested user submission that is blocked by a host event, and +/// setting the event itself on the host. +#[derive(Debug, Default)] +pub struct QueueBlocker { + submissions: Vec<BlockedSubmission>, +} + +impl QueueBlocker { + fn submit_impl(&mut self, cmd_buffer: &metal::CommandBufferRef) { + match self.submissions.last_mut() { + Some(blocked) => blocked.command_buffers.push(cmd_buffer.to_owned()), + None => cmd_buffer.commit(), + } + } + + pub(crate) fn triage(&mut self) { + // clean up the relevant blocks + let done = { + let blocked = match self.submissions.first_mut() { + Some(blocked) => blocked, + None => return, + }; + blocked.wait_events.retain(|ev| !ev.load(Ordering::Acquire)); + blocked.wait_events.is_empty() + }; + + // execute unblocked command buffers + if done { + let blocked = self.submissions.remove(0); + for cmd_buf in blocked.command_buffers { + cmd_buf.commit(); + } + } + } +} + +#[derive(Debug, Default)] +struct RenderPassDescriptorCache { + spare_descriptors: Vec<metal::RenderPassDescriptor>, +} + +#[cfg(feature = "dispatch")] +unsafe impl Send for RenderPassDescriptorCache {} +#[cfg(feature = "dispatch")] +unsafe impl Sync for RenderPassDescriptorCache {} + +impl RenderPassDescriptorCache { + fn alloc(&mut self, shared: &Shared) -> metal::RenderPassDescriptor { + if let Some(rp_desc) = self.spare_descriptors.pop() { + rp_desc + } else { + let rp_desc = metal::RenderPassDescriptor::new(); + rp_desc.set_visibility_result_buffer(Some(&shared.visibility.buffer)); + rp_desc.to_owned() + } + } + + fn free(&mut self, rp_desc: metal::RenderPassDescriptor) { + rp_desc.set_render_target_array_length(0); + for i in 0..MAX_COLOR_ATTACHMENTS { + let desc = rp_desc.color_attachments().object_at(i as _).unwrap(); + desc.set_texture(None); + desc.set_resolve_texture(None); + desc.set_slice(0); + } + if let Some(desc) = rp_desc.depth_attachment() { + desc.set_texture(None); + desc.set_slice(0); + } + if let Some(desc) = rp_desc.stencil_attachment() { + desc.set_texture(None); + desc.set_slice(0); + } + self.spare_descriptors.push(rp_desc); + } +} + +#[derive(Debug)] +struct PoolShared { + online_recording: OnlineRecording, + render_pass_descriptors: Mutex<RenderPassDescriptorCache>, + #[cfg(feature = "dispatch")] + dispatch_queue: Option<NoDebug<dispatch::Queue>>, +} + +type CommandBufferInnerPtr = Arc<RefCell<CommandBufferInner>>; + +#[derive(Debug)] +pub struct CommandPool { + shared: Arc<Shared>, + allocated: Vec<CommandBufferInnerPtr>, + pool_shared: Arc<PoolShared>, +} + +unsafe impl Send for CommandPool {} +unsafe impl Sync for CommandPool {} + +impl CommandPool { + pub(crate) fn new(shared: &Arc<Shared>, online_recording: OnlineRecording) -> Self { + let pool_shared = PoolShared { + #[cfg(feature = "dispatch")] + dispatch_queue: match online_recording { + OnlineRecording::Immediate | OnlineRecording::Deferred => None, + OnlineRecording::Remote(ref priority) => { + Some(NoDebug(dispatch::Queue::global(priority.clone()))) + } + }, + online_recording, + render_pass_descriptors: Mutex::new(RenderPassDescriptorCache::default()), + }; + CommandPool { + shared: Arc::clone(shared), + allocated: Vec::new(), + pool_shared: Arc::new(pool_shared), + } + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + shared: Arc<Shared>, + pool_shared: Arc<PoolShared>, + inner: CommandBufferInnerPtr, + state: State, + temp: Temp, + pub name: String, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +struct Temp { + clear_vertices: Vec<ClearVertex>, + blit_vertices: FastHashMap<(Aspects, i::Level), Vec<BlitVertex>>, + clear_values: Vec<Option<com::ClearValue>>, +} + +type VertexBufferMaybeVec = Vec<Option<(pso::VertexBufferDesc, pso::ElemOffset)>>; + +#[derive(Debug)] +struct RenderPipelineState { + raw: metal::RenderPipelineState, + ds_desc: pso::DepthStencilDesc, + vertex_buffers: VertexBufferMaybeVec, + formats: native::SubpassFormats, +} + +#[derive(Debug)] +struct SubpassInfo { + descriptor: metal::RenderPassDescriptor, + combined_aspects: Aspects, + formats: native::SubpassFormats, + operations: native::SubpassData<native::AttachmentOps>, + sample_count: i::NumSamples, +} + +#[derive(Debug, Default)] +struct DescriptorSetInfo { + graphics_resources: Vec<(ResourcePtr, metal::MTLResourceUsage)>, + compute_resources: Vec<(ResourcePtr, metal::MTLResourceUsage)>, +} + +#[derive(Debug, Default)] +struct TargetState { + aspects: Aspects, + extent: i::Extent, + formats: native::SubpassFormats, + samples: i::NumSamples, +} + +/// The current state of a command buffer. It's a mixed bag of states coming directly +/// from gfx-hal and inherited between Metal pases, states existing solely on Metal side, +/// and stuff that is half way here and there. +/// +/// ## Vertex buffers +/// You may notice that vertex buffers are stored in two separate places: per pipeline, and +/// here in the state. These can't be merged together easily because at binding time we +/// want one input vertex buffer to potentially be bound to multiple entry points.... +/// +/// ## Depth-stencil desc +/// We have one coming from the current graphics pipeline, and one representing the +/// current Metal state. +#[derive(Debug)] +struct State { + // -------- Hal states --------- // + // Note: this could be `MTLViewport` but we have to patch the depth separately. + viewport: Option<(pso::Rect, Range<f32>)>, + scissors: Option<MTLScissorRect>, + blend_color: Option<pso::ColorValue>, + //TODO: move some of that state out, to avoid redundant allocations + render_pso: Option<RenderPipelineState>, + /// A flag to handle edge cases of Vulkan binding inheritance: + /// we don't want to consider the current PSO bound for a new pass if it's not compatible. + render_pso_is_compatible: bool, + compute_pso: Option<metal::ComputePipelineState>, + work_group_size: MTLSize, + primitive_type: MTLPrimitiveType, + rasterizer_state: Option<native::RasterizerState>, + depth_bias: pso::DepthBias, + stencil: native::StencilState<pso::StencilValue>, + push_constants: Vec<u32>, + visibility_query: (metal::MTLVisibilityResultMode, buffer::Offset), + target: TargetState, + pending_subpasses: Vec<SubpassInfo>, + + // -------- Metal states --------- // + resources_vs: StageResources, + resources_ps: StageResources, + resources_cs: StageResources, + descriptor_sets: ArrayVec<[DescriptorSetInfo; MAX_BOUND_DESCRIPTOR_SETS]>, + index_buffer: Option<IndexBuffer<BufferPtr>>, + vertex_buffers: Vec<Option<(BufferPtr, u64)>>, + active_depth_stencil_desc: pso::DepthStencilDesc, + active_scissor: MTLScissorRect, +} + +impl State { + fn reset(&mut self) { + self.viewport = None; + self.scissors = None; + self.blend_color = None; + self.render_pso = None; + self.compute_pso = None; + self.rasterizer_state = None; + self.depth_bias = pso::DepthBias::default(); + self.stencil = native::StencilState { + reference_values: pso::Sided::new(0), + read_masks: pso::Sided::new(!0), + write_masks: pso::Sided::new(!0), + }; + self.push_constants.clear(); + self.pending_subpasses.clear(); + self.resources_vs.clear(); + self.resources_ps.clear(); + self.resources_cs.clear(); + for ds in self.descriptor_sets.iter_mut() { + ds.graphics_resources.clear(); + ds.compute_resources.clear(); + } + self.index_buffer = None; + self.vertex_buffers.clear(); + } + + fn clamp_scissor(sr: MTLScissorRect, extent: i::Extent) -> MTLScissorRect { + // sometimes there is not even an active render pass at this point + let x = sr.x.min(extent.width.max(1) as u64 - 1); + let y = sr.y.min(extent.height.max(1) as u64 - 1); + //TODO: handle the zero scissor size sensibly + MTLScissorRect { + x, + y, + width: ((sr.x + sr.width).min(extent.width as u64) - x).max(1), + height: ((sr.y + sr.height).min(extent.height as u64) - y).max(1), + } + } + + fn make_pso_commands( + &self, + ) -> ( + Option<soft::RenderCommand<&soft::Ref>>, + Option<soft::RenderCommand<&soft::Ref>>, + ) { + if self.render_pso_is_compatible { + ( + self.render_pso + .as_ref() + .map(|ps| soft::RenderCommand::BindPipeline(&*ps.raw)), + self.rasterizer_state + .clone() + .map(soft::RenderCommand::SetRasterizerState), + ) + } else { + // Note: this is technically valid, we should not warn. + (None, None) + } + } + + fn make_viewport_command(&self) -> Option<soft::RenderCommand<&soft::Ref>> { + self.viewport + .as_ref() + .map(|&(rect, ref depth)| soft::RenderCommand::SetViewport(rect, depth.clone())) + } + + fn make_render_commands( + &self, + aspects: Aspects, + ) -> impl Iterator<Item = soft::RenderCommand<&soft::Ref>> { + // Apply previously bound values for this command buffer + let com_blend = if aspects.contains(Aspects::COLOR) { + self.blend_color.map(soft::RenderCommand::SetBlendColor) + } else { + None + }; + let com_depth_bias = if aspects.contains(Aspects::DEPTH) { + Some(soft::RenderCommand::SetDepthBias(self.depth_bias)) + } else { + None + }; + let com_visibility = if self.visibility_query.0 != metal::MTLVisibilityResultMode::Disabled + { + Some(soft::RenderCommand::SetVisibilityResult( + self.visibility_query.0, + self.visibility_query.1, + )) + } else { + None + }; + let com_vp = self.make_viewport_command(); + let (com_pso, com_rast) = self.make_pso_commands(); + + let render_resources = iter::once(&self.resources_vs).chain(iter::once(&self.resources_ps)); + let push_constants = self.push_constants.as_slice(); + let com_resources = [ShaderStage::Vertex, ShaderStage::Fragment] + .iter() + .zip(render_resources) + .flat_map(move |(&stage, resources)| { + let com_buffers = soft::RenderCommand::BindBuffers { + stage, + index: 0, + buffers: (&resources.buffers[..], &resources.buffer_offsets[..]), + }; + let com_textures = soft::RenderCommand::BindTextures { + stage, + index: 0, + textures: &resources.textures[..], + }; + let com_samplers = soft::RenderCommand::BindSamplers { + stage, + index: 0, + samplers: &resources.samplers[..], + }; + let com_push_constants = + resources + .push_constants + .map(|pc| soft::RenderCommand::BindBufferData { + stage, + index: pc.buffer_index as _, + words: &push_constants[..pc.count as usize], + }); + iter::once(com_buffers) + .chain(iter::once(com_textures)) + .chain(iter::once(com_samplers)) + .chain(com_push_constants) + }); + let com_used_resources = self.descriptor_sets.iter().flat_map(|ds| { + ds.graphics_resources + .iter() + .map(|&(resource, usage)| soft::RenderCommand::UseResource { resource, usage }) + }); + + com_vp + .into_iter() + .chain(com_blend) + .chain(com_depth_bias) + .chain(com_visibility) + .chain(com_pso) + .chain(com_rast) + //.chain(com_scissor) // done outside + //.chain(com_ds) // done outside + .chain(com_resources) + .chain(com_used_resources) + } + + fn make_compute_commands(&self) -> impl Iterator<Item = soft::ComputeCommand<&soft::Ref>> { + let resources = &self.resources_cs; + let com_pso = self + .compute_pso + .as_ref() + .map(|pso| soft::ComputeCommand::BindPipeline(&**pso)); + let com_buffers = soft::ComputeCommand::BindBuffers { + index: 0, + buffers: (&resources.buffers[..], &resources.buffer_offsets[..]), + }; + let com_textures = soft::ComputeCommand::BindTextures { + index: 0, + textures: &resources.textures[..], + }; + let com_samplers = soft::ComputeCommand::BindSamplers { + index: 0, + samplers: &resources.samplers[..], + }; + let com_push_constants = + resources + .push_constants + .map(|pc| soft::ComputeCommand::BindBufferData { + index: pc.buffer_index as _, + words: &self.push_constants[..pc.count as usize], + }); + let com_used_resources = self.descriptor_sets.iter().flat_map(|ds| { + ds.compute_resources + .iter() + .map(|&(resource, usage)| soft::ComputeCommand::UseResource { resource, usage }) + }); + + com_pso + .into_iter() + .chain(iter::once(com_buffers)) + .chain(iter::once(com_textures)) + .chain(iter::once(com_samplers)) + .chain(com_push_constants) + .chain(com_used_resources) + } + + fn set_vertex_buffers(&mut self, end: usize) -> Option<soft::RenderCommand<&soft::Ref>> { + let rps = self.render_pso.as_ref()?; + let start = end - rps.vertex_buffers.len(); + self.resources_vs.pre_allocate_buffers(end); + + for ((out_buffer, out_offset), vb_maybe) in self.resources_vs.buffers[..end] + .iter_mut() + .rev() + .zip(self.resources_vs.buffer_offsets[..end].iter_mut().rev()) + .zip(&rps.vertex_buffers) + { + match vb_maybe { + Some((ref vb, extra_offset)) => { + match self.vertex_buffers.get(vb.binding as usize) { + Some(&Some((buffer, base_offset))) => { + *out_buffer = Some(buffer); + *out_offset = *extra_offset as u64 + base_offset; + } + _ => { + // being unable to bind a buffer here is technically fine, since before this moment + // and actual rendering there might be more bind calls + *out_buffer = None; + } + } + } + None => { + *out_buffer = None; + } + } + } + + Some(soft::RenderCommand::BindBuffers { + stage: ShaderStage::Vertex, + index: start as ResourceIndex, + buffers: ( + &self.resources_vs.buffers[start..end], + &self.resources_vs.buffer_offsets[start..end], + ), + }) + } + + fn build_depth_stencil(&mut self) -> Option<pso::DepthStencilDesc> { + let mut desc = match self.render_pso { + Some(ref rp) => rp.ds_desc, + None => return None, + }; + + if !self.target.aspects.contains(Aspects::DEPTH) { + desc.depth = None; + } + if !self.target.aspects.contains(Aspects::STENCIL) { + desc.stencil = None; + } + + if let Some(ref mut stencil) = desc.stencil { + stencil.reference_values = pso::State::Dynamic; + if stencil.read_masks.is_dynamic() { + stencil.read_masks = pso::State::Static(self.stencil.read_masks); + } + if stencil.write_masks.is_dynamic() { + stencil.write_masks = pso::State::Static(self.stencil.write_masks); + } + } + + if desc == self.active_depth_stencil_desc { + None + } else { + self.active_depth_stencil_desc = desc; + Some(desc) + } + } + + fn set_depth_bias<'a>( + &mut self, + depth_bias: &pso::DepthBias, + ) -> soft::RenderCommand<&'a soft::Ref> { + self.depth_bias = *depth_bias; + soft::RenderCommand::SetDepthBias(*depth_bias) + } + + fn push_vs_constants( + &mut self, + pc: native::PushConstantInfo, + ) -> soft::RenderCommand<&soft::Ref> { + self.resources_vs.push_constants = Some(pc); + soft::RenderCommand::BindBufferData { + stage: ShaderStage::Vertex, + index: pc.buffer_index, + words: &self.push_constants[..pc.count as usize], + } + } + + fn push_ps_constants( + &mut self, + pc: native::PushConstantInfo, + ) -> soft::RenderCommand<&soft::Ref> { + self.resources_ps.push_constants = Some(pc); + soft::RenderCommand::BindBufferData { + stage: ShaderStage::Fragment, + index: pc.buffer_index, + words: &self.push_constants[..pc.count as usize], + } + } + + fn push_cs_constants( + &mut self, + pc: native::PushConstantInfo, + ) -> soft::ComputeCommand<&soft::Ref> { + self.resources_cs.push_constants = Some(pc); + soft::ComputeCommand::BindBufferData { + index: pc.buffer_index, + words: &self.push_constants[..pc.count as usize], + } + } + + fn set_viewport<'a>( + &mut self, + vp: &'a pso::Viewport, + disabilities: PrivateDisabilities, + ) -> soft::RenderCommand<&'a soft::Ref> { + let depth = vp.depth.start..if disabilities.broken_viewport_near_depth { + vp.depth.end - vp.depth.start + } else { + vp.depth.end + }; + self.viewport = Some((vp.rect, depth.clone())); + soft::RenderCommand::SetViewport(vp.rect, depth) + } + + fn set_scissor<'a>( + &mut self, + rect: MTLScissorRect, + ) -> Option<soft::RenderCommand<&'a soft::Ref>> { + //TODO: https://github.com/gfx-rs/metal-rs/issues/183 + if self.active_scissor.x == rect.x + && self.active_scissor.y == rect.y + && self.active_scissor.width == rect.width + && self.active_scissor.height == rect.height + { + None + } else { + self.active_scissor = rect; + Some(soft::RenderCommand::SetScissor(rect)) + } + } + + fn set_hal_scissor<'a>( + &mut self, + rect: pso::Rect, + ) -> Option<soft::RenderCommand<&'a soft::Ref>> { + let scissor = MTLScissorRect { + x: rect.x as _, + y: rect.y as _, + width: rect.w as _, + height: rect.h as _, + }; + self.scissors = Some(scissor); + let clamped = State::clamp_scissor(scissor, self.target.extent); + self.set_scissor(clamped) + } + + fn reset_scissor<'a>(&mut self) -> Option<soft::RenderCommand<&'a soft::Ref>> { + self.scissors.and_then(|sr| { + let clamped = State::clamp_scissor(sr, self.target.extent); + self.set_scissor(clamped) + }) + } + + fn set_blend_color<'a>( + &mut self, + color: &'a pso::ColorValue, + ) -> soft::RenderCommand<&'a soft::Ref> { + self.blend_color = Some(*color); + soft::RenderCommand::SetBlendColor(*color) + } + + fn update_push_constants(&mut self, offset: u32, constants: &[u32], total: u32) { + assert_eq!(offset % WORD_ALIGNMENT as u32, 0); + let offset = (offset / WORD_ALIGNMENT as u32) as usize; + let data = &mut self.push_constants; + if data.len() < total as usize { + data.resize(total as usize, 0); + } + data[offset..offset + constants.len()].copy_from_slice(constants); + } + + fn set_visibility_query( + &mut self, + mode: metal::MTLVisibilityResultMode, + offset: buffer::Offset, + ) -> soft::RenderCommand<&soft::Ref> { + self.visibility_query = (mode, offset); + soft::RenderCommand::SetVisibilityResult(mode, offset) + } + + fn bind_set( + &mut self, + stage_filter: pso::ShaderStageFlags, + data: &native::DescriptorEmulatedPoolInner, + base_res_offsets: &native::MultiStageResourceCounters, + pool_range: &native::ResourceData<Range<native::PoolResourceIndex>>, + ) -> native::MultiStageResourceCounters { + let mut offsets = base_res_offsets.clone(); + let pool_range = pool_range.map(|r| r.start as usize..r.end as usize); + + for &(mut stages, value, offset) in &data.buffers[pool_range.buffers] { + stages &= stage_filter; + if stages.contains(pso::ShaderStageFlags::VERTEX) { + let reg = offsets.vs.buffers as usize; + self.resources_vs.buffers[reg] = value; + self.resources_vs.buffer_offsets[reg] = offset; + offsets.vs.buffers += 1; + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + let reg = offsets.ps.buffers as usize; + self.resources_ps.buffers[reg] = value; + self.resources_ps.buffer_offsets[reg] = offset; + offsets.ps.buffers += 1; + } + if stages.contains(pso::ShaderStageFlags::COMPUTE) { + let reg = offsets.cs.buffers as usize; + self.resources_cs.buffers[reg] = value; + self.resources_cs.buffer_offsets[reg] = offset; + offsets.cs.buffers += 1; + } + } + for &(mut stages, value, _layout) in &data.textures[pool_range.textures] { + stages &= stage_filter; + if stages.contains(pso::ShaderStageFlags::VERTEX) { + self.resources_vs.textures[offsets.vs.textures as usize] = value; + offsets.vs.textures += 1; + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + self.resources_ps.textures[offsets.ps.textures as usize] = value; + offsets.ps.textures += 1; + } + if stages.contains(pso::ShaderStageFlags::COMPUTE) { + self.resources_cs.textures[offsets.cs.textures as usize] = value; + offsets.cs.textures += 1; + } + } + for &(mut stages, value) in &data.samplers[pool_range.samplers] { + stages &= stage_filter; + if stages.contains(pso::ShaderStageFlags::VERTEX) { + self.resources_vs.samplers[offsets.vs.samplers as usize] = value; + offsets.vs.samplers += 1; + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + self.resources_ps.samplers[offsets.ps.samplers as usize] = value; + offsets.ps.samplers += 1; + } + if stages.contains(pso::ShaderStageFlags::COMPUTE) { + self.resources_cs.samplers[offsets.cs.samplers as usize] = value; + offsets.cs.samplers += 1; + } + } + + offsets + } +} + +#[derive(Debug)] +struct StageResources { + buffers: Vec<Option<BufferPtr>>, + buffer_offsets: Vec<buffer::Offset>, + textures: Vec<Option<TexturePtr>>, + samplers: Vec<Option<SamplerPtr>>, + push_constants: Option<native::PushConstantInfo>, +} + +impl StageResources { + fn new() -> Self { + StageResources { + buffers: Vec::new(), + buffer_offsets: Vec::new(), + textures: Vec::new(), + samplers: Vec::new(), + push_constants: None, + } + } + + fn clear(&mut self) { + self.buffers.clear(); + self.buffer_offsets.clear(); + self.textures.clear(); + self.samplers.clear(); + self.push_constants = None; + } + + fn pre_allocate_buffers(&mut self, count: usize) { + debug_assert_eq!(self.buffers.len(), self.buffer_offsets.len()); + if self.buffers.len() < count { + self.buffers.resize(count, None); + self.buffer_offsets.resize(count, 0); + } + } + + fn pre_allocate(&mut self, counters: &native::ResourceData<ResourceIndex>) { + if self.textures.len() < counters.textures as usize { + self.textures.resize(counters.textures as usize, None); + } + if self.samplers.len() < counters.samplers as usize { + self.samplers.resize(counters.samplers as usize, None); + } + self.pre_allocate_buffers(counters.buffers as usize); + } +} + +#[cfg(feature = "dispatch")] +#[derive(Debug, Default)] +struct Capacity { + render: usize, + compute: usize, + blit: usize, +} + +//TODO: make sure to recycle the heap allocation of these commands. +#[cfg(feature = "dispatch")] +#[derive(Debug)] +enum EncodePass { + Render( + Vec<soft::RenderCommand<soft::Own>>, + soft::Own, + metal::RenderPassDescriptor, + String, + ), + Compute(Vec<soft::ComputeCommand<soft::Own>>, soft::Own, String), + Blit(Vec<soft::BlitCommand>, String), +} +#[cfg(feature = "dispatch")] +unsafe impl Send for EncodePass {} + +#[cfg(feature = "dispatch")] +struct SharedCommandBuffer(Arc<Mutex<metal::CommandBuffer>>); +#[cfg(feature = "dispatch")] +unsafe impl Send for SharedCommandBuffer {} + +#[cfg(feature = "dispatch")] +impl EncodePass { + fn schedule( + self, + queue: &dispatch::Queue, + cmd_buffer_arc: &Arc<Mutex<metal::CommandBuffer>>, + pool_shared_arc: &Arc<PoolShared>, + ) { + let cmd_buffer = SharedCommandBuffer(Arc::clone(cmd_buffer_arc)); + let pool_shared = Arc::clone(pool_shared_arc); + queue.exec_async(move || match self { + EncodePass::Render(list, resources, desc, label) => { + let encoder = cmd_buffer + .0 + .lock() + .new_render_command_encoder(&desc) + .to_owned(); + pool_shared.render_pass_descriptors.lock().free(desc); + encoder.set_label(&label); + for command in list { + exec_render(&encoder, command, &resources); + } + encoder.end_encoding(); + } + EncodePass::Compute(list, resources, label) => { + let encoder = cmd_buffer.0.lock().new_compute_command_encoder().to_owned(); + encoder.set_label(&label); + for command in list { + exec_compute(&encoder, command, &resources); + } + encoder.end_encoding(); + } + EncodePass::Blit(list, label) => { + let encoder = cmd_buffer.0.lock().new_blit_command_encoder().to_owned(); + encoder.set_label(&label); + for command in list { + exec_blit(&encoder, command); + } + encoder.end_encoding(); + } + }); + } + + fn update(&self, capacity: &mut Capacity) { + match &self { + EncodePass::Render(ref list, _, _, _) => { + capacity.render = capacity.render.max(list.len()) + } + EncodePass::Compute(ref list, _, _) => { + capacity.compute = capacity.compute.max(list.len()) + } + EncodePass::Blit(ref list, _) => capacity.blit = capacity.blit.max(list.len()), + } + } +} + +#[derive(Debug, Default)] +struct Journal { + resources: soft::Own, + passes: Vec<(soft::Pass, Range<usize>, String)>, + render_commands: Vec<soft::RenderCommand<soft::Own>>, + compute_commands: Vec<soft::ComputeCommand<soft::Own>>, + blit_commands: Vec<soft::BlitCommand>, +} + +impl Journal { + fn clear(&mut self, pool_shared: &PoolShared) { + self.resources.clear(); + self.render_commands.clear(); + self.compute_commands.clear(); + self.blit_commands.clear(); + + let mut rp_desc_cache = pool_shared.render_pass_descriptors.lock(); + for (pass, _, _) in self.passes.drain(..) { + if let soft::Pass::Render(desc) = pass { + rp_desc_cache.free(desc); + } + } + } + + fn stop(&mut self) { + match self.passes.last_mut() { + None => {} + Some(&mut (soft::Pass::Render(_), ref mut range, _)) => { + range.end = self.render_commands.len(); + } + Some(&mut (soft::Pass::Compute, ref mut range, _)) => { + range.end = self.compute_commands.len(); + } + Some(&mut (soft::Pass::Blit, ref mut range, _)) => { + range.end = self.blit_commands.len(); + } + }; + } + + fn record(&self, command_buf: &metal::CommandBufferRef) { + for (ref pass, ref range, ref label) in &self.passes { + match *pass { + soft::Pass::Render(ref desc) => { + let encoder = command_buf.new_render_command_encoder(desc); + if !label.is_empty() { + encoder.set_label(label); + } + for command in &self.render_commands[range.clone()] { + exec_render(&encoder, command, &self.resources); + } + encoder.end_encoding(); + } + soft::Pass::Blit => { + let encoder = command_buf.new_blit_command_encoder(); + if !label.is_empty() { + encoder.set_label(label); + } + for command in &self.blit_commands[range.clone()] { + exec_blit(&encoder, command); + } + encoder.end_encoding(); + } + soft::Pass::Compute => { + let encoder = command_buf.new_compute_command_encoder(); + if !label.is_empty() { + encoder.set_label(label); + } + for command in &self.compute_commands[range.clone()] { + exec_compute(&encoder, command, &self.resources); + } + encoder.end_encoding(); + } + } + } + } + + fn extend(&mut self, other: &Self, inherit_pass: bool) { + if inherit_pass { + assert_eq!(other.passes.len(), 1); + match *self.passes.last_mut().unwrap() { + (soft::Pass::Render(_), ref mut range, _) => { + range.end += other.render_commands.len(); + } + (soft::Pass::Compute, _, _) | (soft::Pass::Blit, _, _) => { + panic!("Only render passes can inherit") + } + } + } else { + for (pass, range, label) in &other.passes { + let offset = match *pass { + soft::Pass::Render(_) => self.render_commands.len(), + soft::Pass::Compute => self.compute_commands.len(), + soft::Pass::Blit => self.blit_commands.len(), + }; + self.passes.alloc().init(( + pass.clone(), + range.start + offset..range.end + offset, + label.clone(), + )); + } + } + + // Note: journals contain 3 levels of stuff: + // resources, commands, and passes + // Each upper level points to the lower one with index + // sub-ranges. In order to merge two journals, we need + // to fix those indices of the one that goes on top. + // This is referred here as "rebasing". + for mut com in other.render_commands.iter().cloned() { + self.resources.rebase_render(&mut com); + self.render_commands.push(com); + } + for mut com in other.compute_commands.iter().cloned() { + self.resources.rebase_compute(&mut com); + self.compute_commands.push(com); + } + self.blit_commands.extend_from_slice(&other.blit_commands); + + self.resources.extend(&other.resources); + } +} + +#[derive(Debug)] +enum CommandSink { + Immediate { + cmd_buffer: metal::CommandBuffer, + token: Token, + encoder_state: EncoderState, + num_passes: usize, + label: String, + }, + Deferred { + is_encoding: bool, + is_inheriting: bool, + journal: Journal, + label: String, + }, + #[cfg(feature = "dispatch")] + Remote { + queue: NoDebug<dispatch::Queue>, + cmd_buffer: Arc<Mutex<metal::CommandBuffer>>, + pool_shared: Arc<PoolShared>, + token: Token, + pass: Option<EncodePass>, + capacity: Capacity, + label: String, + }, +} + +/// A helper temporary object that consumes state-setting commands only +/// applicable to a render pass currently encoded. +enum PreRender<'a> { + Immediate(&'a metal::RenderCommandEncoderRef), + Deferred( + &'a mut soft::Own, + &'a mut Vec<soft::RenderCommand<soft::Own>>, + ), + Void, +} + +impl<'a> PreRender<'a> { + fn is_void(&self) -> bool { + match *self { + PreRender::Void => true, + _ => false, + } + } + + fn issue(&mut self, command: soft::RenderCommand<&soft::Ref>) { + match *self { + PreRender::Immediate(encoder) => exec_render(encoder, command, &&soft::Ref), + PreRender::Deferred(ref mut resources, ref mut list) => { + list.alloc().init(resources.own_render(command)); + } + PreRender::Void => (), + } + } + + fn issue_many<'b, I>(&mut self, commands: I) + where + I: Iterator<Item = soft::RenderCommand<&'b soft::Ref>>, + { + match *self { + PreRender::Immediate(encoder) => { + for com in commands { + exec_render(encoder, com, &&soft::Ref); + } + } + PreRender::Deferred(ref mut resources, ref mut list) => { + list.extend(commands.map(|com| resources.own_render(com))) + } + PreRender::Void => {} + } + } +} + +/// A helper temporary object that consumes state-setting commands only +/// applicable to a compute pass currently encoded. +enum PreCompute<'a> { + Immediate(&'a metal::ComputeCommandEncoderRef), + Deferred( + &'a mut soft::Own, + &'a mut Vec<soft::ComputeCommand<soft::Own>>, + ), + Void, +} + +impl<'a> PreCompute<'a> { + fn issue<'b>(&mut self, command: soft::ComputeCommand<&'b soft::Ref>) { + match *self { + PreCompute::Immediate(encoder) => exec_compute(encoder, command, &&soft::Ref), + PreCompute::Deferred(ref mut resources, ref mut list) => { + list.alloc().init(resources.own_compute(command)); + } + PreCompute::Void => (), + } + } + + fn issue_many<'b, I>(&mut self, commands: I) + where + I: Iterator<Item = soft::ComputeCommand<&'b soft::Ref>>, + { + match *self { + PreCompute::Immediate(encoder) => { + for com in commands { + exec_compute(encoder, com, &&soft::Ref); + } + } + PreCompute::Deferred(ref mut resources, ref mut list) => { + list.extend(commands.map(|com| resources.own_compute(com))) + } + PreCompute::Void => {} + } + } +} + +impl CommandSink { + fn label(&mut self, label: &str) -> &Self { + match self { + CommandSink::Immediate { label: l, .. } | CommandSink::Deferred { label: l, .. } => { + *l = label.to_string() + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { label: l, .. } => *l = label.to_string(), + } + self + } + + fn stop_encoding(&mut self) { + match *self { + CommandSink::Immediate { + ref mut encoder_state, + .. + } => { + encoder_state.end(); + } + CommandSink::Deferred { + ref mut is_encoding, + ref mut journal, + .. + } => { + *is_encoding = false; + journal.stop(); + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { + queue: NoDebug(ref queue), + ref cmd_buffer, + ref mut pass, + ref mut capacity, + ref pool_shared, + .. + } => { + if let Some(pass) = pass.take() { + pass.update(capacity); + pass.schedule(queue, cmd_buffer, pool_shared); + } + } + } + } + + /// Start issuing pre-render commands. Those can be rejected, so the caller is responsible + /// for updating the state cache accordingly, so that it's set upon the start of a next pass. + fn pre_render(&mut self) -> PreRender { + match *self { + CommandSink::Immediate { + encoder_state: EncoderState::Render(ref encoder), + .. + } => PreRender::Immediate(encoder), + CommandSink::Deferred { + is_encoding: true, + ref mut journal, + .. + } => match journal.passes.last() { + Some(&(soft::Pass::Render(_), _, _)) => { + PreRender::Deferred(&mut journal.resources, &mut journal.render_commands) + } + _ => PreRender::Void, + }, + #[cfg(feature = "dispatch")] + CommandSink::Remote { + pass: Some(EncodePass::Render(ref mut list, ref mut resources, _, _)), + .. + } => PreRender::Deferred(resources, list), + _ => PreRender::Void, + } + } + + /// Switch the active encoder to render by starting a render pass. + fn switch_render( + &mut self, + descriptor: metal::RenderPassDescriptor, + pool_shared: &Arc<PoolShared>, + ) -> PreRender { + //assert!(AutoReleasePool::is_active()); + self.stop_encoding(); + + match *self { + CommandSink::Immediate { + ref cmd_buffer, + ref mut encoder_state, + ref mut num_passes, + ref label, + .. + } => { + *num_passes += 1; + let encoder = cmd_buffer.new_render_command_encoder(&descriptor); + pool_shared.render_pass_descriptors.lock().free(descriptor); + if !label.is_empty() { + encoder.set_label(label); + } + *encoder_state = EncoderState::Render(encoder.to_owned()); + PreRender::Immediate(encoder) + } + CommandSink::Deferred { + ref mut is_encoding, + ref mut journal, + is_inheriting, + ref label, + .. + } => { + assert!(!is_inheriting); + *is_encoding = true; + journal.passes.alloc().init(( + soft::Pass::Render(descriptor), + journal.render_commands.len()..0, + label.clone(), + )); + PreRender::Deferred(&mut journal.resources, &mut journal.render_commands) + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { + ref mut pass, + ref capacity, + ref label, + .. + } => { + let list = Vec::with_capacity(capacity.render); + *pass = Some(EncodePass::Render( + list, + soft::Own::default(), + descriptor, + label.clone(), + )); + match *pass { + Some(EncodePass::Render(ref mut list, ref mut resources, _, _)) => { + PreRender::Deferred(resources, list) + } + _ => unreachable!(), + } + } + } + } + + fn quick_render<'a, I>( + &mut self, + label: &str, + descriptor: metal::RenderPassDescriptor, + pool_shared: &Arc<PoolShared>, + commands: I, + ) where + I: Iterator<Item = soft::RenderCommand<&'a soft::Ref>>, + { + { + let mut pre = self.switch_render(descriptor, pool_shared); + if !label.is_empty() { + if let PreRender::Immediate(encoder) = pre { + encoder.set_label(label); + } + } + pre.issue_many(commands); + } + self.stop_encoding(); + } + + /// Issue provided blit commands. This function doesn't expect an active blit pass, + /// it will automatically start one when needed. + fn blit_commands<I>(&mut self, commands: I) + where + I: Iterator<Item = soft::BlitCommand>, + { + enum PreBlit<'b> { + Immediate(&'b metal::BlitCommandEncoderRef), + Deferred(&'b mut Vec<soft::BlitCommand>), + } + + let pre = match *self { + CommandSink::Immediate { + encoder_state: EncoderState::Blit(ref encoder), + .. + } => PreBlit::Immediate(encoder), + CommandSink::Immediate { + ref cmd_buffer, + ref mut encoder_state, + ref mut num_passes, + .. + } => { + *num_passes += 1; + encoder_state.end(); + let encoder = cmd_buffer.new_blit_command_encoder(); + *encoder_state = EncoderState::Blit(encoder.to_owned()); + PreBlit::Immediate(encoder) + } + CommandSink::Deferred { + ref mut is_encoding, + is_inheriting, + ref mut journal, + ref label, + .. + } => { + assert!(!is_inheriting); + *is_encoding = true; + if let Some(&(soft::Pass::Blit, _, _)) = journal.passes.last() { + } else { + journal.stop(); + journal.passes.alloc().init(( + soft::Pass::Blit, + journal.blit_commands.len()..0, + label.clone(), + )); + } + PreBlit::Deferred(&mut journal.blit_commands) + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { + pass: Some(EncodePass::Blit(ref mut list, _)), + .. + } => PreBlit::Deferred(list), + #[cfg(feature = "dispatch")] + CommandSink::Remote { + queue: NoDebug(ref queue), + ref cmd_buffer, + ref mut pass, + ref mut capacity, + ref label, + ref pool_shared, + .. + } => { + if let Some(pass) = pass.take() { + pass.update(capacity); + pass.schedule(queue, cmd_buffer, pool_shared); + } + let list = Vec::with_capacity(capacity.blit); + *pass = Some(EncodePass::Blit(list, label.clone())); + match *pass { + Some(EncodePass::Blit(ref mut list, _)) => PreBlit::Deferred(list), + _ => unreachable!(), + } + } + }; + + match pre { + PreBlit::Immediate(encoder) => { + for com in commands { + exec_blit(encoder, com); + } + } + PreBlit::Deferred(list) => { + list.extend(commands); + } + } + } + + /// Start issuing pre-compute commands. Those can be rejected, so the caller is responsible + /// for updating the state cache accordingly, so that it's set upon the start of a next pass. + fn pre_compute(&mut self) -> PreCompute { + match *self { + CommandSink::Immediate { + encoder_state: EncoderState::Compute(ref encoder), + .. + } => PreCompute::Immediate(encoder), + CommandSink::Deferred { + is_encoding: true, + is_inheriting: false, + ref mut journal, + .. + } => match journal.passes.last() { + Some(&(soft::Pass::Compute, _, _)) => { + PreCompute::Deferred(&mut journal.resources, &mut journal.compute_commands) + } + _ => PreCompute::Void, + }, + #[cfg(feature = "dispatch")] + CommandSink::Remote { + pass: Some(EncodePass::Compute(ref mut list, ref mut resources, _)), + .. + } => PreCompute::Deferred(resources, list), + _ => PreCompute::Void, + } + } + + /// Switch the active encoder to compute. + /// Second returned value is `true` if the switch has just happened. + fn switch_compute(&mut self) -> (PreCompute, bool) { + match *self { + CommandSink::Immediate { + encoder_state: EncoderState::Compute(ref encoder), + .. + } => (PreCompute::Immediate(encoder), false), + CommandSink::Immediate { + ref cmd_buffer, + ref mut encoder_state, + ref mut num_passes, + .. + } => { + *num_passes += 1; + encoder_state.end(); + let encoder = cmd_buffer.new_compute_command_encoder(); + *encoder_state = EncoderState::Compute(encoder.to_owned()); + (PreCompute::Immediate(encoder), true) + } + CommandSink::Deferred { + ref mut is_encoding, + is_inheriting, + ref mut journal, + ref label, + .. + } => { + assert!(!is_inheriting); + *is_encoding = true; + let switch = if let Some(&(soft::Pass::Compute, _, _)) = journal.passes.last() { + false + } else { + journal.stop(); + journal.passes.alloc().init(( + soft::Pass::Compute, + journal.compute_commands.len()..0, + label.clone(), + )); + true + }; + ( + PreCompute::Deferred(&mut journal.resources, &mut journal.compute_commands), + switch, + ) + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { + pass: Some(EncodePass::Compute(ref mut list, ref mut resources, _)), + .. + } => (PreCompute::Deferred(resources, list), false), + #[cfg(feature = "dispatch")] + CommandSink::Remote { + queue: NoDebug(ref queue), + ref cmd_buffer, + ref mut pass, + ref mut capacity, + ref label, + ref pool_shared, + .. + } => { + if let Some(pass) = pass.take() { + pass.update(capacity); + pass.schedule(queue, cmd_buffer, pool_shared); + } + let list = Vec::with_capacity(capacity.compute); + *pass = Some(EncodePass::Compute( + list, + soft::Own::default(), + label.clone(), + )); + match *pass { + Some(EncodePass::Compute(ref mut list, ref mut resources, _)) => { + (PreCompute::Deferred(resources, list), true) + } + _ => unreachable!(), + } + } + } + } + + fn quick_compute<'a, I>(&mut self, label: &str, commands: I) + where + I: Iterator<Item = soft::ComputeCommand<&'a soft::Ref>>, + { + { + let (mut pre, switch) = self.switch_compute(); + pre.issue_many(commands); + if switch && !label.is_empty() { + if let PreCompute::Immediate(encoder) = pre { + encoder.set_label(label); + } + } + } + self.stop_encoding(); + } +} + +#[derive(Clone, Debug)] +pub struct IndexBuffer<B> { + buffer: B, + offset: u32, + stride: u32, +} + +/// This is an inner mutable part of the command buffer that is +/// accessible by the owning command pool for one single reason: +/// to reset it. +#[derive(Debug)] +pub struct CommandBufferInner { + sink: Option<CommandSink>, + level: com::Level, + backup_journal: Option<Journal>, + #[cfg(feature = "dispatch")] + backup_capacity: Option<Capacity>, + retained_buffers: Vec<metal::Buffer>, + retained_textures: Vec<metal::Texture>, + active_visibility_queries: Vec<query::Id>, + events: Vec<(Arc<AtomicBool>, bool)>, + host_events: Vec<Arc<AtomicBool>>, +} + +impl Drop for CommandBufferInner { + fn drop(&mut self) { + if self.sink.is_some() { + error!("Command buffer not released properly!"); + } + } +} + +impl CommandBufferInner { + fn reset(&mut self, shared: &Shared, pool_shared: &PoolShared, release: bool) { + match self.sink.take() { + Some(CommandSink::Immediate { + token, + mut encoder_state, + .. + }) => { + encoder_state.end(); + shared.queue.lock().release(token); + } + Some(CommandSink::Deferred { mut journal, .. }) => { + if !release { + journal.clear(pool_shared); + self.backup_journal = Some(journal); + } + } + #[cfg(feature = "dispatch")] + Some(CommandSink::Remote { + token, capacity, .. + }) => { + shared.queue.lock().release(token); + if !release { + self.backup_capacity = Some(capacity); + } + } + None => {} + }; + self.retained_buffers.clear(); + self.retained_textures.clear(); + self.active_visibility_queries.clear(); + self.events.clear(); + } + + fn sink(&mut self) -> &mut CommandSink { + self.sink.as_mut().unwrap() + } +} + +#[derive(Debug)] +enum EncoderState { + None, + Blit(metal::BlitCommandEncoder), + Render(metal::RenderCommandEncoder), + Compute(metal::ComputeCommandEncoder), +} + +impl EncoderState { + fn end(&mut self) { + match mem::replace(self, EncoderState::None) { + EncoderState::None => {} + EncoderState::Render(ref encoder) => { + encoder.end_encoding(); + } + EncoderState::Blit(ref encoder) => { + encoder.end_encoding(); + } + EncoderState::Compute(ref encoder) => { + encoder.end_encoding(); + } + } + } +} + +fn div(a: u32, b: u32) -> u32 { + (a + b - 1) / b +} + +fn compute_pitches(region: &com::BufferImageCopy, fd: FormatDesc, extent: &MTLSize) -> (u32, u32) { + let buffer_width = if region.buffer_width == 0 { + extent.width as u32 + } else { + region.buffer_width + }; + let buffer_height = if region.buffer_height == 0 { + extent.height as u32 + } else { + region.buffer_height + }; + let row_pitch = div(buffer_width, fd.dim.0 as _) * (fd.bits / 8) as u32; + let slice_pitch = div(buffer_height, fd.dim.1 as _) * row_pitch; + (row_pitch, slice_pitch) +} + +fn exec_render<R, C>(encoder: &metal::RenderCommandEncoderRef, command: C, resources: &R) +where + R: soft::Resources, + R::Data: Borrow<[u32]>, + R::BufferArray: soft::AsSlice<Option<BufferPtr>, R> + soft::AsSlice<buffer::Offset, R>, + R::TextureArray: soft::AsSlice<Option<TexturePtr>, R>, + R::SamplerArray: soft::AsSlice<Option<SamplerPtr>, R>, + R::DepthStencil: Borrow<metal::DepthStencilStateRef>, + R::RenderPipeline: Borrow<metal::RenderPipelineStateRef>, + C: Borrow<soft::RenderCommand<R>>, +{ + use crate::soft::RenderCommand as Cmd; + match *command.borrow() { + Cmd::SetViewport(ref rect, ref depth) => { + encoder.set_viewport(MTLViewport { + originX: rect.x as _, + originY: rect.y as _, + width: rect.w as _, + height: rect.h as _, + znear: depth.start as _, + zfar: depth.end as _, + }); + } + Cmd::SetScissor(scissor) => { + encoder.set_scissor_rect(scissor); + } + Cmd::SetBlendColor(color) => { + encoder.set_blend_color(color[0], color[1], color[2], color[3]); + } + Cmd::SetDepthBias(depth_bias) => { + encoder.set_depth_bias( + depth_bias.const_factor, + depth_bias.slope_factor, + depth_bias.clamp, + ); + } + Cmd::SetDepthStencilState(ref depth_stencil) => { + encoder.set_depth_stencil_state(depth_stencil.borrow()); + } + Cmd::SetStencilReferenceValues(sided) => { + encoder.set_stencil_front_back_reference_value(sided.front, sided.back); + } + Cmd::SetRasterizerState(ref rs) => { + encoder.set_front_facing_winding(rs.front_winding); + encoder.set_cull_mode(rs.cull_mode); + encoder.set_triangle_fill_mode(rs.fill_mode); + if let Some(depth_clip) = rs.depth_clip { + encoder.set_depth_clip_mode(depth_clip); + } + } + Cmd::SetVisibilityResult(mode, offset) => { + encoder.set_visibility_result_mode(mode, offset); + } + Cmd::BindBuffer { + stage, + index, + buffer, + offset, + } => { + let native = Some(buffer.as_native()); + match stage { + ShaderStage::Vertex => encoder.set_vertex_buffer(index as _, native, offset as _), + ShaderStage::Fragment => { + encoder.set_fragment_buffer(index as _, native, offset as _) + } + _ => unreachable!(), + } + } + Cmd::BindBuffers { + stage, + index, + ref buffers, + } => { + use crate::soft::AsSlice; + let values: &[Option<BufferPtr>] = buffers.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `BufferPtr` -> `&metal::BufferRef` + mem::transmute(values) + }; + let offsets = buffers.as_slice(resources); + match stage { + ShaderStage::Vertex => encoder.set_vertex_buffers(index as _, data, offsets), + ShaderStage::Fragment => { + encoder.set_fragment_buffers(index as _, data, offsets) + } + _ => unreachable!(), + } + } + } + Cmd::BindBufferData { + stage, + index, + ref words, + } => { + let slice = words.borrow(); + match stage { + ShaderStage::Vertex => encoder.set_vertex_bytes( + index as _, + (slice.len() * WORD_SIZE) as u64, + slice.as_ptr() as _, + ), + ShaderStage::Fragment => encoder.set_fragment_bytes( + index as _, + (slice.len() * WORD_SIZE) as u64, + slice.as_ptr() as _, + ), + _ => unreachable!(), + } + } + Cmd::BindTextures { + stage, + index, + ref textures, + } => { + use crate::soft::AsSlice; + let values = textures.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `TexturePtr` -> `&metal::TextureRef` + mem::transmute(values) + }; + match stage { + ShaderStage::Vertex => encoder.set_vertex_textures(index as _, data), + ShaderStage::Fragment => encoder.set_fragment_textures(index as _, data), + _ => unreachable!(), + } + } + } + Cmd::BindSamplers { + stage, + index, + ref samplers, + } => { + use crate::soft::AsSlice; + let values = samplers.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `SamplerPtr` -> `&metal::SamplerStateRef` + mem::transmute(values) + }; + match stage { + ShaderStage::Vertex => encoder.set_vertex_sampler_states(index as _, data), + ShaderStage::Fragment => encoder.set_fragment_sampler_states(index as _, data), + _ => unreachable!(), + } + } + } + Cmd::BindPipeline(ref pipeline_state) => { + encoder.set_render_pipeline_state(pipeline_state.borrow()); + } + Cmd::UseResource { resource, usage } => { + encoder.use_resource(resource.as_native(), usage); + } + Cmd::Draw { + primitive_type, + ref vertices, + ref instances, + } => { + if instances.end == 1 { + encoder.draw_primitives( + primitive_type, + vertices.start as _, + (vertices.end - vertices.start) as _, + ); + } else if instances.start == 0 { + encoder.draw_primitives_instanced( + primitive_type, + vertices.start as _, + (vertices.end - vertices.start) as _, + instances.end as _, + ); + } else { + encoder.draw_primitives_instanced_base_instance( + primitive_type, + vertices.start as _, + (vertices.end - vertices.start) as _, + (instances.end - instances.start) as _, + instances.start as _, + ); + } + } + Cmd::DrawIndexed { + primitive_type, + ref index, + ref indices, + base_vertex, + ref instances, + } => { + let index_count = (indices.end - indices.start) as _; + let index_type = match index.stride { + 2 => MTLIndexType::UInt16, + 4 => MTLIndexType::UInt32, + _ => unreachable!(), + }; + let offset = (index.offset + indices.start * index.stride) as u64; + let index_buffer = index.buffer.as_native(); + if base_vertex == 0 && instances.end == 1 { + encoder.draw_indexed_primitives( + primitive_type, + index_count, + index_type, + index_buffer, + offset, + ); + } else if base_vertex == 0 && instances.start == 0 { + encoder.draw_indexed_primitives_instanced( + primitive_type, + index_count, + index_type, + index_buffer, + offset, + instances.end as _, + ); + } else { + encoder.draw_indexed_primitives_instanced_base_instance( + primitive_type, + index_count, + index_type, + index_buffer, + offset, + (instances.end - instances.start) as _, + base_vertex as _, + instances.start as _, + ); + } + } + Cmd::DrawIndirect { + primitive_type, + buffer, + offset, + } => { + encoder.draw_primitives_indirect(primitive_type, buffer.as_native(), offset); + } + Cmd::DrawIndexedIndirect { + primitive_type, + ref index, + buffer, + offset, + } => { + let index_type = match index.stride { + 2 => MTLIndexType::UInt16, + 4 => MTLIndexType::UInt32, + _ => unreachable!(), + }; + encoder.draw_indexed_primitives_indirect( + primitive_type, + index_type, + index.buffer.as_native(), + index.offset as u64, + buffer.as_native(), + offset, + ); + } + } +} + +fn exec_blit<C>(encoder: &metal::BlitCommandEncoderRef, command: C) +where + C: Borrow<soft::BlitCommand>, +{ + use crate::soft::BlitCommand as Cmd; + match *command.borrow() { + Cmd::FillBuffer { + dst, + ref range, + value, + } => { + encoder.fill_buffer( + dst.as_native(), + NSRange { + location: range.start, + length: range.end - range.start, + }, + value, + ); + } + Cmd::CopyBuffer { + src, + dst, + ref region, + } => { + encoder.copy_from_buffer( + src.as_native(), + region.src as NSUInteger, + dst.as_native(), + region.dst as NSUInteger, + region.size as NSUInteger, + ); + } + Cmd::CopyImage { + src, + dst, + ref region, + } => { + let size = conv::map_extent(region.extent); + let src_offset = conv::map_offset(region.src_offset); + let dst_offset = conv::map_offset(region.dst_offset); + let layers = region + .src_subresource + .layers + .clone() + .zip(region.dst_subresource.layers.clone()); + for (src_layer, dst_layer) in layers { + encoder.copy_from_texture( + src.as_native(), + src_layer as _, + region.src_subresource.level as _, + src_offset, + size, + dst.as_native(), + dst_layer as _, + region.dst_subresource.level as _, + dst_offset, + ); + } + } + Cmd::CopyBufferToImage { + src, + dst, + dst_desc, + ref region, + } => { + let extent = conv::map_extent(region.image_extent); + let origin = conv::map_offset(region.image_offset); + let (row_pitch, slice_pitch) = compute_pitches(®ion, dst_desc, &extent); + let r = ®ion.image_layers; + + for layer in r.layers.clone() { + let offset = region.buffer_offset + + slice_pitch as NSUInteger * (layer - r.layers.start) as NSUInteger; + encoder.copy_from_buffer_to_texture( + src.as_native(), + offset as NSUInteger, + row_pitch as NSUInteger, + slice_pitch as NSUInteger, + extent, + dst.as_native(), + layer as NSUInteger, + r.level as NSUInteger, + origin, + metal::MTLBlitOption::empty(), + ); + } + } + Cmd::CopyImageToBuffer { + src, + src_desc, + dst, + ref region, + } => { + let extent = conv::map_extent(region.image_extent); + let origin = conv::map_offset(region.image_offset); + let (row_pitch, slice_pitch) = compute_pitches(®ion, src_desc, &extent); + let r = ®ion.image_layers; + + for layer in r.layers.clone() { + let offset = region.buffer_offset + + slice_pitch as NSUInteger * (layer - r.layers.start) as NSUInteger; + encoder.copy_from_texture_to_buffer( + src.as_native(), + layer as NSUInteger, + r.level as NSUInteger, + origin, + extent, + dst.as_native(), + offset as NSUInteger, + row_pitch as NSUInteger, + slice_pitch as NSUInteger, + metal::MTLBlitOption::empty(), + ); + } + } + } +} + +fn exec_compute<R, C>(encoder: &metal::ComputeCommandEncoderRef, command: C, resources: &R) +where + R: soft::Resources, + R::Data: Borrow<[u32]>, + R::BufferArray: soft::AsSlice<Option<BufferPtr>, R> + soft::AsSlice<buffer::Offset, R>, + R::TextureArray: soft::AsSlice<Option<TexturePtr>, R>, + R::SamplerArray: soft::AsSlice<Option<SamplerPtr>, R>, + R::ComputePipeline: Borrow<metal::ComputePipelineStateRef>, + C: Borrow<soft::ComputeCommand<R>>, +{ + use crate::soft::ComputeCommand as Cmd; + match *command.borrow() { + Cmd::BindBuffer { + index, + buffer, + offset, + } => { + let native = Some(buffer.as_native()); + encoder.set_buffer(index as _, native, offset); + } + Cmd::BindBuffers { index, ref buffers } => { + use crate::soft::AsSlice; + let values: &[Option<BufferPtr>] = buffers.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `BufferPtr` -> `&metal::BufferRef` + mem::transmute(values) + }; + let offsets = buffers.as_slice(resources); + encoder.set_buffers(index as _, data, offsets); + } + } + Cmd::BindBufferData { ref words, index } => { + let slice = words.borrow(); + encoder.set_bytes( + index as _, + (slice.len() * WORD_SIZE) as u64, + slice.as_ptr() as _, + ); + } + Cmd::BindTextures { + index, + ref textures, + } => { + use crate::soft::AsSlice; + let values = textures.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `TexturePtr` -> `&metal::TextureRef` + mem::transmute(values) + }; + encoder.set_textures(index as _, data); + } + } + Cmd::BindSamplers { + index, + ref samplers, + } => { + use crate::soft::AsSlice; + let values = samplers.as_slice(resources); + if !values.is_empty() { + let data = unsafe { + // convert `SamplerPtr` -> `&metal::SamplerStateRef` + mem::transmute(values) + }; + encoder.set_sampler_states(index as _, data); + } + } + Cmd::BindPipeline(ref pipeline) => { + encoder.set_compute_pipeline_state(pipeline.borrow()); + } + Cmd::UseResource { resource, usage } => { + encoder.use_resource(resource.as_native(), usage); + } + Cmd::Dispatch { wg_size, wg_count } => { + encoder.dispatch_thread_groups(wg_count, wg_size); + } + Cmd::DispatchIndirect { + wg_size, + buffer, + offset, + } => { + encoder.dispatch_thread_groups_indirect(buffer.as_native(), offset, wg_size); + } + } +} + +#[derive(Default, Debug)] +struct PerformanceCounters { + immediate_command_buffers: usize, + deferred_command_buffers: usize, + remote_command_buffers: usize, + signal_command_buffers: usize, + frame_wait_duration: time::Duration, + frame_wait_count: usize, + frame: usize, +} + +#[derive(Debug)] +pub struct CommandQueue { + shared: Arc<Shared>, + retained_buffers: Vec<metal::Buffer>, + retained_textures: Vec<metal::Texture>, + active_visibility_queries: Vec<query::Id>, + perf_counters: Option<PerformanceCounters>, + /// If true, we combine deferred command buffers together into one giant + /// command buffer per submission, including the signalling logic. + pub stitch_deferred: bool, + /// Hack around the Metal System Trace logic that ignores empty command buffers entirely. + pub insert_dummy_encoders: bool, +} + +unsafe impl Send for CommandQueue {} +unsafe impl Sync for CommandQueue {} + +impl CommandQueue { + pub(crate) fn new(shared: Arc<Shared>) -> Self { + CommandQueue { + shared, + retained_buffers: Vec::new(), + retained_textures: Vec::new(), + active_visibility_queries: Vec::new(), + perf_counters: if COUNTERS_REPORT_WINDOW != 0 { + Some(PerformanceCounters::default()) + } else { + None + }, + stitch_deferred: true, + insert_dummy_encoders: false, + } + } + + /// This is a hack around Metal System Trace logic that ignores empty command buffers entirely. + fn record_empty(&self, command_buf: &metal::CommandBufferRef) { + if self.insert_dummy_encoders { + command_buf.new_blit_command_encoder().end_encoding(); + } + } + + fn wait<'a, T, I>(&mut self, wait_semaphores: I) + where + T: 'a + Borrow<native::Semaphore>, + I: IntoIterator<Item = &'a T>, + { + for semaphore in wait_semaphores { + let sem = semaphore.borrow(); + if let Some(ref system) = sem.system { + system.wait(!0); + } + } + } +} + +impl hal::queue::CommandQueue<Backend> for CommandQueue { + unsafe fn submit<'a, T, Ic, S, Iw, Is>( + &mut self, + hal::queue::Submission { + command_buffers, + wait_semaphores, + signal_semaphores, + }: hal::queue::Submission<Ic, Iw, Is>, + fence: Option<&native::Fence>, + ) where + T: 'a + Borrow<CommandBuffer>, + Ic: IntoIterator<Item = &'a T>, + S: 'a + Borrow<native::Semaphore>, + Iw: IntoIterator<Item = (&'a S, pso::PipelineStage)>, + Is: IntoIterator<Item = &'a S>, + { + debug!("submitting with fence {:?}", fence); + self.wait(wait_semaphores.into_iter().map(|(s, _)| s)); + + let system_semaphores = signal_semaphores + .into_iter() + .filter_map(|sem| sem.borrow().system.clone()) + .collect::<Vec<_>>(); + + #[allow(unused_mut)] + let (mut num_immediate, mut num_deferred, mut num_remote) = (0, 0, 0); + let mut event_commands = Vec::new(); + let do_signal = fence.is_some() || !system_semaphores.is_empty(); + + autoreleasepool(|| { + // for command buffers + let mut cmd_queue = self.shared.queue.lock(); + let mut blocker = self.shared.queue_blocker.lock(); + let mut deferred_cmd_buffer = None::<&metal::CommandBufferRef>; + let mut release_sinks = Vec::new(); + + for cmd_buffer in command_buffers { + let mut inner = cmd_buffer.borrow().inner.borrow_mut(); + let CommandBufferInner { + ref sink, + ref mut retained_buffers, + ref mut retained_textures, + ref mut active_visibility_queries, + ref events, + ref host_events, + .. + } = *inner; + + //TODO: split event commands into immediate/blocked submissions? + event_commands.extend_from_slice(events); + // wait for anything not previously fired + let wait_events = host_events + .iter() + .filter(|event| { + event_commands + .iter() + .rfind(|ev| Arc::ptr_eq(event, &ev.0)) + .map_or(true, |ev| !ev.1) + }) + .cloned() + .collect::<Vec<_>>(); + if !wait_events.is_empty() { + blocker.submissions.push(BlockedSubmission { + wait_events, + command_buffers: Vec::new(), + }); + } + + match *sink { + Some(CommandSink::Immediate { + ref cmd_buffer, + ref token, + num_passes, + .. + }) => { + num_immediate += 1; + trace!("\timmediate {:?} with {} passes", token, num_passes); + self.retained_buffers.extend(retained_buffers.drain(..)); + self.retained_textures.extend(retained_textures.drain(..)); + self.active_visibility_queries + .extend(active_visibility_queries.drain(..)); + if num_passes != 0 { + // flush the deferred recording, if any + if let Some(cb) = deferred_cmd_buffer.take() { + blocker.submit_impl(cb); + } + blocker.submit_impl(cmd_buffer); + } + // destroy the sink with the associated command buffer + release_sinks.extend(inner.sink.take()); + } + Some(CommandSink::Deferred { ref journal, .. }) => { + num_deferred += 1; + trace!("\tdeferred with {} passes", journal.passes.len()); + self.active_visibility_queries + .extend_from_slice(active_visibility_queries); + if !journal.passes.is_empty() { + let cmd_buffer = deferred_cmd_buffer.take().unwrap_or_else(|| { + let cmd_buffer = cmd_queue.spawn_temp(); + cmd_buffer.enqueue(); + if INTERNAL_LABELS { + cmd_buffer.set_label("deferred"); + } + cmd_buffer + }); + journal.record(&*cmd_buffer); + if self.stitch_deferred { + deferred_cmd_buffer = Some(cmd_buffer); + } else { + blocker.submit_impl(cmd_buffer); + } + } + } + #[cfg(feature = "dispatch")] + Some(CommandSink::Remote { + queue: NoDebug(ref queue), + ref cmd_buffer, + ref token, + .. + }) => { + num_remote += 1; + trace!("\tremote {:?}", token); + cmd_buffer.lock().enqueue(); + let shared_cb = SharedCommandBuffer(Arc::clone(cmd_buffer)); + //TODO: make this compatible with events + queue.exec_sync(move || { + shared_cb.0.lock().commit(); + }); + } + None => panic!("Command buffer not recorded for submission"), + } + } + + if do_signal || !event_commands.is_empty() || !self.active_visibility_queries.is_empty() + { + //Note: there is quite a bit copying here + let free_buffers = self.retained_buffers.drain(..).collect::<Vec<_>>(); + let free_textures = self.retained_textures.drain(..).collect::<Vec<_>>(); + let visibility = if self.active_visibility_queries.is_empty() { + None + } else { + let queries = self.active_visibility_queries.drain(..).collect::<Vec<_>>(); + Some((Arc::clone(&self.shared), queries)) + }; + + let block = ConcreteBlock::new(move |_cb: *mut ()| { + // signal the semaphores + for semaphore in &system_semaphores { + semaphore.signal(); + } + // process events + for &(ref atomic, value) in &event_commands { + atomic.store(value, Ordering::Release); + } + // free all the manually retained resources + let _ = free_buffers; + let _ = free_textures; + // update visibility queries + if let Some((ref shared, ref queries)) = visibility { + let vis = &shared.visibility; + let availability_ptr = (vis.buffer.contents() as *mut u8) + .offset(vis.availability_offset as isize) + as *mut u32; + for &q in queries { + *availability_ptr.offset(q as isize) = 1; + } + //HACK: the lock is needed to wake up, but it doesn't hold the checked data + let _ = vis.allocator.lock(); + vis.condvar.notify_all(); + } + }) + .copy(); + + let cmd_buffer = deferred_cmd_buffer.take().unwrap_or_else(|| { + let cmd_buffer = cmd_queue.spawn_temp(); + if INTERNAL_LABELS { + cmd_buffer.set_label("signal"); + } + self.record_empty(cmd_buffer); + cmd_buffer + }); + let () = msg_send![cmd_buffer, addCompletedHandler: block.deref() as *const _]; + blocker.submit_impl(cmd_buffer); + + if let Some(fence) = fence { + debug!( + "\tmarking fence ptr {:?} as pending", + fence.0.raw() as *const _ + ); + *fence.0.lock() = native::FenceInner::PendingSubmission(cmd_buffer.to_owned()); + } + } else if let Some(cmd_buffer) = deferred_cmd_buffer { + blocker.submit_impl(cmd_buffer); + } + + for sink in release_sinks { + if let CommandSink::Immediate { token, .. } = sink { + cmd_queue.release(token); + } + } + }); + + debug!( + "\t{} immediate, {} deferred, and {} remote command buffers", + num_immediate, num_deferred, num_remote + ); + if let Some(ref mut counters) = self.perf_counters { + counters.immediate_command_buffers += num_immediate; + counters.deferred_command_buffers += num_deferred; + counters.remote_command_buffers += num_remote; + if do_signal { + counters.signal_command_buffers += 1; + } + } + } + + unsafe fn present( + &mut self, + _surface: &mut window::Surface, + image: window::SwapchainImage, + wait_semaphore: Option<&native::Semaphore>, + ) -> Result<Option<Suboptimal>, PresentError> { + self.wait(wait_semaphore); + + let queue = self.shared.queue.lock(); + let drawable = image.into_drawable(); + autoreleasepool(|| { + let command_buffer = queue.raw.new_command_buffer(); + if INTERNAL_LABELS { + command_buffer.set_label("present"); + } + self.record_empty(command_buffer); + + command_buffer.present_drawable(&drawable); + command_buffer.commit(); + }); + Ok(None) + } + + fn wait_idle(&self) -> Result<(), OutOfMemory> { + QueueInner::wait_idle(&self.shared.queue); + Ok(()) + } +} + +fn assign_sides( + this: &mut pso::Sided<pso::StencilValue>, + faces: pso::Face, + value: pso::StencilValue, +) { + if faces.contains(pso::Face::FRONT) { + this.front = value; + } + if faces.contains(pso::Face::BACK) { + this.back = value; + } +} + +impl hal::pool::CommandPool<Backend> for CommandPool { + unsafe fn reset(&mut self, release_resources: bool) { + for cmd_buffer in &self.allocated { + cmd_buffer + .borrow_mut() + .reset(&self.shared, &self.pool_shared, release_resources); + } + } + + unsafe fn allocate_one(&mut self, level: com::Level) -> CommandBuffer { + //TODO: fail with OOM if we allocate more actual command buffers + // than our mega-queue supports. + let inner = Arc::new(RefCell::new(CommandBufferInner { + sink: None, + level, + backup_journal: None, + #[cfg(feature = "dispatch")] + backup_capacity: None, + retained_buffers: Vec::new(), + retained_textures: Vec::new(), + active_visibility_queries: Vec::new(), + events: Vec::new(), + host_events: Vec::new(), + })); + self.allocated.push(Arc::clone(&inner)); + + CommandBuffer { + shared: Arc::clone(&self.shared), + pool_shared: Arc::clone(&self.pool_shared), + inner, + state: State { + viewport: None, + scissors: None, + blend_color: None, + render_pso: None, + render_pso_is_compatible: false, + compute_pso: None, + work_group_size: MTLSize { + width: 0, + height: 0, + depth: 0, + }, + primitive_type: MTLPrimitiveType::Point, + resources_vs: StageResources::new(), + resources_ps: StageResources::new(), + resources_cs: StageResources::new(), + index_buffer: None, + rasterizer_state: None, + depth_bias: pso::DepthBias::default(), + stencil: native::StencilState { + reference_values: pso::Sided::new(0), + read_masks: pso::Sided::new(!0), + write_masks: pso::Sided::new(!0), + }, + push_constants: Vec::new(), + vertex_buffers: Vec::new(), + target: TargetState::default(), + visibility_query: (metal::MTLVisibilityResultMode::Disabled, 0), + pending_subpasses: Vec::new(), + descriptor_sets: (0..MAX_BOUND_DESCRIPTOR_SETS) + .map(|_| DescriptorSetInfo::default()) + .collect(), + active_depth_stencil_desc: pso::DepthStencilDesc::default(), + active_scissor: MTLScissorRect { + x: 0, + y: 0, + width: 0, + height: 0, + }, + }, + temp: Temp { + clear_vertices: Vec::new(), + blit_vertices: FastHashMap::default(), + clear_values: Vec::new(), + }, + name: String::new(), + } + } + + /// Free command buffers which are allocated from this pool. + unsafe fn free<I>(&mut self, cmd_buffers: I) + where + I: IntoIterator<Item = CommandBuffer>, + { + use hal::command::CommandBuffer as _; + for mut cmd_buf in cmd_buffers { + cmd_buf.reset(true); + match self + .allocated + .iter_mut() + .position(|b| Arc::ptr_eq(b, &cmd_buf.inner)) + { + Some(index) => { + self.allocated.swap_remove(index); + } + None => error!("Unable to free a command buffer!"), + } + } + } +} + +impl CommandBuffer { + fn update_depth_stencil(&mut self) { + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_render(); + if !pre.is_void() { + let ds_store = &self.shared.service_pipes.depth_stencil_states; + if let Some(desc) = self.state.build_depth_stencil() { + let state = &**ds_store.get(desc, &self.shared.device); + pre.issue(soft::RenderCommand::SetDepthStencilState(state)); + } + } + } +} + +impl com::CommandBuffer<Backend> for CommandBuffer { + unsafe fn begin( + &mut self, + flags: com::CommandBufferFlags, + info: com::CommandBufferInheritanceInfo<Backend>, + ) { + self.reset(false); + + let mut inner = self.inner.borrow_mut(); + let can_immediate = inner.level == com::Level::Primary + && flags.contains(com::CommandBufferFlags::ONE_TIME_SUBMIT); + let sink = match self.pool_shared.online_recording { + OnlineRecording::Immediate if can_immediate => { + let (cmd_buffer, token) = self.shared.queue.lock().spawn(); + if !self.name.is_empty() { + cmd_buffer.set_label(&self.name); + } + CommandSink::Immediate { + cmd_buffer, + token, + encoder_state: EncoderState::None, + num_passes: 0, + label: String::new(), + } + } + #[cfg(feature = "dispatch")] + OnlineRecording::Remote(_) if can_immediate => { + let (cmd_buffer, token) = self.shared.queue.lock().spawn(); + if !self.name.is_empty() { + cmd_buffer.set_label(&self.name); + } + CommandSink::Remote { + queue: NoDebug(dispatch::Queue::with_target_queue( + "gfx-metal", + dispatch::QueueAttribute::Serial, + &self.pool_shared.dispatch_queue.as_ref().unwrap().0, + )), + cmd_buffer: Arc::new(Mutex::new(cmd_buffer)), + token, + pass: None, + capacity: inner.backup_capacity.take().unwrap_or_default(), + label: String::new(), + pool_shared: Arc::clone(&self.pool_shared), + } + } + _ => CommandSink::Deferred { + is_encoding: false, + is_inheriting: info.subpass.is_some(), + journal: inner.backup_journal.take().unwrap_or_default(), + label: String::new(), + }, + }; + inner.sink = Some(sink); + + if let Some(framebuffer) = info.framebuffer { + self.state.target.extent = framebuffer.extent; + } + if let Some(sp) = info.subpass { + let subpass = &sp.main_pass.subpasses[sp.index as usize]; + self.state.target.formats = subpass.attachments.map(|at| (at.format, at.channel)); + self.state.target.aspects = Aspects::empty(); + if !subpass.attachments.colors.is_empty() { + self.state.target.aspects |= Aspects::COLOR; + } + if let Some(ref at) = subpass.attachments.depth_stencil { + let rat = &sp.main_pass.attachments[at.id]; + let aspects = rat.format.unwrap().surface_desc().aspects; + self.state.target.aspects |= aspects; + } + + match inner.sink { + Some(CommandSink::Deferred { + ref mut is_encoding, + ref mut journal, + ref label, + .. + }) => { + *is_encoding = true; + let pass_desc = self + .pool_shared + .render_pass_descriptors + .lock() + .alloc(&self.shared); + journal.passes.alloc().init(( + soft::Pass::Render(pass_desc), + 0..0, + label.clone(), + )); + } + _ => { + warn!("Unexpected inheritance info on a primary command buffer"); + } + } + } + } + + unsafe fn finish(&mut self) { + self.inner.borrow_mut().sink().stop_encoding(); + } + + unsafe fn reset(&mut self, release_resources: bool) { + self.state.reset(); + self.inner + .borrow_mut() + .reset(&self.shared, &self.pool_shared, release_resources); + } + + unsafe fn pipeline_barrier<'a, T>( + &mut self, + _stages: Range<pso::PipelineStage>, + _dependencies: memory::Dependencies, + _barriers: T, + ) where + T: IntoIterator, + T::Item: Borrow<memory::Barrier<'a, Backend>>, + { + } + + unsafe fn fill_buffer(&mut self, buffer: &native::Buffer, sub: buffer::SubRange, data: u32) { + let (raw, base_range) = buffer.as_bound(); + let mut inner = self.inner.borrow_mut(); + + let start = base_range.start + sub.offset; + assert_eq!(start % WORD_ALIGNMENT, 0); + + let end = sub.size.map_or(base_range.end, |s| { + assert_eq!(s % WORD_ALIGNMENT, 0); + start + s + }); + + if (data & 0xFF) * 0x0101_0101 == data { + let command = soft::BlitCommand::FillBuffer { + dst: AsNative::from(raw), + range: start..end, + value: data as u8, + }; + inner.sink().blit_commands(iter::once(command)); + } else { + let pso = &*self.shared.service_pipes.fill_buffer; + let length = (end - start) / WORD_ALIGNMENT; + let value_and_length = [data, length as _]; + + // TODO: Consider writing multiple values per thread in shader + let threads_per_threadgroup = pso.thread_execution_width(); + let threadgroups = (length + threads_per_threadgroup - 1) / threads_per_threadgroup; + + let wg_count = MTLSize { + width: threadgroups, + height: 1, + depth: 1, + }; + let wg_size = MTLSize { + width: threads_per_threadgroup, + height: 1, + depth: 1, + }; + + let commands = [ + soft::ComputeCommand::BindPipeline(pso), + soft::ComputeCommand::BindBuffer { + index: 0, + buffer: AsNative::from(raw), + offset: start, + }, + soft::ComputeCommand::BindBufferData { + index: 1, + words: &value_and_length[..], + }, + soft::ComputeCommand::Dispatch { wg_size, wg_count }, + ]; + + inner + .sink() + .quick_compute("fill_buffer", commands.iter().cloned()); + } + } + + unsafe fn update_buffer(&mut self, dst: &native::Buffer, offset: buffer::Offset, data: &[u8]) { + let (dst_raw, dst_range) = dst.as_bound(); + assert!(dst_range.start + offset + data.len() as buffer::Offset <= dst_range.end); + + let src = self.shared.device.lock().new_buffer_with_data( + data.as_ptr() as _, + data.len() as _, + metal::MTLResourceOptions::CPUCacheModeWriteCombined, + ); + if INTERNAL_LABELS { + src.set_label("update_buffer"); + } + + let mut inner = self.inner.borrow_mut(); + { + let command = soft::BlitCommand::CopyBuffer { + src: AsNative::from(src.as_ref()), + dst: AsNative::from(dst_raw), + region: com::BufferCopy { + src: 0, + dst: dst_range.start + offset, + size: data.len() as _, + }, + }; + + inner.sink().blit_commands(iter::once(command)); + } + + inner.retained_buffers.push(src); + } + + unsafe fn clear_image<T>( + &mut self, + image: &native::Image, + _layout: i::Layout, + value: com::ClearValue, + subresource_ranges: T, + ) where + T: IntoIterator, + T::Item: Borrow<i::SubresourceRange>, + { + let CommandBufferInner { + ref mut retained_textures, + ref mut sink, + .. + } = *self.inner.borrow_mut(); + + let clear_color = image.shader_channel.interpret(value.color); + let base_extent = image.kind.extent(); + let is_layered = !self.shared.disabilities.broken_layered_clear_image; + + autoreleasepool(|| { + let raw = image.like.as_texture(); + for subresource_range in subresource_ranges { + let sub = subresource_range.borrow(); + let num_layers = sub.resolve_layer_count(image.kind.num_layers()); + let num_levels = sub.resolve_level_count(image.mip_levels); + let layers = if is_layered { + 0..1 + } else { + sub.layer_start..sub.layer_start + num_layers + }; + let texture = if is_layered && sub.layer_start > 0 { + // aliasing is necessary for bulk-clearing all layers starting with 0 + let tex = raw.new_texture_view_from_slice( + image.mtl_format, + image.mtl_type, + NSRange { + location: 0, + length: raw.mipmap_level_count(), + }, + NSRange { + location: sub.layer_start as _, + length: num_layers as _, + }, + ); + retained_textures.push(tex); + retained_textures.last().unwrap() + } else { + raw + }; + + for layer in layers { + for level in sub.level_start..sub.level_start + num_levels { + let descriptor = self + .pool_shared + .render_pass_descriptors + .lock() + .alloc(&self.shared); + if base_extent.depth > 1 { + assert_eq!((sub.layer_start, num_layers), (0, 1)); + let depth = base_extent.at_level(level).depth as u64; + descriptor.set_render_target_array_length(depth); + } else if is_layered { + descriptor.set_render_target_array_length(num_layers as u64); + }; + + if image.format_desc.aspects.contains(Aspects::COLOR) { + let attachment = descriptor.color_attachments().object_at(0).unwrap(); + attachment.set_texture(Some(texture)); + attachment.set_level(level as _); + if !is_layered { + attachment.set_slice(layer as _); + } + attachment.set_store_action(metal::MTLStoreAction::Store); + if sub.aspects.contains(Aspects::COLOR) { + attachment.set_load_action(metal::MTLLoadAction::Clear); + attachment.set_clear_color(clear_color.clone()); + } else { + attachment.set_load_action(metal::MTLLoadAction::Load); + } + } else { + assert!(!sub.aspects.contains(Aspects::COLOR)); + }; + + if image.format_desc.aspects.contains(Aspects::DEPTH) { + let attachment = descriptor.depth_attachment().unwrap(); + attachment.set_texture(Some(texture)); + attachment.set_level(level as _); + if !is_layered { + attachment.set_slice(layer as _); + } + attachment.set_store_action(metal::MTLStoreAction::Store); + if sub.aspects.contains(Aspects::DEPTH) { + attachment.set_load_action(metal::MTLLoadAction::Clear); + attachment.set_clear_depth(value.depth_stencil.depth as _); + } else { + attachment.set_load_action(metal::MTLLoadAction::Load); + } + } else { + assert!(!sub.aspects.contains(Aspects::DEPTH)); + }; + + if image.format_desc.aspects.contains(Aspects::STENCIL) { + let attachment = descriptor.stencil_attachment().unwrap(); + attachment.set_texture(Some(texture)); + attachment.set_level(level as _); + if !is_layered { + attachment.set_slice(layer as _); + } + attachment.set_store_action(metal::MTLStoreAction::Store); + if sub.aspects.contains(Aspects::STENCIL) { + attachment.set_load_action(metal::MTLLoadAction::Clear); + attachment.set_clear_stencil(value.depth_stencil.stencil); + } else { + attachment.set_load_action(metal::MTLLoadAction::Load); + } + } else { + assert!(!sub.aspects.contains(Aspects::STENCIL)); + }; + + sink.as_mut().unwrap().quick_render( + "clear_image", + descriptor, + &self.pool_shared, + iter::empty(), + ); + } + } + } + }); + } + + unsafe fn clear_attachments<T, U>(&mut self, clears: T, rects: U) + where + T: IntoIterator, + T::Item: Borrow<com::AttachmentClear>, + U: IntoIterator, + U::Item: Borrow<pso::ClearRect>, + { + // gather vertices/polygons + let ext = self.state.target.extent; + let vertices = &mut self.temp.clear_vertices; + vertices.clear(); + + for rect in rects { + let r = rect.borrow(); + for layer in r.layers.clone() { + let data = [ + [r.rect.x, r.rect.y], + [r.rect.x, r.rect.y + r.rect.h], + [r.rect.x + r.rect.w, r.rect.y + r.rect.h], + [r.rect.x + r.rect.w, r.rect.y], + ]; + // now use the hard-coded index array to add 6 vertices to the list + //TODO: could use instancing here + // - with triangle strips + // - with half of the data supplied per instance + + for &index in &[0usize, 1, 2, 2, 3, 0] { + let d = data[index]; + vertices.alloc().init(ClearVertex { + pos: [ + d[0] as f32 / ext.width as f32, + d[1] as f32 / ext.height as f32, + 0.0, //TODO: depth Z + layer as f32, + ], + }); + } + } + } + + let mut vertex_is_dirty = true; + let mut inner = self.inner.borrow_mut(); + let clear_pipes = &self.shared.service_pipes.clears; + let ds_store = &self.shared.service_pipes.depth_stencil_states; + let ds_state; + + // issue a PSO+color switch and a draw for each requested clear + let mut key = ClearKey { + framebuffer_aspects: self.state.target.aspects, + color_formats: [metal::MTLPixelFormat::Invalid; MAX_COLOR_ATTACHMENTS], + depth_stencil_format: self + .state + .target + .formats + .depth_stencil + .map_or(metal::MTLPixelFormat::Invalid, |(format, _)| format), + sample_count: self.state.target.samples, + target_index: None, + }; + for (out, &(mtl_format, _)) in key + .color_formats + .iter_mut() + .zip(&self.state.target.formats.colors) + { + *out = mtl_format; + } + + for clear in clears { + let pso; // has to live at least as long as all the commands + let depth_stencil; + let raw_value; + + let (com_clear, target_index) = match *clear.borrow() { + com::AttachmentClear::Color { index, value } => { + let channel = self.state.target.formats.colors[index].1; + //Note: technically we should be able to derive the Channel from the + // `value` variant, but this is blocked by the portability that is + // always passing the attachment clears as `ClearColor::Sfloat` atm. + raw_value = com::ClearColor::from(value); + let com = soft::RenderCommand::BindBufferData { + stage: ShaderStage::Fragment, + index: 0, + words: slice::from_raw_parts( + raw_value.float32.as_ptr() as *const u32, + mem::size_of::<com::ClearColor>() / WORD_SIZE, + ), + }; + (com, Some((index as u8, channel))) + } + com::AttachmentClear::DepthStencil { depth, stencil } => { + let mut aspects = Aspects::empty(); + if let Some(value) = depth { + for v in vertices.iter_mut() { + v.pos[2] = value; + } + vertex_is_dirty = true; + aspects |= Aspects::DEPTH; + } + if stencil.is_some() { + //TODO: soft::RenderCommand::SetStencilReference + aspects |= Aspects::STENCIL; + } + depth_stencil = ds_store.get_write(aspects); + let com = soft::RenderCommand::SetDepthStencilState(&**depth_stencil); + (com, None) + } + }; + + key.target_index = target_index; + pso = clear_pipes.get( + key, + &self.shared.service_pipes.library, + &self.shared.device, + &self.shared.private_caps, + ); + + let com_pso = iter::once(soft::RenderCommand::BindPipeline(&**pso)); + let com_rast = iter::once(soft::RenderCommand::SetRasterizerState( + native::RasterizerState::default(), + )); + + let com_vertex = if vertex_is_dirty { + vertex_is_dirty = false; + Some(soft::RenderCommand::BindBufferData { + stage: ShaderStage::Vertex, + index: 0, + words: slice::from_raw_parts( + vertices.as_ptr() as *const u32, + vertices.len() * mem::size_of::<ClearVertex>() / WORD_SIZE, + ), + }) + } else { + None + }; + + let rect = pso::Rect { + x: 0, + y: ext.height as _, + w: ext.width as _, + h: -(ext.height as i16), + }; + let com_viewport = iter::once(soft::RenderCommand::SetViewport(rect, 0.0..1.0)); + let com_scissor = self.state.set_scissor(MTLScissorRect { + x: 0, + y: 0, + width: ext.width as _, + height: ext.height as _, + }); + + let com_draw = iter::once(soft::RenderCommand::Draw { + primitive_type: MTLPrimitiveType::Triangle, + vertices: 0..vertices.len() as _, + instances: 0..1, + }); + + let commands = iter::once(com_clear) + .chain(com_pso) + .chain(com_rast) + .chain(com_viewport) + .chain(com_scissor) + .chain(com_vertex) + .chain(com_draw); + + inner.sink().pre_render().issue_many(commands); + } + + // reset all the affected states + let device_lock = &self.shared.device; + let com_ds = match self.state.build_depth_stencil() { + Some(desc) => { + ds_state = ds_store.get(desc, device_lock); + Some(soft::RenderCommand::SetDepthStencilState(&**ds_state)) + } + None => None, + }; + + let com_scissor = self.state.reset_scissor(); + let com_viewport = self.state.make_viewport_command(); + let (com_pso, com_rast) = self.state.make_pso_commands(); + + let com_vs = match ( + self.state.resources_vs.buffers.first(), + self.state.resources_vs.buffer_offsets.first(), + ) { + (Some(&Some(buffer)), Some(&offset)) => Some(soft::RenderCommand::BindBuffer { + stage: ShaderStage::Vertex, + index: 0, + buffer, + offset, + }), + _ => None, + }; + let com_ps = match ( + self.state.resources_ps.buffers.first(), + self.state.resources_ps.buffer_offsets.first(), + ) { + (Some(&Some(buffer)), Some(&offset)) => Some(soft::RenderCommand::BindBuffer { + stage: ShaderStage::Fragment, + index: 0, + buffer, + offset, + }), + _ => None, + }; + + let commands = com_pso + .into_iter() + .chain(com_rast) + .chain(com_viewport) + .chain(com_scissor) + .chain(com_ds) + .chain(com_vs) + .chain(com_ps); + + inner.sink().pre_render().issue_many(commands); + + vertices.clear(); + } + + unsafe fn resolve_image<T>( + &mut self, + _src: &native::Image, + _src_layout: i::Layout, + _dst: &native::Image, + _dst_layout: i::Layout, + _regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageResolve>, + { + unimplemented!() + } + + unsafe fn blit_image<T>( + &mut self, + src: &native::Image, + _src_layout: i::Layout, + dst: &native::Image, + _dst_layout: i::Layout, + filter: i::Filter, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageBlit>, + { + let CommandBufferInner { + ref mut retained_textures, + ref mut sink, + .. + } = *self.inner.borrow_mut(); + + let src_cubish = src.view_cube_as_2d(); + let dst_cubish = dst.view_cube_as_2d(); + let dst_layers = dst.kind.num_layers(); + + let vertices = &mut self.temp.blit_vertices; + vertices.clear(); + + let sampler = self.shared.service_pipes.sampler_states.get(filter); + let ds_state; + let key_mtl_type = match dst_cubish { + Some(_) => metal::MTLTextureType::D2Array, + None => dst.mtl_type, + }; + let key = ( + key_mtl_type, + dst.mtl_format, + src.format_desc.aspects, + dst.shader_channel, + ); + let pso = self.shared.service_pipes.blits.get( + key, + &self.shared.service_pipes.library, + &self.shared.device, + &self.shared.private_caps, + ); + + for region in regions { + let r = region.borrow(); + + // layer count must be equal in both subresources + debug_assert_eq!( + r.src_subresource.layers.len(), + r.dst_subresource.layers.len() + ); + debug_assert_eq!(r.src_subresource.aspects, r.dst_subresource.aspects); + debug_assert!(src.format_desc.aspects.contains(r.src_subresource.aspects)); + debug_assert!(dst.format_desc.aspects.contains(r.dst_subresource.aspects)); + + let se = src.kind.extent().at_level(r.src_subresource.level); + let de = dst.kind.extent().at_level(r.dst_subresource.level); + //TODO: support 3D textures + if se.depth != 1 || de.depth != 1 { + warn!( + "3D image blits are not supported properly yet: {:?} -> {:?}", + se, de + ); + } + + let layers = r + .src_subresource + .layers + .clone() + .zip(r.dst_subresource.layers.clone()); + let list = vertices + .entry((r.dst_subresource.aspects, r.dst_subresource.level)) + .or_insert_with(Vec::new); + + for (src_layer, dst_layer) in layers { + // this helper array defines unique data for quad vertices + let data = [ + [ + r.src_bounds.start.x, + r.src_bounds.start.y, + r.dst_bounds.start.x, + r.dst_bounds.start.y, + ], + [ + r.src_bounds.start.x, + r.src_bounds.end.y, + r.dst_bounds.start.x, + r.dst_bounds.end.y, + ], + [ + r.src_bounds.end.x, + r.src_bounds.end.y, + r.dst_bounds.end.x, + r.dst_bounds.end.y, + ], + [ + r.src_bounds.end.x, + r.src_bounds.start.y, + r.dst_bounds.end.x, + r.dst_bounds.start.y, + ], + ]; + // now use the hard-coded index array to add 6 vertices to the list + //TODO: could use instancing here + // - with triangle strips + // - with half of the data supplied per instance + + for &index in &[0usize, 1, 2, 2, 3, 0] { + let d = data[index]; + list.alloc().init(BlitVertex { + uv: [ + d[0] as f32 / se.width as f32, + d[1] as f32 / se.height as f32, + src_layer as f32, + r.src_subresource.level as f32, + ], + pos: [ + d[2] as f32 / de.width as f32, + d[3] as f32 / de.height as f32, + 0.0, + dst_layer as f32, + ], + }); + } + } + } + + // Note: we don't bother to restore any render states here, since we are currently + // outside of a render pass, and the state will be reset automatically once + // we enter the next pass. + + let src_native = AsNative::from(match src_cubish { + Some(ref tex) => tex.as_ref(), + None => src.like.as_texture(), + }); + let prelude = [ + soft::RenderCommand::BindPipeline(&**pso), + soft::RenderCommand::BindSamplers { + stage: ShaderStage::Fragment, + index: 0, + samplers: &[Some(AsNative::from(sampler))][..], + }, + soft::RenderCommand::BindTextures { + stage: ShaderStage::Fragment, + index: 0, + textures: &[Some(src_native)][..], + }, + ]; + + let com_ds = if src + .format_desc + .aspects + .intersects(Aspects::DEPTH | Aspects::STENCIL) + { + ds_state = self + .shared + .service_pipes + .depth_stencil_states + .get_write(src.format_desc.aspects); + Some(soft::RenderCommand::SetDepthStencilState(&**ds_state)) + } else { + None + }; + + let layered_rendering = self.shared.private_caps.layered_rendering; + let pool_shared = &self.pool_shared; + let shared = &self.shared; + autoreleasepool(|| { + let dst_new = match dst_cubish { + Some(ref tex) => tex.as_ref(), + None => dst.like.as_texture(), + }; + + for ((aspects, level), list) in vertices.drain() { + let descriptor = pool_shared.render_pass_descriptors.lock().alloc(shared); + if layered_rendering { + descriptor.set_render_target_array_length(dst_layers as _); + } + + if aspects.contains(Aspects::COLOR) { + let att = descriptor.color_attachments().object_at(0).unwrap(); + att.set_texture(Some(dst_new)); + att.set_level(level as _); + } + if aspects.contains(Aspects::DEPTH) { + let att = descriptor.depth_attachment().unwrap(); + att.set_texture(Some(dst_new)); + att.set_level(level as _); + } + if aspects.contains(Aspects::STENCIL) { + let att = descriptor.stencil_attachment().unwrap(); + att.set_texture(Some(dst_new)); + att.set_level(level as _); + } + + let ext = dst.kind.extent().at_level(level); + //Note: flipping Y coordinate of the destination here + let rect = pso::Rect { + x: 0, + y: ext.height as _, + w: ext.width as _, + h: -(ext.height as i16), + }; + + let extra = [ + soft::RenderCommand::SetViewport(rect, 0.0..1.0), + soft::RenderCommand::SetScissor(MTLScissorRect { + x: 0, + y: 0, + width: ext.width as _, + height: ext.height as _, + }), + soft::RenderCommand::BindBufferData { + stage: ShaderStage::Vertex, + index: 0, + words: slice::from_raw_parts( + list.as_ptr() as *const u32, + list.len() * mem::size_of::<BlitVertex>() / WORD_SIZE, + ), + }, + soft::RenderCommand::Draw { + primitive_type: MTLPrimitiveType::Triangle, + vertices: 0..list.len() as _, + instances: 0..1, + }, + ]; + + let commands = prelude.iter().chain(&com_ds).chain(&extra).cloned(); + + sink.as_mut().unwrap().quick_render( + "blit_image", + descriptor, + pool_shared, + commands, + ); + } + }); + + retained_textures.extend(src_cubish); + retained_textures.extend(dst_cubish); + } + + unsafe fn bind_index_buffer( + &mut self, + buffer: &native::Buffer, + sub: buffer::SubRange, + ty: IndexType, + ) { + let (raw, range) = buffer.as_bound(); + assert!(range.start + sub.offset + sub.size.unwrap_or(0) <= range.end); // conservative + self.state.index_buffer = Some(IndexBuffer { + buffer: AsNative::from(raw), + offset: (range.start + sub.offset) as _, + stride: match ty { + IndexType::U16 => 2, + IndexType::U32 => 4, + }, + }); + } + + unsafe fn bind_vertex_buffers<I, T>(&mut self, first_binding: pso::BufferIndex, buffers: I) + where + I: IntoIterator<Item = (T, buffer::SubRange)>, + T: Borrow<native::Buffer>, + { + if self.state.vertex_buffers.len() <= first_binding as usize { + self.state + .vertex_buffers + .resize(first_binding as usize + 1, None); + } + for (i, (buffer, sub)) in buffers.into_iter().enumerate() { + let b = buffer.borrow(); + let (raw, range) = b.as_bound(); + let buffer_ptr = AsNative::from(raw); + let index = first_binding as usize + i; + self.state + .vertex_buffers + .entry(index) + .set(Some((buffer_ptr, range.start + sub.offset))); + } + + if let Some(command) = self + .state + .set_vertex_buffers(self.shared.private_caps.max_buffers_per_stage as usize) + { + self.inner.borrow_mut().sink().pre_render().issue(command); + } + } + + unsafe fn set_viewports<T>(&mut self, first_viewport: u32, vps: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Viewport>, + { + // macOS_GPUFamily1_v3 supports >1 viewport, todo + if first_viewport != 0 { + panic!("First viewport != 0; Metal supports only one viewport"); + } + let mut vps = vps.into_iter(); + let vp_borrowable = vps + .next() + .expect("No viewport provided, Metal supports exactly one"); + let vp = vp_borrowable.borrow(); + if vps.next().is_some() { + // TODO should we panic here or set buffer in an erroneous state? + panic!("More than one viewport set; Metal supports only one viewport"); + } + + let com = self.state.set_viewport(vp, self.shared.disabilities); + self.inner.borrow_mut().sink().pre_render().issue(com); + } + + unsafe fn set_scissors<T>(&mut self, first_scissor: u32, rects: T) + where + T: IntoIterator, + T::Item: Borrow<pso::Rect>, + { + // macOS_GPUFamily1_v3 supports >1 scissor/viewport, todo + if first_scissor != 0 { + panic!("First scissor != 0; Metal supports only one viewport"); + } + let mut rects = rects.into_iter(); + let rect_borrowable = rects + .next() + .expect("No scissor provided, Metal supports exactly one"); + let rect = rect_borrowable.borrow(); + if rects.next().is_some() { + panic!("More than one scissor set; Metal supports only one viewport"); + } + + if let Some(com) = self.state.set_hal_scissor(*rect) { + self.inner.borrow_mut().sink().pre_render().issue(com); + } + } + + unsafe fn set_blend_constants(&mut self, color: pso::ColorValue) { + let com = self.state.set_blend_color(&color); + self.inner.borrow_mut().sink().pre_render().issue(com); + } + + unsafe fn set_depth_bounds(&mut self, _: Range<f32>) { + warn!("Depth bounds test is not supported"); + } + + unsafe fn set_line_width(&mut self, width: f32) { + // Note from the Vulkan spec: + // > If the wide lines feature is not enabled, lineWidth must be 1.0 + // Simply assert and no-op because Metal never exposes `Features::LINE_WIDTH` + assert_eq!(width, 1.0); + } + + unsafe fn set_depth_bias(&mut self, depth_bias: pso::DepthBias) { + let com = self.state.set_depth_bias(&depth_bias); + self.inner.borrow_mut().sink().pre_render().issue(com); + } + + unsafe fn set_stencil_reference(&mut self, faces: pso::Face, value: pso::StencilValue) { + assign_sides(&mut self.state.stencil.reference_values, faces, value); + let com = + soft::RenderCommand::SetStencilReferenceValues(self.state.stencil.reference_values); + self.inner.borrow_mut().sink().pre_render().issue(com); + } + + unsafe fn set_stencil_read_mask(&mut self, faces: pso::Face, value: pso::StencilValue) { + assign_sides(&mut self.state.stencil.read_masks, faces, value); + self.update_depth_stencil(); + } + + unsafe fn set_stencil_write_mask(&mut self, faces: pso::Face, value: pso::StencilValue) { + assign_sides(&mut self.state.stencil.write_masks, faces, value); + self.update_depth_stencil(); + } + + unsafe fn begin_render_pass<T>( + &mut self, + render_pass: &native::RenderPass, + framebuffer: &native::Framebuffer, + _render_area: pso::Rect, + clear_values: T, + first_subpass_contents: com::SubpassContents, + ) where + T: IntoIterator, + T::Item: Borrow<com::ClearValue>, + { + // fill out temporary clear values per attachment + self.temp + .clear_values + .resize(render_pass.attachments.len(), None); + for ((out_val, _), in_val) in self + .temp + .clear_values + .iter_mut() + .zip(&render_pass.attachments) + .filter(|(_, rat)| rat.has_clears()) + .zip(clear_values) + { + *out_val = Some(*in_val.borrow()); + } + + self.state.pending_subpasses.clear(); + self.state.target.extent = framebuffer.extent; + + //Note: we stack the subpasses in the opposite order + for subpass in render_pass.subpasses.iter().rev() { + let mut combined_aspects = Aspects::empty(); + let mut sample_count = 0; + let descriptor = autoreleasepool(|| { + let descriptor = self + .pool_shared + .render_pass_descriptors + .lock() + .alloc(&self.shared); + if self.shared.private_caps.layered_rendering { + descriptor.set_render_target_array_length(framebuffer.extent.depth as _); + } + + for (i, at) in subpass.attachments.colors.iter().enumerate() { + let rat = &render_pass.attachments[at.id]; + let texture = framebuffer.attachments[at.id].as_ref(); + let desc = descriptor.color_attachments().object_at(i as _).unwrap(); + + combined_aspects |= Aspects::COLOR; + sample_count = sample_count.max(rat.samples); + desc.set_texture(Some(texture)); + + if at.ops.contains(native::AttachmentOps::LOAD) { + desc.set_load_action(conv::map_load_operation(rat.ops.load)); + if rat.ops.load == AttachmentLoadOp::Clear { + let raw = self.temp.clear_values[at.id].unwrap().color; + desc.set_clear_color(at.channel.interpret(raw)); + } + } + if let Some(id) = at.resolve_id { + let resolve = &framebuffer.attachments[id]; + //Note: the selection of levels and slices is already handled by `ImageView` + desc.set_resolve_texture(Some(resolve)); + desc.set_store_action(conv::map_resolved_store_operation(rat.ops.store)); + } else if at.ops.contains(native::AttachmentOps::STORE) { + desc.set_store_action(conv::map_store_operation(rat.ops.store)); + } + } + + if let Some(ref at) = subpass.attachments.depth_stencil { + let rat = &render_pass.attachments[at.id]; + let texture = framebuffer.attachments[at.id].as_ref(); + let aspects = rat.format.unwrap().surface_desc().aspects; + sample_count = sample_count.max(rat.samples); + combined_aspects |= aspects; + + if aspects.contains(Aspects::DEPTH) { + let desc = descriptor.depth_attachment().unwrap(); + desc.set_texture(Some(texture)); + + if at.ops.contains(native::AttachmentOps::LOAD) { + desc.set_load_action(conv::map_load_operation(rat.ops.load)); + if rat.ops.load == AttachmentLoadOp::Clear { + let raw = self.temp.clear_values[at.id].unwrap().depth_stencil; + desc.set_clear_depth(raw.depth as f64); + } + } + if at.ops.contains(native::AttachmentOps::STORE) { + desc.set_store_action(conv::map_store_operation(rat.ops.store)); + } + } + if aspects.contains(Aspects::STENCIL) { + let desc = descriptor.stencil_attachment().unwrap(); + desc.set_texture(Some(texture)); + + if at.ops.contains(native::AttachmentOps::LOAD) { + desc.set_load_action(conv::map_load_operation(rat.stencil_ops.load)); + if rat.stencil_ops.load == AttachmentLoadOp::Clear { + let raw = self.temp.clear_values[at.id].unwrap().depth_stencil; + desc.set_clear_stencil(raw.stencil); + } + } + if at.ops.contains(native::AttachmentOps::STORE) { + desc.set_store_action(conv::map_store_operation(rat.stencil_ops.store)); + } + } + } + + descriptor + }); + + self.state.pending_subpasses.alloc().init(SubpassInfo { + descriptor, + combined_aspects, + formats: subpass.attachments.map(|at| (at.format, at.channel)), + operations: subpass.attachments.map(|at| at.ops), + sample_count, + }); + } + + self.inner.borrow_mut().sink().label(&render_pass.name); + self.next_subpass(first_subpass_contents); + } + + unsafe fn next_subpass(&mut self, _contents: com::SubpassContents) { + let sin = self.state.pending_subpasses.pop().unwrap(); + + self.state.render_pso_is_compatible = match self.state.render_pso { + Some(ref ps) => { + ps.formats == sin.formats && self.state.target.samples == sin.sample_count + } + None => false, + }; + self.state.active_depth_stencil_desc = pso::DepthStencilDesc::default(); + self.state.active_scissor = MTLScissorRect { + x: 0, + y: 0, + width: self.state.target.extent.width as u64, + height: self.state.target.extent.height as u64, + }; + self.state.target.aspects = sin.combined_aspects; + self.state.target.formats = sin.formats.clone(); + self.state.target.samples = sin.sample_count; + + let com_scissor = self.state.reset_scissor(); + + let ds_store = &self.shared.service_pipes.depth_stencil_states; + let ds_state; + let com_ds = if sin + .combined_aspects + .intersects(Aspects::DEPTH | Aspects::STENCIL) + { + match self.state.build_depth_stencil() { + Some(desc) => { + ds_state = ds_store.get(desc, &self.shared.device); + Some(soft::RenderCommand::SetDepthStencilState(&**ds_state)) + } + None => None, + } + } else { + None + }; + + let init_commands = self + .state + .make_render_commands(sin.combined_aspects) + .chain(com_scissor) + .chain(com_ds); + + autoreleasepool(|| { + self.inner + .borrow_mut() + .sink() + .switch_render(sin.descriptor, &self.pool_shared) + .issue_many(init_commands); + }); + } + + unsafe fn end_render_pass(&mut self) { + self.inner.borrow_mut().sink().stop_encoding(); + } + + unsafe fn bind_graphics_pipeline(&mut self, pipeline: &native::GraphicsPipeline) { + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_render(); + + if let Some(ref stencil) = pipeline.depth_stencil_desc.stencil { + if let pso::State::Static(value) = stencil.read_masks { + self.state.stencil.read_masks = value; + } + if let pso::State::Static(value) = stencil.write_masks { + self.state.stencil.write_masks = value; + } + if let pso::State::Static(value) = stencil.reference_values { + self.state.stencil.reference_values = value; + pre.issue(soft::RenderCommand::SetStencilReferenceValues(value)); + } + } + + self.state.render_pso_is_compatible = pipeline.attachment_formats + == self.state.target.formats + && self.state.target.samples == pipeline.samples; + let set_pipeline = match self.state.render_pso { + Some(ref ps) if ps.raw.as_ptr() == pipeline.raw.as_ptr() => false, + Some(ref mut ps) => { + ps.raw = pipeline.raw.to_owned(); + ps.vertex_buffers.clear(); + ps.vertex_buffers + .extend(pipeline.vertex_buffers.iter().cloned().map(Some)); + ps.ds_desc = pipeline.depth_stencil_desc; + ps.formats = pipeline.attachment_formats.clone(); + true + } + None => { + self.state.render_pso = Some(RenderPipelineState { + raw: pipeline.raw.to_owned(), + ds_desc: pipeline.depth_stencil_desc, + vertex_buffers: pipeline.vertex_buffers.iter().cloned().map(Some).collect(), + formats: pipeline.attachment_formats.clone(), + }); + true + } + }; + + if self.state.render_pso_is_compatible { + if set_pipeline { + self.state.rasterizer_state = pipeline.rasterizer_state.clone(); + self.state.primitive_type = pipeline.primitive_type; + + pre.issue(soft::RenderCommand::BindPipeline(&*pipeline.raw)); + if let Some(ref rs) = pipeline.rasterizer_state { + pre.issue(soft::RenderCommand::SetRasterizerState(rs.clone())) + } + // re-bind vertex buffers + if let Some(command) = self + .state + .set_vertex_buffers(self.shared.private_caps.max_buffers_per_stage as usize) + { + pre.issue(command); + } + // re-bind push constants + if let Some(pc) = pipeline.vs_pc_info { + if Some(pc) != self.state.resources_vs.push_constants { + // if we don't have enough constants, then binding will follow + if pc.count as usize <= self.state.push_constants.len() { + pre.issue(self.state.push_vs_constants(pc)); + } + } + } + if let Some(pc) = pipeline.ps_pc_info { + if Some(pc) != self.state.resources_ps.push_constants + && pc.count as usize <= self.state.push_constants.len() + { + pre.issue(self.state.push_ps_constants(pc)); + } + } + } else { + debug_assert_eq!(self.state.rasterizer_state, pipeline.rasterizer_state); + debug_assert_eq!(self.state.primitive_type, pipeline.primitive_type); + } + + if let Some(desc) = self.state.build_depth_stencil() { + let ds_store = &self.shared.service_pipes.depth_stencil_states; + let state = &**ds_store.get(desc, &self.shared.device); + pre.issue(soft::RenderCommand::SetDepthStencilState(state)); + } + } else { + // This may be tricky: we expect either another pipeline to be bound + // (this overwriting these), or a new render pass started (thus using these). + self.state.rasterizer_state = pipeline.rasterizer_state.clone(); + self.state.primitive_type = pipeline.primitive_type; + } + + if let pso::State::Static(value) = pipeline.depth_bias { + self.state.depth_bias = value; + pre.issue(soft::RenderCommand::SetDepthBias(value)); + } + + if let Some(ref vp) = pipeline.baked_states.viewport { + pre.issue(self.state.set_viewport(vp, self.shared.disabilities)); + } + if let Some(rect) = pipeline.baked_states.scissor { + if let Some(com) = self.state.set_hal_scissor(rect) { + pre.issue(com); + } + } + if let Some(ref color) = pipeline.baked_states.blend_color { + pre.issue(self.state.set_blend_color(color)); + } + } + + unsafe fn bind_graphics_descriptor_sets<I, J>( + &mut self, + pipe_layout: &native::PipelineLayout, + first_set: usize, + sets: I, + dynamic_offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<native::DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<com::DescriptorSetOffset>, + { + let vbuf_count = self + .state + .render_pso + .as_ref() + .map_or(0, |pso| pso.vertex_buffers.len()) as ResourceIndex; + assert!( + pipe_layout.total.vs.buffers + vbuf_count + <= self.shared.private_caps.max_buffers_per_stage + ); + + self.state.resources_vs.pre_allocate(&pipe_layout.total.vs); + self.state.resources_ps.pre_allocate(&pipe_layout.total.ps); + + let mut dynamic_offset_iter = dynamic_offsets.into_iter(); + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_render(); + let mut bind_range = { + let first = &pipe_layout.infos[first_set].offsets; + native::MultiStageData { + vs: first.vs.map(|&i| i..i), + ps: first.ps.map(|&i| i..i), + cs: first.cs.map(|&i| i..i), + } + }; + for (set_offset, (info, desc_set)) in + pipe_layout.infos[first_set..].iter().zip(sets).enumerate() + { + match *desc_set.borrow() { + native::DescriptorSet::Emulated { + ref pool, + layouts: _, + ref resources, + } => { + let end_offsets = self.state.bind_set( + pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT, + &*pool.read(), + &info.offsets, + resources, + ); + bind_range.vs.expand(end_offsets.vs); + bind_range.ps.expand(end_offsets.ps); + + for (dyn_data, offset) in info + .dynamic_buffers + .iter() + .zip(dynamic_offset_iter.by_ref()) + { + if dyn_data.vs != !0 { + self.state.resources_vs.buffer_offsets[dyn_data.vs as usize] += + *offset.borrow() as buffer::Offset; + } + if dyn_data.ps != !0 { + self.state.resources_ps.buffer_offsets[dyn_data.ps as usize] += + *offset.borrow() as buffer::Offset; + } + } + } + native::DescriptorSet::ArgumentBuffer { + ref raw, + raw_offset, + ref pool, + ref range, + stage_flags, + .. + } => { + //Note: this is incompatible with the binding scheme below + if stage_flags.contains(pso::ShaderStageFlags::VERTEX) { + let index = info.offsets.vs.buffers; + self.state.resources_vs.buffers[index as usize] = + Some(AsNative::from(raw.as_ref())); + self.state.resources_vs.buffer_offsets[index as usize] = raw_offset; + pre.issue(soft::RenderCommand::BindBuffer { + stage: ShaderStage::Vertex, + index, + buffer: AsNative::from(raw.as_ref()), + offset: raw_offset, + }); + } + if stage_flags.contains(pso::ShaderStageFlags::FRAGMENT) { + let index = info.offsets.ps.buffers; + self.state.resources_ps.buffers[index as usize] = + Some(AsNative::from(raw.as_ref())); + self.state.resources_ps.buffer_offsets[index as usize] = raw_offset; + pre.issue(soft::RenderCommand::BindBuffer { + stage: ShaderStage::Fragment, + index, + buffer: AsNative::from(raw.as_ref()), + offset: raw_offset, + }); + } + if stage_flags + .intersects(pso::ShaderStageFlags::VERTEX | pso::ShaderStageFlags::FRAGMENT) + { + let graphics_resources = &mut self.state.descriptor_sets + [first_set + set_offset] + .graphics_resources; + graphics_resources.clear(); + graphics_resources.extend( + pool.read().resources[range.start as usize..range.end as usize] + .iter() + .filter_map(|ur| { + ptr::NonNull::new(ur.ptr).map(|res| (res, ur.usage)) + }), + ); + pre.issue_many(graphics_resources.iter().map(|&(resource, usage)| { + soft::RenderCommand::UseResource { resource, usage } + })); + } + } + } + } + + // now bind all the affected resources + for (stage, cache, range) in + iter::once((ShaderStage::Vertex, &self.state.resources_vs, bind_range.vs)).chain( + iter::once(( + ShaderStage::Fragment, + &self.state.resources_ps, + bind_range.ps, + )), + ) + { + if range.textures.start != range.textures.end { + pre.issue(soft::RenderCommand::BindTextures { + stage, + index: range.textures.start, + textures: &cache.textures + [range.textures.start as usize..range.textures.end as usize], + }); + } + if range.samplers.start != range.samplers.end { + pre.issue(soft::RenderCommand::BindSamplers { + stage, + index: range.samplers.start, + samplers: &cache.samplers + [range.samplers.start as usize..range.samplers.end as usize], + }); + } + if range.buffers.start != range.buffers.end { + pre.issue(soft::RenderCommand::BindBuffers { + stage, + index: range.buffers.start, + buffers: { + let range = range.buffers.start as usize..range.buffers.end as usize; + (&cache.buffers[range.clone()], &cache.buffer_offsets[range]) + }, + }); + } + } + } + + unsafe fn bind_compute_pipeline(&mut self, pipeline: &native::ComputePipeline) { + self.state.compute_pso = Some(pipeline.raw.clone()); + self.state.work_group_size = pipeline.work_group_size; + + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_compute(); + + pre.issue(soft::ComputeCommand::BindPipeline(&*pipeline.raw)); + + if let Some(pc) = pipeline.pc_info { + if Some(pc) != self.state.resources_cs.push_constants + && pc.count as usize <= self.state.push_constants.len() + { + pre.issue(self.state.push_cs_constants(pc)); + } + } + } + + unsafe fn bind_compute_descriptor_sets<I, J>( + &mut self, + pipe_layout: &native::PipelineLayout, + first_set: usize, + sets: I, + dynamic_offsets: J, + ) where + I: IntoIterator, + I::Item: Borrow<native::DescriptorSet>, + J: IntoIterator, + J::Item: Borrow<com::DescriptorSetOffset>, + { + self.state.resources_cs.pre_allocate(&pipe_layout.total.cs); + + let mut dynamic_offset_iter = dynamic_offsets.into_iter(); + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_compute(); + let mut bind_range = pipe_layout.infos[first_set].offsets.cs.map(|&i| i..i); + + for (set_offset, (info, desc_set)) in + pipe_layout.infos[first_set..].iter().zip(sets).enumerate() + { + let res_offset = &info.offsets.cs; + match *desc_set.borrow() { + native::DescriptorSet::Emulated { + ref pool, + layouts: _, + ref resources, + } => { + let end_offsets = self.state.bind_set( + pso::ShaderStageFlags::COMPUTE, + &*pool.read(), + &info.offsets, + resources, + ); + bind_range.expand(end_offsets.cs); + + for (dyn_data, offset) in info + .dynamic_buffers + .iter() + .zip(dynamic_offset_iter.by_ref()) + { + if dyn_data.cs != !0 { + self.state.resources_cs.buffer_offsets[dyn_data.cs as usize] += + *offset.borrow() as buffer::Offset; + } + } + } + native::DescriptorSet::ArgumentBuffer { + ref raw, + raw_offset, + ref pool, + ref range, + stage_flags, + .. + } => { + if stage_flags.contains(pso::ShaderStageFlags::COMPUTE) { + let index = res_offset.buffers; + self.state.resources_cs.buffers[index as usize] = + Some(AsNative::from(raw.as_ref())); + self.state.resources_cs.buffer_offsets[index as usize] = raw_offset; + pre.issue(soft::ComputeCommand::BindBuffer { + index, + buffer: AsNative::from(raw.as_ref()), + offset: raw_offset, + }); + + let compute_resources = &mut self.state.descriptor_sets + [first_set + set_offset] + .compute_resources; + compute_resources.clear(); + compute_resources.extend( + pool.read().resources[range.start as usize..range.end as usize] + .iter() + .filter_map(|ur| { + ptr::NonNull::new(ur.ptr).map(|res| (res, ur.usage)) + }), + ); + pre.issue_many(compute_resources.iter().map(|&(resource, usage)| { + soft::ComputeCommand::UseResource { resource, usage } + })); + } + } + } + } + + // now bind all the affected resources + let cache = &mut self.state.resources_cs; + if bind_range.textures.start != bind_range.textures.end { + pre.issue(soft::ComputeCommand::BindTextures { + index: bind_range.textures.start, + textures: &cache.textures + [bind_range.textures.start as usize..bind_range.textures.end as usize], + }); + } + if bind_range.samplers.start != bind_range.samplers.end { + pre.issue(soft::ComputeCommand::BindSamplers { + index: bind_range.samplers.start, + samplers: &cache.samplers + [bind_range.samplers.start as usize..bind_range.samplers.end as usize], + }); + } + if bind_range.buffers.start != bind_range.buffers.end { + pre.issue(soft::ComputeCommand::BindBuffers { + index: bind_range.buffers.start, + buffers: { + let range = bind_range.buffers.start as usize..bind_range.buffers.end as usize; + (&cache.buffers[range.clone()], &cache.buffer_offsets[range]) + }, + }); + } + } + + unsafe fn dispatch(&mut self, count: WorkGroupCount) { + let mut inner = self.inner.borrow_mut(); + let (mut pre, init) = inner.sink().switch_compute(); + if init { + pre.issue_many(self.state.make_compute_commands()); + } + + pre.issue(soft::ComputeCommand::Dispatch { + wg_size: self.state.work_group_size, + wg_count: MTLSize { + width: count[0] as _, + height: count[1] as _, + depth: count[2] as _, + }, + }); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &native::Buffer, offset: buffer::Offset) { + let mut inner = self.inner.borrow_mut(); + let (mut pre, init) = inner.sink().switch_compute(); + if init { + pre.issue_many(self.state.make_compute_commands()); + } + + let (raw, range) = buffer.as_bound(); + assert!(range.start + offset < range.end); + + pre.issue(soft::ComputeCommand::DispatchIndirect { + wg_size: self.state.work_group_size, + buffer: AsNative::from(raw), + offset: range.start + offset, + }); + } + + unsafe fn copy_buffer<T>(&mut self, src: &native::Buffer, dst: &native::Buffer, regions: T) + where + T: IntoIterator, + T::Item: Borrow<com::BufferCopy>, + { + let pso = &*self.shared.service_pipes.copy_buffer; + let wg_size = MTLSize { + width: pso.thread_execution_width(), + height: 1, + depth: 1, + }; + + let (src_raw, src_range) = src.as_bound(); + let (dst_raw, dst_range) = dst.as_bound(); + + let mut compute_datas = Vec::new(); + let mut inner = self.inner.borrow_mut(); + let mut blit_commands = Vec::new(); + let mut compute_commands = vec![ + //TODO: get rid of heap + soft::ComputeCommand::BindPipeline(pso), + ]; + + for region in regions { + let r = region.borrow(); + if r.size % WORD_SIZE as u64 == 0 + && r.src % WORD_SIZE as u64 == 0 + && r.dst % WORD_SIZE as u64 == 0 + { + blit_commands.alloc().init(soft::BlitCommand::CopyBuffer { + src: AsNative::from(src_raw), + dst: AsNative::from(dst_raw), + region: com::BufferCopy { + src: r.src + src_range.start, + dst: r.dst + dst_range.start, + size: r.size, + }, + }); + } else { + // not natively supported, going through a compute shader + assert_eq!(0, r.size >> 32); + let src_aligned = r.src & !(WORD_SIZE as u64 - 1); + let dst_aligned = r.dst & !(WORD_SIZE as u64 - 1); + let offsets = (r.src - src_aligned) | ((r.dst - dst_aligned) << 16); + let size_and_offsets = [r.size as u32, offsets as u32]; + compute_datas.push(Box::new(size_and_offsets)); + + let wg_count = MTLSize { + width: (r.size + wg_size.width - 1) / wg_size.width, + height: 1, + depth: 1, + }; + + compute_commands + .alloc() + .init(soft::ComputeCommand::BindBuffer { + index: 0, + buffer: AsNative::from(dst_raw), + offset: dst_aligned + dst_range.start, + }); + compute_commands + .alloc() + .init(soft::ComputeCommand::BindBuffer { + index: 1, + buffer: AsNative::from(src_raw), + offset: src_aligned + src_range.start, + }); + compute_commands + .alloc() + .init(soft::ComputeCommand::BindBufferData { + index: 2, + // Rust doesn't see that compute_datas will not lose this + // item and the boxed contents can't be moved otherwise. + words: mem::transmute(&compute_datas.last().unwrap()[..]), + }); + compute_commands + .alloc() + .init(soft::ComputeCommand::Dispatch { wg_size, wg_count }); + } + } + + let sink = inner.sink(); + if !blit_commands.is_empty() { + sink.blit_commands(blit_commands.into_iter()); + } + if compute_commands.len() > 1 { + // first is bind PSO + sink.quick_compute("copy_buffer", compute_commands.into_iter()); + } + } + + unsafe fn copy_image<T>( + &mut self, + src: &native::Image, + src_layout: i::Layout, + dst: &native::Image, + dst_layout: i::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::ImageCopy>, + T::IntoIter: ExactSizeIterator, + { + match (&src.like, &dst.like) { + (&native::ImageLike::Unbound { .. }, _) | (_, &native::ImageLike::Unbound { .. }) => { + panic!("Unexpected Image::Unbound"); + } + ( + &native::ImageLike::Texture(ref src_raw), + &native::ImageLike::Texture(ref dst_raw), + ) => { + let CommandBufferInner { + ref mut retained_textures, + ref mut sink, + .. + } = *self.inner.borrow_mut(); + + let new_dst = if src.mtl_format == dst.mtl_format { + dst_raw + } else { + assert_eq!(src.format_desc.bits, dst.format_desc.bits); + let tex = dst_raw.new_texture_view(src.mtl_format); + retained_textures.push(tex); + retained_textures.last().unwrap() + }; + + let commands = regions.into_iter().filter_map(|region| { + let r = region.borrow(); + if r.extent.is_empty() { + None + } else { + Some(soft::BlitCommand::CopyImage { + src: AsNative::from(src_raw.as_ref()), + dst: AsNative::from(new_dst.as_ref()), + region: r.clone(), + }) + } + }); + + sink.as_mut().unwrap().blit_commands(commands); + } + (&native::ImageLike::Buffer(ref src_buffer), &native::ImageLike::Texture(_)) => { + let src_extent = src.kind.extent(); + self.copy_buffer_to_image( + src_buffer, + dst, + dst_layout, + regions.into_iter().map(|region| { + let r = region.borrow(); + com::BufferImageCopy { + buffer_offset: src.byte_offset(r.src_offset), + buffer_width: src_extent.width, + buffer_height: src_extent.height, + image_layers: r.dst_subresource.clone(), + image_offset: r.dst_offset, + image_extent: r.extent, + } + }), + ) + } + (&native::ImageLike::Texture(_), &native::ImageLike::Buffer(ref dst_buffer)) => { + let dst_extent = dst.kind.extent(); + self.copy_image_to_buffer( + src, + src_layout, + dst_buffer, + regions.into_iter().map(|region| { + let r = region.borrow(); + com::BufferImageCopy { + buffer_offset: dst.byte_offset(r.dst_offset), + buffer_width: dst_extent.width, + buffer_height: dst_extent.height, + image_layers: r.src_subresource.clone(), + image_offset: r.src_offset, + image_extent: r.extent, + } + }), + ) + } + ( + &native::ImageLike::Buffer(ref src_buffer), + &native::ImageLike::Buffer(ref dst_buffer), + ) => self.copy_buffer( + src_buffer, + dst_buffer, + regions.into_iter().map(|region| { + let r = region.borrow(); + com::BufferCopy { + src: src.byte_offset(r.src_offset), + dst: dst.byte_offset(r.dst_offset), + size: src.byte_extent(r.extent), + } + }), + ), + } + } + + unsafe fn copy_buffer_to_image<T>( + &mut self, + src: &native::Buffer, + dst: &native::Image, + _dst_layout: i::Layout, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::BufferImageCopy>, + T::IntoIter: ExactSizeIterator, + { + match dst.like { + native::ImageLike::Unbound { .. } => { + panic!("Unexpected Image::Unbound"); + } + native::ImageLike::Texture(ref dst_raw) => { + let (src_raw, src_range) = src.as_bound(); + let commands = regions.into_iter().filter_map(|region| { + let r = region.borrow(); + if r.image_extent.is_empty() { + None + } else { + Some(soft::BlitCommand::CopyBufferToImage { + src: AsNative::from(src_raw), + dst: AsNative::from(dst_raw.as_ref()), + dst_desc: dst.format_desc, + region: com::BufferImageCopy { + buffer_offset: r.buffer_offset + src_range.start, + ..r.clone() + }, + }) + } + }); + self.inner.borrow_mut().sink().blit_commands(commands); + } + native::ImageLike::Buffer(ref dst_buffer) => self.copy_buffer( + src, + dst_buffer, + regions.into_iter().map(|region| { + let r = region.borrow(); + com::BufferCopy { + src: r.buffer_offset, + dst: dst.byte_offset(r.image_offset), + size: dst.byte_extent(r.image_extent), + } + }), + ), + } + } + + unsafe fn copy_image_to_buffer<T>( + &mut self, + src: &native::Image, + _src_layout: i::Layout, + dst: &native::Buffer, + regions: T, + ) where + T: IntoIterator, + T::Item: Borrow<com::BufferImageCopy>, + T::IntoIter: ExactSizeIterator, + { + match src.like { + native::ImageLike::Unbound { .. } => { + panic!("Unexpected Image::Unbound"); + } + native::ImageLike::Texture(ref src_raw) => { + let (dst_raw, dst_range) = dst.as_bound(); + let commands = regions.into_iter().filter_map(|region| { + let r = region.borrow(); + if r.image_extent.is_empty() { + None + } else { + Some(soft::BlitCommand::CopyImageToBuffer { + src: AsNative::from(src_raw.as_ref()), + src_desc: src.format_desc, + dst: AsNative::from(dst_raw), + region: com::BufferImageCopy { + buffer_offset: r.buffer_offset + dst_range.start, + ..r.clone() + }, + }) + } + }); + self.inner.borrow_mut().sink().blit_commands(commands); + } + native::ImageLike::Buffer(ref src_buffer) => self.copy_buffer( + src_buffer, + dst, + regions.into_iter().map(|region| { + let r = region.borrow(); + com::BufferCopy { + src: src.byte_offset(r.image_offset), + dst: r.buffer_offset, + size: src.byte_extent(r.image_extent), + } + }), + ), + } + } + + unsafe fn draw(&mut self, vertices: Range<VertexCount>, instances: Range<InstanceCount>) { + debug_assert!(self.state.render_pso_is_compatible); + if instances.start == instances.end { + return; + } + + let command = soft::RenderCommand::Draw { + primitive_type: self.state.primitive_type, + vertices, + instances, + }; + self.inner.borrow_mut().sink().pre_render().issue(command); + } + + unsafe fn draw_indexed( + &mut self, + indices: Range<IndexCount>, + base_vertex: VertexOffset, + instances: Range<InstanceCount>, + ) { + debug_assert!(self.state.render_pso_is_compatible); + if instances.start == instances.end { + return; + } + + let command = soft::RenderCommand::DrawIndexed { + primitive_type: self.state.primitive_type, + index: self + .state + .index_buffer + .clone() + .expect("must bind index buffer"), + indices, + base_vertex, + instances, + }; + self.inner.borrow_mut().sink().pre_render().issue(command); + } + + unsafe fn draw_indirect( + &mut self, + buffer: &native::Buffer, + offset: buffer::Offset, + count: DrawCount, + stride: u32, + ) { + assert_eq!(offset % WORD_ALIGNMENT, 0); + assert_eq!(stride % WORD_ALIGNMENT as u32, 0); + debug_assert!(self.state.render_pso_is_compatible); + let (raw, range) = buffer.as_bound(); + + let commands = (0..count).map(|i| soft::RenderCommand::DrawIndirect { + primitive_type: self.state.primitive_type, + buffer: AsNative::from(raw), + offset: range.start + offset + (i * stride) as buffer::Offset, + }); + + self.inner + .borrow_mut() + .sink() + .pre_render() + .issue_many(commands); + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &native::Buffer, + offset: buffer::Offset, + count: DrawCount, + stride: u32, + ) { + assert_eq!(offset % WORD_ALIGNMENT, 0); + assert_eq!(stride % WORD_ALIGNMENT as u32, 0); + debug_assert!(self.state.render_pso_is_compatible); + let (raw, range) = buffer.as_bound(); + + let commands = (0..count).map(|i| soft::RenderCommand::DrawIndexedIndirect { + primitive_type: self.state.primitive_type, + index: self + .state + .index_buffer + .clone() + .expect("must bind index buffer"), + buffer: AsNative::from(raw), + offset: range.start + offset + (i * stride) as buffer::Offset, + }); + + self.inner + .borrow_mut() + .sink() + .pre_render() + .issue_many(commands); + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &native::Buffer, + _offset: buffer::Offset, + _count_buffer: &native::Buffer, + _count_buffer_offset: buffer::Offset, + _max_draw_count: u32, + _stride: u32, + ) { + unimplemented!() + } + + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &native::Buffer, + _offset: buffer::Offset, + _count_buffer: &native::Buffer, + _count_buffer_offset: buffer::Offset, + _max_draw_count: u32, + _stride: u32, + ) { + unimplemented!() + } + + unsafe fn draw_mesh_tasks(&mut self, _: TaskCount, _: TaskCount) { + unimplemented!() + } + + unsafe fn draw_mesh_tasks_indirect( + &mut self, + _: &native::Buffer, + _: buffer::Offset, + _: DrawCount, + _: u32, + ) { + unimplemented!() + } + + unsafe fn draw_mesh_tasks_indirect_count( + &mut self, + _: &native::Buffer, + _: buffer::Offset, + _: &native::Buffer, + _: buffer::Offset, + _: u32, + _: u32, + ) { + unimplemented!() + } + + unsafe fn set_event(&mut self, event: &native::Event, _: pso::PipelineStage) { + self.inner + .borrow_mut() + .events + .push((Arc::clone(&event.0), true)); + } + + unsafe fn reset_event(&mut self, event: &native::Event, _: pso::PipelineStage) { + self.inner + .borrow_mut() + .events + .push((Arc::clone(&event.0), false)); + } + + unsafe fn wait_events<'a, I, J>( + &mut self, + events: I, + stages: Range<pso::PipelineStage>, + barriers: J, + ) where + I: IntoIterator, + I::Item: Borrow<native::Event>, + J: IntoIterator, + J::Item: Borrow<memory::Barrier<'a, Backend>>, + { + let mut need_barrier = false; + + for event in events { + let mut inner = self.inner.borrow_mut(); + let event = &event.borrow().0; + let is_local = inner + .events + .iter() + .rfind(|ev| Arc::ptr_eq(&ev.0, event)) + .map_or(false, |ev| ev.1); + if is_local { + need_barrier = true; + } else { + inner.host_events.push(Arc::clone(event)); + } + } + + if need_barrier { + self.pipeline_barrier(stages, memory::Dependencies::empty(), barriers); + } + } + + unsafe fn begin_query(&mut self, query: query::Query<Backend>, flags: query::ControlFlags) { + match query.pool { + native::QueryPool::Occlusion(ref pool_range) => { + debug_assert!(pool_range.start + query.id < pool_range.end); + let offset = (query.id + pool_range.start) as buffer::Offset + * mem::size_of::<u64>() as buffer::Offset; + let mode = if flags.contains(query::ControlFlags::PRECISE) { + metal::MTLVisibilityResultMode::Counting + } else { + metal::MTLVisibilityResultMode::Boolean + }; + + let com = self.state.set_visibility_query(mode, offset); + self.inner.borrow_mut().sink().pre_render().issue(com); + } + native::QueryPool::Timestamp => {} + } + } + + unsafe fn end_query(&mut self, query: query::Query<Backend>) { + match query.pool { + native::QueryPool::Occlusion(ref pool_range) => { + let mut inner = self.inner.borrow_mut(); + debug_assert!(pool_range.start + query.id < pool_range.end); + inner + .active_visibility_queries + .push(pool_range.start + query.id); + + let com = self + .state + .set_visibility_query(metal::MTLVisibilityResultMode::Disabled, 0); + inner.sink().pre_render().issue(com); + } + native::QueryPool::Timestamp => {} + } + } + + unsafe fn reset_query_pool(&mut self, pool: &native::QueryPool, queries: Range<query::Id>) { + let visibility = &self.shared.visibility; + match *pool { + native::QueryPool::Occlusion(ref pool_range) => { + let mut inner = self.inner.borrow_mut(); + debug_assert!(pool_range.start + queries.end <= pool_range.end); + inner.active_visibility_queries.retain(|&id| { + id < pool_range.start + queries.start || id >= pool_range.start + queries.end + }); + + let size_data = mem::size_of::<u64>() as buffer::Offset; + let offset_data = pool_range.start as buffer::Offset * size_data; + let command_data = soft::BlitCommand::FillBuffer { + dst: AsNative::from(visibility.buffer.as_ref()), + range: offset_data + queries.start as buffer::Offset * size_data + ..offset_data + queries.end as buffer::Offset * size_data, + value: 0, + }; + + let size_meta = mem::size_of::<u32>() as buffer::Offset; + let offset_meta = + visibility.availability_offset + pool_range.start as buffer::Offset * size_meta; + let command_meta = soft::BlitCommand::FillBuffer { + dst: AsNative::from(visibility.buffer.as_ref()), + range: offset_meta + queries.start as buffer::Offset * size_meta + ..offset_meta + queries.end as buffer::Offset * size_meta, + value: 0, + }; + + let commands = iter::once(command_data).chain(iter::once(command_meta)); + inner.sink().blit_commands(commands); + } + native::QueryPool::Timestamp => {} + } + } + + unsafe fn copy_query_pool_results( + &mut self, + pool: &native::QueryPool, + queries: Range<query::Id>, + buffer: &native::Buffer, + offset: buffer::Offset, + stride: buffer::Offset, + flags: query::ResultFlags, + ) { + let (raw, range) = buffer.as_bound(); + match *pool { + native::QueryPool::Occlusion(ref pool_range) => { + let visibility = &self.shared.visibility; + let size_data = mem::size_of::<u64>() as buffer::Offset; + let size_meta = mem::size_of::<u32>() as buffer::Offset; + + if stride == size_data + && flags.contains(query::ResultFlags::BITS_64) + && !flags.contains(query::ResultFlags::WITH_AVAILABILITY) + { + // if stride is matching, copy everything in one go + let com = soft::BlitCommand::CopyBuffer { + src: AsNative::from(visibility.buffer.as_ref()), + dst: AsNative::from(raw), + region: com::BufferCopy { + src: (pool_range.start + queries.start) as buffer::Offset * size_data, + dst: range.start + offset, + size: (queries.end - queries.start) as buffer::Offset * size_data, + }, + }; + self.inner + .borrow_mut() + .sink() + .blit_commands(iter::once(com)); + } else { + // copy parts of individual entries + let size_payload = if flags.contains(query::ResultFlags::BITS_64) { + mem::size_of::<u64>() as buffer::Offset + } else { + mem::size_of::<u32>() as buffer::Offset + }; + let commands = (0..queries.end - queries.start).flat_map(|i| { + let absolute_index = + (pool_range.start + queries.start + i) as buffer::Offset; + let dst_offset = range.start + offset + i as buffer::Offset * stride; + let com_data = soft::BlitCommand::CopyBuffer { + src: AsNative::from(visibility.buffer.as_ref()), + dst: AsNative::from(raw), + region: com::BufferCopy { + src: absolute_index * size_data, + dst: dst_offset, + size: size_payload, + }, + }; + + let (com_avail, com_pad) = if flags.contains( + query::ResultFlags::WITH_AVAILABILITY | query::ResultFlags::WAIT, + ) { + // Technically waiting is a no-op on a single queue. However, + // the client expects the availability to be set regardless. + let com = soft::BlitCommand::FillBuffer { + dst: AsNative::from(raw), + range: dst_offset + size_payload..dst_offset + 2 * size_payload, + value: !0, + }; + (Some(com), None) + } else if flags.contains(query::ResultFlags::WITH_AVAILABILITY) { + let com_avail = soft::BlitCommand::CopyBuffer { + src: AsNative::from(visibility.buffer.as_ref()), + dst: AsNative::from(raw), + region: com::BufferCopy { + src: visibility.availability_offset + + absolute_index * size_meta, + dst: dst_offset + size_payload, + size: size_meta, + }, + }; + // An extra padding is required if the client expects 64 bits availability without a wait + let com_pad = if flags.contains(query::ResultFlags::BITS_64) { + Some(soft::BlitCommand::FillBuffer { + dst: AsNative::from(raw), + range: dst_offset + size_payload + size_meta + ..dst_offset + 2 * size_payload, + value: 0, + }) + } else { + None + }; + (Some(com_avail), com_pad) + } else { + (None, None) + }; + + iter::once(com_data).chain(com_avail).chain(com_pad) + }); + self.inner.borrow_mut().sink().blit_commands(commands); + } + } + native::QueryPool::Timestamp => { + let start = range.start + offset + queries.start as buffer::Offset * stride; + let end = range.start + offset + (queries.end - 1) as buffer::Offset * stride + 4; + let command = soft::BlitCommand::FillBuffer { + dst: AsNative::from(raw), + range: start..end, + value: 0, + }; + self.inner + .borrow_mut() + .sink() + .blit_commands(iter::once(command)); + } + } + } + + unsafe fn write_timestamp(&mut self, _: pso::PipelineStage, _: query::Query<Backend>) { + // nothing to do, timestamps are unsupported on Metal + } + + unsafe fn push_graphics_constants( + &mut self, + layout: &native::PipelineLayout, + stages: pso::ShaderStageFlags, + offset: u32, + constants: &[u32], + ) { + self.state + .update_push_constants(offset, constants, layout.total_push_constants); + if stages.intersects(pso::ShaderStageFlags::GRAPHICS) { + let mut inner = self.inner.borrow_mut(); + let mut pre = inner.sink().pre_render(); + // Note: the whole range is re-uploaded, which may be inefficient + if stages.contains(pso::ShaderStageFlags::VERTEX) { + let pc = layout.push_constants.vs.unwrap(); + pre.issue(self.state.push_vs_constants(pc)); + } + if stages.contains(pso::ShaderStageFlags::FRAGMENT) { + let pc = layout.push_constants.ps.unwrap(); + pre.issue(self.state.push_ps_constants(pc)); + } + } + } + + unsafe fn push_compute_constants( + &mut self, + layout: &native::PipelineLayout, + offset: u32, + constants: &[u32], + ) { + self.state + .update_push_constants(offset, constants, layout.total_push_constants); + let pc = layout.push_constants.cs.unwrap(); + + // Note: the whole range is re-uploaded, which may be inefficient + self.inner + .borrow_mut() + .sink() + .pre_compute() + .issue(self.state.push_cs_constants(pc)); + } + + unsafe fn execute_commands<'a, T, I>(&mut self, cmd_buffers: I) + where + T: 'a + Borrow<CommandBuffer>, + I: IntoIterator<Item = &'a T>, + { + for cmd_buffer in cmd_buffers { + let outer_borrowed = cmd_buffer.borrow(); + let inner_borrowed = outer_borrowed.inner.borrow_mut(); + + let (exec_journal, is_inheriting) = match inner_borrowed.sink { + Some(CommandSink::Deferred { + ref journal, + is_inheriting, + .. + }) => (journal, is_inheriting), + _ => panic!("Unexpected secondary sink!"), + }; + + for (a, b) in self + .state + .descriptor_sets + .iter_mut() + .zip(&outer_borrowed.state.descriptor_sets) + { + if !b.graphics_resources.is_empty() { + a.graphics_resources.clear(); + a.graphics_resources + .extend_from_slice(&b.graphics_resources); + } + if !b.compute_resources.is_empty() { + a.compute_resources.clear(); + a.compute_resources.extend_from_slice(&b.compute_resources); + } + } + + let mut inner_self = self.inner.borrow_mut(); + inner_self.events.extend_from_slice(&inner_borrowed.events); + + match *inner_self.sink() { + CommandSink::Immediate { + ref mut cmd_buffer, + ref mut encoder_state, + ref mut num_passes, + .. + } => { + if is_inheriting { + let encoder = match encoder_state { + EncoderState::Render(ref encoder) => encoder, + _ => panic!("Expected Render encoder!"), + }; + for command in &exec_journal.render_commands { + exec_render(encoder, command, &exec_journal.resources); + } + } else { + encoder_state.end(); + *num_passes += exec_journal.passes.len(); + exec_journal.record(cmd_buffer); + } + } + CommandSink::Deferred { + ref mut journal, .. + } => { + journal.extend(exec_journal, is_inheriting); + } + #[cfg(feature = "dispatch")] + CommandSink::Remote { .. } => unimplemented!(), + } + } + } + + unsafe fn insert_debug_marker(&mut self, _name: &str, _color: u32) { + //TODO + } + unsafe fn begin_debug_marker(&mut self, _name: &str, _color: u32) { + //TODO + } + unsafe fn end_debug_marker(&mut self) { + //TODO + } +} diff --git a/third_party/rust/gfx-backend-metal/src/conversions.rs b/third_party/rust/gfx-backend-metal/src/conversions.rs new file mode 100644 index 0000000000..02be631931 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/conversions.rs @@ -0,0 +1,1242 @@ +use hal; + +use crate::PrivateCapabilities; + +use hal::{ + format::{Format, Properties, Swizzle}, + image, pass, pso, + pso::{Comparison, StencilOp}, + IndexType, +}; +use metal::*; + +impl PrivateCapabilities { + pub fn map_format(&self, format: Format) -> Option<MTLPixelFormat> { + use self::hal::format::Format as f; + use metal::MTLPixelFormat::*; + Some(match format { + f::R5g6b5Unorm if self.format_b5 => B5G6R5Unorm, + f::R5g5b5a1Unorm if self.format_b5 => A1BGR5Unorm, + f::A1r5g5b5Unorm if self.format_b5 => BGR5A1Unorm, + f::Rgba4Unorm if self.format_b5 => ABGR4Unorm, + f::R8Srgb if self.format_min_srgb_channels <= 1 => R8Unorm_sRGB, + f::Rg8Srgb if self.format_min_srgb_channels <= 2 => RG8Unorm_sRGB, + f::Rgba8Srgb if self.format_min_srgb_channels <= 4 => RGBA8Unorm_sRGB, + f::Bgra8Srgb if self.format_min_srgb_channels <= 4 => BGRA8Unorm_sRGB, + f::D16Unorm if self.format_depth16unorm => Depth16Unorm, + f::D24UnormS8Uint if self.format_depth24_stencil8 => Depth24Unorm_Stencil8, + f::D32Sfloat => Depth32Float, + f::D32SfloatS8Uint => Depth32Float_Stencil8, + f::R8Unorm => R8Unorm, + f::R8Snorm => R8Snorm, + f::R8Uint => R8Uint, + f::R8Sint => R8Sint, + f::Rg8Unorm => RG8Unorm, + f::Rg8Snorm => RG8Snorm, + f::Rg8Uint => RG8Uint, + f::Rg8Sint => RG8Sint, + f::Rgba8Unorm => RGBA8Unorm, + f::Rgba8Snorm => RGBA8Snorm, + f::Rgba8Uint => RGBA8Uint, + f::Rgba8Sint => RGBA8Sint, + f::Bgra8Unorm => BGRA8Unorm, + f::R16Unorm => R16Unorm, + f::R16Snorm => R16Snorm, + f::R16Uint => R16Uint, + f::R16Sint => R16Sint, + f::R16Sfloat => R16Float, + f::Rg16Unorm => RG16Unorm, + f::Rg16Snorm => RG16Snorm, + f::Rg16Uint => RG16Uint, + f::Rg16Sint => RG16Sint, + f::Rg16Sfloat => RG16Float, + f::Rgba16Unorm => RGBA16Unorm, + f::Rgba16Snorm => RGBA16Snorm, + f::Rgba16Uint => RGBA16Uint, + f::Rgba16Sint => RGBA16Sint, + f::Rgba16Sfloat => RGBA16Float, + f::A2r10g10b10Unorm => BGR10A2Unorm, + f::A2b10g10r10Unorm => RGB10A2Unorm, + f::B10g11r11Ufloat => RG11B10Float, + f::E5b9g9r9Ufloat => RGB9E5Float, + f::R32Uint => R32Uint, + f::R32Sint => R32Sint, + f::R32Sfloat => R32Float, + f::Rg32Uint => RG32Uint, + f::Rg32Sint => RG32Sint, + f::Rg32Sfloat => RG32Float, + f::Rgba32Uint => RGBA32Uint, + f::Rgba32Sint => RGBA32Sint, + f::Rgba32Sfloat => RGBA32Float, + f::Bc1RgbaUnorm if self.format_bc => BC1_RGBA, + f::Bc1RgbaSrgb if self.format_bc => BC1_RGBA_sRGB, + f::Bc1RgbUnorm if self.format_bc => BC1_RGBA, //TODO? + f::Bc1RgbSrgb if self.format_bc => BC1_RGBA_sRGB, //TODO? + f::Bc2Unorm if self.format_bc => BC2_RGBA, + f::Bc2Srgb if self.format_bc => BC2_RGBA_sRGB, + f::Bc3Unorm if self.format_bc => BC3_RGBA, + f::Bc3Srgb if self.format_bc => BC3_RGBA_sRGB, + f::Bc4Unorm if self.format_bc => BC4_RUnorm, + f::Bc4Snorm if self.format_bc => BC4_RSnorm, + f::Bc5Unorm if self.format_bc => BC5_RGUnorm, + f::Bc5Snorm if self.format_bc => BC5_RGSnorm, + f::Bc6hUfloat if self.format_bc => BC6H_RGBUfloat, + f::Bc6hSfloat if self.format_bc => BC6H_RGBFloat, + f::Bc7Unorm if self.format_bc => BC7_RGBAUnorm, + f::Bc7Srgb if self.format_bc => BC7_RGBAUnorm_sRGB, + f::EacR11Unorm if self.format_eac_etc => EAC_R11Unorm, + f::EacR11Snorm if self.format_eac_etc => EAC_R11Snorm, + f::EacR11g11Unorm if self.format_eac_etc => EAC_RG11Unorm, + f::EacR11g11Snorm if self.format_eac_etc => EAC_RG11Snorm, + f::Etc2R8g8b8Unorm if self.format_eac_etc => ETC2_RGB8, + f::Etc2R8g8b8Srgb if self.format_eac_etc => ETC2_RGB8_sRGB, + f::Etc2R8g8b8a1Unorm if self.format_eac_etc => ETC2_RGB8A1, + f::Etc2R8g8b8a1Srgb if self.format_eac_etc => ETC2_RGB8A1_sRGB, + f::Astc4x4Unorm if self.format_astc => ASTC_4x4_LDR, + f::Astc4x4Srgb if self.format_astc => ASTC_4x4_sRGB, + f::Astc5x4Unorm if self.format_astc => ASTC_5x4_LDR, + f::Astc5x4Srgb if self.format_astc => ASTC_5x4_sRGB, + f::Astc5x5Unorm if self.format_astc => ASTC_5x5_LDR, + f::Astc5x5Srgb if self.format_astc => ASTC_5x5_sRGB, + f::Astc6x5Unorm if self.format_astc => ASTC_6x5_LDR, + f::Astc6x5Srgb if self.format_astc => ASTC_6x5_sRGB, + f::Astc6x6Unorm if self.format_astc => ASTC_6x6_LDR, + f::Astc6x6Srgb if self.format_astc => ASTC_6x6_sRGB, + f::Astc8x5Unorm if self.format_astc => ASTC_8x5_LDR, + f::Astc8x5Srgb if self.format_astc => ASTC_8x5_sRGB, + f::Astc8x6Unorm if self.format_astc => ASTC_8x6_LDR, + f::Astc8x6Srgb if self.format_astc => ASTC_8x6_sRGB, + f::Astc8x8Unorm if self.format_astc => ASTC_8x8_LDR, + f::Astc8x8Srgb if self.format_astc => ASTC_8x8_sRGB, + f::Astc10x5Unorm if self.format_astc => ASTC_10x5_LDR, + f::Astc10x5Srgb if self.format_astc => ASTC_10x5_sRGB, + f::Astc10x6Unorm if self.format_astc => ASTC_10x6_LDR, + f::Astc10x6Srgb if self.format_astc => ASTC_10x6_sRGB, + f::Astc10x8Unorm if self.format_astc => ASTC_10x8_LDR, + f::Astc10x8Srgb if self.format_astc => ASTC_10x8_sRGB, + f::Astc10x10Unorm if self.format_astc => ASTC_10x10_LDR, + f::Astc10x10Srgb if self.format_astc => ASTC_10x10_sRGB, + f::Astc12x10Unorm if self.format_astc => ASTC_12x10_LDR, + f::Astc12x10Srgb if self.format_astc => ASTC_12x10_sRGB, + f::Astc12x12Unorm if self.format_astc => ASTC_12x12_LDR, + f::Astc12x12Srgb if self.format_astc => ASTC_12x12_sRGB, + // Not supported: + // a8Unorm + // agbr4Unorm + // pvrtc_rgb_2bpp + // pvrtc_rgb_2bpp_srgb + // pvrtc_rgb_4bpp + // pvrtc_rgb_4bpp_srgb + // pvrtc_rgba_2bpp + // pvrtc_rgba_2bpp_srgb + // pvrtc_rgba_4bpp + // pvrtc_rgba_4bpp_srgb + // eac_rgba8 + // eac_rgba8_srgb + // gbgr422 + // bgrg422 + // stencil8 (float-version) + // x32_stencil8 (float-version) + // x24_stencil8 (float-version) + // bgra10_xr + // bgra10_xr_srgb + // bgr10_xr + // bgr10_xr_srgb + _ => return None, + }) + } + + pub fn map_format_with_swizzle( + &self, + format: Format, + swizzle: Swizzle, + ) -> Option<MTLPixelFormat> { + use self::hal::format::{Component::*, Format::*}; + use metal::MTLPixelFormat as Pf; + match (format, swizzle) { + (R8Unorm, Swizzle(Zero, Zero, Zero, R)) => Some(Pf::A8Unorm), + (Rgba8Unorm, Swizzle(B, G, R, A)) => Some(Pf::BGRA8Unorm), + (Bgra8Unorm, Swizzle(B, G, R, A)) => Some(Pf::RGBA8Unorm), + (Bgra8Srgb, Swizzle(B, G, R, A)) => Some(Pf::RGBA8Unorm_sRGB), + (B5g6r5Unorm, Swizzle(B, G, R, A)) if self.format_b5 => Some(Pf::B5G6R5Unorm), + _ => { + let bits = format.base_format().0.describe_bits(); + if swizzle != Swizzle::NO && !(bits.alpha == 0 && swizzle == Swizzle(R, G, B, One)) + { + error!("Unsupported swizzle {:?} for format {:?}", swizzle, format); + } + self.map_format(format) + } + } + } + + pub fn map_format_properties(&self, format: Format) -> Properties { + use self::hal::format::{BufferFeature as Bf, ImageFeature as If}; + use metal::MTLPixelFormat::*; + + let buffer_features = Bf::all(); + let color_if = If::SAMPLED | If::BLIT_SRC | If::BLIT_DST; + let compressed_if = color_if | If::SAMPLED_LINEAR; + let depth_if = color_if | If::DEPTH_STENCIL_ATTACHMENT; + + match self.map_format(format) { + Some(A8Unorm) => Properties { + optimal_tiling: compressed_if, + buffer_features, + ..Properties::default() + }, + Some(R8Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R8Unorm_sRGB) if self.format_any8_unorm_srgb_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R8Unorm_sRGB) if self.format_any8_unorm_srgb_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R8Snorm) if self.format_any8_snorm_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R8Uint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R8Sint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R16Unorm) if self.format_r16_norm_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R16Snorm) if self.format_r16_norm_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R16Uint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R16Sint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R16Float) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG8Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG8Unorm_sRGB) if self.format_any8_unorm_srgb_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG8Unorm_sRGB) if self.format_any8_unorm_srgb_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG8Snorm) if self.format_any8_snorm_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG8Uint) => Properties { + optimal_tiling: color_if | If::SAMPLED_LINEAR | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RG8Sint) => Properties { + optimal_tiling: color_if | If::SAMPLED_LINEAR | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(B5G6R5Unorm) if self.format_b5 => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(A1BGR5Unorm) if self.format_b5 => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(ABGR4Unorm) if self.format_b5 => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(BGR5A1Unorm) if self.format_b5 => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R32Uint) if self.format_r32_all => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R32Uint) if self.format_r32_no_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R32Sint) if self.format_r32_all => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R32Sint) if self.format_r32_no_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(R32Float) if self.format_r32float_no_write_no_filter => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R32Float) if self.format_r32float_no_filter => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(R32Float) if self.format_r32float_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG16Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG16Snorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG16Float) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Unorm_sRGB) if self.format_rgba8_srgb_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Unorm_sRGB) if self.format_rgba8_srgb_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Snorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Uint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA8Sint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(BGRA8Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(BGRA8Unorm_sRGB) if self.format_rgba8_srgb_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(BGRA8Unorm_sRGB) if self.format_rgba8_srgb_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGB10A2Unorm) if self.format_rgb10a2_unorm_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGB10A2Unorm) if self.format_rgb10a2_unorm_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGB10A2Uint) if self.format_rgb10a2_uint_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGB10A2Uint) if self.format_rgb10a2_uint_color_write => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RG11B10Float) if self.format_rg11b10_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG11B10Float) if self.format_rg11b10_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGB9E5Float) if self.format_rgb9e5_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGB9E5Float) if self.format_rgb9e5_filter_only => Properties { + optimal_tiling: compressed_if, + buffer_features, + ..Properties::default() + }, + Some(RGB9E5Float) if self.format_rgb9e5_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG32Uint) if self.format_rg32_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RG32Sint) if self.format_rg32_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RG32Uint) if self.format_rg32_color_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::STORAGE, + buffer_features, + ..Properties::default() + }, + Some(RG32Sint) if self.format_rg32_color_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::STORAGE, + buffer_features, + ..Properties::default() + }, + Some(RG32Float) if self.format_rg32float_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG32Float) if self.format_rg32float_color_blend => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RG32Float) if self.format_rg32float_no_filter => Properties { + optimal_tiling: color_if + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA16Unorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA16Snorm) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA16Uint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA16Sint) => Properties { + optimal_tiling: color_if | If::STORAGE | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA16Float) => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Uint) if self.format_rgba32int_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Uint) if self.format_rgba32int_color_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::STORAGE, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Sint) if self.format_rgba32int_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Sint) if self.format_rgba32int_color_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::STORAGE, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Float) if self.format_rgba32float_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Float) if self.format_rgba32float_color => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT, + buffer_features, + ..Properties::default() + }, + Some(RGBA32Float) if self.format_rgba32float_color_write => Properties { + optimal_tiling: color_if | If::COLOR_ATTACHMENT | If::STORAGE, + buffer_features, + ..Properties::default() + }, + Some(EAC_R11Unorm) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(EAC_R11Snorm) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(EAC_RG11Unorm) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(EAC_RG11Snorm) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ETC2_RGB8) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ETC2_RGB8_sRGB) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ETC2_RGB8A1) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ETC2_RGB8A1_sRGB) if self.format_eac_etc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_4x4_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_4x4_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_5x4_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_5x4_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_5x5_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_5x5_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_6x5_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_6x5_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_6x6_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_6x6_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x5_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x5_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x6_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x6_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x8_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_8x8_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x5_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x5_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x6_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x6_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x8_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x8_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x10_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_10x10_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_12x10_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_12x10_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_12x12_LDR) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(ASTC_12x12_sRGB) if self.format_astc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC1_RGBA) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC1_RGBA_sRGB) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC2_RGBA) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC2_RGBA_sRGB) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC3_RGBA) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC3_RGBA_sRGB) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC4_RUnorm) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC4_RSnorm) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC5_RGUnorm) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC5_RGSnorm) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC6H_RGBUfloat) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC6H_RGBFloat) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC7_RGBAUnorm) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(BC7_RGBAUnorm_sRGB) if self.format_bc => Properties { + optimal_tiling: compressed_if, + ..Properties::default() + }, + Some(Depth16Unorm) if self.format_depth16unorm => Properties { + optimal_tiling: depth_if | If::SAMPLED_LINEAR, + ..Properties::default() + }, + Some(Depth32Float) if self.format_depth32float_filter => Properties { + optimal_tiling: depth_if | If::SAMPLED_LINEAR, + ..Properties::default() + }, + Some(Depth32Float) if self.format_depth32float_none => Properties { + optimal_tiling: depth_if, + ..Properties::default() + }, + Some(Stencil8) => Properties { + ..Properties::default() + }, + Some(Depth24Unorm_Stencil8) if self.format_depth24_stencil8 => Properties { + optimal_tiling: depth_if, + ..Properties::default() + }, + Some(Depth32Float_Stencil8) if self.format_depth32_stencil8_filter => Properties { + optimal_tiling: depth_if | If::SAMPLED_LINEAR, + ..Properties::default() + }, + Some(Depth32Float_Stencil8) if self.format_depth32_stencil8_none => Properties { + optimal_tiling: depth_if, + ..Properties::default() + }, + Some(BGR10A2Unorm) if self.format_bgr10a2_all => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::STORAGE + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + ..Properties::default() + }, + Some(BGR10A2Unorm) if self.format_bgr10a2_no_write => Properties { + optimal_tiling: color_if + | If::SAMPLED_LINEAR + | If::COLOR_ATTACHMENT + | If::COLOR_ATTACHMENT_BLEND, + ..Properties::default() + }, + _ if map_vertex_format(format).is_some() => Properties { + buffer_features: Bf::VERTEX, + ..Properties::default() + }, + _ => Properties::default(), + } + } +} + +pub fn map_load_operation(operation: pass::AttachmentLoadOp) -> MTLLoadAction { + use self::pass::AttachmentLoadOp::*; + + match operation { + Load => MTLLoadAction::Load, + Clear => MTLLoadAction::Clear, + DontCare => MTLLoadAction::DontCare, + } +} + +pub fn map_store_operation(operation: pass::AttachmentStoreOp) -> MTLStoreAction { + use self::pass::AttachmentStoreOp::*; + + match operation { + Store => MTLStoreAction::Store, + DontCare => MTLStoreAction::DontCare, + } +} + +pub fn map_resolved_store_operation(operation: pass::AttachmentStoreOp) -> MTLStoreAction { + use self::pass::AttachmentStoreOp::*; + + match operation { + Store => MTLStoreAction::StoreAndMultisampleResolve, + DontCare => MTLStoreAction::MultisampleResolve, + } +} + +pub fn map_write_mask(mask: pso::ColorMask) -> MTLColorWriteMask { + let mut mtl_mask = MTLColorWriteMask::empty(); + + if mask.contains(pso::ColorMask::RED) { + mtl_mask |= MTLColorWriteMask::Red; + } + if mask.contains(pso::ColorMask::GREEN) { + mtl_mask |= MTLColorWriteMask::Green; + } + if mask.contains(pso::ColorMask::BLUE) { + mtl_mask |= MTLColorWriteMask::Blue; + } + if mask.contains(pso::ColorMask::ALPHA) { + mtl_mask |= MTLColorWriteMask::Alpha; + } + + mtl_mask +} + +fn map_factor(factor: pso::Factor) -> MTLBlendFactor { + use self::hal::pso::Factor::*; + + match factor { + Zero => MTLBlendFactor::Zero, + One => MTLBlendFactor::One, + SrcColor => MTLBlendFactor::SourceColor, + OneMinusSrcColor => MTLBlendFactor::OneMinusSourceColor, + DstColor => MTLBlendFactor::DestinationColor, + OneMinusDstColor => MTLBlendFactor::OneMinusDestinationColor, + SrcAlpha => MTLBlendFactor::SourceAlpha, + OneMinusSrcAlpha => MTLBlendFactor::OneMinusSourceAlpha, + DstAlpha => MTLBlendFactor::DestinationAlpha, + OneMinusDstAlpha => MTLBlendFactor::OneMinusDestinationAlpha, + ConstColor => MTLBlendFactor::BlendColor, + OneMinusConstColor => MTLBlendFactor::OneMinusBlendColor, + ConstAlpha => MTLBlendFactor::BlendAlpha, + OneMinusConstAlpha => MTLBlendFactor::OneMinusBlendAlpha, + SrcAlphaSaturate => MTLBlendFactor::SourceAlphaSaturated, + Src1Color => MTLBlendFactor::Source1Color, + OneMinusSrc1Color => MTLBlendFactor::OneMinusSource1Color, + Src1Alpha => MTLBlendFactor::Source1Alpha, + OneMinusSrc1Alpha => MTLBlendFactor::OneMinusSource1Alpha, + } +} + +pub fn map_blend_op( + operation: pso::BlendOp, +) -> (MTLBlendOperation, MTLBlendFactor, MTLBlendFactor) { + use self::hal::pso::BlendOp::*; + + match operation { + Add { src, dst } => (MTLBlendOperation::Add, map_factor(src), map_factor(dst)), + Sub { src, dst } => ( + MTLBlendOperation::Subtract, + map_factor(src), + map_factor(dst), + ), + RevSub { src, dst } => ( + MTLBlendOperation::ReverseSubtract, + map_factor(src), + map_factor(dst), + ), + Min => ( + MTLBlendOperation::Min, + MTLBlendFactor::Zero, + MTLBlendFactor::Zero, + ), + Max => ( + MTLBlendOperation::Max, + MTLBlendFactor::Zero, + MTLBlendFactor::Zero, + ), + } +} + +pub fn map_vertex_format(format: Format) -> Option<MTLVertexFormat> { + use self::hal::format::Format as f; + use metal::MTLVertexFormat::*; + Some(match format { + f::R8Unorm => UCharNormalized, + f::R8Snorm => CharNormalized, + f::R8Uint => UChar, + f::R8Sint => Char, + f::Rg8Unorm => UChar2Normalized, + f::Rg8Snorm => Char2Normalized, + f::Rg8Uint => UChar2, + f::Rg8Sint => Char2, + f::Rgb8Unorm => UChar3Normalized, + f::Rgb8Snorm => Char3Normalized, + f::Rgb8Uint => UChar3, + f::Rgb8Sint => Char3, + f::Rgba8Unorm => UChar4Normalized, + f::Rgba8Snorm => Char4Normalized, + f::Rgba8Uint => UChar4, + f::Rgba8Sint => Char4, + f::Bgra8Unorm => UChar4Normalized_BGRA, + f::R16Unorm => UShortNormalized, + f::R16Snorm => ShortNormalized, + f::R16Uint => UShort, + f::R16Sint => Short, + f::R16Sfloat => Half, + f::Rg16Unorm => UShort2Normalized, + f::Rg16Snorm => Short2Normalized, + f::Rg16Uint => UShort2, + f::Rg16Sint => Short2, + f::Rg16Sfloat => Half2, + f::Rgb16Unorm => UShort3Normalized, + f::Rgb16Snorm => Short3Normalized, + f::Rgb16Uint => UShort3, + f::Rgb16Sint => Short3, + f::Rgb16Sfloat => Half3, + f::Rgba16Unorm => UShort4Normalized, + f::Rgba16Snorm => Short4Normalized, + f::Rgba16Uint => UShort4, + f::Rgba16Sint => Short4, + f::Rgba16Sfloat => Half4, + f::R32Uint => UInt, + f::R32Sint => Int, + f::R32Sfloat => Float, + f::Rg32Uint => UInt2, + f::Rg32Sint => Int2, + f::Rg32Sfloat => Float2, + f::Rgb32Uint => UInt3, + f::Rgb32Sint => Int3, + f::Rgb32Sfloat => Float3, + f::Rgba32Uint => UInt4, + f::Rgba32Sint => Int4, + f::Rgba32Sfloat => Float4, + _ => return None, + }) +} + +pub fn resource_options_from_storage_and_cache( + storage: MTLStorageMode, + cache: MTLCPUCacheMode, +) -> MTLResourceOptions { + MTLResourceOptions::from_bits( + ((storage as u64) << MTLResourceStorageModeShift) + | ((cache as u64) << MTLResourceCPUCacheModeShift), + ) + .unwrap() +} + +pub fn map_texture_usage( + usage: image::Usage, + tiling: image::Tiling, + view_caps: image::ViewCapabilities, +) -> MTLTextureUsage { + use self::hal::image::Usage as U; + + let mut texture_usage = MTLTextureUsage::Unknown; + // We have to view the texture with a different format + // in `clear_image` and `copy_image` destinations. + if view_caps.contains(image::ViewCapabilities::MUTABLE_FORMAT) + || usage.contains(U::TRANSFER_DST) + { + texture_usage |= MTLTextureUsage::PixelFormatView; + } + + if usage.intersects(U::COLOR_ATTACHMENT | U::DEPTH_STENCIL_ATTACHMENT) { + texture_usage |= MTLTextureUsage::RenderTarget; + } + if usage.intersects(U::SAMPLED | U::INPUT_ATTACHMENT) { + texture_usage |= MTLTextureUsage::ShaderRead; + } + if usage.intersects(U::STORAGE) { + texture_usage |= MTLTextureUsage::ShaderRead | MTLTextureUsage::ShaderWrite; + } + + // Note: for blitting, we do actual rendering, so we add more flags for TRANSFER_* usage + if usage.contains(U::TRANSFER_DST) && tiling == image::Tiling::Optimal { + texture_usage |= MTLTextureUsage::RenderTarget; + } + if usage.contains(U::TRANSFER_SRC) { + texture_usage |= MTLTextureUsage::ShaderRead; + } + + texture_usage +} + +pub fn map_texture_type(view_kind: image::ViewKind) -> MTLTextureType { + use self::hal::image::ViewKind as Vk; + match view_kind { + Vk::D1 => MTLTextureType::D1, + Vk::D1Array => MTLTextureType::D1Array, + Vk::D2 => MTLTextureType::D2, + Vk::D2Array => MTLTextureType::D2Array, + Vk::D3 => MTLTextureType::D3, + Vk::Cube => MTLTextureType::Cube, + Vk::CubeArray => MTLTextureType::CubeArray, + } +} + +pub fn _map_index_type(index_type: IndexType) -> MTLIndexType { + match index_type { + IndexType::U16 => MTLIndexType::UInt16, + IndexType::U32 => MTLIndexType::UInt32, + } +} + +pub fn map_compare_function(fun: Comparison) -> MTLCompareFunction { + match fun { + Comparison::Never => MTLCompareFunction::Never, + Comparison::Less => MTLCompareFunction::Less, + Comparison::LessEqual => MTLCompareFunction::LessEqual, + Comparison::Equal => MTLCompareFunction::Equal, + Comparison::GreaterEqual => MTLCompareFunction::GreaterEqual, + Comparison::Greater => MTLCompareFunction::Greater, + Comparison::NotEqual => MTLCompareFunction::NotEqual, + Comparison::Always => MTLCompareFunction::Always, + } +} + +pub fn map_filter(filter: image::Filter) -> MTLSamplerMinMagFilter { + match filter { + image::Filter::Nearest => MTLSamplerMinMagFilter::Nearest, + image::Filter::Linear => MTLSamplerMinMagFilter::Linear, + } +} + +pub fn map_wrap_mode(wrap: image::WrapMode) -> MTLSamplerAddressMode { + match wrap { + image::WrapMode::Tile => MTLSamplerAddressMode::Repeat, + image::WrapMode::Mirror => MTLSamplerAddressMode::MirrorRepeat, + image::WrapMode::Clamp => MTLSamplerAddressMode::ClampToEdge, + image::WrapMode::Border => MTLSamplerAddressMode::ClampToBorderColor, + image::WrapMode::MirrorClamp => MTLSamplerAddressMode::MirrorClampToEdge, + } +} + +pub fn map_border_color(border_color: image::BorderColor) -> MTLSamplerBorderColor { + match border_color { + image::BorderColor::TransparentBlack => MTLSamplerBorderColor::TransparentBlack, + image::BorderColor::OpaqueBlack => MTLSamplerBorderColor::OpaqueBlack, + image::BorderColor::OpaqueWhite => MTLSamplerBorderColor::OpaqueWhite, + } +} + +pub fn map_extent(extent: image::Extent) -> MTLSize { + MTLSize { + width: extent.width as _, + height: extent.height as _, + depth: extent.depth as _, + } +} + +pub fn map_offset(offset: image::Offset) -> MTLOrigin { + MTLOrigin { + x: offset.x as _, + y: offset.y as _, + z: offset.z as _, + } +} + +pub fn map_stencil_op(op: StencilOp) -> MTLStencilOperation { + match op { + StencilOp::Keep => MTLStencilOperation::Keep, + StencilOp::Zero => MTLStencilOperation::Zero, + StencilOp::Replace => MTLStencilOperation::Replace, + StencilOp::IncrementClamp => MTLStencilOperation::IncrementClamp, + StencilOp::IncrementWrap => MTLStencilOperation::IncrementWrap, + StencilOp::DecrementClamp => MTLStencilOperation::DecrementClamp, + StencilOp::DecrementWrap => MTLStencilOperation::DecrementWrap, + StencilOp::Invert => MTLStencilOperation::Invert, + } +} + +pub fn map_winding(face: pso::FrontFace) -> MTLWinding { + match face { + pso::FrontFace::Clockwise => MTLWinding::Clockwise, + pso::FrontFace::CounterClockwise => MTLWinding::CounterClockwise, + } +} + +pub fn map_polygon_mode(mode: pso::PolygonMode) -> MTLTriangleFillMode { + match mode { + pso::PolygonMode::Point => { + warn!("Unable to fill with points"); + MTLTriangleFillMode::Lines + } + pso::PolygonMode::Line => MTLTriangleFillMode::Lines, + pso::PolygonMode::Fill => MTLTriangleFillMode::Fill, + } +} + +pub fn map_cull_face(face: pso::Face) -> Option<MTLCullMode> { + match face { + pso::Face::NONE => Some(MTLCullMode::None), + pso::Face::FRONT => Some(MTLCullMode::Front), + pso::Face::BACK => Some(MTLCullMode::Back), + _ => None, + } +} diff --git a/third_party/rust/gfx-backend-metal/src/device.rs b/third_party/rust/gfx-backend-metal/src/device.rs new file mode 100644 index 0000000000..dcf7df6220 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/device.rs @@ -0,0 +1,3262 @@ +use crate::{ + command, conversions as conv, + internal::{Channel, FastStorageMap}, + native as n, AsNative, Backend, OnlineRecording, QueueFamily, ResourceIndex, Shared, + VisibilityShared, MAX_BOUND_DESCRIPTOR_SETS, MAX_COLOR_ATTACHMENTS, +}; + +use arrayvec::ArrayVec; +use auxil::{spirv_cross_specialize_ast, FastHashMap, ShaderStage}; +use cocoa_foundation::foundation::{NSRange, NSUInteger}; +use copyless::VecHelper; +use foreign_types::{ForeignType, ForeignTypeRef}; +use hal::{ + adapter, buffer, + device::{ + AllocationError, BindError, CreationError as DeviceCreationError, DeviceLost, MapError, + OomOrDeviceLost, OutOfMemory, ShaderError, + }, + format, image, memory, + memory::Properties, + pass, + pool::CommandPoolCreateFlags, + pso, + pso::VertexInputRate, + query, + queue::{QueueFamilyId, QueueGroup, QueuePriority}, +}; +use metal::{ + CaptureManager, MTLCPUCacheMode, MTLLanguageVersion, MTLPrimitiveTopologyClass, + MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType, + MTLVertexStepFunction, +}; +use objc::{ + rc::autoreleasepool, + runtime::{Object, BOOL, NO}, +}; +use parking_lot::Mutex; +use spirv_cross::{msl, spirv, ErrorCode as SpirvErrorCode}; + +use std::{ + borrow::Borrow, + cmp, + collections::hash_map::Entry, + collections::BTreeMap, + iter, mem, + ops::Range, + ptr, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread, time, +}; + +const PUSH_CONSTANTS_DESC_SET: u32 = !0; +const PUSH_CONSTANTS_DESC_BINDING: u32 = 0; +const STRIDE_GRANULARITY: pso::ElemStride = 4; //TODO: work around? +const SHADER_STAGE_COUNT: usize = 3; + +/// Emit error during shader module creation. Used if we don't expect an error +/// but might panic due to an exception in SPIRV-Cross. +fn gen_unexpected_error(err: SpirvErrorCode) -> ShaderError { + let msg = match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unexpected error".into(), + }; + ShaderError::CompilationFailed(msg) +} + +#[derive(Clone, Debug)] +enum FunctionError { + InvalidEntryPoint, + MissingRequiredSpecialization, + BadSpecialization, +} + +fn get_final_function( + library: &metal::LibraryRef, + entry: &str, + specialization: &pso::Specialization, + function_specialization: bool, +) -> Result<metal::Function, FunctionError> { + type MTLFunctionConstant = Object; + + let mut mtl_function = library.get_function(entry, None).map_err(|e| { + error!("Function retrieval error {:?}", e); + FunctionError::InvalidEntryPoint + })?; + + if !function_specialization { + if !specialization.data.is_empty() || !specialization.constants.is_empty() { + error!("platform does not support specialization"); + } + return Ok(mtl_function); + } + + let dictionary = mtl_function.function_constants_dictionary(); + let count: NSUInteger = unsafe { msg_send![dictionary, count] }; + if count == 0 { + return Ok(mtl_function); + } + + let all_values: *mut Object = unsafe { msg_send![dictionary, allValues] }; + + let constants = metal::FunctionConstantValues::new(); + for i in 0..count { + let object: *mut MTLFunctionConstant = unsafe { msg_send![all_values, objectAtIndex: i] }; + let index: NSUInteger = unsafe { msg_send![object, index] }; + let required: BOOL = unsafe { msg_send![object, required] }; + match specialization + .constants + .iter() + .find(|c| c.id as NSUInteger == index) + { + Some(c) => unsafe { + let ptr = &specialization.data[c.range.start as usize] as *const u8 as *const _; + let ty: metal::MTLDataType = msg_send![object, type]; + constants.set_constant_value_at_index(c.id as NSUInteger, ty, ptr); + }, + None if required != NO => { + //TODO: get name + error!("Missing required specialization constant id {}", index); + return Err(FunctionError::MissingRequiredSpecialization); + } + None => {} + } + } + + mtl_function = library.get_function(entry, Some(constants)).map_err(|e| { + error!("Specialized function retrieval error {:?}", e); + FunctionError::BadSpecialization + })?; + + Ok(mtl_function) +} + +impl VisibilityShared { + fn are_available(&self, pool_base: query::Id, queries: &Range<query::Id>) -> bool { + unsafe { + let availability_ptr = ((self.buffer.contents() as *mut u8) + .offset(self.availability_offset as isize) + as *mut u32) + .offset(pool_base as isize); + queries + .clone() + .all(|id| *availability_ptr.offset(id as isize) != 0) + } + } +} + +#[derive(Debug)] +pub struct Device { + pub(crate) shared: Arc<Shared>, + invalidation_queue: command::QueueInner, + memory_types: Vec<adapter::MemoryType>, + features: hal::Features, + pub online_recording: OnlineRecording, +} +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +impl Drop for Device { + fn drop(&mut self) { + if cfg!(feature = "auto-capture") { + info!("Metal capture stop"); + let shared_capture_manager = CaptureManager::shared(); + if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { + default_capture_scope.end_scope(); + } + shared_capture_manager.stop_capture(); + } + } +} + +bitflags! { + /// Memory type bits. + struct MemoryTypes: u32 { + const PRIVATE = 1<<0; + const SHARED = 1<<1; + const MANAGED_UPLOAD = 1<<2; + const MANAGED_DOWNLOAD = 1<<3; + } +} + +impl MemoryTypes { + fn describe(index: usize) -> (MTLStorageMode, MTLCPUCacheMode) { + match Self::from_bits(1 << index).unwrap() { + Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache), + Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache), + Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined), + Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache), + _ => unreachable!(), + } + } +} + +#[derive(Debug)] +pub struct PhysicalDevice { + pub(crate) shared: Arc<Shared>, + memory_types: Vec<adapter::MemoryType>, +} +unsafe impl Send for PhysicalDevice {} +unsafe impl Sync for PhysicalDevice {} + +impl PhysicalDevice { + pub(crate) fn new(shared: Arc<Shared>) -> Self { + let memory_types = if shared.private_caps.os_is_mac { + vec![ + adapter::MemoryType { + // PRIVATE + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + adapter::MemoryType { + // SHARED + properties: Properties::CPU_VISIBLE | Properties::COHERENT, + heap_index: 1, + }, + adapter::MemoryType { + // MANAGED_UPLOAD + properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE, + heap_index: 1, + }, + adapter::MemoryType { + // MANAGED_DOWNLOAD + properties: Properties::DEVICE_LOCAL + | Properties::CPU_VISIBLE + | Properties::CPU_CACHED, + heap_index: 1, + }, + ] + } else { + vec![ + adapter::MemoryType { + // PRIVATE + properties: Properties::DEVICE_LOCAL, + heap_index: 0, + }, + adapter::MemoryType { + // SHARED + properties: Properties::CPU_VISIBLE | Properties::COHERENT, + heap_index: 1, + }, + ] + }; + PhysicalDevice { + shared: shared.clone(), + memory_types, + } + } + + /// Return true if the specified format-swizzle pair is supported natively. + pub fn supports_swizzle(&self, format: format::Format, swizzle: format::Swizzle) -> bool { + self.shared + .private_caps + .map_format_with_swizzle(format, swizzle) + .is_some() + } +} + +impl adapter::PhysicalDevice<Backend> for PhysicalDevice { + unsafe fn open( + &self, + families: &[(&QueueFamily, &[QueuePriority])], + requested_features: hal::Features, + ) -> Result<adapter::Gpu<Backend>, DeviceCreationError> { + use hal::queue::QueueFamily as _; + + // TODO: Query supported features by feature set rather than hard coding in the supported + // features. https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + if !self.features().contains(requested_features) { + warn!( + "Features missing: {:?}", + requested_features - self.features() + ); + return Err(DeviceCreationError::MissingFeature); + } + + let device = self.shared.device.lock(); + + if cfg!(feature = "auto-capture") { + info!("Metal capture start"); + let shared_capture_manager = CaptureManager::shared(); + let default_capture_scope = + shared_capture_manager.new_capture_scope_with_device(&*device); + shared_capture_manager.set_default_capture_scope(&default_capture_scope); + shared_capture_manager.start_capture_with_scope(&default_capture_scope); + default_capture_scope.begin_scope(); + } + + assert_eq!(families.len(), 1); + assert_eq!(families[0].1.len(), 1); + let mut queue_group = QueueGroup::new(families[0].0.id()); + for _ in 0..self.shared.private_caps.exposed_queues { + queue_group.add_queue(command::CommandQueue::new(self.shared.clone())); + } + + let device = Device { + shared: self.shared.clone(), + invalidation_queue: command::QueueInner::new(&*device, Some(1)), + memory_types: self.memory_types.clone(), + features: requested_features, + online_recording: OnlineRecording::default(), + }; + + Ok(adapter::Gpu { + device, + queue_groups: vec![queue_group], + }) + } + + fn format_properties(&self, format: Option<format::Format>) -> format::Properties { + match format { + Some(format) => self.shared.private_caps.map_format_properties(format), + None => format::Properties { + linear_tiling: format::ImageFeature::empty(), + optimal_tiling: format::ImageFeature::empty(), + buffer_features: format::BufferFeature::empty(), + }, + } + } + + fn image_format_properties( + &self, + format: format::Format, + dimensions: u8, + tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Option<image::FormatProperties> { + if let image::Tiling::Linear = tiling { + let format_desc = format.surface_desc(); + let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST; + if dimensions != 2 + || !view_caps.is_empty() + || !host_usage.contains(usage) + || format_desc.aspects != format::Aspects::COLOR + || format_desc.is_compressed() + { + return None; + } + } + if dimensions == 1 + && usage + .intersects(image::Usage::COLOR_ATTACHMENT | image::Usage::DEPTH_STENCIL_ATTACHMENT) + { + // MTLRenderPassDescriptor texture must not be MTLTextureType1D + return None; + } + if dimensions == 3 && view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) { + // Can't create 2D/2DArray views of 3D textures + return None; + } + let max_dimension = if dimensions == 3 { + self.shared.private_caps.max_texture_3d_size as _ + } else { + self.shared.private_caps.max_texture_size as _ + }; + + let max_extent = image::Extent { + width: max_dimension, + height: if dimensions >= 2 { max_dimension } else { 1 }, + depth: if dimensions >= 3 { max_dimension } else { 1 }, + }; + + self.shared + .private_caps + .map_format(format) + .map(|_| image::FormatProperties { + max_extent, + max_levels: if dimensions == 1 { 1 } else { 12 }, + // 3D images enforce a single layer + max_layers: if dimensions == 3 { + 1 + } else { + self.shared.private_caps.max_texture_layers as _ + }, + sample_count_mask: self.shared.private_caps.sample_count_mask as _, + //TODO: buffers and textures have separate limits + // Max buffer size is determined by feature set + // Max texture size does not appear to be documented publicly + max_resource_size: self.shared.private_caps.max_buffer_size as _, + }) + } + + fn memory_properties(&self) -> adapter::MemoryProperties { + adapter::MemoryProperties { + memory_heaps: vec![ + adapter::MemoryHeap { + size: !0, //TODO: private memory limits + flags: memory::HeapFlags::DEVICE_LOCAL, + }, + adapter::MemoryHeap { + size: self.shared.private_caps.max_buffer_size, + flags: memory::HeapFlags::empty(), + }, + ], + memory_types: self.memory_types.to_vec(), + } + } + + fn features(&self) -> hal::Features { + use hal::Features as F; + F::empty() + | F::FULL_DRAW_INDEX_U32 + | if self.shared.private_caps.texture_cube_array { + F::IMAGE_CUBE_ARRAY + } else { + F::empty() + } + | F::INDEPENDENT_BLENDING + | if self.shared.private_caps.dual_source_blending { + F::DUAL_SRC_BLENDING + } else { + F::empty() + } + | F::DRAW_INDIRECT_FIRST_INSTANCE + | F::DEPTH_CLAMP + //| F::DEPTH_BOUNDS + | F::SAMPLER_ANISOTROPY + | F::FORMAT_BC + | F::PRECISE_OCCLUSION_QUERY + | F::SHADER_STORAGE_BUFFER_ARRAY_DYNAMIC_INDEXING + | F::VERTEX_STORES_AND_ATOMICS + | F::FRAGMENT_STORES_AND_ATOMICS + | F::INSTANCE_RATE + | F::SEPARATE_STENCIL_REF_VALUES + | if self.shared.private_caps.expose_line_mode { + F::NON_FILL_POLYGON_MODE + } else { + F::empty() + } + | F::SHADER_CLIP_DISTANCE + | if self.shared.private_caps.msl_version >= metal::MTLLanguageVersion::V2_0 { + F::TEXTURE_DESCRIPTOR_ARRAY | + F::SHADER_SAMPLED_IMAGE_ARRAY_DYNAMIC_INDEXING | + F::SAMPLED_TEXTURE_DESCRIPTOR_INDEXING | + F::STORAGE_TEXTURE_DESCRIPTOR_INDEXING + } else { + F::empty() + } + //| F::SAMPLER_MIRROR_CLAMP_EDGE + | if self.shared.private_caps.sampler_clamp_to_border { + F::SAMPLER_BORDER_COLOR + } else { + F::empty() + } + | if self.shared.private_caps.mutable_comparison_samplers { + F::MUTABLE_COMPARISON_SAMPLER + } else { + F::empty() + } + | F::NDC_Y_UP + } + + fn hints(&self) -> hal::Hints { + if self.shared.private_caps.base_vertex_instance_drawing { + hal::Hints::BASE_VERTEX_INSTANCE_DRAWING + } else { + hal::Hints::empty() + } + } + + fn limits(&self) -> hal::Limits { + let pc = &self.shared.private_caps; + let device = self.shared.device.lock(); + hal::Limits { + max_image_1d_size: pc.max_texture_size as _, + max_image_2d_size: pc.max_texture_size as _, + max_image_3d_size: pc.max_texture_3d_size as _, + max_image_cube_size: pc.max_texture_size as _, + max_image_array_layers: pc.max_texture_layers as _, + max_texel_elements: (pc.max_texture_size * pc.max_texture_size) as usize, + max_uniform_buffer_range: pc.max_buffer_size, + max_storage_buffer_range: pc.max_buffer_size, + // "Maximum length of an inlined constant data buffer, per graphics or compute function" + max_push_constants_size: 0x1000, + max_sampler_allocation_count: !0, + max_bound_descriptor_sets: MAX_BOUND_DESCRIPTOR_SETS as _, + max_descriptor_set_samplers: pc.max_samplers_per_stage as usize * SHADER_STAGE_COUNT, + max_descriptor_set_uniform_buffers: pc.max_buffers_per_stage as usize + * SHADER_STAGE_COUNT, + max_descriptor_set_uniform_buffers_dynamic: 8 * SHADER_STAGE_COUNT, + max_descriptor_set_storage_buffers: pc.max_buffers_per_stage as usize + * SHADER_STAGE_COUNT, + max_descriptor_set_storage_buffers_dynamic: 4 * SHADER_STAGE_COUNT, + max_descriptor_set_sampled_images: pc + .max_textures_per_stage + .min(pc.max_samplers_per_stage) + as usize + * SHADER_STAGE_COUNT, + max_descriptor_set_storage_images: pc.max_textures_per_stage as usize + * SHADER_STAGE_COUNT, + max_descriptor_set_input_attachments: pc.max_textures_per_stage as usize + * SHADER_STAGE_COUNT, + max_fragment_input_components: pc.max_fragment_input_components as usize, + max_framebuffer_layers: 2048, // TODO: Determine is this is the correct value + max_memory_allocation_count: 4096, // TODO: Determine is this is the correct value + + max_per_stage_descriptor_samplers: pc.max_samplers_per_stage as usize, + max_per_stage_descriptor_uniform_buffers: pc.max_buffers_per_stage as usize, + max_per_stage_descriptor_storage_buffers: pc.max_buffers_per_stage as usize, + max_per_stage_descriptor_sampled_images: pc + .max_textures_per_stage + .min(pc.max_samplers_per_stage) + as usize, + max_per_stage_descriptor_storage_images: pc.max_textures_per_stage as usize, + max_per_stage_descriptor_input_attachments: pc.max_textures_per_stage as usize, //TODO + max_per_stage_resources: 0x100, //TODO + + max_patch_size: 0, // No tessellation + + // Note: The maximum number of supported viewports and scissor rectangles varies by device. + // TODO: read from Metal Feature Sets. + max_viewports: 1, + max_viewport_dimensions: [pc.max_texture_size as _; 2], + max_framebuffer_extent: hal::image::Extent { + //TODO + width: pc.max_texture_size as _, + height: pc.max_texture_size as _, + depth: pc.max_texture_layers as _, + }, + min_memory_map_alignment: 4, + + optimal_buffer_copy_offset_alignment: pc.buffer_alignment, + optimal_buffer_copy_pitch_alignment: 4, + min_texel_buffer_offset_alignment: pc.buffer_alignment, + min_uniform_buffer_offset_alignment: pc.buffer_alignment, + min_storage_buffer_offset_alignment: pc.buffer_alignment, + + max_compute_work_group_count: [!0; 3], // really undefined + max_compute_work_group_size: { + let size = device.max_threads_per_threadgroup(); + [size.width as u32, size.height as u32, size.depth as u32] + }, + max_compute_shared_memory_size: pc.max_total_threadgroup_memory as usize, + + max_vertex_input_attributes: 31, + max_vertex_input_bindings: 31, + max_vertex_input_attribute_offset: 255, // TODO + max_vertex_input_binding_stride: 256, // TODO + max_vertex_output_components: pc.max_fragment_input_components as usize, + + framebuffer_color_sample_counts: 0b101, // TODO + framebuffer_depth_sample_counts: 0b101, // TODO + framebuffer_stencil_sample_counts: 0b101, // TODO + max_color_attachments: pc.max_color_render_targets as usize, + + buffer_image_granularity: 1, + // Note: we issue Metal buffer-to-buffer copies on memory flush/invalidate, + // and those need to operate on sizes being multiples of 4. + non_coherent_atom_size: 4, + max_sampler_anisotropy: 16., + min_vertex_input_binding_stride_alignment: STRIDE_GRANULARITY as u64, + + ..hal::Limits::default() // TODO! + } + } +} + +pub struct LanguageVersion { + pub major: u8, + pub minor: u8, +} + +impl LanguageVersion { + pub fn new(major: u8, minor: u8) -> Self { + LanguageVersion { major, minor } + } +} + +impl Device { + fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool { + match *heap { + n::MemoryHeap::Private => false, + n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0] + .properties + .contains(Properties::COHERENT), + n::MemoryHeap::Native(ref heap) => heap.storage_mode() == MTLStorageMode::Shared, + } + } + + fn compile_shader_library_cross( + device: &Mutex<metal::Device>, + raw_data: &[u32], + compiler_options: &msl::CompilerOptions, + msl_version: MTLLanguageVersion, + specialization: &pso::Specialization, + ) -> Result<n::ModuleInfo, ShaderError> { + let module = spirv::Module::from_words(raw_data); + + // now parse again using the new overrides + let mut ast = spirv::Ast::<msl::Target>::parse(&module).map_err(|err| { + ShaderError::CompilationFailed(match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unexpected parse error".into(), + }) + })?; + + spirv_cross_specialize_ast(&mut ast, specialization)?; + + ast.set_compiler_options(compiler_options) + .map_err(gen_unexpected_error)?; + + let entry_points = ast.get_entry_points().map_err(|err| { + ShaderError::CompilationFailed(match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unexpected entry point error".into(), + }) + })?; + + let shader_code = ast.compile().map_err(|err| { + ShaderError::CompilationFailed(match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown compile error".into(), + }) + })?; + + let mut entry_point_map = n::EntryPointMap::default(); + for entry_point in entry_points { + info!("Entry point {:?}", entry_point); + let cleansed = ast + .get_cleansed_entry_point_name(&entry_point.name, entry_point.execution_model) + .map_err(|err| { + ShaderError::CompilationFailed(match err { + SpirvErrorCode::CompilationError(msg) => msg, + SpirvErrorCode::Unhandled => "Unknown compile error".into(), + }) + })?; + entry_point_map.insert( + entry_point.name, + spirv::EntryPoint { + name: cleansed, + ..entry_point + }, + ); + } + + let rasterization_enabled = ast + .is_rasterization_enabled() + .map_err(|_| ShaderError::CompilationFailed("Unknown compile error".into()))?; + + // done + debug!("SPIRV-Cross generated shader:\n{}", shader_code); + + let options = metal::CompileOptions::new(); + options.set_language_version(msl_version); + + let library = device + .lock() + .new_library_with_source(shader_code.as_ref(), &options) + .map_err(|err| ShaderError::CompilationFailed(err.into()))?; + + Ok(n::ModuleInfo { + library, + entry_point_map, + rasterization_enabled, + }) + } + + #[cfg(feature = "naga")] + fn compile_shader_library_naga( + device: &Mutex<metal::Device>, + module: &naga::Module, + naga_options: &naga::back::msl::Options, + ) -> Result<n::ModuleInfo, ShaderError> { + let source = naga::back::msl::write_string(module, naga_options) + .map_err(|e| ShaderError::CompilationFailed(format!("{:?}", e)))?; + + let mut entry_point_map = n::EntryPointMap::default(); + for (&(stage, ref name), ep) in module.entry_points.iter() { + entry_point_map.insert( + name.clone(), + spirv::EntryPoint { + //TODO: fill that information by Naga + name: format!("{}{:?}", name, stage), + execution_model: match stage { + naga::ShaderStage::Vertex => spirv::ExecutionModel::Vertex, + naga::ShaderStage::Fragment => spirv::ExecutionModel::Fragment, + naga::ShaderStage::Compute => spirv::ExecutionModel::GlCompute, + }, + work_group_size: spirv::WorkGroupSize { + x: ep.workgroup_size[0], + y: ep.workgroup_size[1], + z: ep.workgroup_size[2], + }, + }, + ); + } + + debug!("Naga generated shader:\n{}", source); + + let options = metal::CompileOptions::new(); + let msl_version = match naga_options.lang_version { + (1, 0) => MTLLanguageVersion::V1_0, + (1, 1) => MTLLanguageVersion::V1_1, + (1, 2) => MTLLanguageVersion::V1_2, + (2, 0) => MTLLanguageVersion::V2_0, + (2, 1) => MTLLanguageVersion::V2_1, + (2, 2) => MTLLanguageVersion::V2_2, + other => panic!("Unexpected language version {:?}", other), + }; + options.set_language_version(msl_version); + + let library = device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| ShaderError::CompilationFailed(err.into()))?; + + Ok(n::ModuleInfo { + library, + entry_point_map, + rasterization_enabled: true, //TODO + }) + } + + fn load_shader( + &self, + ep: &pso::EntryPoint<Backend>, + layout: &n::PipelineLayout, + primitive_class: MTLPrimitiveTopologyClass, + pipeline_cache: Option<&n::PipelineCache>, + stage: ShaderStage, + ) -> Result<(metal::Library, metal::Function, metal::MTLSize, bool), pso::CreationError> { + let device = &self.shared.device; + let msl_version = self.shared.private_caps.msl_version; + let module_map; + let (info_owned, info_guard); + + let compiler_options = &mut match primitive_class { + MTLPrimitiveTopologyClass::Point => layout.shader_compiler_options_point.clone(), + _ => layout.shader_compiler_options.clone(), + }; + compiler_options.entry_point = Some(( + ep.entry.to_string(), + match stage { + ShaderStage::Vertex => spirv::ExecutionModel::Vertex, + ShaderStage::Fragment => spirv::ExecutionModel::Fragment, + ShaderStage::Compute => spirv::ExecutionModel::GlCompute, + _ => return Err(pso::CreationError::UnsupportedPipeline), + }, + )); + + let data = &ep.module.spv; + let info = match pipeline_cache { + Some(cache) => { + module_map = cache + .modules + .get_or_create_with(compiler_options, FastStorageMap::default); + info_guard = module_map.get_or_create_with(data, || { + Self::compile_shader_library_cross( + device, + data, + compiler_options, + msl_version, + &ep.specialization, + ) + .unwrap() + }); + &*info_guard + } + None => { + let mut result = Err(ShaderError::CompilationFailed(String::new())); + #[cfg(feature = "naga")] + if let Some(ref module) = ep.module.naga { + result = + Self::compile_shader_library_naga(device, module, &layout.naga_options); + if let Err(ShaderError::CompilationFailed(ref msg)) = result { + warn!("Naga: {:?}", msg); + } + } + if result.is_err() { + result = Self::compile_shader_library_cross( + device, + data, + compiler_options, + msl_version, + &ep.specialization, + ); + } + info_owned = result.map_err(|e| { + error!("Error compiling the shader {:?}", e); + pso::CreationError::Other + })?; + &info_owned + } + }; + + let lib = info.library.clone(); + let (name, wg_size) = match info.entry_point_map.get(ep.entry) { + Some(p) => ( + p.name.as_str(), + metal::MTLSize { + width: p.work_group_size.x as _, + height: p.work_group_size.y as _, + depth: p.work_group_size.z as _, + }, + ), + // this can only happen if the shader came directly from the user + None => ( + ep.entry, + metal::MTLSize { + width: 0, + height: 0, + depth: 0, + }, + ), + }; + let mtl_function = get_final_function( + &lib, + name, + &ep.specialization, + self.shared.private_caps.function_specialization, + ) + .map_err(|e| { + error!("Invalid shader entry point '{}': {:?}", name, e); + pso::CreationError::Other + })?; + + Ok((lib, mtl_function, wg_size, info.rasterization_enabled)) + } + + fn make_sampler_descriptor( + &self, + info: &image::SamplerDesc, + ) -> Option<metal::SamplerDescriptor> { + let caps = &self.shared.private_caps; + let descriptor = metal::SamplerDescriptor::new(); + + descriptor.set_normalized_coordinates(info.normalized); + + descriptor.set_min_filter(conv::map_filter(info.min_filter)); + descriptor.set_mag_filter(conv::map_filter(info.mag_filter)); + descriptor.set_mip_filter(match info.mip_filter { + // Note: this shouldn't be required, but Metal appears to be confused when mipmaps + // are provided even with trivial LOD bias. + image::Filter::Nearest if info.lod_range.end.0 < 0.5 => { + MTLSamplerMipFilter::NotMipmapped + } + image::Filter::Nearest => MTLSamplerMipFilter::Nearest, + image::Filter::Linear => MTLSamplerMipFilter::Linear, + }); + + if let Some(aniso) = info.anisotropy_clamp { + descriptor.set_max_anisotropy(aniso as _); + } + + let (s, t, r) = info.wrap_mode; + descriptor.set_address_mode_s(conv::map_wrap_mode(s)); + descriptor.set_address_mode_t(conv::map_wrap_mode(t)); + descriptor.set_address_mode_r(conv::map_wrap_mode(r)); + + let lod_bias = info.lod_bias.0; + if lod_bias != 0.0 { + if self.features.contains(hal::Features::SAMPLER_MIP_LOD_BIAS) { + unsafe { + descriptor.set_lod_bias(lod_bias); + } + } else { + error!("Lod bias {:?} is not supported", info.lod_bias); + } + } + descriptor.set_lod_min_clamp(info.lod_range.start.0); + descriptor.set_lod_max_clamp(info.lod_range.end.0); + + // TODO: Clarify minimum macOS version with Apple (43707452) + if (caps.os_is_mac && caps.has_version_at_least(10, 13)) + || (!caps.os_is_mac && caps.has_version_at_least(9, 0)) + { + descriptor.set_lod_average(true); // optimization + } + + if let Some(fun) = info.comparison { + if !caps.mutable_comparison_samplers { + return None; + } + descriptor.set_compare_function(conv::map_compare_function(fun)); + } + if [r, s, t].iter().any(|&am| am == image::WrapMode::Border) { + descriptor.set_border_color(conv::map_border_color(info.border)); + } + + if caps.argument_buffers { + descriptor.set_support_argument_buffers(true); + } + + Some(descriptor) + } + + fn make_sampler_data(info: &image::SamplerDesc) -> msl::SamplerData { + fn map_address(wrap: image::WrapMode) -> msl::SamplerAddress { + match wrap { + image::WrapMode::Tile => msl::SamplerAddress::Repeat, + image::WrapMode::Mirror => msl::SamplerAddress::MirroredRepeat, + image::WrapMode::Clamp => msl::SamplerAddress::ClampToEdge, + image::WrapMode::Border => msl::SamplerAddress::ClampToBorder, + image::WrapMode::MirrorClamp => { + unimplemented!("https://github.com/grovesNL/spirv_cross/issues/138") + } + } + } + + let lods = info.lod_range.start.0..info.lod_range.end.0; + msl::SamplerData { + coord: if info.normalized { + msl::SamplerCoord::Normalized + } else { + msl::SamplerCoord::Pixel + }, + min_filter: match info.min_filter { + image::Filter::Nearest => msl::SamplerFilter::Nearest, + image::Filter::Linear => msl::SamplerFilter::Linear, + }, + mag_filter: match info.mag_filter { + image::Filter::Nearest => msl::SamplerFilter::Nearest, + image::Filter::Linear => msl::SamplerFilter::Linear, + }, + mip_filter: match info.min_filter { + image::Filter::Nearest if info.lod_range.end.0 < 0.5 => msl::SamplerMipFilter::None, + image::Filter::Nearest => msl::SamplerMipFilter::Nearest, + image::Filter::Linear => msl::SamplerMipFilter::Linear, + }, + s_address: map_address(info.wrap_mode.0), + t_address: map_address(info.wrap_mode.1), + r_address: map_address(info.wrap_mode.2), + compare_func: match info.comparison { + Some(func) => unsafe { mem::transmute(conv::map_compare_function(func) as u32) }, + None => msl::SamplerCompareFunc::Always, + }, + border_color: match info.border { + image::BorderColor::TransparentBlack => msl::SamplerBorderColor::TransparentBlack, + image::BorderColor::OpaqueBlack => msl::SamplerBorderColor::OpaqueBlack, + image::BorderColor::OpaqueWhite => msl::SamplerBorderColor::OpaqueWhite, + }, + lod_clamp_min: lods.start.into(), + lod_clamp_max: lods.end.into(), + max_anisotropy: info.anisotropy_clamp.map_or(0, |aniso| aniso as i32), + planes: 0, + resolution: msl::FormatResolution::_444, + chroma_filter: msl::SamplerFilter::Nearest, + x_chroma_offset: msl::ChromaLocation::CositedEven, + y_chroma_offset: msl::ChromaLocation::CositedEven, + swizzle: [ + msl::ComponentSwizzle::Identity, + msl::ComponentSwizzle::Identity, + msl::ComponentSwizzle::Identity, + msl::ComponentSwizzle::Identity, + ], + ycbcr_conversion_enable: false, + ycbcr_model: msl::SamplerYCbCrModelConversion::RgbIdentity, + ycbcr_range: msl::SamplerYCbCrRange::ItuFull, + bpc: 8, + } + } +} + +impl hal::device::Device<Backend> for Device { + unsafe fn create_command_pool( + &self, + _family: QueueFamilyId, + _flags: CommandPoolCreateFlags, + ) -> Result<command::CommandPool, OutOfMemory> { + Ok(command::CommandPool::new( + &self.shared, + self.online_recording.clone(), + )) + } + + unsafe fn destroy_command_pool(&self, mut pool: command::CommandPool) { + use hal::pool::CommandPool as _; + pool.reset(false); + } + + unsafe fn create_render_pass<'a, IA, IS, ID>( + &self, + attachments: IA, + subpasses: IS, + _dependencies: ID, + ) -> Result<n::RenderPass, OutOfMemory> + where + IA: IntoIterator, + IA::Item: Borrow<pass::Attachment>, + IS: IntoIterator, + IS::Item: Borrow<pass::SubpassDesc<'a>>, + ID: IntoIterator, + ID::Item: Borrow<pass::SubpassDependency>, + { + let attachments: Vec<pass::Attachment> = attachments + .into_iter() + .map(|at| at.borrow().clone()) + .collect(); + + let mut subpasses: Vec<n::Subpass> = subpasses + .into_iter() + .map(|sp| { + let sub = sp.borrow(); + let mut colors: ArrayVec<[_; MAX_COLOR_ATTACHMENTS]> = sub + .colors + .iter() + .map(|&(id, _)| { + let hal_format = attachments[id].format.expect("No format!"); + n::AttachmentInfo { + id, + resolve_id: None, + ops: n::AttachmentOps::empty(), + format: self + .shared + .private_caps + .map_format(hal_format) + .expect("Unable to map color format!"), + channel: Channel::from(hal_format.base_format().1), + } + }) + .collect(); + for (color, &(resolve_id, _)) in colors.iter_mut().zip(sub.resolves.iter()) { + if resolve_id != pass::ATTACHMENT_UNUSED { + color.resolve_id = Some(resolve_id); + } + } + let depth_stencil = sub.depth_stencil.map(|&(id, _)| { + let hal_format = attachments[id].format.expect("No format!"); + n::AttachmentInfo { + id, + resolve_id: None, + ops: n::AttachmentOps::empty(), + format: self + .shared + .private_caps + .map_format(hal_format) + .expect("Unable to map depth-stencil format!"), + channel: Channel::Float, + } + }); + + n::Subpass { + attachments: n::SubpassData { + colors, + depth_stencil, + }, + inputs: sub.inputs.iter().map(|&(id, _)| id).collect(), + } + }) + .collect(); + + // sprinkle load operations + // an attachment receives LOAD flag on a subpass if it's the first sub-pass that uses it + let mut use_mask = 0u64; + for sub in subpasses.iter_mut() { + for at in sub.attachments.colors.iter_mut() { + if use_mask & 1 << at.id == 0 { + at.ops |= n::AttachmentOps::LOAD; + use_mask ^= 1 << at.id; + } + } + if let Some(ref mut at) = sub.attachments.depth_stencil { + if use_mask & 1 << at.id == 0 { + at.ops |= n::AttachmentOps::LOAD; + use_mask ^= 1 << at.id; + } + } + } + // sprinkle store operations + // an attachment receives STORE flag on a subpass if it's the last sub-pass that uses it + for sub in subpasses.iter_mut().rev() { + for at in sub.attachments.colors.iter_mut() { + if use_mask & 1 << at.id != 0 { + at.ops |= n::AttachmentOps::STORE; + use_mask ^= 1 << at.id; + } + } + if let Some(ref mut at) = sub.attachments.depth_stencil { + if use_mask & 1 << at.id != 0 { + at.ops |= n::AttachmentOps::STORE; + use_mask ^= 1 << at.id; + } + } + } + + Ok(n::RenderPass { + attachments, + subpasses, + name: String::new(), + }) + } + + unsafe fn create_pipeline_layout<IS, IR>( + &self, + set_layouts: IS, + push_constant_ranges: IR, + ) -> Result<n::PipelineLayout, OutOfMemory> + where + IS: IntoIterator, + IS::Item: Borrow<n::DescriptorSetLayout>, + IR: IntoIterator, + IR::Item: Borrow<(pso::ShaderStageFlags, Range<u32>)>, + { + let mut stage_infos = [ + ( + pso::ShaderStageFlags::VERTEX, + spirv::ExecutionModel::Vertex, + n::ResourceData::<ResourceIndex>::new(), + ), + ( + pso::ShaderStageFlags::FRAGMENT, + spirv::ExecutionModel::Fragment, + n::ResourceData::<ResourceIndex>::new(), + ), + ( + pso::ShaderStageFlags::COMPUTE, + spirv::ExecutionModel::GlCompute, + n::ResourceData::<ResourceIndex>::new(), + ), + ]; + let mut res_overrides = BTreeMap::new(); + let mut const_samplers = BTreeMap::new(); + let mut infos = Vec::new(); + + // First, place the push constants + let mut pc_buffers = [None; 3]; + let mut pc_limits = [0u32; 3]; + for pcr in push_constant_ranges { + let (flags, range) = pcr.borrow(); + for (limit, &(stage_bit, _, _)) in pc_limits.iter_mut().zip(&stage_infos) { + if flags.contains(stage_bit) { + debug_assert_eq!(range.end % 4, 0); + *limit = (range.end / 4).max(*limit); + } + } + } + + const LIMIT_MASK: u32 = 3; + // round up the limits alignment to 4, so that it matches MTL compiler logic + //TODO: figure out what and how exactly does the alignment. Clearly, it's not + // straightforward, given that value of 2 stays non-aligned. + for limit in &mut pc_limits { + if *limit > LIMIT_MASK { + *limit = (*limit + LIMIT_MASK) & !LIMIT_MASK; + } + } + + for ((limit, ref mut buffer_index), &mut (_, stage, ref mut counters)) in pc_limits + .iter() + .zip(pc_buffers.iter_mut()) + .zip(stage_infos.iter_mut()) + { + // handle the push constant buffer assignment and shader overrides + if *limit != 0 { + let index = counters.buffers; + **buffer_index = Some(index); + counters.buffers += 1; + + res_overrides.insert( + msl::ResourceBindingLocation { + stage, + desc_set: PUSH_CONSTANTS_DESC_SET, + binding: PUSH_CONSTANTS_DESC_BINDING, + }, + msl::ResourceBinding { + buffer_id: index as _, + texture_id: !0, + sampler_id: !0, + }, + ); + } + } + + // Second, place the descripted resources + for (set_index, set_layout) in set_layouts.into_iter().enumerate() { + // remember where the resources for this set start at each shader stage + let mut dynamic_buffers = Vec::new(); + let offsets = n::MultiStageResourceCounters { + vs: stage_infos[0].2.clone(), + ps: stage_infos[1].2.clone(), + cs: stage_infos[2].2.clone(), + }; + match *set_layout.borrow() { + n::DescriptorSetLayout::Emulated { + layouts: ref desc_layouts, + ref immutable_samplers, + total: _, + } => { + for (&binding, data) in immutable_samplers.iter() { + //TODO: array support? + const_samplers.insert( + msl::SamplerLocation { + desc_set: set_index as u32, + binding, + }, + data.clone(), + ); + } + for layout in desc_layouts.iter() { + if layout + .content + .contains(n::DescriptorContent::DYNAMIC_BUFFER) + { + dynamic_buffers.alloc().init(n::MultiStageData { + vs: if layout.stages.contains(pso::ShaderStageFlags::VERTEX) { + stage_infos[0].2.buffers + } else { + !0 + }, + ps: if layout.stages.contains(pso::ShaderStageFlags::FRAGMENT) { + stage_infos[1].2.buffers + } else { + !0 + }, + cs: if layout.stages.contains(pso::ShaderStageFlags::COMPUTE) { + stage_infos[2].2.buffers + } else { + !0 + }, + }); + } + for &mut (stage_bit, stage, ref mut counters) in stage_infos.iter_mut() { + if !layout.stages.contains(stage_bit) { + continue; + } + let res = msl::ResourceBinding { + buffer_id: if layout.content.contains(n::DescriptorContent::BUFFER) + { + counters.buffers as _ + } else { + !0 + }, + texture_id: if layout + .content + .contains(n::DescriptorContent::TEXTURE) + { + counters.textures as _ + } else { + !0 + }, + sampler_id: if layout + .content + .contains(n::DescriptorContent::SAMPLER) + { + counters.samplers as _ + } else { + !0 + }, + }; + counters.add(layout.content); + if layout.array_index == 0 { + let location = msl::ResourceBindingLocation { + stage, + desc_set: set_index as _, + binding: layout.binding, + }; + res_overrides.insert(location, res); + } + } + } + } + n::DescriptorSetLayout::ArgumentBuffer { + ref bindings, + stage_flags, + .. + } => { + for &mut (stage_bit, stage, ref mut counters) in stage_infos.iter_mut() { + let has_stage = stage_flags.contains(stage_bit); + res_overrides.insert( + msl::ResourceBindingLocation { + stage, + desc_set: set_index as _, + binding: msl::ARGUMENT_BUFFER_BINDING, + }, + msl::ResourceBinding { + buffer_id: if has_stage { counters.buffers } else { !0 }, + texture_id: !0, + sampler_id: !0, + }, + ); + if has_stage { + res_overrides.extend(bindings.iter().map(|(&binding, arg)| { + let key = msl::ResourceBindingLocation { + stage, + desc_set: set_index as _, + binding, + }; + (key, arg.res.clone()) + })); + counters.buffers += 1; + } + } + } + } + + infos.alloc().init(n::DescriptorSetInfo { + offsets, + dynamic_buffers, + }); + } + + // Finally, make sure we fit the limits + for &(_, _, ref counters) in stage_infos.iter() { + assert!(counters.buffers <= self.shared.private_caps.max_buffers_per_stage); + assert!(counters.textures <= self.shared.private_caps.max_textures_per_stage); + assert!(counters.samplers <= self.shared.private_caps.max_samplers_per_stage); + } + + #[cfg(feature = "naga")] + let naga_options = { + use naga::back::msl; + fn res_index(id: u32) -> Option<u8> { + if id == !0 { + None + } else { + Some(id as _) + } + } + msl::Options { + lang_version: match self.shared.private_caps.msl_version { + MTLLanguageVersion::V1_0 => (1, 0), + MTLLanguageVersion::V1_1 => (1, 1), + MTLLanguageVersion::V1_2 => (1, 2), + MTLLanguageVersion::V2_0 => (2, 0), + MTLLanguageVersion::V2_1 => (2, 1), + MTLLanguageVersion::V2_2 => (2, 2), + }, + spirv_cross_compatibility: true, + binding_map: res_overrides + .iter() + .map(|(loc, binding)| { + let source = msl::BindSource { + stage: match loc.stage { + spirv::ExecutionModel::Vertex => naga::ShaderStage::Vertex, + spirv::ExecutionModel::Fragment => naga::ShaderStage::Fragment, + spirv::ExecutionModel::GlCompute => naga::ShaderStage::Compute, + other => panic!("Unexpected stage: {:?}", other), + }, + group: loc.desc_set, + binding: loc.binding, + }; + let target = msl::BindTarget { + buffer: res_index(binding.buffer_id), + texture: res_index(binding.texture_id), + sampler: res_index(binding.sampler_id), + mutable: false, //TODO + }; + (source, target) + }) + .collect(), + } + }; + + let mut shader_compiler_options = msl::CompilerOptions::default(); + shader_compiler_options.version = match self.shared.private_caps.msl_version { + MTLLanguageVersion::V1_0 => msl::Version::V1_0, + MTLLanguageVersion::V1_1 => msl::Version::V1_1, + MTLLanguageVersion::V1_2 => msl::Version::V1_2, + MTLLanguageVersion::V2_0 => msl::Version::V2_0, + MTLLanguageVersion::V2_1 => msl::Version::V2_1, + MTLLanguageVersion::V2_2 => msl::Version::V2_2, + }; + shader_compiler_options.enable_point_size_builtin = false; + shader_compiler_options.vertex.invert_y = !self.features.contains(hal::Features::NDC_Y_UP); + shader_compiler_options.resource_binding_overrides = res_overrides; + shader_compiler_options.const_samplers = const_samplers; + shader_compiler_options.enable_argument_buffers = self.shared.private_caps.argument_buffers; + shader_compiler_options.force_zero_initialized_variables = true; + shader_compiler_options.force_native_arrays = true; + let mut shader_compiler_options_point = shader_compiler_options.clone(); + shader_compiler_options_point.enable_point_size_builtin = true; + + Ok(n::PipelineLayout { + shader_compiler_options, + shader_compiler_options_point, + #[cfg(feature = "naga")] + naga_options, + infos, + total: n::MultiStageResourceCounters { + vs: stage_infos[0].2.clone(), + ps: stage_infos[1].2.clone(), + cs: stage_infos[2].2.clone(), + }, + push_constants: n::MultiStageData { + vs: pc_buffers[0].map(|buffer_index| n::PushConstantInfo { + count: pc_limits[0], + buffer_index, + }), + ps: pc_buffers[1].map(|buffer_index| n::PushConstantInfo { + count: pc_limits[1], + buffer_index, + }), + cs: pc_buffers[2].map(|buffer_index| n::PushConstantInfo { + count: pc_limits[2], + buffer_index, + }), + }, + total_push_constants: pc_limits[0].max(pc_limits[1]).max(pc_limits[2]), + }) + } + + unsafe fn create_pipeline_cache( + &self, + _data: Option<&[u8]>, + ) -> Result<n::PipelineCache, OutOfMemory> { + Ok(n::PipelineCache { + modules: FastStorageMap::default(), + }) + } + + unsafe fn get_pipeline_cache_data( + &self, + _cache: &n::PipelineCache, + ) -> Result<Vec<u8>, OutOfMemory> { + //empty + Ok(Vec::new()) + } + + unsafe fn destroy_pipeline_cache(&self, _cache: n::PipelineCache) { + //drop + } + + unsafe fn merge_pipeline_caches<I>( + &self, + target: &n::PipelineCache, + sources: I, + ) -> Result<(), OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<n::PipelineCache>, + { + let mut dst = target.modules.whole_write(); + for source in sources { + let src = source.borrow().modules.whole_write(); + for (key, value) in src.iter() { + let storage = dst + .entry(key.clone()) + .or_insert_with(FastStorageMap::default); + let mut dst_module = storage.whole_write(); + let src_module = value.whole_write(); + for (key_module, value_module) in src_module.iter() { + match dst_module.entry(key_module.clone()) { + Entry::Vacant(em) => { + em.insert(value_module.clone()); + } + Entry::Occupied(em) => { + if em.get().library.as_ptr() != value_module.library.as_ptr() + || em.get().entry_point_map != value_module.entry_point_map + { + warn!( + "Merged module don't match, target: {:?}, source: {:?}", + em.get(), + value_module + ); + } + } + } + } + } + } + + Ok(()) + } + + unsafe fn create_graphics_pipeline<'a>( + &self, + pipeline_desc: &pso::GraphicsPipelineDesc<'a, Backend>, + cache: Option<&n::PipelineCache>, + ) -> Result<n::GraphicsPipeline, pso::CreationError> { + debug!("create_graphics_pipeline {:#?}", pipeline_desc); + let pipeline = metal::RenderPipelineDescriptor::new(); + let pipeline_layout = &pipeline_desc.layout; + let (rp_attachments, subpass) = { + let pass::Subpass { main_pass, index } = pipeline_desc.subpass; + (&main_pass.attachments, &main_pass.subpasses[index as usize]) + }; + + let (desc_vertex_buffers, attributes, input_assembler, vs, gs, hs, ds) = + match pipeline_desc.primitive_assembler { + pso::PrimitiveAssemblerDesc::Vertex { + buffers, + attributes, + ref input_assembler, + ref vertex, + ref tessellation, + ref geometry, + } => { + let (hs, ds) = if let Some(ts) = tessellation { + (Some(&ts.0), Some(&ts.1)) + } else { + (None, None) + }; + + ( + buffers, + attributes, + input_assembler, + vertex, + geometry, + hs, + ds, + ) + } + pso::PrimitiveAssemblerDesc::Mesh { .. } => { + return Err(pso::CreationError::UnsupportedPipeline) + } + }; + + let (primitive_class, primitive_type) = match input_assembler.primitive { + pso::Primitive::PointList => { + (MTLPrimitiveTopologyClass::Point, MTLPrimitiveType::Point) + } + pso::Primitive::LineList => (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::Line), + pso::Primitive::LineStrip => { + (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::LineStrip) + } + pso::Primitive::TriangleList => ( + MTLPrimitiveTopologyClass::Triangle, + MTLPrimitiveType::Triangle, + ), + pso::Primitive::TriangleStrip => ( + MTLPrimitiveTopologyClass::Triangle, + MTLPrimitiveType::TriangleStrip, + ), + pso::Primitive::PatchList(_) => ( + MTLPrimitiveTopologyClass::Unspecified, + MTLPrimitiveType::Point, + ), + }; + if self.shared.private_caps.layered_rendering { + pipeline.set_input_primitive_topology(primitive_class); + } + + // Vertex shader + let (vs_lib, vs_function, _, enable_rasterization) = self.load_shader( + vs, + pipeline_layout, + primitive_class, + cache, + ShaderStage::Vertex, + )?; + pipeline.set_vertex_function(Some(&vs_function)); + + // Fragment shader + let fs_function; + let fs_lib = match pipeline_desc.fragment { + Some(ref ep) => { + let (lib, fun, _, _) = self.load_shader( + ep, + pipeline_layout, + primitive_class, + cache, + ShaderStage::Fragment, + )?; + fs_function = fun; + pipeline.set_fragment_function(Some(&fs_function)); + Some(lib) + } + None => { + // TODO: This is a workaround for what appears to be a Metal validation bug + // A pixel format is required even though no attachments are provided + if subpass.attachments.colors.is_empty() + && subpass.attachments.depth_stencil.is_none() + { + pipeline.set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); + } + None + } + }; + + // Other shaders + if hs.is_some() { + return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( + pso::ShaderStageFlags::HULL, + ))); + } + if ds.is_some() { + return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( + pso::ShaderStageFlags::DOMAIN, + ))); + } + if gs.is_some() { + return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( + pso::ShaderStageFlags::GEOMETRY, + ))); + } + + pipeline.set_rasterization_enabled(enable_rasterization); + + // Assign target formats + let blend_targets = pipeline_desc + .blender + .targets + .iter() + .chain(iter::repeat(&pso::ColorBlendDesc::EMPTY)); + for (i, (at, color_desc)) in subpass + .attachments + .colors + .iter() + .zip(blend_targets) + .enumerate() + { + let desc = pipeline + .color_attachments() + .object_at(i as u64) + .expect("too many color attachments"); + + desc.set_pixel_format(at.format); + desc.set_write_mask(conv::map_write_mask(color_desc.mask)); + + if let Some(ref blend) = color_desc.blend { + desc.set_blending_enabled(true); + let (color_op, color_src, color_dst) = conv::map_blend_op(blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_op(blend.alpha); + + desc.set_rgb_blend_operation(color_op); + desc.set_source_rgb_blend_factor(color_src); + desc.set_destination_rgb_blend_factor(color_dst); + + desc.set_alpha_blend_operation(alpha_op); + desc.set_source_alpha_blend_factor(alpha_src); + desc.set_destination_alpha_blend_factor(alpha_dst); + } + } + if let Some(ref at) = subpass.attachments.depth_stencil { + let orig_format = rp_attachments[at.id].format.unwrap(); + if orig_format.is_depth() { + pipeline.set_depth_attachment_pixel_format(at.format); + } + if orig_format.is_stencil() { + pipeline.set_stencil_attachment_pixel_format(at.format); + } + } + + // Vertex buffers + let vertex_descriptor = metal::VertexDescriptor::new(); + let mut vertex_buffers: n::VertexBufferVec = Vec::new(); + trace!("Vertex attribute remapping started"); + + for &pso::AttributeDesc { + location, + binding, + element, + } in attributes + { + let original = desc_vertex_buffers + .iter() + .find(|vb| vb.binding == binding) + .expect("no associated vertex buffer found"); + // handle wrapping offsets + let elem_size = element.format.surface_desc().bits as pso::ElemOffset / 8; + let (cut_offset, base_offset) = + if original.stride == 0 || element.offset + elem_size <= original.stride { + (element.offset, 0) + } else { + let remainder = element.offset % original.stride; + if remainder + elem_size <= original.stride { + (remainder, element.offset - remainder) + } else { + (0, element.offset) + } + }; + let relative_index = vertex_buffers + .iter() + .position(|(ref vb, offset)| vb.binding == binding && base_offset == *offset) + .unwrap_or_else(|| { + vertex_buffers.alloc().init((original.clone(), base_offset)); + vertex_buffers.len() - 1 + }); + let mtl_buffer_index = self.shared.private_caps.max_buffers_per_stage + - 1 + - (relative_index as ResourceIndex); + if mtl_buffer_index < pipeline_layout.total.vs.buffers { + error!("Attribute offset {} exceeds the stride {}, and there is no room for replacement.", + element.offset, original.stride); + return Err(pso::CreationError::Other); + } + trace!("\tAttribute[{}] is mapped to vertex buffer[{}] with binding {} and offsets {} + {}", + location, binding, mtl_buffer_index, base_offset, cut_offset); + // pass the refined data to Metal + let mtl_attribute_desc = vertex_descriptor + .attributes() + .object_at(location as u64) + .expect("too many vertex attributes"); + let mtl_vertex_format = + conv::map_vertex_format(element.format).expect("unsupported vertex format"); + mtl_attribute_desc.set_format(mtl_vertex_format); + mtl_attribute_desc.set_buffer_index(mtl_buffer_index as _); + mtl_attribute_desc.set_offset(cut_offset as _); + } + + for (i, (vb, _)) in vertex_buffers.iter().enumerate() { + let mtl_buffer_desc = vertex_descriptor + .layouts() + .object_at(self.shared.private_caps.max_buffers_per_stage as u64 - 1 - i as u64) + .expect("too many vertex descriptor layouts"); + if vb.stride % STRIDE_GRANULARITY != 0 { + error!( + "Stride ({}) must be a multiple of {}", + vb.stride, STRIDE_GRANULARITY + ); + return Err(pso::CreationError::Other); + } + if vb.stride != 0 { + mtl_buffer_desc.set_stride(vb.stride as u64); + match vb.rate { + VertexInputRate::Vertex => { + mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerVertex); + } + VertexInputRate::Instance(divisor) => { + mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance); + mtl_buffer_desc.set_step_rate(divisor as u64); + } + } + } else { + mtl_buffer_desc.set_stride(256); // big enough to fit all the elements + mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance); + mtl_buffer_desc.set_step_rate(!0); + } + } + if !vertex_buffers.is_empty() { + pipeline.set_vertex_descriptor(Some(&vertex_descriptor)); + } + + if let pso::State::Static(w) = pipeline_desc.rasterizer.line_width { + if w != 1.0 { + warn!("Unsupported line width: {:?}", w); + } + } + + let rasterizer_state = Some(n::RasterizerState { + front_winding: conv::map_winding(pipeline_desc.rasterizer.front_face), + fill_mode: conv::map_polygon_mode(pipeline_desc.rasterizer.polygon_mode), + cull_mode: match conv::map_cull_face(pipeline_desc.rasterizer.cull_face) { + Some(mode) => mode, + None => { + //TODO - Metal validation fails with + // RasterizationEnabled is false but the vertex shader's return type is not void + error!("Culling both sides is not yet supported"); + //pipeline.set_rasterization_enabled(false); + metal::MTLCullMode::None + } + }, + depth_clip: if self.shared.private_caps.depth_clip_mode { + Some(if pipeline_desc.rasterizer.depth_clamping { + metal::MTLDepthClipMode::Clamp + } else { + metal::MTLDepthClipMode::Clip + }) + } else { + None + }, + }); + let depth_bias = pipeline_desc + .rasterizer + .depth_bias + .unwrap_or(pso::State::Static(pso::DepthBias::default())); + + // prepare the depth-stencil state now + let device = self.shared.device.lock(); + self.shared + .service_pipes + .depth_stencil_states + .prepare(&pipeline_desc.depth_stencil, &*device); + + let samples = if let Some(multisampling) = &pipeline_desc.multisampling { + pipeline.set_sample_count(multisampling.rasterization_samples as u64); + pipeline.set_alpha_to_coverage_enabled(multisampling.alpha_coverage); + pipeline.set_alpha_to_one_enabled(multisampling.alpha_to_one); + // TODO: sample_mask + // TODO: sample_shading + multisampling.rasterization_samples + } else { + 1 + }; + + device + .new_render_pipeline_state(&pipeline) + .map(|raw| n::GraphicsPipeline { + vs_lib, + fs_lib, + raw, + primitive_type, + vs_pc_info: pipeline_desc.layout.push_constants.vs, + ps_pc_info: pipeline_desc.layout.push_constants.ps, + rasterizer_state, + depth_bias, + depth_stencil_desc: pipeline_desc.depth_stencil.clone(), + baked_states: pipeline_desc.baked_states.clone(), + vertex_buffers, + attachment_formats: subpass.attachments.map(|at| (at.format, at.channel)), + samples, + }) + .map_err(|err| { + error!("PSO creation failed: {}", err); + pso::CreationError::Other + }) + } + + unsafe fn create_compute_pipeline<'a>( + &self, + pipeline_desc: &pso::ComputePipelineDesc<'a, Backend>, + cache: Option<&n::PipelineCache>, + ) -> Result<n::ComputePipeline, pso::CreationError> { + debug!("create_compute_pipeline {:?}", pipeline_desc); + let pipeline = metal::ComputePipelineDescriptor::new(); + + let (cs_lib, cs_function, work_group_size, _) = self.load_shader( + &pipeline_desc.shader, + &pipeline_desc.layout, + MTLPrimitiveTopologyClass::Unspecified, + cache, + ShaderStage::Compute, + )?; + pipeline.set_compute_function(Some(&cs_function)); + + self.shared + .device + .lock() + .new_compute_pipeline_state(&pipeline) + .map(|raw| n::ComputePipeline { + cs_lib, + raw, + work_group_size, + pc_info: pipeline_desc.layout.push_constants.cs, + }) + .map_err(|err| { + error!("PSO creation failed: {}", err); + pso::CreationError::Other + }) + } + + unsafe fn create_framebuffer<I>( + &self, + _render_pass: &n::RenderPass, + attachments: I, + extent: image::Extent, + ) -> Result<n::Framebuffer, OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<n::ImageView>, + { + Ok(n::Framebuffer { + extent, + attachments: attachments + .into_iter() + .map(|at| at.borrow().texture.clone()) + .collect(), + }) + } + + unsafe fn create_shader_module( + &self, + raw_data: &[u32], + ) -> Result<n::ShaderModule, ShaderError> { + //TODO: we can probably at least parse here and save the `Ast` + Ok(n::ShaderModule { + spv: raw_data.to_vec(), + #[cfg(feature = "naga")] + naga: match naga::front::spv::Parser::new(raw_data.iter().cloned(), &Default::default()) + .parse() + { + Ok(module) => match naga::proc::Validator::new().validate(&module) { + Ok(()) => Some(module), + Err(e) => { + warn!("Naga validation failed: {:?}", e); + None + } + }, + Err(e) => { + warn!("Naga parsing failed: {:?}", e); + None + } + }, + }) + } + + unsafe fn create_sampler( + &self, + info: &image::SamplerDesc, + ) -> Result<n::Sampler, AllocationError> { + Ok(n::Sampler { + raw: match self.make_sampler_descriptor(&info) { + Some(ref descriptor) => Some(self.shared.device.lock().new_sampler(descriptor)), + None => None, + }, + data: Self::make_sampler_data(&info), + }) + } + + unsafe fn destroy_sampler(&self, _sampler: n::Sampler) {} + + unsafe fn map_memory( + &self, + memory: &n::Memory, + segment: memory::Segment, + ) -> Result<*mut u8, MapError> { + let range = memory.resolve(&segment); + debug!("map_memory of size {} at {:?}", memory.size, range); + + let base_ptr = match memory.heap { + n::MemoryHeap::Public(_, ref cpu_buffer) => cpu_buffer.contents() as *mut u8, + n::MemoryHeap::Native(_) | n::MemoryHeap::Private => panic!("Unable to map memory!"), + }; + Ok(base_ptr.offset(range.start as _)) + } + + unsafe fn unmap_memory(&self, memory: &n::Memory) { + debug!("unmap_memory of size {}", memory.size); + } + + unsafe fn flush_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a n::Memory, memory::Segment)>, + { + debug!("flush_mapped_memory_ranges"); + for item in iter { + let (memory, ref segment) = *item.borrow(); + let range = memory.resolve(segment); + debug!("\trange {:?}", range); + + match memory.heap { + n::MemoryHeap::Native(_) => unimplemented!(), + n::MemoryHeap::Public(mt, ref cpu_buffer) + if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize => + { + cpu_buffer.did_modify_range(NSRange { + location: range.start as _, + length: (range.end - range.start) as _, + }); + } + n::MemoryHeap::Public(..) => continue, + n::MemoryHeap::Private => panic!("Can't map private memory!"), + }; + } + + Ok(()) + } + + unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<(&'a n::Memory, memory::Segment)>, + { + let mut num_syncs = 0; + debug!("invalidate_mapped_memory_ranges"); + + // temporary command buffer to copy the contents from + // the given buffers into the allocated CPU-visible buffers + // Note: using a separate internal queue in order to avoid a stall + let cmd_buffer = self.invalidation_queue.spawn_temp(); + autoreleasepool(|| { + let encoder = cmd_buffer.new_blit_command_encoder(); + + for item in iter { + let (memory, ref segment) = *item.borrow(); + let range = memory.resolve(segment); + debug!("\trange {:?}", range); + + match memory.heap { + n::MemoryHeap::Native(_) => unimplemented!(), + n::MemoryHeap::Public(mt, ref cpu_buffer) + if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize => + { + num_syncs += 1; + encoder.synchronize_resource(cpu_buffer); + } + n::MemoryHeap::Public(..) => continue, + n::MemoryHeap::Private => panic!("Can't map private memory!"), + }; + } + encoder.end_encoding(); + }); + + if num_syncs != 0 { + debug!("\twaiting..."); + cmd_buffer.set_label("invalidate_mapped_memory_ranges"); + cmd_buffer.commit(); + cmd_buffer.wait_until_completed(); + } + + Ok(()) + } + + fn create_semaphore(&self) -> Result<n::Semaphore, OutOfMemory> { + Ok(n::Semaphore { + // Semaphore synchronization between command buffers of the same queue + // is useless, don't bother even creating one. + system: if self.shared.private_caps.exposed_queues > 1 { + Some(n::SystemSemaphore::new()) + } else { + None + }, + }) + } + + unsafe fn create_descriptor_pool<I>( + &self, + max_sets: usize, + descriptor_ranges: I, + _flags: pso::DescriptorPoolCreateFlags, + ) -> Result<n::DescriptorPool, OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorRangeDesc>, + { + if self.shared.private_caps.argument_buffers { + let mut arguments = n::ArgumentArray::default(); + for desc_range in descriptor_ranges { + let dr = desc_range.borrow(); + let content = n::DescriptorContent::from(dr.ty); + let usage = n::ArgumentArray::describe_usage(dr.ty); + if content.contains(n::DescriptorContent::BUFFER) { + arguments.push(metal::MTLDataType::Pointer, dr.count, usage); + } + if content.contains(n::DescriptorContent::TEXTURE) { + arguments.push(metal::MTLDataType::Texture, dr.count, usage); + } + if content.contains(n::DescriptorContent::SAMPLER) { + arguments.push(metal::MTLDataType::Sampler, dr.count, usage); + } + } + + let device = self.shared.device.lock(); + let (array_ref, total_resources) = arguments.build(); + let encoder = device.new_argument_encoder(array_ref); + + let alignment = self.shared.private_caps.buffer_alignment; + let total_size = encoder.encoded_length() + (max_sets as u64) * alignment; + let raw = device.new_buffer(total_size, MTLResourceOptions::empty()); + + Ok(n::DescriptorPool::new_argument( + raw, + total_size, + alignment, + total_resources, + )) + } else { + let mut counters = n::ResourceData::<n::PoolResourceIndex>::new(); + for desc_range in descriptor_ranges { + let dr = desc_range.borrow(); + counters.add_many( + n::DescriptorContent::from(dr.ty), + dr.count as pso::DescriptorBinding, + ); + } + Ok(n::DescriptorPool::new_emulated(counters)) + } + } + + unsafe fn create_descriptor_set_layout<I, J>( + &self, + binding_iter: I, + immutable_samplers: J, + ) -> Result<n::DescriptorSetLayout, OutOfMemory> + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetLayoutBinding>, + J: IntoIterator, + J::Item: Borrow<n::Sampler>, + { + if self.shared.private_caps.argument_buffers { + let mut stage_flags = pso::ShaderStageFlags::empty(); + let mut arguments = n::ArgumentArray::default(); + let mut bindings = FastHashMap::default(); + for desc in binding_iter { + let desc = desc.borrow(); + //TODO: have the API providing the dimensions and MSAA flag + // for textures in an argument buffer + match desc.ty { + pso::DescriptorType::Buffer { + format: + pso::BufferDescriptorFormat::Structured { + dynamic_offset: true, + }, + .. + } => { + //TODO: apply the offsets somehow at the binding time + error!("Dynamic offsets are not yet supported in argument buffers!"); + } + pso::DescriptorType::Image { + ty: pso::ImageDescriptorType::Storage { .. }, + } + | pso::DescriptorType::Buffer { + ty: pso::BufferDescriptorType::Storage { .. }, + format: pso::BufferDescriptorFormat::Texel, + } => { + //TODO: bind storage buffers and images separately + error!("Storage images are not yet supported in argument buffers!"); + } + _ => {} + } + + stage_flags |= desc.stage_flags; + let content = n::DescriptorContent::from(desc.ty); + let usage = n::ArgumentArray::describe_usage(desc.ty); + let res = msl::ResourceBinding { + buffer_id: if content.contains(n::DescriptorContent::BUFFER) { + arguments.push(metal::MTLDataType::Pointer, desc.count, usage) as u32 + } else { + !0 + }, + texture_id: if content.contains(n::DescriptorContent::TEXTURE) { + arguments.push(metal::MTLDataType::Texture, desc.count, usage) as u32 + } else { + !0 + }, + sampler_id: if content.contains(n::DescriptorContent::SAMPLER) { + arguments.push(metal::MTLDataType::Sampler, desc.count, usage) as u32 + } else { + !0 + }, + }; + let res_offset = res.buffer_id.min(res.texture_id).min(res.sampler_id); + bindings.insert( + desc.binding, + n::ArgumentLayout { + res, + res_offset, + count: desc.count, + usage, + content, + }, + ); + } + + let (array_ref, arg_total) = arguments.build(); + let encoder = self.shared.device.lock().new_argument_encoder(array_ref); + + Ok(n::DescriptorSetLayout::ArgumentBuffer { + encoder, + stage_flags, + bindings: Arc::new(bindings), + total: arg_total as n::PoolResourceIndex, + }) + } else { + struct TempSampler { + data: msl::SamplerData, + binding: pso::DescriptorBinding, + array_index: pso::DescriptorArrayIndex, + }; + let mut immutable_sampler_iter = immutable_samplers.into_iter(); + let mut tmp_samplers = Vec::new(); + let mut desc_layouts = Vec::new(); + let mut total = n::ResourceData::new(); + + for set_layout_binding in binding_iter { + let slb = set_layout_binding.borrow(); + let mut content = n::DescriptorContent::from(slb.ty); + total.add(content); + + if slb.immutable_samplers { + tmp_samplers.extend( + immutable_sampler_iter + .by_ref() + .take(slb.count) + .enumerate() + .map(|(array_index, sm)| TempSampler { + data: sm.borrow().data.clone(), + binding: slb.binding, + array_index, + }), + ); + content |= n::DescriptorContent::IMMUTABLE_SAMPLER; + } + + desc_layouts.extend((0..slb.count).map(|array_index| n::DescriptorLayout { + content, + stages: slb.stage_flags, + binding: slb.binding, + array_index, + })); + } + + desc_layouts.sort_by_key(|dl| (dl.binding, dl.array_index)); + tmp_samplers.sort_by_key(|ts| (ts.binding, ts.array_index)); + // From here on, we assume that `desc_layouts` has at most a single item for + // a (binding, array_index) pair. To achieve that, we deduplicate the array now + desc_layouts.dedup_by(|a, b| { + if (a.binding, a.array_index) == (b.binding, b.array_index) { + debug_assert!(!b.stages.intersects(a.stages)); + debug_assert_eq!(a.content, b.content); //TODO: double check if this can be demanded + b.stages |= a.stages; //`b` is here to stay + true + } else { + false + } + }); + + Ok(n::DescriptorSetLayout::Emulated { + layouts: Arc::new(desc_layouts), + total, + immutable_samplers: tmp_samplers + .into_iter() + .map(|ts| (ts.binding, ts.data)) + .collect(), + }) + } + } + + unsafe fn write_descriptor_sets<'a, I, J>(&self, write_iter: I) + where + I: IntoIterator<Item = pso::DescriptorSetWrite<'a, Backend, J>>, + J: IntoIterator, + J::Item: Borrow<pso::Descriptor<'a, Backend>>, + { + debug!("write_descriptor_sets"); + for write in write_iter { + match *write.set { + n::DescriptorSet::Emulated { + ref pool, + ref layouts, + ref resources, + } => { + let mut counters = resources.map(|r| r.start); + let mut start = None; //TODO: can pre-compute this + for (i, layout) in layouts.iter().enumerate() { + if layout.binding == write.binding + && layout.array_index == write.array_offset + { + start = Some(i); + break; + } + counters.add(layout.content); + } + let mut data = pool.write(); + + for (layout, descriptor) in + layouts[start.unwrap()..].iter().zip(write.descriptors) + { + trace!("\t{:?}", layout); + match *descriptor.borrow() { + pso::Descriptor::Sampler(sam) => { + debug_assert!(!layout + .content + .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)); + data.samplers[counters.samplers as usize] = ( + layout.stages, + Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())), + ); + } + pso::Descriptor::Image(view, il) => { + data.textures[counters.textures as usize] = ( + layout.stages, + Some(AsNative::from(view.texture.as_ref())), + il, + ); + } + pso::Descriptor::CombinedImageSampler(view, il, sam) => { + if !layout + .content + .contains(n::DescriptorContent::IMMUTABLE_SAMPLER) + { + data.samplers[counters.samplers as usize] = ( + layout.stages, + Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())), + ); + } + data.textures[counters.textures as usize] = ( + layout.stages, + Some(AsNative::from(view.texture.as_ref())), + il, + ); + } + pso::Descriptor::TexelBuffer(view) => { + data.textures[counters.textures as usize] = ( + layout.stages, + Some(AsNative::from(view.raw.as_ref())), + image::Layout::General, + ); + } + pso::Descriptor::Buffer(buf, ref sub) => { + let (raw, range) = buf.as_bound(); + debug_assert!( + range.start + sub.offset + sub.size.unwrap_or(0) <= range.end + ); + data.buffers[counters.buffers as usize] = ( + layout.stages, + Some(AsNative::from(raw)), + range.start + sub.offset, + ); + } + } + counters.add(layout.content); + } + } + n::DescriptorSet::ArgumentBuffer { + ref raw, + raw_offset, + ref pool, + ref range, + ref encoder, + ref bindings, + .. + } => { + debug_assert!(self.shared.private_caps.argument_buffers); + + encoder.set_argument_buffer(raw, raw_offset); + let mut arg_index = { + let binding = &bindings[&write.binding]; + debug_assert!((write.array_offset as usize) < binding.count); + (binding.res_offset as NSUInteger) + (write.array_offset as NSUInteger) + }; + + for (data, descriptor) in pool.write().resources + [range.start as usize + arg_index as usize..range.end as usize] + .iter_mut() + .zip(write.descriptors) + { + match *descriptor.borrow() { + pso::Descriptor::Sampler(sampler) => { + debug_assert!(!bindings[&write.binding] + .content + .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)); + encoder.set_sampler_state(arg_index, sampler.raw.as_ref().unwrap()); + arg_index += 1; + } + pso::Descriptor::Image(image, _layout) => { + let tex_ref = image.texture.as_ref(); + encoder.set_texture(arg_index, tex_ref); + data.ptr = (&**tex_ref).as_ptr(); + arg_index += 1; + } + pso::Descriptor::CombinedImageSampler(image, _il, sampler) => { + let binding = &bindings[&write.binding]; + if !binding + .content + .contains(n::DescriptorContent::IMMUTABLE_SAMPLER) + { + //TODO: supporting arrays of combined image-samplers can be tricky. + // We need to scan both sampler and image sections of the encoder + // at the same time. + assert!( + arg_index + < (binding.res_offset as NSUInteger) + + (binding.count as NSUInteger) + ); + encoder.set_sampler_state( + arg_index + binding.count as NSUInteger, + sampler.raw.as_ref().unwrap(), + ); + } + let tex_ref = image.texture.as_ref(); + encoder.set_texture(arg_index, tex_ref); + data.ptr = (&**tex_ref).as_ptr(); + } + pso::Descriptor::TexelBuffer(view) => { + encoder.set_texture(arg_index, &view.raw); + data.ptr = (&**view.raw).as_ptr(); + arg_index += 1; + } + pso::Descriptor::Buffer(buffer, ref sub) => { + let (buf_raw, buf_range) = buffer.as_bound(); + encoder.set_buffer( + arg_index, + buf_raw, + buf_range.start + sub.offset, + ); + data.ptr = (&**buf_raw).as_ptr(); + arg_index += 1; + } + } + } + } + } + } + } + + unsafe fn copy_descriptor_sets<'a, I>(&self, copies: I) + where + I: IntoIterator, + I::Item: Borrow<pso::DescriptorSetCopy<'a, Backend>>, + { + for _copy in copies { + unimplemented!() + } + } + + unsafe fn destroy_descriptor_pool(&self, _pool: n::DescriptorPool) {} + + unsafe fn destroy_descriptor_set_layout(&self, _layout: n::DescriptorSetLayout) {} + + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: n::PipelineLayout) {} + + unsafe fn destroy_shader_module(&self, _module: n::ShaderModule) {} + + unsafe fn destroy_render_pass(&self, _pass: n::RenderPass) {} + + unsafe fn destroy_graphics_pipeline(&self, _pipeline: n::GraphicsPipeline) {} + + unsafe fn destroy_compute_pipeline(&self, _pipeline: n::ComputePipeline) {} + + unsafe fn destroy_framebuffer(&self, _buffer: n::Framebuffer) {} + + unsafe fn destroy_semaphore(&self, _semaphore: n::Semaphore) {} + + unsafe fn allocate_memory( + &self, + memory_type: hal::MemoryTypeId, + size: u64, + ) -> Result<n::Memory, AllocationError> { + let (storage, cache) = MemoryTypes::describe(memory_type.0); + let device = self.shared.device.lock(); + debug!("allocate_memory type {:?} of size {}", memory_type, size); + + // Heaps cannot be used for CPU coherent resources + //TEMP: MacOS supports Private only, iOS and tvOS can do private/shared + let heap = if self.shared.private_caps.resource_heaps + && storage != MTLStorageMode::Shared + && false + { + let descriptor = metal::HeapDescriptor::new(); + descriptor.set_storage_mode(storage); + descriptor.set_cpu_cache_mode(cache); + descriptor.set_size(size); + let heap_raw = device.new_heap(&descriptor); + n::MemoryHeap::Native(heap_raw) + } else if storage == MTLStorageMode::Private { + n::MemoryHeap::Private + } else { + let options = conv::resource_options_from_storage_and_cache(storage, cache); + let cpu_buffer = device.new_buffer(size, options); + debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr()); + n::MemoryHeap::Public(memory_type, cpu_buffer) + }; + + Ok(n::Memory::new(heap, size)) + } + + unsafe fn free_memory(&self, memory: n::Memory) { + debug!("free_memory of size {}", memory.size); + if let n::MemoryHeap::Public(_, ref cpu_buffer) = memory.heap { + debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr()); + } + } + + unsafe fn create_buffer( + &self, + size: u64, + usage: buffer::Usage, + ) -> Result<n::Buffer, buffer::CreationError> { + debug!("create_buffer of size {} and usage {:?}", size, usage); + Ok(n::Buffer::Unbound { + usage, + size, + name: String::new(), + }) + } + + unsafe fn get_buffer_requirements(&self, buffer: &n::Buffer) -> memory::Requirements { + let (size, usage) = match *buffer { + n::Buffer::Unbound { size, usage, .. } => (size, usage), + n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"), + }; + let mut max_size = size; + let mut max_alignment = self.shared.private_caps.buffer_alignment; + + if self.shared.private_caps.resource_heaps { + // We don't know what memory type the user will try to allocate the buffer with, so we test them + // all get the most stringent ones. + for (i, _mt) in self.memory_types.iter().enumerate() { + let (storage, cache) = MemoryTypes::describe(i); + let options = conv::resource_options_from_storage_and_cache(storage, cache); + let requirements = self + .shared + .device + .lock() + .heap_buffer_size_and_align(size, options); + max_size = cmp::max(max_size, requirements.size); + max_alignment = cmp::max(max_alignment, requirements.align); + } + } + + // based on Metal validation error for view creation: + // failed assertion `BytesPerRow of a buffer-backed texture with pixelFormat(XXX) must be aligned to 256 bytes + const SIZE_MASK: u64 = 0xFF; + let supports_texel_view = + usage.intersects(buffer::Usage::UNIFORM_TEXEL | buffer::Usage::STORAGE_TEXEL); + + memory::Requirements { + size: (max_size + SIZE_MASK) & !SIZE_MASK, + alignment: max_alignment, + type_mask: if !supports_texel_view || self.shared.private_caps.shared_textures { + MemoryTypes::all().bits() + } else { + (MemoryTypes::all() ^ MemoryTypes::SHARED).bits() + }, + } + } + + unsafe fn bind_buffer_memory( + &self, + memory: &n::Memory, + offset: u64, + buffer: &mut n::Buffer, + ) -> Result<(), BindError> { + let (size, name) = match buffer { + n::Buffer::Unbound { size, name, .. } => (*size, name), + n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"), + }; + debug!("bind_buffer_memory of size {} at offset {}", size, offset); + *buffer = match memory.heap { + n::MemoryHeap::Native(ref heap) => { + let options = conv::resource_options_from_storage_and_cache( + heap.storage_mode(), + heap.cpu_cache_mode(), + ); + let raw = heap.new_buffer(size, options).unwrap_or_else(|| { + // TODO: disable hazard tracking? + self.shared.device.lock().new_buffer(size, options) + }); + raw.set_label(name); + n::Buffer::Bound { + raw, + options, + range: 0..size, //TODO? + } + } + n::MemoryHeap::Public(mt, ref cpu_buffer) => { + debug!( + "\tmapped to public heap with address {:?}", + cpu_buffer.as_ptr() + ); + let (storage, cache) = MemoryTypes::describe(mt.0); + let options = conv::resource_options_from_storage_and_cache(storage, cache); + if offset == 0x0 && size == cpu_buffer.length() { + cpu_buffer.set_label(name); + } else if self.shared.private_caps.supports_debug_markers { + cpu_buffer.add_debug_marker( + name, + NSRange { + location: offset, + length: size, + }, + ); + } + n::Buffer::Bound { + raw: cpu_buffer.clone(), + options, + range: offset..offset + size, + } + } + n::MemoryHeap::Private => { + //TODO: check for aliasing + let options = MTLResourceOptions::StorageModePrivate + | MTLResourceOptions::CPUCacheModeDefaultCache; + let raw = self.shared.device.lock().new_buffer(size, options); + raw.set_label(name); + n::Buffer::Bound { + raw, + options, + range: 0..size, + } + } + }; + + Ok(()) + } + + unsafe fn destroy_buffer(&self, buffer: n::Buffer) { + if let n::Buffer::Bound { raw, range, .. } = buffer { + debug!( + "destroy_buffer {:?} occupying memory {:?}", + raw.as_ptr(), + range + ); + } + } + + unsafe fn create_buffer_view( + &self, + buffer: &n::Buffer, + format_maybe: Option<format::Format>, + sub: buffer::SubRange, + ) -> Result<n::BufferView, buffer::ViewCreationError> { + let (raw, base_range, options) = match *buffer { + n::Buffer::Bound { + ref raw, + ref range, + options, + } => (raw, range, options), + n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"), + }; + let start = base_range.start + sub.offset; + let size_rough = sub.size.unwrap_or(base_range.end - start); + let format = match format_maybe { + Some(fmt) => fmt, + None => { + return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe)); + } + }; + let format_desc = format.surface_desc(); + if format_desc.aspects != format::Aspects::COLOR || format_desc.is_compressed() { + // Vadlidator says "Linear texture: cannot create compressed, depth, or stencil textures" + return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe)); + } + + //Note: we rely on SPIRV-Cross to use the proper 2D texel indexing here + let texel_count = size_rough * 8 / format_desc.bits as u64; + let col_count = cmp::min(texel_count, self.shared.private_caps.max_texture_size); + let row_count = (texel_count + self.shared.private_caps.max_texture_size - 1) + / self.shared.private_caps.max_texture_size; + let mtl_format = self + .shared + .private_caps + .map_format(format) + .ok_or(buffer::ViewCreationError::UnsupportedFormat(format_maybe))?; + + let descriptor = metal::TextureDescriptor::new(); + descriptor.set_texture_type(MTLTextureType::D2); + descriptor.set_width(col_count); + descriptor.set_height(row_count); + descriptor.set_mipmap_level_count(1); + descriptor.set_pixel_format(mtl_format); + descriptor.set_resource_options(options); + descriptor.set_storage_mode(raw.storage_mode()); + descriptor.set_usage(metal::MTLTextureUsage::ShaderRead); + + let align_mask = self.shared.private_caps.buffer_alignment - 1; + let stride = (col_count * (format_desc.bits as u64 / 8) + align_mask) & !align_mask; + + Ok(n::BufferView { + raw: raw.new_texture_from_contents(&descriptor, start, stride), + }) + } + + unsafe fn destroy_buffer_view(&self, _view: n::BufferView) { + //nothing to do + } + + unsafe fn create_image( + &self, + kind: image::Kind, + mip_levels: image::Level, + format: format::Format, + tiling: image::Tiling, + usage: image::Usage, + view_caps: image::ViewCapabilities, + ) -> Result<n::Image, image::CreationError> { + debug!( + "create_image {:?} with {} mips of {:?} {:?} and usage {:?} with {:?}", + kind, mip_levels, format, tiling, usage, view_caps + ); + + let is_cube = view_caps.contains(image::ViewCapabilities::KIND_CUBE); + let mtl_format = self + .shared + .private_caps + .map_format(format) + .ok_or_else(|| image::CreationError::Format(format))?; + + let descriptor = metal::TextureDescriptor::new(); + + let (mtl_type, num_layers) = match kind { + image::Kind::D1(_, 1) => { + assert!(!is_cube); + (MTLTextureType::D1, None) + } + image::Kind::D1(_, layers) => { + assert!(!is_cube); + (MTLTextureType::D1Array, Some(layers)) + } + image::Kind::D2(_, _, layers, 1) => { + if is_cube && layers > 6 { + assert_eq!(layers % 6, 0); + (MTLTextureType::CubeArray, Some(layers / 6)) + } else if is_cube { + assert_eq!(layers, 6); + (MTLTextureType::Cube, None) + } else if layers > 1 { + (MTLTextureType::D2Array, Some(layers)) + } else { + (MTLTextureType::D2, None) + } + } + image::Kind::D2(_, _, 1, samples) if !is_cube => { + descriptor.set_sample_count(samples as u64); + (MTLTextureType::D2Multisample, None) + } + image::Kind::D2(..) => { + error!( + "Multi-sampled array textures or cubes are not supported: {:?}", + kind + ); + return Err(image::CreationError::Kind); + } + image::Kind::D3(..) => { + assert!(!is_cube); + if view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) { + warn!("Unable to support 2D array views of 3D textures"); + } + (MTLTextureType::D3, None) + } + }; + + descriptor.set_texture_type(mtl_type); + if let Some(count) = num_layers { + descriptor.set_array_length(count as u64); + } + let extent = kind.extent(); + descriptor.set_width(extent.width as u64); + descriptor.set_height(extent.height as u64); + descriptor.set_depth(extent.depth as u64); + descriptor.set_mipmap_level_count(mip_levels as u64); + descriptor.set_pixel_format(mtl_format); + descriptor.set_usage(conv::map_texture_usage(usage, tiling, view_caps)); + + let base = format.base_format(); + let format_desc = base.0.desc(); + let mip_sizes = (0..mip_levels) + .map(|level| { + let pitches = n::Image::pitches_impl(extent.at_level(level), format_desc); + num_layers.unwrap_or(1) as buffer::Offset * pitches[3] + }) + .collect(); + + let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST; + let host_visible = mtl_type == MTLTextureType::D2 + && mip_levels == 1 + && num_layers.is_none() + && format_desc.aspects.contains(format::Aspects::COLOR) + && tiling == image::Tiling::Linear + && host_usage.contains(usage); + + Ok(n::Image { + like: n::ImageLike::Unbound { + descriptor, + mip_sizes, + host_visible, + name: String::new(), + }, + kind, + mip_levels, + format_desc, + shader_channel: base.1.into(), + mtl_format, + mtl_type, + }) + } + + unsafe fn get_image_requirements(&self, image: &n::Image) -> memory::Requirements { + let (descriptor, mip_sizes, host_visible) = match image.like { + n::ImageLike::Unbound { + ref descriptor, + ref mip_sizes, + host_visible, + .. + } => (descriptor, mip_sizes, host_visible), + n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => { + panic!("Expected Image::Unbound") + } + }; + + if self.shared.private_caps.resource_heaps { + // We don't know what memory type the user will try to allocate the image with, so we test them + // all get the most stringent ones. Note we don't check Shared because heaps can't use it + let mut max_size = 0; + let mut max_alignment = 0; + let types = if host_visible { + MemoryTypes::all() + } else { + MemoryTypes::PRIVATE + }; + for (i, _) in self.memory_types.iter().enumerate() { + if !types.contains(MemoryTypes::from_bits(1 << i).unwrap()) { + continue; + } + let (storage, cache_mode) = MemoryTypes::describe(i); + descriptor.set_storage_mode(storage); + descriptor.set_cpu_cache_mode(cache_mode); + + let requirements = self + .shared + .device + .lock() + .heap_texture_size_and_align(descriptor); + max_size = cmp::max(max_size, requirements.size); + max_alignment = cmp::max(max_alignment, requirements.align); + } + memory::Requirements { + size: max_size, + alignment: max_alignment, + type_mask: types.bits(), + } + } else if host_visible { + assert_eq!(mip_sizes.len(), 1); + let mask = self.shared.private_caps.buffer_alignment - 1; + memory::Requirements { + size: (mip_sizes[0] + mask) & !mask, + alignment: self.shared.private_caps.buffer_alignment, + type_mask: MemoryTypes::all().bits(), + } + } else { + memory::Requirements { + size: mip_sizes.iter().sum(), + alignment: 4, + type_mask: MemoryTypes::PRIVATE.bits(), + } + } + } + + unsafe fn get_image_subresource_footprint( + &self, + image: &n::Image, + sub: image::Subresource, + ) -> image::SubresourceFootprint { + let num_layers = image.kind.num_layers() as buffer::Offset; + let level_offset = (0..sub.level).fold(0, |offset, level| { + let pitches = image.pitches(level); + offset + num_layers * pitches[3] + }); + let pitches = image.pitches(sub.level); + let layer_offset = level_offset + sub.layer as buffer::Offset * pitches[3]; + image::SubresourceFootprint { + slice: layer_offset..layer_offset + pitches[3], + row_pitch: pitches[1] as _, + depth_pitch: pitches[2] as _, + array_pitch: pitches[3] as _, + } + } + + unsafe fn bind_image_memory( + &self, + memory: &n::Memory, + offset: u64, + image: &mut n::Image, + ) -> Result<(), BindError> { + let like = { + let (descriptor, mip_sizes, name) = match image.like { + n::ImageLike::Unbound { + ref descriptor, + ref mip_sizes, + ref name, + .. + } => (descriptor, mip_sizes, name), + n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => { + panic!("Expected Image::Unbound") + } + }; + + match memory.heap { + n::MemoryHeap::Native(ref heap) => { + let resource_options = conv::resource_options_from_storage_and_cache( + heap.storage_mode(), + heap.cpu_cache_mode(), + ); + descriptor.set_resource_options(resource_options); + n::ImageLike::Texture(heap.new_texture(descriptor).unwrap_or_else(|| { + // TODO: disable hazard tracking? + let texture = self.shared.device.lock().new_texture(&descriptor); + texture.set_label(name); + texture + })) + } + n::MemoryHeap::Public(_memory_type, ref cpu_buffer) => { + assert_eq!(mip_sizes.len(), 1); + if offset == 0x0 && cpu_buffer.length() == mip_sizes[0] { + cpu_buffer.set_label(name); + } else if self.shared.private_caps.supports_debug_markers { + cpu_buffer.add_debug_marker( + name, + NSRange { + location: offset, + length: mip_sizes[0], + }, + ); + } + n::ImageLike::Buffer(n::Buffer::Bound { + raw: cpu_buffer.clone(), + range: offset..offset + mip_sizes[0] as u64, + options: MTLResourceOptions::StorageModeShared, + }) + } + n::MemoryHeap::Private => { + descriptor.set_storage_mode(MTLStorageMode::Private); + let texture = self.shared.device.lock().new_texture(descriptor); + texture.set_label(name); + n::ImageLike::Texture(texture) + } + } + }; + + Ok(image.like = like) + } + + unsafe fn destroy_image(&self, _image: n::Image) { + //nothing to do + } + + unsafe fn create_image_view( + &self, + image: &n::Image, + kind: image::ViewKind, + format: format::Format, + swizzle: format::Swizzle, + range: image::SubresourceRange, + ) -> Result<n::ImageView, image::ViewCreationError> { + let mtl_format = match self + .shared + .private_caps + .map_format_with_swizzle(format, swizzle) + { + Some(f) => f, + None => { + error!("failed to swizzle format {:?} with {:?}", format, swizzle); + return Err(image::ViewCreationError::BadFormat(format)); + } + }; + let raw = image.like.as_texture(); + let full_range = image::SubresourceRange { + aspects: image.format_desc.aspects, + ..Default::default() + }; + let mtl_type = if image.mtl_type == MTLTextureType::D2Multisample { + if kind != image::ViewKind::D2 { + error!("Requested {:?} for MSAA texture", kind); + } + image.mtl_type + } else { + conv::map_texture_type(kind) + }; + + let texture = if mtl_format == image.mtl_format + && mtl_type == image.mtl_type + && swizzle == format::Swizzle::NO + && range == full_range + { + // Some images are marked as framebuffer-only, and we can't create aliases of them. + // Also helps working around Metal bugs with aliased array textures. + raw.to_owned() + } else { + raw.new_texture_view_from_slice( + mtl_format, + mtl_type, + NSRange { + location: range.level_start as _, + length: range.resolve_level_count(image.mip_levels) as _, + }, + NSRange { + location: range.layer_start as _, + length: range.resolve_layer_count(image.kind.num_layers()) as _, + }, + ) + }; + + Ok(n::ImageView { + texture, + mtl_format, + }) + } + + unsafe fn destroy_image_view(&self, _view: n::ImageView) {} + + fn create_fence(&self, signaled: bool) -> Result<n::Fence, OutOfMemory> { + let mutex = Mutex::new(n::FenceInner::Idle { signaled }); + debug!( + "Creating fence ptr {:?} with signal={}", + unsafe { mutex.raw() } as *const _, + signaled + ); + Ok(n::Fence(mutex)) + } + + unsafe fn reset_fence(&self, fence: &n::Fence) -> Result<(), OutOfMemory> { + debug!("Resetting fence ptr {:?}", fence.0.raw() as *const _); + *fence.0.lock() = n::FenceInner::Idle { signaled: false }; + Ok(()) + } + + unsafe fn wait_for_fence( + &self, + fence: &n::Fence, + timeout_ns: u64, + ) -> Result<bool, OomOrDeviceLost> { + unsafe fn to_ns(duration: time::Duration) -> u64 { + duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64 + } + + debug!("wait_for_fence {:?} for {} ms", fence, timeout_ns); + match *fence.0.lock() { + n::FenceInner::Idle { signaled } => { + if !signaled { + warn!( + "Fence ptr {:?} is not pending, waiting not possible", + fence.0.raw() as *const _ + ); + } + Ok(signaled) + } + n::FenceInner::PendingSubmission(ref cmd_buf) => { + if timeout_ns == !0 { + cmd_buf.wait_until_completed(); + return Ok(true); + } + let start = time::Instant::now(); + loop { + if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { + return Ok(true); + } + if to_ns(start.elapsed()) >= timeout_ns { + return Ok(false); + } + thread::sleep(time::Duration::from_millis(1)); + self.shared.queue_blocker.lock().triage(); + } + } + } + } + + unsafe fn get_fence_status(&self, fence: &n::Fence) -> Result<bool, DeviceLost> { + Ok(match *fence.0.lock() { + n::FenceInner::Idle { signaled } => signaled, + n::FenceInner::PendingSubmission(ref cmd_buf) => match cmd_buf.status() { + metal::MTLCommandBufferStatus::Completed => true, + _ => false, + }, + }) + } + + unsafe fn destroy_fence(&self, _fence: n::Fence) { + //empty + } + + fn create_event(&self) -> Result<n::Event, OutOfMemory> { + Ok(n::Event(Arc::new(AtomicBool::new(false)))) + } + + unsafe fn get_event_status(&self, event: &n::Event) -> Result<bool, OomOrDeviceLost> { + Ok(event.0.load(Ordering::Acquire)) + } + + unsafe fn set_event(&self, event: &n::Event) -> Result<(), OutOfMemory> { + event.0.store(true, Ordering::Release); + self.shared.queue_blocker.lock().triage(); + Ok(()) + } + + unsafe fn reset_event(&self, event: &n::Event) -> Result<(), OutOfMemory> { + Ok(event.0.store(false, Ordering::Release)) + } + + unsafe fn destroy_event(&self, _event: n::Event) { + //empty + } + + unsafe fn create_query_pool( + &self, + ty: query::Type, + count: query::Id, + ) -> Result<n::QueryPool, query::CreationError> { + match ty { + query::Type::Occlusion => { + let range = self + .shared + .visibility + .allocator + .lock() + .allocate_range(count) + .map_err(|_| { + error!("Not enough space to allocate an occlusion query pool"); + OutOfMemory::Host + })?; + Ok(n::QueryPool::Occlusion(range)) + } + query::Type::Timestamp => { + warn!("Timestamp queries are not really useful yet"); + Ok(n::QueryPool::Timestamp) + } + query::Type::PipelineStatistics(..) => Err(query::CreationError::Unsupported(ty)), + } + } + + unsafe fn destroy_query_pool(&self, pool: n::QueryPool) { + match pool { + n::QueryPool::Occlusion(range) => { + self.shared.visibility.allocator.lock().free_range(range); + } + n::QueryPool::Timestamp => {} + } + } + + unsafe fn get_query_pool_results( + &self, + pool: &n::QueryPool, + queries: Range<query::Id>, + data: &mut [u8], + stride: buffer::Offset, + flags: query::ResultFlags, + ) -> Result<bool, OomOrDeviceLost> { + let is_ready = match *pool { + n::QueryPool::Occlusion(ref pool_range) => { + let visibility = &self.shared.visibility; + let is_ready = if flags.contains(query::ResultFlags::WAIT) { + let mut guard = visibility.allocator.lock(); + while !visibility.are_available(pool_range.start, &queries) { + visibility.condvar.wait(&mut guard); + } + true + } else { + visibility.are_available(pool_range.start, &queries) + }; + + let size_data = mem::size_of::<u64>() as buffer::Offset; + if stride == size_data + && flags.contains(query::ResultFlags::BITS_64) + && !flags.contains(query::ResultFlags::WITH_AVAILABILITY) + { + // if stride is matching, copy everything in one go + ptr::copy_nonoverlapping( + (visibility.buffer.contents() as *const u8).offset( + (pool_range.start + queries.start) as isize * size_data as isize, + ), + data.as_mut_ptr(), + stride as usize * (queries.end - queries.start) as usize, + ); + } else { + // copy parts of individual entries + for i in 0..queries.end - queries.start { + let absolute_index = (pool_range.start + queries.start + i) as isize; + let value = + *(visibility.buffer.contents() as *const u64).offset(absolute_index); + let base = (visibility.buffer.contents() as *const u8) + .offset(visibility.availability_offset as isize); + let availability = *(base as *const u32).offset(absolute_index); + let data_ptr = data[i as usize * stride as usize..].as_mut_ptr(); + if flags.contains(query::ResultFlags::BITS_64) { + *(data_ptr as *mut u64) = value; + if flags.contains(query::ResultFlags::WITH_AVAILABILITY) { + *(data_ptr as *mut u64).offset(1) = availability as u64; + } + } else { + *(data_ptr as *mut u32) = value as u32; + if flags.contains(query::ResultFlags::WITH_AVAILABILITY) { + *(data_ptr as *mut u32).offset(1) = availability; + } + } + } + } + + is_ready + } + n::QueryPool::Timestamp => { + for d in data.iter_mut() { + *d = 0; + } + true + } + }; + + Ok(is_ready) + } + + fn wait_idle(&self) -> Result<(), OutOfMemory> { + command::QueueInner::wait_idle(&self.shared.queue); + Ok(()) + } + + unsafe fn set_image_name(&self, image: &mut n::Image, name: &str) { + match image { + n::Image { + like: n::ImageLike::Buffer(ref mut buf), + .. + } => self.set_buffer_name(buf, name), + n::Image { + like: n::ImageLike::Texture(ref tex), + .. + } => tex.set_label(name), + n::Image { + like: + n::ImageLike::Unbound { + name: ref mut unbound_name, + .. + }, + .. + } => { + *unbound_name = name.to_string(); + } + }; + } + + unsafe fn set_buffer_name(&self, buffer: &mut n::Buffer, name: &str) { + match buffer { + n::Buffer::Unbound { + name: ref mut unbound_name, + .. + } => { + *unbound_name = name.to_string(); + } + n::Buffer::Bound { + ref raw, ref range, .. + } => { + if self.shared.private_caps.supports_debug_markers { + raw.add_debug_marker( + name, + NSRange { + location: range.start, + length: range.end - range.start, + }, + ); + } + } + } + } + + unsafe fn set_command_buffer_name( + &self, + command_buffer: &mut command::CommandBuffer, + name: &str, + ) { + command_buffer.name = name.to_string(); + } + + unsafe fn set_semaphore_name(&self, _semaphore: &mut n::Semaphore, _name: &str) {} + + unsafe fn set_fence_name(&self, _fence: &mut n::Fence, _name: &str) {} + + unsafe fn set_framebuffer_name(&self, _framebuffer: &mut n::Framebuffer, _name: &str) {} + + unsafe fn set_render_pass_name(&self, render_pass: &mut n::RenderPass, name: &str) { + render_pass.name = name.to_string(); + } + + unsafe fn set_descriptor_set_name(&self, _descriptor_set: &mut n::DescriptorSet, _name: &str) { + // TODO + } + + unsafe fn set_descriptor_set_layout_name( + &self, + _descriptor_set_layout: &mut n::DescriptorSetLayout, + _name: &str, + ) { + // TODO + } + + unsafe fn set_pipeline_layout_name( + &self, + _pipeline_layout: &mut n::PipelineLayout, + _name: &str, + ) { + // TODO + } + + unsafe fn set_compute_pipeline_name( + &self, + compute_pipeline: &mut n::ComputePipeline, + name: &str, + ) { + if self.shared.private_caps.supports_debug_markers { + compute_pipeline.raw.set_label(name); + } + } + + unsafe fn set_graphics_pipeline_name( + &self, + graphics_pipeline: &mut n::GraphicsPipeline, + name: &str, + ) { + if self.shared.private_caps.supports_debug_markers { + graphics_pipeline.raw.set_label(name); + } + } +} + +#[test] +fn test_send_sync() { + fn foo<T: Send + Sync>() {} + foo::<Device>() +} diff --git a/third_party/rust/gfx-backend-metal/src/internal.rs b/third_party/rust/gfx-backend-metal/src/internal.rs new file mode 100644 index 0000000000..e9c345e2fc --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/internal.rs @@ -0,0 +1,522 @@ +use crate::{conversions as conv, PrivateCapabilities, MAX_COLOR_ATTACHMENTS}; + +use auxil::FastHashMap; +use hal::{ + command::ClearColor, + format::{Aspects, ChannelType}, + image::{Filter, NumSamples}, + pso, +}; + +use metal; +use parking_lot::{Mutex, RawRwLock}; +use storage_map::{StorageMap, StorageMapGuard}; + +use std::mem; + +pub type FastStorageMap<K, V> = StorageMap<RawRwLock, FastHashMap<K, V>>; +pub type FastStorageGuard<'a, V> = StorageMapGuard<'a, RawRwLock, V>; + +#[derive(Clone, Debug)] +pub struct ClearVertex { + pub pos: [f32; 4], +} + +#[derive(Clone, Debug)] +pub struct BlitVertex { + pub uv: [f32; 4], + pub pos: [f32; 4], +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub enum Channel { + Float, + Int, + Uint, +} + +impl From<ChannelType> for Channel { + fn from(channel_type: ChannelType) -> Self { + match channel_type { + ChannelType::Unorm + | ChannelType::Snorm + | ChannelType::Ufloat + | ChannelType::Sfloat + | ChannelType::Uscaled + | ChannelType::Sscaled + | ChannelType::Srgb => Channel::Float, + ChannelType::Uint => Channel::Uint, + ChannelType::Sint => Channel::Int, + } + } +} + +impl Channel { + pub fn interpret(self, raw: ClearColor) -> metal::MTLClearColor { + unsafe { + match self { + Channel::Float => metal::MTLClearColor::new( + raw.float32[0] as _, + raw.float32[1] as _, + raw.float32[2] as _, + raw.float32[3] as _, + ), + Channel::Int => metal::MTLClearColor::new( + raw.sint32[0] as _, + raw.sint32[1] as _, + raw.sint32[2] as _, + raw.sint32[3] as _, + ), + Channel::Uint => metal::MTLClearColor::new( + raw.uint32[0] as _, + raw.uint32[1] as _, + raw.uint32[2] as _, + raw.uint32[3] as _, + ), + } + } + } +} + +#[derive(Debug)] +pub struct SamplerStates { + nearest: metal::SamplerState, + linear: metal::SamplerState, +} + +impl SamplerStates { + fn new(device: &metal::DeviceRef) -> Self { + let desc = metal::SamplerDescriptor::new(); + desc.set_min_filter(metal::MTLSamplerMinMagFilter::Nearest); + desc.set_mag_filter(metal::MTLSamplerMinMagFilter::Nearest); + desc.set_mip_filter(metal::MTLSamplerMipFilter::Nearest); + let nearest = device.new_sampler(&desc); + desc.set_min_filter(metal::MTLSamplerMinMagFilter::Linear); + desc.set_mag_filter(metal::MTLSamplerMinMagFilter::Linear); + let linear = device.new_sampler(&desc); + + SamplerStates { nearest, linear } + } + + pub fn get(&self, filter: Filter) -> &metal::SamplerStateRef { + match filter { + Filter::Nearest => &self.nearest, + Filter::Linear => &self.linear, + } + } +} + +#[derive(Debug)] +pub struct DepthStencilStates { + map: FastStorageMap<pso::DepthStencilDesc, metal::DepthStencilState>, + write_none: pso::DepthStencilDesc, + write_depth: pso::DepthStencilDesc, + write_stencil: pso::DepthStencilDesc, + write_all: pso::DepthStencilDesc, +} + +impl DepthStencilStates { + fn new(device: &metal::DeviceRef) -> Self { + let write_none = pso::DepthStencilDesc { + depth: None, + depth_bounds: false, + stencil: None, + }; + let write_depth = pso::DepthStencilDesc { + depth: Some(pso::DepthTest { + fun: pso::Comparison::Always, + write: true, + }), + depth_bounds: false, + stencil: None, + }; + let face = pso::StencilFace { + fun: pso::Comparison::Always, + op_fail: pso::StencilOp::Replace, + op_depth_fail: pso::StencilOp::Replace, + op_pass: pso::StencilOp::Replace, + }; + let write_stencil = pso::DepthStencilDesc { + depth: None, + depth_bounds: false, + stencil: Some(pso::StencilTest { + faces: pso::Sided::new(face), + ..pso::StencilTest::default() + }), + }; + let write_all = pso::DepthStencilDesc { + depth: Some(pso::DepthTest { + fun: pso::Comparison::Always, + write: true, + }), + depth_bounds: false, + stencil: Some(pso::StencilTest { + faces: pso::Sided::new(face), + ..pso::StencilTest::default() + }), + }; + + let map = FastStorageMap::default(); + for desc in &[&write_none, &write_depth, &write_stencil, &write_all] { + map.get_or_create_with(*desc, || { + let raw_desc = Self::create_desc(desc).unwrap(); + device.new_depth_stencil_state(&raw_desc) + }); + } + + DepthStencilStates { + map, + write_none, + write_depth, + write_stencil, + write_all, + } + } + + pub fn get_write(&self, aspects: Aspects) -> FastStorageGuard<metal::DepthStencilState> { + let key = if aspects.contains(Aspects::DEPTH | Aspects::STENCIL) { + &self.write_all + } else if aspects.contains(Aspects::DEPTH) { + &self.write_depth + } else if aspects.contains(Aspects::STENCIL) { + &self.write_stencil + } else { + &self.write_none + }; + self.map.get_or_create_with(key, || unreachable!()) + } + + pub fn prepare(&self, desc: &pso::DepthStencilDesc, device: &metal::DeviceRef) { + self.map.prepare_maybe(desc, || { + Self::create_desc(desc).map(|raw_desc| device.new_depth_stencil_state(&raw_desc)) + }); + } + + // TODO: avoid locking for writes every time + pub fn get( + &self, + desc: pso::DepthStencilDesc, + device: &Mutex<metal::Device>, + ) -> FastStorageGuard<metal::DepthStencilState> { + self.map.get_or_create_with(&desc, || { + let raw_desc = Self::create_desc(&desc).expect("Incomplete descriptor provided"); + device.lock().new_depth_stencil_state(&raw_desc) + }) + } + + fn create_stencil( + face: &pso::StencilFace, + read_mask: pso::StencilValue, + write_mask: pso::StencilValue, + ) -> metal::StencilDescriptor { + let desc = metal::StencilDescriptor::new(); + desc.set_stencil_compare_function(conv::map_compare_function(face.fun)); + desc.set_read_mask(read_mask); + desc.set_write_mask(write_mask); + desc.set_stencil_failure_operation(conv::map_stencil_op(face.op_fail)); + desc.set_depth_failure_operation(conv::map_stencil_op(face.op_depth_fail)); + desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.op_pass)); + desc + } + + fn create_desc(desc: &pso::DepthStencilDesc) -> Option<metal::DepthStencilDescriptor> { + let raw = metal::DepthStencilDescriptor::new(); + + if let Some(ref stencil) = desc.stencil { + let read_masks = match stencil.read_masks { + pso::State::Static(value) => value, + pso::State::Dynamic => return None, + }; + let write_masks = match stencil.write_masks { + pso::State::Static(value) => value, + pso::State::Dynamic => return None, + }; + let front_desc = + Self::create_stencil(&stencil.faces.front, read_masks.front, write_masks.front); + raw.set_front_face_stencil(Some(&front_desc)); + let back_desc = if stencil.faces.front == stencil.faces.back + && read_masks.front == read_masks.back + && write_masks.front == write_masks.back + { + front_desc + } else { + Self::create_stencil(&stencil.faces.back, read_masks.back, write_masks.back) + }; + raw.set_back_face_stencil(Some(&back_desc)); + } + + if let Some(ref depth) = desc.depth { + raw.set_depth_compare_function(conv::map_compare_function(depth.fun)); + raw.set_depth_write_enabled(depth.write); + } + + Some(raw) + } +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct ClearKey { + pub framebuffer_aspects: Aspects, + pub color_formats: [metal::MTLPixelFormat; MAX_COLOR_ATTACHMENTS], + pub depth_stencil_format: metal::MTLPixelFormat, + pub sample_count: NumSamples, + pub target_index: Option<(u8, Channel)>, +} + +#[derive(Debug)] +pub struct ImageClearPipes { + map: FastStorageMap<ClearKey, metal::RenderPipelineState>, +} + +impl ImageClearPipes { + pub(crate) fn get( + &self, + key: ClearKey, + library: &Mutex<metal::Library>, + device: &Mutex<metal::Device>, + private_caps: &PrivateCapabilities, + ) -> FastStorageGuard<metal::RenderPipelineState> { + self.map.get_or_create_with(&key, || { + Self::create(key, &*library.lock(), &*device.lock(), private_caps) + }) + } + + fn create( + key: ClearKey, + library: &metal::LibraryRef, + device: &metal::DeviceRef, + private_caps: &PrivateCapabilities, + ) -> metal::RenderPipelineState { + let pipeline = metal::RenderPipelineDescriptor::new(); + if private_caps.layered_rendering { + pipeline.set_input_primitive_topology(metal::MTLPrimitiveTopologyClass::Triangle); + } + + let vs_clear = library.get_function("vs_clear", None).unwrap(); + pipeline.set_vertex_function(Some(&vs_clear)); + + if key.framebuffer_aspects.contains(Aspects::COLOR) { + for (i, &format) in key.color_formats.iter().enumerate() { + pipeline + .color_attachments() + .object_at(i as u64) + .unwrap() + .set_pixel_format(format); + } + } + if key.framebuffer_aspects.contains(Aspects::DEPTH) { + pipeline.set_depth_attachment_pixel_format(key.depth_stencil_format); + } + if key.framebuffer_aspects.contains(Aspects::STENCIL) { + pipeline.set_stencil_attachment_pixel_format(key.depth_stencil_format); + } + if key.sample_count > 1 { + pipeline.set_sample_count(key.sample_count as u64); + } + + if let Some((index, channel)) = key.target_index { + assert!(key.framebuffer_aspects.contains(Aspects::COLOR)); + let s_channel = match channel { + Channel::Float => "float", + Channel::Int => "int", + Channel::Uint => "uint", + }; + let ps_name = format!("ps_clear{}_{}", index, s_channel); + let ps_fun = library.get_function(&ps_name, None).unwrap(); + pipeline.set_fragment_function(Some(&ps_fun)); + } + + // Vertex buffers + let vertex_descriptor = metal::VertexDescriptor::new(); + let mtl_buffer_desc = vertex_descriptor.layouts().object_at(0).unwrap(); + mtl_buffer_desc.set_stride(mem::size_of::<ClearVertex>() as _); + for i in 0..1 { + let mtl_attribute_desc = vertex_descriptor + .attributes() + .object_at(i) + .expect("too many vertex attributes"); + mtl_attribute_desc.set_buffer_index(0); + mtl_attribute_desc.set_offset(i * mem::size_of::<[f32; 4]>() as u64); + mtl_attribute_desc.set_format(metal::MTLVertexFormat::Float4); + } + pipeline.set_vertex_descriptor(Some(&vertex_descriptor)); + + device.new_render_pipeline_state(&pipeline).unwrap() + } +} + +pub type BlitKey = ( + metal::MTLTextureType, + metal::MTLPixelFormat, + Aspects, + Channel, +); + +#[derive(Debug)] +pub struct ImageBlitPipes { + map: FastStorageMap<BlitKey, metal::RenderPipelineState>, +} + +impl ImageBlitPipes { + pub(crate) fn get( + &self, + key: BlitKey, + library: &Mutex<metal::Library>, + device: &Mutex<metal::Device>, + private_caps: &PrivateCapabilities, + ) -> FastStorageGuard<metal::RenderPipelineState> { + self.map.get_or_create_with(&key, || { + Self::create(key, &*library.lock(), &*device.lock(), private_caps) + }) + } + + fn create( + key: BlitKey, + library: &metal::LibraryRef, + device: &metal::DeviceRef, + private_caps: &PrivateCapabilities, + ) -> metal::RenderPipelineState { + use metal::MTLTextureType as Tt; + + let pipeline = metal::RenderPipelineDescriptor::new(); + if private_caps.layered_rendering { + pipeline.set_input_primitive_topology(metal::MTLPrimitiveTopologyClass::Triangle); + } + + let s_type = match key.0 { + Tt::D1 => "1d", + Tt::D1Array => "1d_array", + Tt::D2 => "2d", + Tt::D2Array => "2d_array", + Tt::D3 => "3d", + Tt::D2Multisample => panic!("Can't blit MSAA surfaces"), + Tt::Cube | Tt::CubeArray => unimplemented!(), + }; + let s_channel = if key.2.contains(Aspects::COLOR) { + match key.3 { + Channel::Float => "float", + Channel::Int => "int", + Channel::Uint => "uint", + } + } else { + "depth" //TODO: stencil + }; + let ps_name = format!("ps_blit_{}_{}", s_type, s_channel); + + let vs_blit = library.get_function("vs_blit", None).unwrap(); + let ps_blit = library.get_function(&ps_name, None).unwrap(); + pipeline.set_vertex_function(Some(&vs_blit)); + pipeline.set_fragment_function(Some(&ps_blit)); + + if key.2.contains(Aspects::COLOR) { + pipeline + .color_attachments() + .object_at(0) + .unwrap() + .set_pixel_format(key.1); + } + if key.2.contains(Aspects::DEPTH) { + pipeline.set_depth_attachment_pixel_format(key.1); + } + if key.2.contains(Aspects::STENCIL) { + pipeline.set_stencil_attachment_pixel_format(key.1); + } + + // Vertex buffers + let vertex_descriptor = metal::VertexDescriptor::new(); + let mtl_buffer_desc = vertex_descriptor.layouts().object_at(0).unwrap(); + mtl_buffer_desc.set_stride(mem::size_of::<BlitVertex>() as _); + for i in 0..2 { + let mtl_attribute_desc = vertex_descriptor + .attributes() + .object_at(i) + .expect("too many vertex attributes"); + mtl_attribute_desc.set_buffer_index(0); + mtl_attribute_desc.set_offset(i * mem::size_of::<[f32; 4]>() as u64); + mtl_attribute_desc.set_format(metal::MTLVertexFormat::Float4); + } + pipeline.set_vertex_descriptor(Some(&vertex_descriptor)); + + device.new_render_pipeline_state(&pipeline).unwrap() + } +} + +#[derive(Debug)] +pub struct ServicePipes { + pub library: Mutex<metal::Library>, + pub sampler_states: SamplerStates, + pub depth_stencil_states: DepthStencilStates, + pub clears: ImageClearPipes, + pub blits: ImageBlitPipes, + pub copy_buffer: metal::ComputePipelineState, + pub fill_buffer: metal::ComputePipelineState, +} + +impl ServicePipes { + pub fn new(device: &metal::DeviceRef) -> Self { + let data = if cfg!(target_os = "macos") { + &include_bytes!("./../shaders/gfx-shaders-macos.metallib")[..] + } else if cfg!(target_arch = "aarch64") { + &include_bytes!("./../shaders/gfx-shaders-ios.metallib")[..] + } else { + &include_bytes!("./../shaders/gfx-shaders-ios-simulator.metallib")[..] + }; + let library = device.new_library_with_data(data).unwrap(); + + let copy_buffer = Self::create_copy_buffer(&library, device); + let fill_buffer = Self::create_fill_buffer(&library, device); + + ServicePipes { + library: Mutex::new(library), + sampler_states: SamplerStates::new(device), + depth_stencil_states: DepthStencilStates::new(device), + clears: ImageClearPipes { + map: FastStorageMap::default(), + }, + blits: ImageBlitPipes { + map: FastStorageMap::default(), + }, + copy_buffer, + fill_buffer, + } + } + + fn create_copy_buffer( + library: &metal::LibraryRef, + device: &metal::DeviceRef, + ) -> metal::ComputePipelineState { + let pipeline = metal::ComputePipelineDescriptor::new(); + + let cs_copy_buffer = library.get_function("cs_copy_buffer", None).unwrap(); + pipeline.set_compute_function(Some(&cs_copy_buffer)); + pipeline.set_thread_group_size_is_multiple_of_thread_execution_width(true); + + /*TODO: check MacOS version + if let Some(buffers) = pipeline.buffers() { + buffers.object_at(0).unwrap().set_mutability(metal::MTLMutability::Mutable); + buffers.object_at(1).unwrap().set_mutability(metal::MTLMutability::Immutable); + buffers.object_at(2).unwrap().set_mutability(metal::MTLMutability::Immutable); + }*/ + + device.new_compute_pipeline_state(&pipeline).unwrap() + } + + fn create_fill_buffer( + library: &metal::LibraryRef, + device: &metal::DeviceRef, + ) -> metal::ComputePipelineState { + let pipeline = metal::ComputePipelineDescriptor::new(); + + let cs_fill_buffer = library.get_function("cs_fill_buffer", None).unwrap(); + pipeline.set_compute_function(Some(&cs_fill_buffer)); + pipeline.set_thread_group_size_is_multiple_of_thread_execution_width(true); + + /*TODO: check MacOS version + if let Some(buffers) = pipeline.buffers() { + buffers.object_at(0).unwrap().set_mutability(metal::MTLMutability::Mutable); + buffers.object_at(1).unwrap().set_mutability(metal::MTLMutability::Immutable); + }*/ + + device.new_compute_pipeline_state(&pipeline).unwrap() + } +} diff --git a/third_party/rust/gfx-backend-metal/src/lib.rs b/third_party/rust/gfx-backend-metal/src/lib.rs new file mode 100644 index 0000000000..5c51cb91a7 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/lib.rs @@ -0,0 +1,1126 @@ +/*! +# Metal backend internals. + +## Pipeline Layout + +In Metal, push constants, vertex buffers, and resources in the descriptor sets +are all placed together in the native resource bindings, which work similarly to D3D11: +there are tables of textures, buffers, and samplers. + +We put push constants first (if any) in the table, followed by descriptor set 0 +resource, followed by other descriptor sets. The vertex buffers are bound at the very +end of the VS buffer table. + +When argument buffers are supported, each descriptor set becomes a buffer binding, +but the general placement rule is the same. + +## Command recording + +One-time-submit primary command buffers are recorded "live" into `MTLCommandBuffer`. +Special care is taken to the recording state: active bindings are restored at the +start of any render or compute pass. + +Multi-submit and secondary command buffers are recorded as "soft" commands into +`Journal`. Actual native recording is done at either `submit` or `execute_commands` +correspondingly. When that happens, we `enqueue` the command buffer at the start +of recording, which allows the driver to work on pass translation at the same time +as we are recording the following passes. + +## Memory + +In general, "Shared" storage is used for CPU-coherent memory. "Managed" is used for +non-coherent CPU-visible memory. Finally, "Private" storage is backing device-local +memory types. + +Metal doesn't have CPU-visible memory for textures. We only allow RGBA8 2D textures +to be allocated from it, and only for the matter of transfer operations, which is +the minimum required by Vulkan. In fact, these become just glorified staging buffers. + +## Events + +Events are represented by just an atomic bool. When recording, a command buffer keeps +track of all events set or reset. Signalling within a command buffer is therefore a +matter of simply checking that local list. When making a submission, used events are +also accumulated temporarily, so that we can change their values in the completion +handler of the last command buffer. We also check this list in order to resolve events +fired in one command buffer and waited in another one within the same submission. + +Waiting for an event from a different submission is accomplished similar to waiting +for the host. We block all the submissions until the host blockers are resolved, and +these are checked at certain points like setting an event by the device, or waiting +for a fence. +!*/ + +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate objc; +#[macro_use] +extern crate log; + +use hal::{ + adapter::{Adapter, AdapterInfo, DeviceType}, + queue::{QueueFamilyId, QueueType}, +}; +use range_alloc::RangeAllocator; + +use cocoa_foundation::foundation::NSInteger; +#[cfg(feature = "dispatch")] +use dispatch; +use foreign_types::ForeignTypeRef; +use lazy_static::lazy_static; +use metal::MTLFeatureSet; +use metal::MTLLanguageVersion; +use metal::{CGFloat, CGSize, CoreAnimationLayer, CoreAnimationLayerRef}; +use objc::{ + declare::ClassDecl, + runtime::{Class, Object, Sel, BOOL, YES}, +}; +use parking_lot::{Condvar, Mutex}; + +use std::mem; +use std::os::raw::c_void; +use std::ptr::NonNull; +use std::sync::Arc; + +mod command; +mod conversions; +mod device; +mod internal; +mod native; +mod soft; +mod window; + +pub use crate::command::CommandPool; +pub use crate::device::{Device, LanguageVersion, PhysicalDevice}; +pub use crate::window::{AcquireMode, Surface}; + +pub type GraphicsCommandPool = CommandPool; + +//TODO: investigate why exactly using `u8` here is slower (~5% total). +/// A type representing Metal binding's resource index. +type ResourceIndex = u32; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default)] +pub struct CGPoint { + pub x: CGFloat, + pub y: CGFloat, +} + +impl CGPoint { + #[inline] + pub fn new(x: CGFloat, y: CGFloat) -> CGPoint { + CGPoint { x, y } + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default)] +pub struct CGRect { + pub origin: CGPoint, + pub size: CGSize, +} + +impl CGRect { + #[inline] + pub fn new(origin: CGPoint, size: CGSize) -> CGRect { + CGRect { origin, size } + } +} + +/// Method of recording one-time-submit command buffers. +#[derive(Clone, Debug, Hash, PartialEq)] +pub enum OnlineRecording { + /// Record natively on-the-fly. + Immediate, + /// Store commands and only start recording at submission time. + Deferred, + #[cfg(feature = "dispatch")] + /// Start recording asynchronously upon finishing each pass. + Remote(dispatch::QueuePriority), +} + +impl Default for OnlineRecording { + fn default() -> Self { + OnlineRecording::Immediate + } +} + +const MAX_ACTIVE_COMMAND_BUFFERS: usize = 1 << 14; +const MAX_VISIBILITY_QUERIES: usize = 1 << 14; +const MAX_COLOR_ATTACHMENTS: usize = 8; +const MAX_BOUND_DESCRIPTOR_SETS: usize = 8; + +#[derive(Debug, Clone, Copy)] +pub struct QueueFamily {} + +impl hal::queue::QueueFamily for QueueFamily { + fn queue_type(&self) -> QueueType { + QueueType::General + } + fn max_queues(&self) -> usize { + 1 + } + fn id(&self) -> QueueFamilyId { + QueueFamilyId(0) + } +} + +#[derive(Debug)] +struct VisibilityShared { + /// Availability buffer is in shared memory, it has N double words for + /// query results followed by N words for the availability. + buffer: metal::Buffer, + allocator: Mutex<RangeAllocator<hal::query::Id>>, + availability_offset: hal::buffer::Offset, + condvar: Condvar, +} + +#[derive(Debug)] +struct Shared { + device: Mutex<metal::Device>, + queue: Mutex<command::QueueInner>, + queue_blocker: Mutex<command::QueueBlocker>, + service_pipes: internal::ServicePipes, + disabilities: PrivateDisabilities, + private_caps: PrivateCapabilities, + visibility: VisibilityShared, +} + +unsafe impl Send for Shared {} +unsafe impl Sync for Shared {} + +impl Shared { + fn new(device: metal::Device, experiments: &Experiments) -> Self { + let private_caps = PrivateCapabilities::new(&device, experiments); + debug!("{:#?}", private_caps); + + let visibility = VisibilityShared { + buffer: device.new_buffer( + MAX_VISIBILITY_QUERIES as u64 + * (mem::size_of::<u64>() + mem::size_of::<u32>()) as u64, + metal::MTLResourceOptions::StorageModeShared, + ), + allocator: Mutex::new(RangeAllocator::new( + 0..MAX_VISIBILITY_QUERIES as hal::query::Id, + )), + availability_offset: (MAX_VISIBILITY_QUERIES * mem::size_of::<u64>()) + as hal::buffer::Offset, + condvar: Condvar::new(), + }; + Shared { + queue: Mutex::new(command::QueueInner::new( + &device, + Some(MAX_ACTIVE_COMMAND_BUFFERS), + )), + queue_blocker: Mutex::new(command::QueueBlocker::default()), + service_pipes: internal::ServicePipes::new(&device), + disabilities: PrivateDisabilities { + broken_viewport_near_depth: device.name().starts_with("Intel") + && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + broken_layered_clear_image: device.name().starts_with("Intel"), + }, + private_caps, + device: Mutex::new(device), + visibility, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Experiments { + pub argument_buffers: bool, +} + +#[derive(Debug)] +pub struct Instance { + pub experiments: Experiments, + gfx_managed_metal_layer_delegate: GfxManagedMetalLayerDelegate, +} + +impl hal::Instance<Backend> for Instance { + fn create(_: &str, _: u32) -> Result<Self, hal::UnsupportedBackend> { + Ok(Instance { + experiments: Experiments::default(), + gfx_managed_metal_layer_delegate: GfxManagedMetalLayerDelegate::new(), + }) + } + + fn enumerate_adapters(&self) -> Vec<Adapter<Backend>> { + let devices = metal::Device::all(); + let mut adapters: Vec<Adapter<Backend>> = devices + .into_iter() + .map(|dev| { + let name = dev.name().into(); + let shared = Shared::new(dev, &self.experiments); + let physical_device = device::PhysicalDevice::new(Arc::new(shared)); + Adapter { + info: AdapterInfo { + name, + vendor: 0, + device: 0, + device_type: if physical_device.shared.private_caps.low_power { + DeviceType::IntegratedGpu + } else { + DeviceType::DiscreteGpu + }, + }, + physical_device, + queue_families: vec![QueueFamily {}], + } + }) + .collect(); + adapters.sort_by_key(|adapt| { + ( + adapt.physical_device.shared.private_caps.low_power, + adapt.physical_device.shared.private_caps.headless, + ) + }); + adapters + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result<Surface, hal::window::InitError> { + match has_handle.raw_window_handle() { + #[cfg(target_os = "ios")] + raw_window_handle::RawWindowHandle::IOS(handle) => { + Ok(self.create_surface_from_uiview(handle.ui_view)) + } + #[cfg(target_os = "macos")] + raw_window_handle::RawWindowHandle::MacOS(handle) => { + Ok(self.create_surface_from_nsview(handle.ns_view)) + } + _ => Err(hal::window::InitError::UnsupportedWindowHandle), + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + surface.dispose(); + } +} + +lazy_static! { + static ref GFX_MANAGED_METAL_LAYER_DELEGATE_CLASS: &'static Class = unsafe { + let mut decl = ClassDecl::new("GfxManagedMetalLayerDelegate", class!(NSObject)).unwrap(); + decl.add_method( + sel!(layer:shouldInheritContentsScale:fromWindow:), + layer_should_inherit_contents_scale_from_window + as extern "C" fn(&Object, Sel, *mut Object, CGFloat, *mut Object) -> BOOL, + ); + decl.register() + }; +} + +extern "C" fn layer_should_inherit_contents_scale_from_window( + _: &Object, + _: Sel, + _layer: *mut Object, + _new_scale: CGFloat, + _from_window: *mut Object, +) -> BOOL { + return YES; +} + +#[derive(Debug)] +struct GfxManagedMetalLayerDelegate(*mut Object); + +impl GfxManagedMetalLayerDelegate { + pub fn new() -> Self { + unsafe { + let mut delegate: *mut Object = + msg_send![*GFX_MANAGED_METAL_LAYER_DELEGATE_CLASS, alloc]; + delegate = msg_send![delegate, init]; + Self(delegate) + } + } +} + +impl Drop for GfxManagedMetalLayerDelegate { + fn drop(&mut self) { + unsafe { + let () = msg_send![self.0, release]; + } + } +} + +unsafe impl Send for GfxManagedMetalLayerDelegate {} +unsafe impl Sync for GfxManagedMetalLayerDelegate {} + +impl Instance { + #[cfg(target_os = "ios")] + unsafe fn create_from_uiview(&self, uiview: *mut c_void) -> Surface { + let view: cocoa_foundation::base::id = mem::transmute(uiview); + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let main_layer: *mut Object = msg_send![view, layer]; + let class = class!(CAMetalLayer); + let is_valid_layer: BOOL = msg_send![main_layer, isKindOfClass: class]; + let render_layer = if is_valid_layer == YES { + mem::transmute::<_, &CoreAnimationLayerRef>(main_layer).to_owned() + } else { + // If the main layer is not a CAMetalLayer, we create a CAMetalLayer sublayer and use it instead. + // Unlike on macOS, we cannot replace the main view as UIView does not allow it (when NSView does). + let new_layer: CoreAnimationLayer = msg_send![class, new]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![new_layer.as_ref(), setFrame: bounds]; + let () = msg_send![main_layer, addSublayer: new_layer.as_ref()]; + new_layer + }; + + let window: cocoa_foundation::base::id = msg_send![view, window]; + if !window.is_null() { + let screen: cocoa_foundation::base::id = msg_send![window, screen]; + assert!(!screen.is_null(), "window is not attached to a screen"); + + let scale_factor: CGFloat = msg_send![screen, nativeScale]; + let () = msg_send![view, setContentScaleFactor: scale_factor]; + } + + let _: *mut c_void = msg_send![view, retain]; + Surface::new(NonNull::new(view), render_layer) + } + + #[cfg(target_os = "macos")] + unsafe fn create_from_nsview(&self, nsview: *mut c_void) -> Surface { + let view: cocoa_foundation::base::id = mem::transmute(nsview); + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let class = class!(CAMetalLayer); + // Deprecated! Clients should use `create_surface_from_layer` instead. + let is_actually_layer: BOOL = msg_send![view, isKindOfClass: class]; + if is_actually_layer == YES { + return self.create_from_layer(mem::transmute(view)); + } + + let existing: *mut Object = msg_send![view, layer]; + let use_current = if existing.is_null() { + false + } else { + let result: BOOL = msg_send![existing, isKindOfClass: class]; + result == YES + }; + + let render_layer: CoreAnimationLayer = if use_current { + mem::transmute::<_, &CoreAnimationLayerRef>(existing).to_owned() + } else { + let layer: CoreAnimationLayer = msg_send![class, new]; + let () = msg_send![view, setLayer: layer.as_ref()]; + let () = msg_send![view, setWantsLayer: YES]; + let bounds: CGRect = msg_send![view, bounds]; + let () = msg_send![layer.as_ref(), setBounds: bounds]; + + let window: cocoa_foundation::base::id = msg_send![view, window]; + if !window.is_null() { + let scale_factor: CGFloat = msg_send![window, backingScaleFactor]; + let () = msg_send![layer, setContentsScale: scale_factor]; + } + let () = msg_send![layer, setDelegate: self.gfx_managed_metal_layer_delegate.0]; + layer + }; + + let _: *mut c_void = msg_send![view, retain]; + Surface::new(NonNull::new(view), render_layer) + } + + unsafe fn create_from_layer(&self, layer: &CoreAnimationLayerRef) -> Surface { + let class = class!(CAMetalLayer); + let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; + assert_eq!(proper_kind, YES); + Surface::new(None, layer.to_owned()) + } + + pub fn create_surface_from_layer(&self, layer: &CoreAnimationLayerRef) -> Surface { + unsafe { self.create_from_layer(layer) } + } + + #[cfg(target_os = "macos")] + pub fn create_surface_from_nsview(&self, nsview: *mut c_void) -> Surface { + unsafe { self.create_from_nsview(nsview) } + } + + #[cfg(target_os = "ios")] + pub fn create_surface_from_uiview(&self, uiview: *mut c_void) -> Surface { + unsafe { self.create_from_uiview(uiview) } + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum Backend {} +impl hal::Backend for Backend { + type Instance = Instance; + type PhysicalDevice = device::PhysicalDevice; + type Device = device::Device; + type Surface = Surface; + + type QueueFamily = QueueFamily; + type CommandQueue = command::CommandQueue; + type CommandBuffer = command::CommandBuffer; + + type Memory = native::Memory; + type CommandPool = command::CommandPool; + + type ShaderModule = native::ShaderModule; + type RenderPass = native::RenderPass; + type Framebuffer = native::Framebuffer; + + type Buffer = native::Buffer; + type BufferView = native::BufferView; + type Image = native::Image; + type ImageView = native::ImageView; + type Sampler = native::Sampler; + + type ComputePipeline = native::ComputePipeline; + type GraphicsPipeline = native::GraphicsPipeline; + type PipelineCache = native::PipelineCache; + type PipelineLayout = native::PipelineLayout; + type DescriptorSetLayout = native::DescriptorSetLayout; + type DescriptorPool = native::DescriptorPool; + type DescriptorSet = native::DescriptorSet; + + type Fence = native::Fence; + type Semaphore = native::Semaphore; + type Event = native::Event; + type QueryPool = native::QueryPool; +} + +const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::iOS_GPUFamily2_v4, + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const SAMPLER_CLAMP_TO_BORDER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const ASTC_PIXEL_FORMAT_FEATURES: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const ANY8_UNORM_SRGB_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const ANY8_SNORM_RESOLVE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const RGBA8_SRGB: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const RGB10A2UNORM_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const RGB10A2UINT_COLOR_WRITE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const RG11B10FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const RGB9E5FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const BGR10A2_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::iOS_GPUFamily2_v4, + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const BASE_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const BASE_VERTEX_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const TEXTURE_CUBE_ARRAY_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const DUAL_SOURCE_BLEND_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::iOS_GPUFamily2_v4, + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const LAYERED_RENDERING_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const FUNCTION_SPECIALIZATION_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +#[derive(Clone, Debug)] +struct PrivateCapabilities { + pub os_is_mac: bool, + os_version: (u32, u32), + msl_version: metal::MTLLanguageVersion, + exposed_queues: usize, + // if TRUE, we'll report `NON_FILL_POLYGON_MODE` feature without the points support + expose_line_mode: bool, + resource_heaps: bool, + argument_buffers: bool, + shared_textures: bool, + mutable_comparison_samplers: bool, + sampler_clamp_to_border: bool, + base_instance: bool, + base_vertex_instance_drawing: bool, + dual_source_blending: bool, + low_power: bool, + headless: bool, + layered_rendering: bool, + function_specialization: bool, + depth_clip_mode: bool, + texture_cube_array: bool, + format_depth24_stencil8: bool, + format_depth32_stencil8_filter: bool, + format_depth32_stencil8_none: bool, + format_min_srgb_channels: u8, + format_b5: bool, + format_bc: bool, + format_eac_etc: bool, + format_astc: bool, + format_any8_unorm_srgb_all: bool, + format_any8_unorm_srgb_no_write: bool, + format_any8_snorm_all: bool, + format_r16_norm_all: bool, + format_r32_all: bool, + format_r32_no_write: bool, + format_r32float_no_write_no_filter: bool, + format_r32float_no_filter: bool, + format_r32float_all: bool, + format_rgba8_srgb_all: bool, + format_rgba8_srgb_no_write: bool, + format_rgb10a2_unorm_all: bool, + format_rgb10a2_unorm_no_write: bool, + format_rgb10a2_uint_color: bool, + format_rgb10a2_uint_color_write: bool, + format_rg11b10_all: bool, + format_rg11b10_no_write: bool, + format_rgb9e5_all: bool, + format_rgb9e5_no_write: bool, + format_rgb9e5_filter_only: bool, + format_rg32_color: bool, + format_rg32_color_write: bool, + format_rg32float_all: bool, + format_rg32float_color_blend: bool, + format_rg32float_no_filter: bool, + format_rgba32int_color: bool, + format_rgba32int_color_write: bool, + format_rgba32float_color: bool, + format_rgba32float_color_write: bool, + format_rgba32float_all: bool, + format_depth16unorm: bool, + format_depth32float_filter: bool, + format_depth32float_none: bool, + format_bgr10a2_all: bool, + format_bgr10a2_no_write: bool, + max_buffers_per_stage: ResourceIndex, + max_textures_per_stage: ResourceIndex, + max_samplers_per_stage: ResourceIndex, + buffer_alignment: u64, + max_buffer_size: u64, + max_texture_size: u64, + max_texture_3d_size: u64, + max_texture_layers: u64, + max_fragment_input_components: u64, + max_color_render_targets: u8, + max_total_threadgroup_memory: u32, + sample_count_mask: u8, + supports_debug_markers: bool, +} + +impl PrivateCapabilities { + fn version_at_least(major: u32, minor: u32, needed_major: u32, needed_minor: u32) -> bool { + major > needed_major || (major == needed_major && minor >= needed_minor) + } + + fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + features_sets + .iter() + .cloned() + .any(|x| raw.supports_feature_set(x)) + } + + fn new(device: &metal::Device, experiments: &Experiments) -> Self { + #[repr(C)] + #[derive(Clone, Copy, Debug)] + struct NSOperatingSystemVersion { + major: NSInteger, + minor: NSInteger, + patch: NSInteger, + } + + let version: NSOperatingSystemVersion = unsafe { + let process_info: *mut Object = msg_send![class!(NSProcessInfo), processInfo]; + msg_send![process_info, operatingSystemVersion] + }; + + let major = version.major as u32; + let minor = version.minor as u32; + let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + + let mut sample_count_mask: u8 = 1 | 4; // 1 and 4 samples are supported on all devices + if device.supports_sample_count(2) { + sample_count_mask |= 2; + } + if device.supports_sample_count(8) { + sample_count_mask |= 8; + } + + PrivateCapabilities { + os_is_mac, + os_version: (major as u32, minor as u32), + msl_version: if os_is_mac { + if Self::version_at_least(major, minor, 10, 15) { + MTLLanguageVersion::V2_2 + } else if Self::version_at_least(major, minor, 10, 14) { + MTLLanguageVersion::V2_1 + } else if Self::version_at_least(major, minor, 10, 13) { + MTLLanguageVersion::V2_0 + } else if Self::version_at_least(major, minor, 10, 12) { + MTLLanguageVersion::V1_2 + } else if Self::version_at_least(major, minor, 10, 11) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + } + } else if Self::version_at_least(major, minor, 13, 0) { + MTLLanguageVersion::V2_2 + } else if Self::version_at_least(major, minor, 12, 0) { + MTLLanguageVersion::V2_1 + } else if Self::version_at_least(major, minor, 11, 0) { + MTLLanguageVersion::V2_0 + } else if Self::version_at_least(major, minor, 10, 0) { + MTLLanguageVersion::V1_2 + } else if Self::version_at_least(major, minor, 9, 0) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + }, + exposed_queues: 1, + expose_line_mode: true, + resource_heaps: Self::supports_any(&device, RESOURCE_HEAP_SUPPORT), + argument_buffers: experiments.argument_buffers + && Self::supports_any(&device, ARGUMENT_BUFFER_SUPPORT), + shared_textures: !os_is_mac, + mutable_comparison_samplers: Self::supports_any( + &device, + MUTABLE_COMPARISON_SAMPLER_SUPPORT, + ), + sampler_clamp_to_border: Self::supports_any(&device, SAMPLER_CLAMP_TO_BORDER_SUPPORT), + base_instance: Self::supports_any(&device, BASE_INSTANCE_SUPPORT), + base_vertex_instance_drawing: Self::supports_any(&device, BASE_VERTEX_INSTANCE_SUPPORT), + dual_source_blending: Self::supports_any(&device, DUAL_SOURCE_BLEND_SUPPORT), + low_power: !os_is_mac || device.is_low_power(), + headless: os_is_mac && device.is_headless(), + layered_rendering: Self::supports_any(&device, LAYERED_RENDERING_SUPPORT), + function_specialization: Self::supports_any(&device, FUNCTION_SPECIALIZATION_SUPPORT), + depth_clip_mode: Self::supports_any(&device, DEPTH_CLIP_MODE), + texture_cube_array: Self::supports_any(&device, TEXTURE_CUBE_ARRAY_SUPPORT), + format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + format_depth32_stencil8_filter: os_is_mac, + format_depth32_stencil8_none: !os_is_mac, + format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, + format_b5: !os_is_mac, + format_bc: os_is_mac, + format_eac_etc: !os_is_mac, + format_astc: Self::supports_any(&device, ASTC_PIXEL_FORMAT_FEATURES), + format_any8_unorm_srgb_all: Self::supports_any(&device, ANY8_UNORM_SRGB_ALL), + format_any8_unorm_srgb_no_write: !Self::supports_any(&device, ANY8_UNORM_SRGB_ALL) + && !os_is_mac, + format_any8_snorm_all: Self::supports_any(&device, ANY8_SNORM_RESOLVE), + format_r16_norm_all: os_is_mac, + format_r32_all: !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_r32_no_write: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_r32float_no_write_no_filter: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ) && !os_is_mac, + format_r32float_no_filter: !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ) && !os_is_mac, + format_r32float_all: os_is_mac, + format_rgba8_srgb_all: Self::supports_any(&device, RGBA8_SRGB), + format_rgba8_srgb_no_write: !Self::supports_any(&device, RGBA8_SRGB), + format_rgb10a2_unorm_all: Self::supports_any(&device, RGB10A2UNORM_ALL), + format_rgb10a2_unorm_no_write: !Self::supports_any(&device, RGB10A2UNORM_ALL), + format_rgb10a2_uint_color: !Self::supports_any(&device, RGB10A2UINT_COLOR_WRITE), + format_rgb10a2_uint_color_write: Self::supports_any(&device, RGB10A2UINT_COLOR_WRITE), + format_rg11b10_all: Self::supports_any(&device, RG11B10FLOAT_ALL), + format_rg11b10_no_write: !Self::supports_any(&device, RG11B10FLOAT_ALL), + format_rgb9e5_all: Self::supports_any(&device, RGB9E5FLOAT_ALL), + format_rgb9e5_no_write: !Self::supports_any(&device, RGB9E5FLOAT_ALL) && !os_is_mac, + format_rgb9e5_filter_only: os_is_mac, + format_rg32_color: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rg32_color_write: !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rg32float_all: os_is_mac, + format_rg32float_color_blend: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rg32float_no_filter: !os_is_mac + && !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rgba32int_color: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rgba32int_color_write: !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rgba32float_color: Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ), + format_rgba32float_color_write: !Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v1, + MTLFeatureSet::iOS_GPUFamily2_v1, + ], + ) && !os_is_mac, + format_rgba32float_all: os_is_mac, + format_depth16unorm: device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v2), + format_depth32float_filter: device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1), + format_depth32float_none: !device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1), + format_bgr10a2_all: Self::supports_any(&device, BGR10A2_ALL), + format_bgr10a2_no_write: !device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v3), + max_buffers_per_stage: 31, + max_textures_per_stage: if os_is_mac { 128 } else { 31 }, + max_samplers_per_stage: 16, + buffer_alignment: if os_is_mac { 256 } else { 64 }, + max_buffer_size: if device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v2) { + 1 << 30 // 1GB on macOS 1.2 and up + } else { + 1 << 28 // 256MB otherwise + }, + max_texture_size: if Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 16384 + } else if Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily1_v2, + MTLFeatureSet::iOS_GPUFamily2_v2, + MTLFeatureSet::tvOS_GPUFamily1_v1, + ], + ) { + 8192 + } else { + 4096 + }, + max_texture_3d_size: 2048, + max_texture_layers: 2048, + max_fragment_input_components: if os_is_mac { 128 } else { 60 }, + max_color_render_targets: if Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, + ], + ) { + 8 + } else { + 4 + }, + max_total_threadgroup_memory: if Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v2, + MTLFeatureSet::iOS_GPUFamily5_v1, + ], + ) { + 64 << 10 + } else if Self::supports_any( + &device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily2_v1, + ], + ) { + 32 << 10 + } else { + 16 << 10 + }, + sample_count_mask, + supports_debug_markers: Self::supports_any( + &device, + &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily2_v1, + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + ], + ), + } + } + + fn has_version_at_least(&self, needed_major: u32, needed_minor: u32) -> bool { + let (major, minor) = self.os_version; + Self::version_at_least(major, minor, needed_major, needed_minor) + } +} + +#[derive(Clone, Copy, Debug)] +struct PrivateDisabilities { + /// Near depth is not respected properly on some Intel GPUs. + broken_viewport_near_depth: bool, + /// Multi-target clears don't appear to work properly on Intel GPUs. + broken_layered_clear_image: bool, +} + +trait AsNative { + type Native; + fn from(native: &Self::Native) -> Self; + fn as_native(&self) -> &Self::Native; +} + +pub type BufferPtr = NonNull<metal::MTLBuffer>; +pub type TexturePtr = NonNull<metal::MTLTexture>; +pub type SamplerPtr = NonNull<metal::MTLSamplerState>; +pub type ResourcePtr = NonNull<metal::MTLResource>; + +//TODO: make this a generic struct with a single generic implementation + +impl AsNative for BufferPtr { + type Native = metal::BufferRef; + #[inline] + fn from(native: &metal::BufferRef) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &metal::BufferRef { + unsafe { metal::BufferRef::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for TexturePtr { + type Native = metal::TextureRef; + #[inline] + fn from(native: &metal::TextureRef) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &metal::TextureRef { + unsafe { metal::TextureRef::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for SamplerPtr { + type Native = metal::SamplerStateRef; + #[inline] + fn from(native: &metal::SamplerStateRef) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &metal::SamplerStateRef { + unsafe { metal::SamplerStateRef::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for ResourcePtr { + type Native = metal::ResourceRef; + #[inline] + fn from(native: &metal::ResourceRef) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &metal::ResourceRef { + unsafe { metal::ResourceRef::from_ptr(self.as_ptr()) } + } +} diff --git a/third_party/rust/gfx-backend-metal/src/native.rs b/third_party/rust/gfx-backend-metal/src/native.rs new file mode 100644 index 0000000000..c25e8cabe6 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/native.rs @@ -0,0 +1,1089 @@ +use crate::{ + internal::{Channel, FastStorageMap}, + Backend, BufferPtr, ResourceIndex, SamplerPtr, TexturePtr, MAX_COLOR_ATTACHMENTS, +}; + +use auxil::FastHashMap; +use hal::{ + buffer, + format::FormatDesc, + image, + memory::Segment, + pass::{Attachment, AttachmentId}, + pso, MemoryTypeId, +}; +use range_alloc::RangeAllocator; + +use arrayvec::ArrayVec; +use cocoa_foundation::foundation::NSRange; +use metal; +use parking_lot::{Mutex, RwLock}; +use spirv_cross::{msl, spirv}; + +use std::{ + fmt, + ops::Range, + os::raw::{c_long, c_void}, + ptr, + sync::{atomic::AtomicBool, Arc}, +}; + +pub type EntryPointMap = FastHashMap<String, spirv::EntryPoint>; +/// An index of a resource within descriptor pool. +pub type PoolResourceIndex = u32; + +pub struct ShaderModule { + pub(crate) spv: Vec<u32>, + #[cfg(feature = "naga")] + pub(crate) naga: Option<naga::Module>, +} + +impl fmt::Debug for ShaderModule { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "ShaderModule(words = {})", self.spv.len()) + } +} + +bitflags! { + /// Subpass attachment operations. + pub struct AttachmentOps: u8 { + const LOAD = 0x1; + const STORE = 0x2; + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SubpassData<T> { + pub colors: ArrayVec<[T; MAX_COLOR_ATTACHMENTS]>, + pub depth_stencil: Option<T>, +} + +impl<T> Default for SubpassData<T> { + fn default() -> Self { + SubpassData { + colors: ArrayVec::new(), + depth_stencil: None, + } + } +} + +impl<T> SubpassData<T> { + pub fn map<V, F: Fn(&T) -> V>(&self, fun: F) -> SubpassData<V> { + SubpassData { + colors: self.colors.iter().map(&fun).collect(), + depth_stencil: self.depth_stencil.as_ref().map(fun), + } + } +} + +pub type SubpassFormats = SubpassData<(metal::MTLPixelFormat, Channel)>; + +#[derive(Debug)] +pub struct AttachmentInfo { + pub id: AttachmentId, + pub resolve_id: Option<AttachmentId>, + pub ops: AttachmentOps, + pub format: metal::MTLPixelFormat, + pub channel: Channel, +} + +#[derive(Debug)] +pub struct Subpass { + pub attachments: SubpassData<AttachmentInfo>, + pub inputs: Vec<AttachmentId>, +} + +#[derive(Debug)] +pub struct RenderPass { + pub(crate) attachments: Vec<Attachment>, + pub(crate) subpasses: Vec<Subpass>, + pub(crate) name: String, +} + +#[derive(Debug)] +pub struct Framebuffer { + pub(crate) extent: image::Extent, + pub(crate) attachments: Vec<metal::Texture>, +} + +unsafe impl Send for Framebuffer {} +unsafe impl Sync for Framebuffer {} + +#[derive(Clone, Debug)] +pub struct ResourceData<T> { + pub buffers: T, + pub textures: T, + pub samplers: T, +} + +impl<T> ResourceData<T> { + pub fn map<V, F: Fn(&T) -> V>(&self, fun: F) -> ResourceData<V> { + ResourceData { + buffers: fun(&self.buffers), + textures: fun(&self.textures), + samplers: fun(&self.samplers), + } + } +} + +impl<T: Copy + Ord> ResourceData<Range<T>> { + pub fn expand(&mut self, point: ResourceData<T>) { + //TODO: modify `start` as well? + self.buffers.end = self.buffers.end.max(point.buffers); + self.textures.end = self.textures.end.max(point.textures); + self.samplers.end = self.samplers.end.max(point.samplers); + } +} + +impl ResourceData<PoolResourceIndex> { + pub fn new() -> Self { + ResourceData { + buffers: 0, + textures: 0, + samplers: 0, + } + } + + #[inline] + pub fn add_many(&mut self, content: DescriptorContent, count: PoolResourceIndex) { + if content.contains(DescriptorContent::BUFFER) { + self.buffers += count; + } + if content.contains(DescriptorContent::TEXTURE) { + self.textures += count; + } + if content.contains(DescriptorContent::SAMPLER) { + self.samplers += count; + } + } + #[inline] + pub fn add(&mut self, content: DescriptorContent) { + self.add_many(content, 1) + } +} + +#[derive(Clone, Debug)] +pub struct MultiStageData<T> { + pub vs: T, + pub ps: T, + pub cs: T, +} + +pub type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>; + +#[derive(Debug)] +pub struct DescriptorSetInfo { + pub offsets: MultiStageResourceCounters, + pub dynamic_buffers: Vec<MultiStageData<PoolResourceIndex>>, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct PushConstantInfo { + pub count: u32, + pub buffer_index: ResourceIndex, +} + +#[derive(Debug)] +pub struct PipelineLayout { + pub(crate) shader_compiler_options: msl::CompilerOptions, + pub(crate) shader_compiler_options_point: msl::CompilerOptions, + #[cfg(feature = "naga")] + pub(crate) naga_options: naga::back::msl::Options, + pub(crate) infos: Vec<DescriptorSetInfo>, + pub(crate) total: MultiStageResourceCounters, + pub(crate) push_constants: MultiStageData<Option<PushConstantInfo>>, + pub(crate) total_push_constants: u32, +} + +#[derive(Clone, Debug)] +pub struct ModuleInfo { + pub library: metal::Library, + pub entry_point_map: EntryPointMap, + pub rasterization_enabled: bool, +} + +pub struct PipelineCache { + pub(crate) modules: FastStorageMap<msl::CompilerOptions, FastStorageMap<Vec<u32>, ModuleInfo>>, +} + +impl fmt::Debug for PipelineCache { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "PipelineCache") + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct RasterizerState { + //TODO: more states + pub front_winding: metal::MTLWinding, + pub fill_mode: metal::MTLTriangleFillMode, + pub cull_mode: metal::MTLCullMode, + pub depth_clip: Option<metal::MTLDepthClipMode>, +} + +impl Default for RasterizerState { + fn default() -> Self { + RasterizerState { + front_winding: metal::MTLWinding::Clockwise, + fill_mode: metal::MTLTriangleFillMode::Fill, + cull_mode: metal::MTLCullMode::None, + depth_clip: None, + } + } +} + +#[derive(Debug)] +pub struct StencilState<T: Clone> { + pub reference_values: pso::Sided<T>, + pub read_masks: pso::Sided<T>, + pub write_masks: pso::Sided<T>, +} + +pub type VertexBufferVec = Vec<(pso::VertexBufferDesc, pso::ElemOffset)>; + +#[derive(Debug)] +pub struct GraphicsPipeline { + // we hold the compiled libraries here for now + // TODO: move to some cache in `Device` + pub(crate) vs_lib: metal::Library, + pub(crate) fs_lib: Option<metal::Library>, + pub(crate) raw: metal::RenderPipelineState, + pub(crate) primitive_type: metal::MTLPrimitiveType, + pub(crate) vs_pc_info: Option<PushConstantInfo>, + pub(crate) ps_pc_info: Option<PushConstantInfo>, + pub(crate) rasterizer_state: Option<RasterizerState>, + pub(crate) depth_bias: pso::State<pso::DepthBias>, + pub(crate) depth_stencil_desc: pso::DepthStencilDesc, + pub(crate) baked_states: pso::BakedStates, + /// The mapping from Metal vertex buffers to Vulkan ones. + /// This is needed because Vulkan allows attribute offsets to exceed the strides, + /// while Metal does not. Thus, we register extra vertex buffer bindings with + /// adjusted offsets to cover this use case. + pub(crate) vertex_buffers: VertexBufferVec, + /// Tracked attachment formats + pub(crate) attachment_formats: SubpassFormats, + pub(crate) samples: image::NumSamples, +} + +unsafe impl Send for GraphicsPipeline {} +unsafe impl Sync for GraphicsPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + pub(crate) cs_lib: metal::Library, + pub(crate) raw: metal::ComputePipelineState, + pub(crate) work_group_size: metal::MTLSize, + pub(crate) pc_info: Option<PushConstantInfo>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub enum ImageLike { + /// This image has not yet been bound to memory. + Unbound { + descriptor: metal::TextureDescriptor, + mip_sizes: Vec<buffer::Offset>, + host_visible: bool, + name: String, + }, + /// This is a linearly tiled HOST-visible image, which is represented by a buffer. + Buffer(Buffer), + /// This is a regular image represented by a texture. + Texture(metal::Texture), +} + +impl ImageLike { + pub fn as_texture(&self) -> &metal::TextureRef { + match *self { + ImageLike::Unbound { .. } | ImageLike::Buffer(..) => panic!("Expected bound image!"), + ImageLike::Texture(ref tex) => tex, + } + } +} + +#[derive(Debug)] +pub struct Image { + pub(crate) like: ImageLike, + pub(crate) kind: image::Kind, + pub(crate) mip_levels: image::Level, + pub(crate) format_desc: FormatDesc, + pub(crate) shader_channel: Channel, + pub(crate) mtl_format: metal::MTLPixelFormat, + pub(crate) mtl_type: metal::MTLTextureType, +} + +impl Image { + pub(crate) fn pitches_impl( + extent: image::Extent, + format_desc: FormatDesc, + ) -> [buffer::Offset; 4] { + let bytes_per_texel = format_desc.bits as image::Size >> 3; + let row_pitch = extent.width * bytes_per_texel; + let depth_pitch = extent.height * row_pitch; + let array_pitch = extent.depth * depth_pitch; + [ + bytes_per_texel as _, + row_pitch as _, + depth_pitch as _, + array_pitch as _, + ] + } + pub(crate) fn pitches(&self, level: image::Level) -> [buffer::Offset; 4] { + let extent = self.kind.extent().at_level(level); + Self::pitches_impl(extent, self.format_desc) + } + pub(crate) fn byte_offset(&self, offset: image::Offset) -> buffer::Offset { + let pitches = Self::pitches_impl(self.kind.extent(), self.format_desc); + pitches[0] * offset.x as buffer::Offset + + pitches[1] * offset.y as buffer::Offset + + pitches[2] * offset.z as buffer::Offset + } + pub(crate) fn byte_extent(&self, extent: image::Extent) -> buffer::Offset { + let bytes_per_texel = self.format_desc.bits as image::Size >> 3; + (bytes_per_texel * extent.width * extent.height * extent.depth) as _ + } + /// View this cube texture as a 2D array. + pub(crate) fn view_cube_as_2d(&self) -> Option<metal::Texture> { + match self.mtl_type { + metal::MTLTextureType::Cube | metal::MTLTextureType::CubeArray => { + let raw = self.like.as_texture(); + Some(raw.new_texture_view_from_slice( + self.mtl_format, + metal::MTLTextureType::D2Array, + NSRange { + location: 0, + length: raw.mipmap_level_count(), + }, + NSRange { + location: 0, + length: self.kind.num_layers() as _, + }, + )) + } + _ => None, + } + } +} + +unsafe impl Send for Image {} +unsafe impl Sync for Image {} + +#[derive(Debug)] +pub struct BufferView { + pub(crate) raw: metal::Texture, +} + +unsafe impl Send for BufferView {} +unsafe impl Sync for BufferView {} + +#[derive(Debug)] +pub struct ImageView { + pub(crate) texture: metal::Texture, + pub(crate) mtl_format: metal::MTLPixelFormat, +} + +unsafe impl Send for ImageView {} +unsafe impl Sync for ImageView {} + +#[derive(Debug)] +pub struct Sampler { + pub(crate) raw: Option<metal::SamplerState>, + pub(crate) data: msl::SamplerData, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Clone, Debug)] +pub struct Semaphore { + pub(crate) system: Option<SystemSemaphore>, +} + +#[derive(Debug)] +pub enum Buffer { + Unbound { + size: u64, + usage: buffer::Usage, + name: String, + }, + Bound { + raw: metal::Buffer, + range: Range<u64>, + options: metal::MTLResourceOptions, + }, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl Buffer { + //TODO: consider returning `AsNative`? + pub fn as_bound(&self) -> (&metal::BufferRef, &Range<u64>) { + match *self { + Buffer::Unbound { .. } => panic!("Expected bound buffer!"), + Buffer::Bound { + ref raw, ref range, .. + } => (raw, range), + } + } +} + +#[derive(Debug)] +pub struct DescriptorEmulatedPoolInner { + pub(crate) samplers: Vec<(pso::ShaderStageFlags, Option<SamplerPtr>)>, + pub(crate) textures: Vec<(pso::ShaderStageFlags, Option<TexturePtr>, image::Layout)>, + pub(crate) buffers: Vec<(pso::ShaderStageFlags, Option<BufferPtr>, buffer::Offset)>, +} + +#[derive(Debug)] +pub struct DescriptorArgumentPoolInner { + pub(crate) resources: Vec<UsedResource>, +} + +#[derive(Debug)] +pub enum DescriptorPool { + Emulated { + inner: Arc<RwLock<DescriptorEmulatedPoolInner>>, + allocators: ResourceData<RangeAllocator<PoolResourceIndex>>, + }, + ArgumentBuffer { + raw: metal::Buffer, + raw_allocator: RangeAllocator<buffer::Offset>, + alignment: buffer::Offset, + inner: Arc<RwLock<DescriptorArgumentPoolInner>>, + res_allocator: RangeAllocator<PoolResourceIndex>, + }, +} +//TODO: re-evaluate Send/Sync here +unsafe impl Send for DescriptorPool {} +unsafe impl Sync for DescriptorPool {} + +impl DescriptorPool { + pub(crate) fn new_emulated(counters: ResourceData<PoolResourceIndex>) -> Self { + let inner = DescriptorEmulatedPoolInner { + samplers: vec![Default::default(); counters.samplers as usize], + textures: vec![Default::default(); counters.textures as usize], + buffers: vec![Default::default(); counters.buffers as usize], + }; + DescriptorPool::Emulated { + inner: Arc::new(RwLock::new(inner)), + allocators: ResourceData { + samplers: RangeAllocator::new(0..counters.samplers), + textures: RangeAllocator::new(0..counters.textures), + buffers: RangeAllocator::new(0..counters.buffers), + }, + } + } + + pub(crate) fn new_argument( + raw: metal::Buffer, + total_bytes: buffer::Offset, + alignment: buffer::Offset, + total_resources: usize, + ) -> Self { + let default = UsedResource { + ptr: ptr::null_mut(), + usage: metal::MTLResourceUsage::empty(), + }; + DescriptorPool::ArgumentBuffer { + raw, + raw_allocator: RangeAllocator::new(0..total_bytes), + alignment, + inner: Arc::new(RwLock::new(DescriptorArgumentPoolInner { + resources: vec![default; total_resources], + })), + res_allocator: RangeAllocator::new(0..total_resources as PoolResourceIndex), + } + } + + fn report_available(&self) { + match *self { + DescriptorPool::Emulated { ref allocators, .. } => { + trace!( + "\tavailable {} samplers, {} textures, and {} buffers", + allocators.samplers.total_available(), + allocators.textures.total_available(), + allocators.buffers.total_available(), + ); + } + DescriptorPool::ArgumentBuffer { + ref raw_allocator, + ref res_allocator, + .. + } => { + trace!( + "\tavailable {} bytes for {} resources", + raw_allocator.total_available(), + res_allocator.total_available(), + ); + } + } + } +} + +impl pso::DescriptorPool<Backend> for DescriptorPool { + unsafe fn allocate_set( + &mut self, + set_layout: &DescriptorSetLayout, + ) -> Result<DescriptorSet, pso::AllocationError> { + self.report_available(); + match *self { + DescriptorPool::Emulated { + ref inner, + ref mut allocators, + } => { + debug!("pool: allocate_set"); + let (layouts, total, immutable_samplers) = match *set_layout { + DescriptorSetLayout::Emulated { + ref layouts, + ref total, + ref immutable_samplers, + } => (layouts, total, immutable_samplers), + _ => return Err(pso::AllocationError::IncompatibleLayout), + }; + + // try to allocate the ranges from the pool + let sampler_range = if total.samplers != 0 { + match allocators.samplers.allocate_range(total.samplers as _) { + Ok(range) => { + // fill out the stages for immutable samplers + let mut data = inner.write(); + let mut offset = range.start as usize; + for layout in layouts.iter() { + if layout.content.contains(DescriptorContent::SAMPLER) { + if immutable_samplers.contains_key(&layout.binding) { + data.samplers[offset] = (layout.stages, None); + } + offset += 1; + } + } + range + } + Err(e) => { + return Err(if e.fragmented_free_length >= total.samplers { + pso::AllocationError::FragmentedPool + } else { + pso::AllocationError::OutOfPoolMemory + }); + } + } + } else { + 0..0 + }; + let texture_range = if total.textures != 0 { + match allocators.textures.allocate_range(total.textures as _) { + Ok(range) => range, + Err(e) => { + if sampler_range.end != 0 { + allocators.samplers.free_range(sampler_range); + } + return Err(if e.fragmented_free_length >= total.samplers { + pso::AllocationError::FragmentedPool + } else { + pso::AllocationError::OutOfPoolMemory + }); + } + } + } else { + 0..0 + }; + let buffer_range = if total.buffers != 0 { + match allocators.buffers.allocate_range(total.buffers as _) { + Ok(range) => range, + Err(e) => { + if sampler_range.end != 0 { + allocators.samplers.free_range(sampler_range); + } + if texture_range.end != 0 { + allocators.textures.free_range(texture_range); + } + return Err(if e.fragmented_free_length >= total.samplers { + pso::AllocationError::FragmentedPool + } else { + pso::AllocationError::OutOfPoolMemory + }); + } + } + } else { + 0..0 + }; + + let resources = ResourceData { + buffers: buffer_range, + textures: texture_range, + samplers: sampler_range, + }; + + Ok(DescriptorSet::Emulated { + pool: Arc::clone(inner), + layouts: Arc::clone(layouts), + resources, + }) + } + DescriptorPool::ArgumentBuffer { + ref raw, + ref mut raw_allocator, + alignment, + ref inner, + ref mut res_allocator, + } => { + let (encoder, stage_flags, bindings, total) = match *set_layout { + DescriptorSetLayout::ArgumentBuffer { + ref encoder, + stage_flags, + ref bindings, + total, + .. + } => (encoder, stage_flags, bindings, total), + _ => return Err(pso::AllocationError::IncompatibleLayout), + }; + let range = res_allocator + .allocate_range(total as PoolResourceIndex) + .map_err(|_| pso::AllocationError::OutOfPoolMemory)?; + + let raw_range = raw_allocator + .allocate_range(encoder.encoded_length() + alignment) + .expect("Argument encoding length is inconsistent!"); + let raw_offset = (raw_range.start + alignment - 1) & !(alignment - 1); + + let mut data = inner.write(); + for arg in bindings.values() { + if arg.res.buffer_id != !0 || arg.res.texture_id != !0 { + let pos = (range.start + arg.res_offset) as usize; + for ur in data.resources[pos..pos + arg.count].iter_mut() { + ur.usage = arg.usage; + } + } + } + + Ok(DescriptorSet::ArgumentBuffer { + raw: raw.clone(), + raw_offset, + pool: Arc::clone(inner), + range, + encoder: encoder.clone(), + bindings: Arc::clone(bindings), + stage_flags, + }) + } + } + } + + unsafe fn free<I>(&mut self, descriptor_sets: I) + where + I: IntoIterator<Item = DescriptorSet>, + { + match self { + DescriptorPool::Emulated { + ref inner, + ref mut allocators, + } => { + debug!("pool: free_sets"); + let mut data = inner.write(); + for descriptor_set in descriptor_sets { + match descriptor_set { + DescriptorSet::Emulated { resources, .. } => { + debug!("\t{:?} resources", resources); + for sampler in &mut data.samplers + [resources.samplers.start as usize..resources.samplers.end as usize] + { + sampler.1 = None; + } + if resources.samplers.start != resources.samplers.end { + allocators.samplers.free_range(resources.samplers); + } + for image in &mut data.textures + [resources.textures.start as usize..resources.textures.end as usize] + { + image.1 = None; + } + if resources.textures.start != resources.textures.end { + allocators.textures.free_range(resources.textures); + } + for buffer in &mut data.buffers + [resources.buffers.start as usize..resources.buffers.end as usize] + { + buffer.1 = None; + } + if resources.buffers.start != resources.buffers.end { + allocators.buffers.free_range(resources.buffers); + } + } + DescriptorSet::ArgumentBuffer { .. } => panic!( + "Tried to free a DescriptorSet not given out by this DescriptorPool!" + ), + } + } + } + DescriptorPool::ArgumentBuffer { + ref mut raw_allocator, + ref mut res_allocator, + ref inner, + .. + } => { + let mut data = inner.write(); + for descriptor_set in descriptor_sets { + match descriptor_set { + DescriptorSet::Emulated { .. } => panic!( + "Tried to free a DescriptorSet not given out by this DescriptorPool!" + ), + DescriptorSet::ArgumentBuffer { + raw_offset, + range, + encoder, + .. + } => { + for ur in + data.resources[range.start as usize..range.end as usize].iter_mut() + { + ur.ptr = ptr::null_mut(); + ur.usage = metal::MTLResourceUsage::empty(); + } + + let handle_range = raw_offset..raw_offset + encoder.encoded_length(); + raw_allocator.free_range(handle_range); + res_allocator.free_range(range); + } + } + } + } + } + self.report_available(); + } + + unsafe fn reset(&mut self) { + match *self { + DescriptorPool::Emulated { + ref inner, + ref mut allocators, + } => { + debug!("pool: reset"); + if allocators.samplers.is_empty() + && allocators.textures.is_empty() + && allocators.buffers.is_empty() + { + return; // spare the locking + } + let mut data = inner.write(); + + for range in allocators.samplers.allocated_ranges() { + for sampler in &mut data.samplers[range.start as usize..range.end as usize] { + sampler.1 = None; + } + } + for range in allocators.textures.allocated_ranges() { + for texture in &mut data.textures[range.start as usize..range.end as usize] { + texture.1 = None; + } + } + for range in allocators.buffers.allocated_ranges() { + for buffer in &mut data.buffers[range.start as usize..range.end as usize] { + buffer.1 = None; + } + } + + allocators.samplers.reset(); + allocators.textures.reset(); + allocators.buffers.reset(); + } + DescriptorPool::ArgumentBuffer { + ref mut raw_allocator, + ref mut res_allocator, + .. + } => { + raw_allocator.reset(); + res_allocator.reset(); + } + } + } +} + +bitflags! { + /// Descriptor content flags. + pub struct DescriptorContent: u8 { + const BUFFER = 1<<0; + const DYNAMIC_BUFFER = 1<<1; + const TEXTURE = 1<<2; + const SAMPLER = 1<<3; + const IMMUTABLE_SAMPLER = 1<<4; + } +} + +impl From<pso::DescriptorType> for DescriptorContent { + fn from(ty: pso::DescriptorType) -> Self { + match ty { + pso::DescriptorType::Sampler => DescriptorContent::SAMPLER, + pso::DescriptorType::Image { ty } => match ty { + pso::ImageDescriptorType::Sampled { with_sampler: true } => { + DescriptorContent::TEXTURE | DescriptorContent::SAMPLER + } + _ => DescriptorContent::TEXTURE, + }, + pso::DescriptorType::Buffer { format, .. } => match format { + pso::BufferDescriptorFormat::Structured { dynamic_offset } => { + match dynamic_offset { + true => DescriptorContent::BUFFER | DescriptorContent::DYNAMIC_BUFFER, + false => DescriptorContent::BUFFER, + } + } + pso::BufferDescriptorFormat::Texel => DescriptorContent::TEXTURE, + }, + pso::DescriptorType::InputAttachment => DescriptorContent::TEXTURE, + } + } +} + +// Note: this structure is iterated often, so it makes sense to keep it dense +#[derive(Debug)] +pub struct DescriptorLayout { + pub content: DescriptorContent, + pub stages: pso::ShaderStageFlags, + pub binding: pso::DescriptorBinding, + pub array_index: pso::DescriptorArrayIndex, +} + +#[derive(Debug)] +pub struct ArgumentLayout { + pub(crate) res: msl::ResourceBinding, + pub(crate) res_offset: PoolResourceIndex, + pub(crate) count: pso::DescriptorArrayIndex, + pub(crate) usage: metal::MTLResourceUsage, + pub(crate) content: DescriptorContent, +} + +#[derive(Debug)] +pub enum DescriptorSetLayout { + Emulated { + layouts: Arc<Vec<DescriptorLayout>>, + total: ResourceData<PoolResourceIndex>, + immutable_samplers: FastHashMap<pso::DescriptorBinding, msl::SamplerData>, + }, + ArgumentBuffer { + encoder: metal::ArgumentEncoder, + stage_flags: pso::ShaderStageFlags, + bindings: Arc<FastHashMap<pso::DescriptorBinding, ArgumentLayout>>, + total: PoolResourceIndex, + }, +} +unsafe impl Send for DescriptorSetLayout {} +unsafe impl Sync for DescriptorSetLayout {} + +#[derive(Clone, Debug)] +pub struct UsedResource { + pub(crate) ptr: *mut metal::MTLResource, + pub(crate) usage: metal::MTLResourceUsage, +} + +#[derive(Debug)] +pub enum DescriptorSet { + Emulated { + pool: Arc<RwLock<DescriptorEmulatedPoolInner>>, + layouts: Arc<Vec<DescriptorLayout>>, + resources: ResourceData<Range<PoolResourceIndex>>, + }, + ArgumentBuffer { + raw: metal::Buffer, + raw_offset: buffer::Offset, + pool: Arc<RwLock<DescriptorArgumentPoolInner>>, + range: Range<PoolResourceIndex>, + encoder: metal::ArgumentEncoder, + bindings: Arc<FastHashMap<pso::DescriptorBinding, ArgumentLayout>>, + stage_flags: pso::ShaderStageFlags, + }, +} +unsafe impl Send for DescriptorSet {} +unsafe impl Sync for DescriptorSet {} + +#[derive(Debug)] +pub struct Memory { + pub(crate) heap: MemoryHeap, + pub(crate) size: u64, +} + +impl Memory { + pub(crate) fn new(heap: MemoryHeap, size: u64) -> Self { + Memory { heap, size } + } + + pub(crate) fn resolve(&self, range: &Segment) -> Range<u64> { + range.offset..range.size.map_or(self.size, |s| range.offset + s) + } +} + +unsafe impl Send for Memory {} +unsafe impl Sync for Memory {} + +#[derive(Debug)] +pub(crate) enum MemoryHeap { + Private, + Public(MemoryTypeId, metal::Buffer), + Native(metal::Heap), +} + +#[derive(Default)] +pub(crate) struct ArgumentArray { + arguments: Vec<metal::ArgumentDescriptor>, + position: usize, +} + +impl ArgumentArray { + pub fn describe_usage(ty: pso::DescriptorType) -> metal::MTLResourceUsage { + use hal::pso::DescriptorType as Dt; + use metal::MTLResourceUsage; + + match ty { + Dt::Sampler => MTLResourceUsage::empty(), + Dt::Image { ty } => match ty { + pso::ImageDescriptorType::Sampled { .. } => MTLResourceUsage::Sample, + pso::ImageDescriptorType::Storage { read_only: true } => MTLResourceUsage::Read, + pso::ImageDescriptorType::Storage { .. } => MTLResourceUsage::Write, + }, + Dt::Buffer { ty, format } => match ty { + pso::BufferDescriptorType::Storage { read_only: true } => MTLResourceUsage::Read, + pso::BufferDescriptorType::Storage { .. } => MTLResourceUsage::Write, + pso::BufferDescriptorType::Uniform => match format { + pso::BufferDescriptorFormat::Structured { .. } => MTLResourceUsage::Read, + pso::BufferDescriptorFormat::Texel => MTLResourceUsage::Sample, + }, + }, + Dt::InputAttachment => MTLResourceUsage::Sample, + } + } + + pub fn push( + &mut self, + ty: metal::MTLDataType, + count: usize, + usage: metal::MTLResourceUsage, + ) -> usize { + use metal::{MTLArgumentAccess, MTLResourceUsage}; + + let pos = self.position; + self.position += count; + let access = if usage == MTLResourceUsage::Write { + MTLArgumentAccess::ReadWrite + } else { + MTLArgumentAccess::ReadOnly + }; + + let arg = metal::ArgumentDescriptor::new(); + arg.set_array_length(count as u64); + arg.set_index(pos as u64); + arg.set_access(access); + arg.set_data_type(ty); + self.arguments.push(arg.to_owned()); + + pos + } + + pub fn build<'a>(self) -> (&'a metal::ArrayRef<metal::ArgumentDescriptor>, usize) { + ( + metal::Array::from_owned_slice(&self.arguments), + self.position, + ) + } +} + +#[derive(Debug)] +pub enum QueryPool { + Occlusion(Range<u32>), + Timestamp, +} + +#[derive(Debug)] +pub enum FenceInner { + Idle { signaled: bool }, + PendingSubmission(metal::CommandBuffer), +} + +#[derive(Debug)] +pub struct Fence(pub(crate) Mutex<FenceInner>); + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +//TODO: review the atomic ordering +#[derive(Debug)] +pub struct Event(pub(crate) Arc<AtomicBool>); + +extern "C" { + fn dispatch_semaphore_wait(semaphore: *mut c_void, timeout: u64) -> c_long; + fn dispatch_semaphore_signal(semaphore: *mut c_void) -> c_long; + fn dispatch_semaphore_create(value: c_long) -> *mut c_void; + fn dispatch_release(object: *mut c_void); +} + +#[cfg(feature = "signpost")] +extern "C" { + fn kdebug_signpost(code: u32, arg1: usize, arg2: usize, arg3: usize, arg4: usize); + fn kdebug_signpost_start(code: u32, arg1: usize, arg2: usize, arg3: usize, arg4: usize); + fn kdebug_signpost_end(code: u32, arg1: usize, arg2: usize, arg3: usize, arg4: usize); +} + +#[derive(Clone, Debug)] +pub struct SystemSemaphore(*mut c_void); +unsafe impl Send for SystemSemaphore {} +unsafe impl Sync for SystemSemaphore {} + +impl Drop for SystemSemaphore { + fn drop(&mut self) { + unsafe { dispatch_release(self.0) } + } +} +impl SystemSemaphore { + pub(crate) fn new() -> Self { + SystemSemaphore(unsafe { dispatch_semaphore_create(1) }) + } + pub(crate) fn signal(&self) { + unsafe { + dispatch_semaphore_signal(self.0); + } + } + pub(crate) fn wait(&self, timeout: u64) { + unsafe { + dispatch_semaphore_wait(self.0, timeout); + } + } +} + +#[derive(Clone, Debug)] +pub struct Signpost { + code: u32, + args: [usize; 4], +} + +impl Drop for Signpost { + fn drop(&mut self) { + #[cfg(feature = "signpost")] + unsafe { + kdebug_signpost_end( + self.code, + self.args[0], + self.args[1], + self.args[2], + self.args[3], + ); + } + } +} + +#[allow(dead_code)] +impl Signpost { + pub(crate) fn new(code: u32, args: [usize; 4]) -> Self { + #[cfg(feature = "signpost")] + unsafe { + kdebug_signpost_start(code, args[0], args[1], args[2], args[3]); + } + Signpost { code, args } + } + pub(crate) fn place(code: u32, args: [usize; 4]) { + #[cfg(feature = "signpost")] + unsafe { + kdebug_signpost(code, args[0], args[1], args[2], args[3]); + } + #[cfg(not(feature = "signpost"))] + let _ = (code, args); + } +} diff --git a/third_party/rust/gfx-backend-metal/src/soft.rs b/third_party/rust/gfx-backend-metal/src/soft.rs new file mode 100644 index 0000000000..278df1f272 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/soft.rs @@ -0,0 +1,524 @@ +use crate::{ + command::IndexBuffer, native::RasterizerState, BufferPtr, ResourceIndex, ResourcePtr, + SamplerPtr, TexturePtr, +}; + +use auxil::ShaderStage; +use hal; +use metal; + +use std::{fmt::Debug, ops::Range}; + +pub type CacheResourceIndex = u32; + +pub trait Resources: Debug { + type Data: Debug; + type BufferArray: Debug; + type TextureArray: Debug; + type SamplerArray: Debug; + type DepthStencil: Debug; + type RenderPipeline: Debug; + type ComputePipeline: Debug; +} + +#[derive(Clone, Debug, Default)] +pub struct Own { + pub buffers: Vec<Option<BufferPtr>>, + pub buffer_offsets: Vec<hal::buffer::Offset>, + pub textures: Vec<Option<TexturePtr>>, + pub samplers: Vec<Option<SamplerPtr>>, +} + +impl Resources for Own { + type Data = Vec<u32>; + type BufferArray = Range<CacheResourceIndex>; + type TextureArray = Range<CacheResourceIndex>; + type SamplerArray = Range<CacheResourceIndex>; + type DepthStencil = metal::DepthStencilState; + type RenderPipeline = metal::RenderPipelineState; + type ComputePipeline = metal::ComputePipelineState; +} + +#[derive(Debug)] +pub struct Ref; +impl<'a> Resources for &'a Ref { + type Data = &'a [u32]; + type BufferArray = (&'a [Option<BufferPtr>], &'a [hal::buffer::Offset]); + type TextureArray = &'a [Option<TexturePtr>]; + type SamplerArray = &'a [Option<SamplerPtr>]; + type DepthStencil = &'a metal::DepthStencilStateRef; + type RenderPipeline = &'a metal::RenderPipelineStateRef; + type ComputePipeline = &'a metal::ComputePipelineStateRef; +} + +//TODO: Remove `Clone` from here, blocked by arguments of `quick_render` and +// `quick_compute` which currently use `cloned()` iteration. +#[derive(Clone, Debug)] +pub enum RenderCommand<R: Resources> { + SetViewport(hal::pso::Rect, Range<f32>), + SetScissor(metal::MTLScissorRect), + SetBlendColor(hal::pso::ColorValue), + SetDepthBias(hal::pso::DepthBias), + SetDepthStencilState(R::DepthStencil), + SetStencilReferenceValues(hal::pso::Sided<hal::pso::StencilValue>), + SetRasterizerState(RasterizerState), + SetVisibilityResult(metal::MTLVisibilityResultMode, hal::buffer::Offset), + BindBuffer { + stage: ShaderStage, + index: ResourceIndex, + buffer: BufferPtr, + offset: hal::buffer::Offset, + }, + BindBuffers { + stage: ShaderStage, + index: ResourceIndex, + buffers: R::BufferArray, + }, + BindBufferData { + stage: ShaderStage, + index: ResourceIndex, + words: R::Data, + }, + BindTextures { + stage: ShaderStage, + index: ResourceIndex, + textures: R::TextureArray, + }, + BindSamplers { + stage: ShaderStage, + index: ResourceIndex, + samplers: R::SamplerArray, + }, + BindPipeline(R::RenderPipeline), + UseResource { + resource: ResourcePtr, + usage: metal::MTLResourceUsage, + }, + Draw { + primitive_type: metal::MTLPrimitiveType, + vertices: Range<hal::VertexCount>, + instances: Range<hal::InstanceCount>, + }, + DrawIndexed { + primitive_type: metal::MTLPrimitiveType, + index: IndexBuffer<BufferPtr>, + indices: Range<hal::IndexCount>, + base_vertex: hal::VertexOffset, + instances: Range<hal::InstanceCount>, + }, + DrawIndirect { + primitive_type: metal::MTLPrimitiveType, + buffer: BufferPtr, + offset: hal::buffer::Offset, + }, + DrawIndexedIndirect { + primitive_type: metal::MTLPrimitiveType, + index: IndexBuffer<BufferPtr>, + buffer: BufferPtr, + offset: hal::buffer::Offset, + }, +} + +#[derive(Clone, Debug)] +pub enum BlitCommand { + FillBuffer { + dst: BufferPtr, + range: Range<hal::buffer::Offset>, + value: u8, + }, + CopyBuffer { + src: BufferPtr, + dst: BufferPtr, + region: hal::command::BufferCopy, + }, + CopyImage { + src: TexturePtr, + dst: TexturePtr, + region: hal::command::ImageCopy, + }, + CopyBufferToImage { + src: BufferPtr, + dst: TexturePtr, + dst_desc: hal::format::FormatDesc, + region: hal::command::BufferImageCopy, + }, + CopyImageToBuffer { + src: TexturePtr, + src_desc: hal::format::FormatDesc, + dst: BufferPtr, + region: hal::command::BufferImageCopy, + }, +} + +#[derive(Clone, Debug)] +pub enum ComputeCommand<R: Resources> { + BindBuffer { + index: ResourceIndex, + buffer: BufferPtr, + offset: hal::buffer::Offset, + }, + BindBuffers { + index: ResourceIndex, + buffers: R::BufferArray, + }, + BindBufferData { + index: ResourceIndex, + words: R::Data, + }, + BindTextures { + index: ResourceIndex, + textures: R::TextureArray, + }, + BindSamplers { + index: ResourceIndex, + samplers: R::SamplerArray, + }, + BindPipeline(R::ComputePipeline), + UseResource { + resource: ResourcePtr, + usage: metal::MTLResourceUsage, + }, + Dispatch { + wg_size: metal::MTLSize, + wg_count: metal::MTLSize, + }, + DispatchIndirect { + wg_size: metal::MTLSize, + buffer: BufferPtr, + offset: hal::buffer::Offset, + }, +} + +#[derive(Clone, Debug)] +pub enum Pass { + Render(metal::RenderPassDescriptor), + Blit, + Compute, +} + +impl Own { + pub fn clear(&mut self) { + self.buffers.clear(); + self.buffer_offsets.clear(); + self.textures.clear(); + self.samplers.clear(); + } + + pub fn own_render(&mut self, com: RenderCommand<&Ref>) -> RenderCommand<Self> { + use self::RenderCommand::*; + match com { + SetViewport(rect, depth) => SetViewport(rect, depth), + SetScissor(rect) => SetScissor(rect), + SetBlendColor(color) => SetBlendColor(color), + SetDepthBias(bias) => SetDepthBias(bias), + SetDepthStencilState(state) => SetDepthStencilState(state.to_owned()), + SetStencilReferenceValues(sided) => SetStencilReferenceValues(sided), + SetRasterizerState(ref state) => SetRasterizerState(state.clone()), + SetVisibilityResult(mode, offset) => SetVisibilityResult(mode, offset), + BindBuffer { + stage, + index, + buffer, + offset, + } => BindBuffer { + stage, + index, + buffer, + offset, + }, + BindBuffers { + stage, + index, + buffers: (buffers, offsets), + } => BindBuffers { + stage, + index, + buffers: { + let start = self.buffers.len() as CacheResourceIndex; + self.buffers.extend_from_slice(buffers); + self.buffer_offsets.extend_from_slice(offsets); + start..self.buffers.len() as CacheResourceIndex + }, + }, + BindBufferData { + stage, + index, + words, + } => BindBufferData { + stage, + index, + words: words.to_vec(), + }, + BindTextures { + stage, + index, + textures, + } => BindTextures { + stage, + index, + textures: { + let start = self.textures.len() as CacheResourceIndex; + self.textures.extend_from_slice(textures); + start..self.textures.len() as CacheResourceIndex + }, + }, + BindSamplers { + stage, + index, + samplers, + } => BindSamplers { + stage, + index, + samplers: { + let start = self.samplers.len() as CacheResourceIndex; + self.samplers.extend_from_slice(samplers); + start..self.samplers.len() as CacheResourceIndex + }, + }, + BindPipeline(pso) => BindPipeline(pso.to_owned()), + UseResource { resource, usage } => UseResource { resource, usage }, + Draw { + primitive_type, + vertices, + instances, + } => Draw { + primitive_type, + vertices, + instances, + }, + DrawIndexed { + primitive_type, + index, + indices, + base_vertex, + instances, + } => DrawIndexed { + primitive_type, + index, + indices, + base_vertex, + instances, + }, + DrawIndirect { + primitive_type, + buffer, + offset, + } => DrawIndirect { + primitive_type, + buffer, + offset, + }, + DrawIndexedIndirect { + primitive_type, + index, + buffer, + offset, + } => DrawIndexedIndirect { + primitive_type, + index, + buffer, + offset, + }, + } + } + + pub fn own_compute(&mut self, com: ComputeCommand<&Ref>) -> ComputeCommand<Self> { + use self::ComputeCommand::*; + match com { + BindBuffer { + index, + buffer, + offset, + } => BindBuffer { + index, + buffer, + offset, + }, + BindBuffers { + index, + buffers: (buffers, offsets), + } => BindBuffers { + index, + buffers: { + let start = self.buffers.len() as CacheResourceIndex; + self.buffers.extend_from_slice(buffers); + self.buffer_offsets.extend_from_slice(offsets); + start..self.buffers.len() as CacheResourceIndex + }, + }, + BindBufferData { index, words } => BindBufferData { + index, + words: words.to_vec(), + }, + BindTextures { index, textures } => BindTextures { + index, + textures: { + let start = self.textures.len() as CacheResourceIndex; + self.textures.extend_from_slice(textures); + start..self.textures.len() as CacheResourceIndex + }, + }, + BindSamplers { index, samplers } => BindSamplers { + index, + samplers: { + let start = self.samplers.len() as CacheResourceIndex; + self.samplers.extend_from_slice(samplers); + start..self.samplers.len() as CacheResourceIndex + }, + }, + BindPipeline(pso) => BindPipeline(pso.to_owned()), + UseResource { resource, usage } => UseResource { resource, usage }, + Dispatch { wg_size, wg_count } => Dispatch { wg_size, wg_count }, + DispatchIndirect { + wg_size, + buffer, + offset, + } => DispatchIndirect { + wg_size, + buffer, + offset, + }, + } + } + + pub fn rebase_render(&self, com: &mut RenderCommand<Own>) { + use self::RenderCommand::*; + match *com { + SetViewport(..) + | SetScissor(..) + | SetBlendColor(..) + | SetDepthBias(..) + | SetDepthStencilState(..) + | SetStencilReferenceValues(..) + | SetRasterizerState(..) + | SetVisibilityResult(..) + | BindBuffer { .. } => {} + BindBuffers { + ref mut buffers, .. + } => { + buffers.start += self.buffers.len() as CacheResourceIndex; + buffers.end += self.buffers.len() as CacheResourceIndex; + } + BindBufferData { .. } => {} + BindTextures { + ref mut textures, .. + } => { + textures.start += self.textures.len() as CacheResourceIndex; + textures.end += self.textures.len() as CacheResourceIndex; + } + BindSamplers { + ref mut samplers, .. + } => { + samplers.start += self.samplers.len() as CacheResourceIndex; + samplers.end += self.samplers.len() as CacheResourceIndex; + } + BindPipeline(..) + | UseResource { .. } + | Draw { .. } + | DrawIndexed { .. } + | DrawIndirect { .. } + | DrawIndexedIndirect { .. } => {} + } + } + + pub fn rebase_compute(&self, com: &mut ComputeCommand<Own>) { + use self::ComputeCommand::*; + match *com { + BindBuffer { .. } => {} + BindBuffers { + ref mut buffers, .. + } => { + buffers.start += self.buffers.len() as CacheResourceIndex; + buffers.end += self.buffers.len() as CacheResourceIndex; + } + BindBufferData { .. } => {} + BindTextures { + ref mut textures, .. + } => { + textures.start += self.textures.len() as CacheResourceIndex; + textures.end += self.textures.len() as CacheResourceIndex; + } + BindSamplers { + ref mut samplers, .. + } => { + samplers.start += self.samplers.len() as CacheResourceIndex; + samplers.end += self.samplers.len() as CacheResourceIndex; + } + BindPipeline(..) | UseResource { .. } | Dispatch { .. } | DispatchIndirect { .. } => {} + } + } + + pub fn extend(&mut self, other: &Self) { + self.buffers.extend_from_slice(&other.buffers); + self.buffer_offsets.extend_from_slice(&other.buffer_offsets); + self.textures.extend_from_slice(&other.textures); + self.samplers.extend_from_slice(&other.samplers); + } +} + +/// This is a helper trait that allows us to unify owned and non-owned handling +/// of the context-dependent data, such as resource arrays. +pub trait AsSlice<T, R> { + fn as_slice<'a>(&'a self, resources: &'a R) -> &'a [T]; +} +impl<'b, T> AsSlice<Option<T>, &'b Ref> for &'b [Option<T>] { + #[inline(always)] + fn as_slice<'a>(&'a self, _: &'a &'b Ref) -> &'a [Option<T>] { + self + } +} +impl<'b> AsSlice<Option<BufferPtr>, &'b Ref> + for (&'b [Option<BufferPtr>], &'b [hal::buffer::Offset]) +{ + #[inline(always)] + fn as_slice<'a>(&'a self, _: &'a &'b Ref) -> &'a [Option<BufferPtr>] { + self.0 + } +} +impl<'b> AsSlice<hal::buffer::Offset, &'b Ref> + for (&'b [Option<BufferPtr>], &'b [hal::buffer::Offset]) +{ + #[inline(always)] + fn as_slice<'a>(&'a self, _: &'a &'b Ref) -> &'a [hal::buffer::Offset] { + self.1 + } +} +impl AsSlice<Option<BufferPtr>, Own> for Range<CacheResourceIndex> { + #[inline(always)] + fn as_slice<'a>(&'a self, resources: &'a Own) -> &'a [Option<BufferPtr>] { + &resources.buffers[self.start as usize..self.end as usize] + } +} +impl AsSlice<hal::buffer::Offset, Own> for Range<CacheResourceIndex> { + #[inline(always)] + fn as_slice<'a>(&'a self, resources: &'a Own) -> &'a [hal::buffer::Offset] { + &resources.buffer_offsets[self.start as usize..self.end as usize] + } +} +impl AsSlice<Option<TexturePtr>, Own> for Range<CacheResourceIndex> { + #[inline(always)] + fn as_slice<'a>(&'a self, resources: &'a Own) -> &'a [Option<TexturePtr>] { + &resources.textures[self.start as usize..self.end as usize] + } +} +impl AsSlice<Option<SamplerPtr>, Own> for Range<CacheResourceIndex> { + #[inline(always)] + fn as_slice<'a>(&'a self, resources: &'a Own) -> &'a [Option<SamplerPtr>] { + &resources.samplers[self.start as usize..self.end as usize] + } +} + +fn _test_command_sizes( + render: RenderCommand<&Ref>, + blit: BlitCommand, + compute: ComputeCommand<&Ref>, +) { + use std::mem::transmute; + let _ = unsafe { + ( + transmute::<_, [usize; 6]>(render), + transmute::<_, [usize; 9]>(blit), + transmute::<_, [usize; 7]>(compute), + ) + }; +} diff --git a/third_party/rust/gfx-backend-metal/src/window.rs b/third_party/rust/gfx-backend-metal/src/window.rs new file mode 100644 index 0000000000..a7040ff8c1 --- /dev/null +++ b/third_party/rust/gfx-backend-metal/src/window.rs @@ -0,0 +1,286 @@ +use crate::{ + device::{Device, PhysicalDevice}, + internal::Channel, + native, Backend, QueueFamily, Shared, +}; + +use hal::{format, image, window as w}; + +use crate::CGRect; +use metal::{CGFloat, CGSize, CoreAnimationDrawable}; +use objc::rc::autoreleasepool; +use objc::runtime::Object; +use parking_lot::Mutex; + +use std::borrow::Borrow; +use std::ptr::NonNull; +use std::thread; + +#[derive(Debug)] +pub struct Surface { + view: Option<NonNull<Object>>, + render_layer: Mutex<metal::CoreAnimationLayer>, + swapchain_format: metal::MTLPixelFormat, + swapchain_format_desc: format::FormatDesc, + main_thread_id: thread::ThreadId, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub fn new(view: Option<NonNull<Object>>, layer: metal::CoreAnimationLayer) -> Self { + Surface { + view, + render_layer: Mutex::new(layer), + swapchain_format: metal::MTLPixelFormat::Invalid, + swapchain_format_desc: format::FormatDesc { + bits: 0, + dim: (0, 0), + packed: false, + aspects: format::Aspects::empty(), + }, + main_thread_id: thread::current().id(), + } + } + + pub(crate) fn dispose(self) { + if let Some(view) = self.view { + let () = unsafe { msg_send![view.as_ptr(), release] }; + } + } + + fn configure(&self, shared: &Shared, config: &w::SwapchainConfig) -> metal::MTLPixelFormat { + info!("build swapchain {:?}", config); + + let caps = &shared.private_caps; + let mtl_format = caps + .map_format(config.format) + .expect("unsupported backbuffer format"); + + let render_layer = self.render_layer.lock(); + let framebuffer_only = config.image_usage == image::Usage::COLOR_ATTACHMENT; + let display_sync = config.present_mode != w::PresentMode::IMMEDIATE; + let is_mac = caps.os_is_mac; + let can_set_next_drawable_timeout = if is_mac { + caps.has_version_at_least(10, 13) + } else { + caps.has_version_at_least(11, 0) + }; + let can_set_display_sync = is_mac && caps.has_version_at_least(10, 13); + let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); + + let device_raw = shared.device.lock(); + unsafe { + // On iOS, unless the user supplies a view with a CAMetalLayer, we + // create one as a sublayer. However, when the view changes size, + // its sublayers are not automatically resized, and we must resize + // it here. The drawable size and the layer size don't correlate + #[cfg(target_os = "ios")] + { + if let Some(view) = self.view { + let main_layer: *mut Object = msg_send![view.as_ptr(), layer]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![*render_layer, setFrame: bounds]; + } + } + render_layer.set_device(&*device_raw); + render_layer.set_pixel_format(mtl_format); + render_layer.set_framebuffer_only(framebuffer_only as _); + + // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) + let () = msg_send![*render_layer, setMaximumDrawableCount: config.image_count as u64]; + + render_layer.set_drawable_size(drawable_size); + if can_set_next_drawable_timeout { + let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + } + if can_set_display_sync { + let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + } + }; + + mtl_format + } + + fn dimensions(&self) -> w::Extent2D { + let (size, scale): (CGSize, CGFloat) = match self.view { + Some(view) if !cfg!(target_os = "macos") => unsafe { + let bounds: CGRect = msg_send![view.as_ptr(), bounds]; + let window: Option<NonNull<Object>> = msg_send![view.as_ptr(), window]; + let screen = window.and_then(|window| -> Option<NonNull<Object>> { + msg_send![window.as_ptr(), screen] + }); + match screen { + Some(screen) => { + let screen_space: *mut Object = msg_send![screen.as_ptr(), coordinateSpace]; + let rect: CGRect = msg_send![view.as_ptr(), convertRect:bounds toCoordinateSpace:screen_space]; + let scale_factor: CGFloat = msg_send![screen.as_ptr(), nativeScale]; + (rect.size, scale_factor) + } + None => (bounds.size, 1.0), + } + }, + _ => unsafe { + let render_layer_borrow = self.render_layer.lock(); + let render_layer = render_layer_borrow.as_ref(); + let bounds: CGRect = msg_send![render_layer, bounds]; + let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + (bounds.size, contents_scale) + }, + }; + w::Extent2D { + width: (size.width * scale) as u32, + height: (size.height * scale) as u32, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum AcquireMode { + Wait, + Oldest, +} + +impl Default for AcquireMode { + fn default() -> Self { + AcquireMode::Oldest + } +} + +#[derive(Debug)] +pub struct SwapchainImage { + image: native::Image, + view: native::ImageView, + drawable: metal::CoreAnimationDrawable, +} + +unsafe impl Send for SwapchainImage {} +unsafe impl Sync for SwapchainImage {} + +impl SwapchainImage { + pub(crate) fn into_drawable(self) -> CoreAnimationDrawable { + self.drawable + } +} + +impl Borrow<native::Image> for SwapchainImage { + fn borrow(&self) -> &native::Image { + &self.image + } +} + +impl Borrow<native::ImageView> for SwapchainImage { + fn borrow(&self) -> &native::ImageView { + &self.view + } +} + +impl w::Surface<Backend> for Surface { + fn supports_queue_family(&self, _queue_family: &QueueFamily) -> bool { + // we only expose one family atm, so it's compatible + true + } + + fn capabilities(&self, physical_device: &PhysicalDevice) -> w::SurfaceCapabilities { + let current_extent = if self.main_thread_id == thread::current().id() { + Some(self.dimensions()) + } else { + warn!("Unable to get the current view dimensions on a non-main thread"); + None + }; + + let device_caps = &physical_device.shared.private_caps; + + let can_set_maximum_drawables_count = + device_caps.os_is_mac || device_caps.has_version_at_least(11, 2); + let can_set_display_sync = + device_caps.os_is_mac && device_caps.has_version_at_least(10, 13); + + w::SurfaceCapabilities { + present_modes: if can_set_display_sync { + w::PresentMode::FIFO | w::PresentMode::IMMEDIATE + } else { + w::PresentMode::FIFO + }, + composite_alpha_modes: w::CompositeAlphaMode::OPAQUE, //TODO + //Note: this is hardcoded in `CAMetalLayer` documentation + image_count: if can_set_maximum_drawables_count { + 2..=3 + } else { + // 3 is the default in `CAMetalLayer` documentation + // iOS 10.3 was tested to use 3 on iphone5s + 3..=3 + }, + current_extent, + extents: w::Extent2D { + width: 4, + height: 4, + }..=w::Extent2D { + width: 4096, + height: 4096, + }, + max_image_layers: 1, + usage: image::Usage::COLOR_ATTACHMENT + | image::Usage::SAMPLED + | image::Usage::TRANSFER_SRC + | image::Usage::TRANSFER_DST, + } + } + + fn supported_formats(&self, _physical_device: &PhysicalDevice) -> Option<Vec<format::Format>> { + Some(vec![ + format::Format::Bgra8Unorm, + format::Format::Bgra8Srgb, + format::Format::Rgba16Sfloat, + ]) + } +} + +impl w::PresentationSurface<Backend> for Surface { + type SwapchainImage = SwapchainImage; + + unsafe fn configure_swapchain( + &mut self, + device: &Device, + config: w::SwapchainConfig, + ) -> Result<(), w::CreationError> { + assert!(image::Usage::COLOR_ATTACHMENT.contains(config.image_usage)); + self.swapchain_format = self.configure(&device.shared, &config); + Ok(()) + } + + unsafe fn unconfigure_swapchain(&mut self, _device: &Device) { + self.swapchain_format = metal::MTLPixelFormat::Invalid; + } + + unsafe fn acquire_image( + &mut self, + _timeout_ns: u64, //TODO: use the timeout + ) -> Result<(Self::SwapchainImage, Option<w::Suboptimal>), w::AcquireError> { + let render_layer = self.render_layer.lock(); + let (drawable, texture) = autoreleasepool(|| { + let drawable = render_layer.next_drawable().unwrap(); + (drawable.to_owned(), drawable.texture().to_owned()) + }); + let size = render_layer.drawable_size(); + + let sc_image = SwapchainImage { + image: native::Image { + like: native::ImageLike::Texture(texture.clone()), + kind: image::Kind::D2(size.width as u32, size.height as u32, 1, 1), + mip_levels: 1, + format_desc: self.swapchain_format_desc, + shader_channel: Channel::Float, + mtl_format: self.swapchain_format, + mtl_type: metal::MTLTextureType::D2, + }, + view: native::ImageView { + texture, + mtl_format: self.swapchain_format, + }, + drawable, + }; + Ok((sc_image, None)) + } +} |