/*! # Metal API internals. ## Pipeline Layout In Metal, push constants, vertex buffers, and resources in the bind groups are all placed together in the native resource bindings, which work similarly to D3D11: there are tables of textures, buffers, and samplers. We put push constants first (if any) in the table, followed by bind group 0 resources, followed by other bind groups. The vertex buffers are bound at the very end of the VS buffer table. !*/ // `MTLFeatureSet` is superseded by `MTLGpuFamily`. // However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13, // See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec. // TODO: Eventually all deprecated features should be abstracted and use new api when available. #[allow(deprecated)] mod adapter; mod command; mod conv; mod device; mod surface; mod time; use std::{ fmt, iter, ops, ptr::NonNull, sync::{atomic, Arc}, thread, }; use arrayvec::ArrayVec; use bitflags::bitflags; use metal::foreign_types::ForeignTypeRef as _; use parking_lot::{Mutex, RwLock}; #[derive(Clone, Debug)] pub struct Api; type ResourceIndex = u32; impl crate::Api for Api { type Instance = Instance; type Surface = Surface; type Adapter = Adapter; type Device = Device; type Queue = Queue; type CommandEncoder = CommandEncoder; type CommandBuffer = CommandBuffer; type Buffer = Buffer; type Texture = Texture; type SurfaceTexture = SurfaceTexture; type TextureView = TextureView; type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; type PipelineLayout = PipelineLayout; type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; type AccelerationStructure = AccelerationStructure; } pub struct Instance { managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate, } impl Instance { pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { unsafe { Surface::from_layer(layer) } } } impl crate::Instance for Instance { unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result { profiling::scope!("Init Metal Backend"); // We do not enable metal validation based on the validation flags as it affects the entire // process. Instead, we enable the validation inside the test harness itself in tests/src/native.rs. Ok(Instance { managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate::new(), }) } unsafe fn create_surface( &self, _display_handle: raw_window_handle::RawDisplayHandle, window_handle: raw_window_handle::RawWindowHandle, ) -> Result { match window_handle { #[cfg(target_os = "ios")] raw_window_handle::RawWindowHandle::UiKit(handle) => { let _ = &self.managed_metal_layer_delegate; Ok(unsafe { Surface::from_view(handle.ui_view.as_ptr(), None) }) } #[cfg(target_os = "macos")] raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe { Surface::from_view( handle.ns_view.as_ptr(), Some(&self.managed_metal_layer_delegate), ) }), _ => Err(crate::InstanceError::new(format!( "window handle {window_handle:?} is not a Metal-compatible handle" ))), } } unsafe fn destroy_surface(&self, surface: Surface) { unsafe { surface.dispose() }; } unsafe fn enumerate_adapters(&self) -> Vec> { let devices = metal::Device::all(); let mut adapters: Vec> = devices .into_iter() .map(|dev| { let name = dev.name().into(); let shared = AdapterShared::new(dev); crate::ExposedAdapter { info: wgt::AdapterInfo { name, vendor: 0, device: 0, device_type: shared.private_caps.device_type(), driver: String::new(), driver_info: String::new(), backend: wgt::Backend::Metal, }, features: shared.private_caps.features(), capabilities: shared.private_caps.capabilities(), adapter: Adapter::new(Arc::new(shared)), } }) .collect(); adapters.sort_by_key(|ad| { ( ad.adapter.shared.private_caps.low_power, ad.adapter.shared.private_caps.headless, ) }); adapters } } bitflags!( /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes. #[derive(Debug, Copy, Clone)] pub struct TimestampQuerySupport: u32 { /// On creating Metal encoders. const STAGE_BOUNDARIES = 1 << 1; /// Within existing draw encoders. const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2); /// Within existing dispatch encoders. const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3); /// Within existing blit encoders. const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4); /// Within any wgpu render/compute pass. const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits(); } ); #[allow(dead_code)] #[derive(Clone, Debug)] struct PrivateCapabilities { family_check: bool, msl_version: metal::MTLLanguageVersion, fragment_rw_storage: bool, read_write_texture_tier: metal::MTLReadWriteTextureTier, msaa_desktop: bool, msaa_apple3: bool, msaa_apple7: bool, resource_heaps: bool, argument_buffers: bool, shared_textures: bool, mutable_comparison_samplers: bool, sampler_clamp_to_border: bool, indirect_draw_dispatch: bool, base_vertex_first_instance_drawing: bool, dual_source_blending: bool, low_power: bool, headless: bool, layered_rendering: bool, function_specialization: bool, depth_clip_mode: bool, texture_cube_array: bool, supports_float_filtering: bool, format_depth24_stencil8: bool, format_depth32_stencil8_filter: bool, format_depth32_stencil8_none: bool, format_min_srgb_channels: u8, format_b5: bool, format_bc: bool, format_eac_etc: bool, format_astc: bool, format_astc_hdr: bool, format_any8_unorm_srgb_all: bool, format_any8_unorm_srgb_no_write: bool, format_any8_snorm_all: bool, format_r16_norm_all: bool, format_r32_all: bool, format_r32_no_write: bool, format_r32float_no_write_no_filter: bool, format_r32float_no_filter: bool, format_r32float_all: bool, format_rgba8_srgb_all: bool, format_rgba8_srgb_no_write: bool, format_rgb10a2_unorm_all: bool, format_rgb10a2_unorm_no_write: bool, format_rgb10a2_uint_write: bool, format_rg11b10_all: bool, format_rg11b10_no_write: bool, format_rgb9e5_all: bool, format_rgb9e5_no_write: bool, format_rgb9e5_filter_only: bool, format_rg32_color: bool, format_rg32_color_write: bool, format_rg32float_all: bool, format_rg32float_color_blend: bool, format_rg32float_no_filter: bool, format_rgba32int_color: bool, format_rgba32int_color_write: bool, format_rgba32float_color: bool, format_rgba32float_color_write: bool, format_rgba32float_all: bool, format_depth16unorm: bool, format_depth32float_filter: bool, format_depth32float_none: bool, format_bgr10a2_all: bool, format_bgr10a2_no_write: bool, max_buffers_per_stage: ResourceIndex, max_vertex_buffers: ResourceIndex, max_textures_per_stage: ResourceIndex, max_samplers_per_stage: ResourceIndex, buffer_alignment: u64, max_buffer_size: u64, max_texture_size: u64, max_texture_3d_size: u64, max_texture_layers: u64, max_fragment_input_components: u64, max_color_render_targets: u8, max_color_attachment_bytes_per_sample: u8, max_varying_components: u32, max_threads_per_group: u32, max_total_threadgroup_memory: u32, sample_count_mask: crate::TextureFormatCapabilities, supports_debug_markers: bool, supports_binary_archives: bool, supports_capture_manager: bool, can_set_maximum_drawables_count: bool, can_set_display_sync: bool, can_set_next_drawable_timeout: bool, supports_arrays_of_textures: bool, supports_arrays_of_textures_write: bool, supports_mutability: bool, supports_depth_clip_control: bool, supports_preserve_invariance: bool, supports_shader_primitive_index: bool, has_unified_memory: Option, timestamp_query_support: TimestampQuerySupport, } #[derive(Clone, Debug)] struct PrivateDisabilities { /// Near depth is not respected properly on some Intel GPUs. broken_viewport_near_depth: bool, /// Multi-target clears don't appear to work properly on Intel GPUs. #[allow(dead_code)] broken_layered_clear_image: bool, } #[derive(Debug, Default)] struct Settings { retain_command_buffer_references: bool, } struct AdapterShared { device: Mutex, disabilities: PrivateDisabilities, private_caps: PrivateCapabilities, settings: Settings, presentation_timer: time::PresentationTimer, } unsafe impl Send for AdapterShared {} unsafe impl Sync for AdapterShared {} impl AdapterShared { fn new(device: metal::Device) -> Self { let private_caps = PrivateCapabilities::new(&device); log::debug!("{:#?}", private_caps); Self { disabilities: PrivateDisabilities::new(&device), private_caps, device: Mutex::new(device), settings: Settings::default(), presentation_timer: time::PresentationTimer::new(), } } } pub struct Adapter { shared: Arc, } pub struct Queue { raw: Arc>, timestamp_period: f32, } unsafe impl Send for Queue {} unsafe impl Sync for Queue {} impl Queue { pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self { Self { raw: Arc::new(Mutex::new(raw)), timestamp_period, } } } pub struct Device { shared: Arc, features: wgt::Features, } pub struct Surface { view: Option>, render_layer: Mutex, swapchain_format: RwLock>, extent: RwLock, main_thread_id: thread::ThreadId, // Useful for UI-intensive applications that are sensitive to // window resizing. pub present_with_transaction: bool, } unsafe impl Send for Surface {} unsafe impl Sync for Surface {} #[derive(Debug)] pub struct SurfaceTexture { texture: Texture, drawable: metal::MetalDrawable, present_with_transaction: bool, } impl std::borrow::Borrow for SurfaceTexture { fn borrow(&self) -> &Texture { &self.texture } } unsafe impl Send for SurfaceTexture {} unsafe impl Sync for SurfaceTexture {} impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], _surface_textures: &[&SurfaceTexture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { objc::rc::autoreleasepool(|| { let extra_command_buffer = match signal_fence { Some((fence, value)) => { let completed_value = Arc::clone(&fence.completed_value); let block = block::ConcreteBlock::new(move |_cmd_buf| { completed_value.store(value, atomic::Ordering::Release); }) .copy(); let raw = match command_buffers.last() { Some(&cmd_buf) => cmd_buf.raw.to_owned(), None => { let queue = self.raw.lock(); queue .new_command_buffer_with_unretained_references() .to_owned() } }; raw.set_label("(wgpu internal) Signal"); raw.add_completed_handler(&block); fence.maintain(); fence.pending_command_buffers.push((value, raw.to_owned())); // only return an extra one if it's extra match command_buffers.last() { Some(_) => None, None => Some(raw), } } None => None, }; for cmd_buffer in command_buffers { cmd_buffer.raw.commit(); } if let Some(raw) = extra_command_buffer { raw.commit(); } }); Ok(()) } unsafe fn present( &self, _surface: &Surface, texture: SurfaceTexture, ) -> Result<(), crate::SurfaceError> { let queue = &self.raw.lock(); objc::rc::autoreleasepool(|| { let command_buffer = queue.new_command_buffer(); command_buffer.set_label("(wgpu internal) Present"); // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc if !texture.present_with_transaction { command_buffer.present_drawable(&texture.drawable); } command_buffer.commit(); if texture.present_with_transaction { command_buffer.wait_until_scheduled(); texture.drawable.present(); } }); Ok(()) } unsafe fn get_timestamp_period(&self) -> f32 { self.timestamp_period } } #[derive(Debug)] pub struct Buffer { raw: metal::Buffer, size: wgt::BufferAddress, } unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} impl Buffer { fn as_raw(&self) -> BufferPtr { unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } } } #[derive(Debug)] pub struct Texture { raw: metal::Texture, format: wgt::TextureFormat, raw_type: metal::MTLTextureType, array_layers: u32, mip_levels: u32, copy_size: crate::CopyExtent, } unsafe impl Send for Texture {} unsafe impl Sync for Texture {} #[derive(Debug)] pub struct TextureView { raw: metal::Texture, aspects: crate::FormatAspects, } unsafe impl Send for TextureView {} unsafe impl Sync for TextureView {} impl TextureView { fn as_raw(&self) -> TexturePtr { unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } } } #[derive(Debug)] pub struct Sampler { raw: metal::SamplerState, } unsafe impl Send for Sampler {} unsafe impl Sync for Sampler {} impl Sampler { fn as_raw(&self) -> SamplerPtr { unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } } } #[derive(Debug)] pub struct BindGroupLayout { /// Sorted list of BGL entries. entries: Arc<[wgt::BindGroupLayoutEntry]>, } #[derive(Clone, Debug, Default)] struct ResourceData { buffers: T, textures: T, samplers: T, } #[derive(Clone, Debug, Default)] struct MultiStageData { vs: T, fs: T, cs: T, } const NAGA_STAGES: MultiStageData = MultiStageData { vs: naga::ShaderStage::Vertex, fs: naga::ShaderStage::Fragment, cs: naga::ShaderStage::Compute, }; impl ops::Index for MultiStageData { type Output = T; fn index(&self, stage: naga::ShaderStage) -> &T { match stage { naga::ShaderStage::Vertex => &self.vs, naga::ShaderStage::Fragment => &self.fs, naga::ShaderStage::Compute => &self.cs, } } } impl MultiStageData { fn map_ref(&self, fun: impl Fn(&T) -> Y) -> MultiStageData { MultiStageData { vs: fun(&self.vs), fs: fun(&self.fs), cs: fun(&self.cs), } } fn map(self, fun: impl Fn(T) -> Y) -> MultiStageData { MultiStageData { vs: fun(self.vs), fs: fun(self.fs), cs: fun(self.cs), } } fn iter<'a>(&'a self) -> impl Iterator { iter::once(&self.vs) .chain(iter::once(&self.fs)) .chain(iter::once(&self.cs)) } fn iter_mut<'a>(&'a mut self) -> impl Iterator { iter::once(&mut self.vs) .chain(iter::once(&mut self.fs)) .chain(iter::once(&mut self.cs)) } } type MultiStageResourceCounters = MultiStageData>; type MultiStageResources = MultiStageData; #[derive(Debug)] struct BindGroupLayoutInfo { base_resource_indices: MultiStageResourceCounters, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] struct PushConstantsInfo { count: u32, buffer_index: ResourceIndex, } #[derive(Debug)] pub struct PipelineLayout { bind_group_infos: ArrayVec, push_constants_infos: MultiStageData>, total_counters: MultiStageResourceCounters, total_push_constants: u32, per_stage_map: MultiStageResources, } trait AsNative { type Native; fn from(native: &Self::Native) -> Self; fn as_native(&self) -> &Self::Native; } type BufferPtr = NonNull; type TexturePtr = NonNull; type SamplerPtr = NonNull; impl AsNative for BufferPtr { type Native = metal::BufferRef; #[inline] fn from(native: &Self::Native) -> Self { unsafe { NonNull::new_unchecked(native.as_ptr()) } } #[inline] fn as_native(&self) -> &Self::Native { unsafe { Self::Native::from_ptr(self.as_ptr()) } } } impl AsNative for TexturePtr { type Native = metal::TextureRef; #[inline] fn from(native: &Self::Native) -> Self { unsafe { NonNull::new_unchecked(native.as_ptr()) } } #[inline] fn as_native(&self) -> &Self::Native { unsafe { Self::Native::from_ptr(self.as_ptr()) } } } impl AsNative for SamplerPtr { type Native = metal::SamplerStateRef; #[inline] fn from(native: &Self::Native) -> Self { unsafe { NonNull::new_unchecked(native.as_ptr()) } } #[inline] fn as_native(&self) -> &Self::Native { unsafe { Self::Native::from_ptr(self.as_ptr()) } } } #[derive(Debug)] struct BufferResource { ptr: BufferPtr, offset: wgt::BufferAddress, dynamic_index: Option, /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. /// /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can /// hold WGSL runtime-sized arrays. When one does, we must pass its size to /// shader entry points to implement bounds checks and WGSL's `arrayLength` /// function. See [`device::CompiledShader::sized_bindings`] for details. /// /// [`Storage`]: wgt::BufferBindingType::Storage binding_size: Option, binding_location: u32, } #[derive(Debug, Default)] pub struct BindGroup { counters: MultiStageResourceCounters, buffers: Vec, samplers: Vec, textures: Vec, } unsafe impl Send for BindGroup {} unsafe impl Sync for BindGroup {} #[derive(Debug)] pub struct ShaderModule { naga: crate::NagaShader, runtime_checks: bool, } #[derive(Debug, Default)] struct PipelineStageInfo { push_constants: Option, /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. /// /// See [`device::CompiledShader::sized_bindings`] for more details. sizes_slot: Option, /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. /// /// See [`device::CompiledShader::sized_bindings`] for more details. sized_bindings: Vec, } impl PipelineStageInfo { fn clear(&mut self) { self.push_constants = None; self.sizes_slot = None; self.sized_bindings.clear(); } fn assign_from(&mut self, other: &Self) { self.push_constants = other.push_constants; self.sizes_slot = other.sizes_slot; self.sized_bindings.clear(); self.sized_bindings.extend_from_slice(&other.sized_bindings); } } #[derive(Debug)] pub struct RenderPipeline { raw: metal::RenderPipelineState, #[allow(dead_code)] vs_lib: metal::Library, #[allow(dead_code)] fs_lib: Option, vs_info: PipelineStageInfo, fs_info: Option, raw_primitive_type: metal::MTLPrimitiveType, raw_triangle_fill_mode: metal::MTLTriangleFillMode, raw_front_winding: metal::MTLWinding, raw_cull_mode: metal::MTLCullMode, raw_depth_clip_mode: Option, depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, } unsafe impl Send for RenderPipeline {} unsafe impl Sync for RenderPipeline {} #[derive(Debug)] pub struct ComputePipeline { raw: metal::ComputePipelineState, #[allow(dead_code)] cs_lib: metal::Library, cs_info: PipelineStageInfo, work_group_size: metal::MTLSize, work_group_memory_sizes: Vec, } unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} #[derive(Debug, Clone)] pub struct QuerySet { raw_buffer: metal::Buffer, //Metal has a custom buffer for counters. counter_sample_buffer: Option, ty: wgt::QueryType, } unsafe impl Send for QuerySet {} unsafe impl Sync for QuerySet {} #[derive(Debug)] pub struct Fence { completed_value: Arc, /// The pending fence values have to be ascending. pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, } unsafe impl Send for Fence {} unsafe impl Sync for Fence {} impl Fence { fn get_latest(&self) -> crate::FenceValue { let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { max_value = value; } } max_value } fn maintain(&mut self) { let latest = self.get_latest(); self.pending_command_buffers .retain(|&(value, _)| value > latest); } } struct IndexState { buffer_ptr: BufferPtr, offset: wgt::BufferAddress, stride: wgt::BufferAddress, raw_type: metal::MTLIndexType, } #[derive(Default)] struct Temp { binding_sizes: Vec, } struct CommandState { blit: Option, render: Option, compute: Option, raw_primitive_type: metal::MTLPrimitiveType, index: Option, raw_wg_size: metal::MTLSize, stage_infos: MultiStageData, /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. /// /// Specifically: /// /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` /// and `@binding` attributes) for `var` global variables in the /// current module that contain runtime-sized arrays. /// /// - The values are the actual sizes of the buffers currently bound to /// provide those globals' contents, which are needed to implement bounds /// checks and the WGSL `arrayLength` function. /// /// For each stage `S` in `stage_infos`, we consult this to find the sizes /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must /// pass to the entry point. /// /// See [`device::CompiledShader::sized_bindings`] for more details. /// /// [`ResourceBinding`]: naga::ResourceBinding storage_buffer_length_map: rustc_hash::FxHashMap, work_group_memory_sizes: Vec, push_constants: Vec, /// Timer query that should be executed when the next pass starts. pending_timer_queries: Vec<(QuerySet, u32)>, } pub struct CommandEncoder { shared: Arc, raw_queue: Arc>, raw_cmd_buf: Option, state: CommandState, temp: Temp, } impl fmt::Debug for CommandEncoder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("CommandEncoder") .field("raw_queue", &self.raw_queue) .field("raw_cmd_buf", &self.raw_cmd_buf) .finish() } } unsafe impl Send for CommandEncoder {} unsafe impl Sync for CommandEncoder {} #[derive(Debug)] pub struct CommandBuffer { raw: metal::CommandBuffer, } unsafe impl Send for CommandBuffer {} unsafe impl Sync for CommandBuffer {} #[derive(Debug)] pub struct AccelerationStructure;