diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/wgpu-hal/src/metal | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-hal/src/metal')
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/adapter.rs | 1137 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/command.rs | 1254 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/conv.rs | 322 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/device.rs | 1251 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/mod.rs | 852 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/surface.rs | 280 | ||||
-rw-r--r-- | third_party/rust/wgpu-hal/src/metal/time.rs | 38 |
7 files changed, 5134 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs new file mode 100644 index 0000000000..a946ce5819 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -0,0 +1,1137 @@ +use metal::{MTLFeatureSet, MTLGPUFamily, MTLLanguageVersion, MTLReadWriteTextureTier}; +use objc::{class, msg_send, sel, sel_impl}; +use parking_lot::Mutex; +use wgt::{AstcBlock, AstcChannel}; + +use std::{sync::Arc, thread}; + +use super::TimestampQuerySupport; + +const MAX_COMMAND_BUFFERS: u64 = 2048; + +unsafe impl Send for super::Adapter {} +unsafe impl Sync for super::Adapter {} + +impl super::Adapter { + pub(super) fn new(shared: Arc<super::AdapterShared>) -> Self { + Self { shared } + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = self + .shared + .device + .lock() + .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + + // Acquiring the meaning of timestamp ticks is hard with Metal! + // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`). + // Users are supposed to call this method twice and calculate the difference, + // see "Converting GPU Timestamps into CPU Time": + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time + // Not only does this mean we get an approximate value, this is as also *very slow*! + // Chromium opted to solve this using a linear regression that they stop at some point + // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46 + // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values. + // + // We should do as Chromium does for the general case, but this requires quite some state tracking + // and doesn't even provide perfectly accurate values, especially at the start of the application when + // we didn't have the chance to sample a lot of values just yet. + // + // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps + // conversions on different devices, after all Metal isn't supported on that many ;) + // Based on: + // * https://github.com/gfx-rs/wgpu/pull/2528 + // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326 + let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") { + 83.333 + } else { + // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs. + 1.0 + }; + + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + features, + }, + queue: super::Queue { + raw: Arc::new(Mutex::new(queue)), + timestamp_period, + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let pc = &self.shared.private_caps; + // Affected formats documented at: + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier1?language=objc + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier2?language=objc + let (read_write_tier1_if, read_write_tier2_if) = match pc.read_write_texture_tier { + metal::MTLReadWriteTextureTier::TierNone => (Tfc::empty(), Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier1 => (Tfc::STORAGE_READ_WRITE, Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier2 => { + (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE) + } + }; + let msaa_count = pc.sample_count_mask; + + let msaa_resolve_desktop_if = if pc.msaa_desktop { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let msaa_resolve_apple3x_if = if pc.msaa_desktop | pc.msaa_apple3 { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let is_not_apple1x = super::PrivateCapabilities::supports_any( + self.shared.device.lock().as_ref(), + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + ], + ); + + // Metal defined pixel format capabilities + let all_caps = Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE; + + let extra = match format { + Tf::R8Unorm | Tf::R16Float | Tf::Rgba8Unorm | Tf::Rgba16Float => { + read_write_tier2_if | all_caps + } + Tf::R8Snorm | Tf::Rg8Snorm | Tf::Rgba8Snorm => { + let mut flags = all_caps; + flags.set(Tfc::MULTISAMPLE_RESOLVE, is_not_apple1x); + flags + } + Tf::R8Uint + | Tf::R8Sint + | Tf::R16Uint + | Tf::R16Sint + | Tf::Rgba8Uint + | Tf::Rgba8Sint + | Tf::Rgba16Uint + | Tf::Rgba16Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R16Unorm + | Tf::R16Snorm + | Tf::Rg16Unorm + | Tf::Rg16Snorm + | Tf::Rgba16Unorm + | Tf::Rgba16Snorm => { + Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | msaa_resolve_desktop_if + } + Tf::Rg8Unorm | Tf::Rg16Float | Tf::Bgra8Unorm => all_caps, + Tf::Rg8Uint | Tf::Rg8Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::R32Uint | Tf::R32Sint => { + read_write_tier1_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R32Float => { + let flags = if pc.format_r32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + }; + read_write_tier1_if | flags + } + Tf::Rg16Uint | Tf::Rg16Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::Rgba8UnormSrgb | Tf::Bgra8UnormSrgb => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgba8_srgb_all); + flags + } + Tf::Rgb10a2Uint => { + let mut flags = Tfc::COLOR_ATTACHMENT | msaa_count; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_uint_write); + flags + } + Tf::Rgb10a2Unorm => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all); + flags + } + Tf::Rg11b10Float => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rg11b10_all); + flags + } + Tf::Rg32Uint | Tf::Rg32Sint => Tfc::COLOR_ATTACHMENT | Tfc::STORAGE | msaa_count, + Tf::Rg32Float => { + if pc.format_rg32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + } + } + Tf::Rgba32Uint | Tf::Rgba32Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::Rgba32Float => { + let mut flags = read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT; + if pc.format_rgba32float_all { + flags |= all_caps + } else if pc.msaa_apple7 { + flags |= msaa_count + }; + flags + } + Tf::Stencil8 => { + all_caps | Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if + } + Tf::Depth16Unorm => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth16unorm { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth32Float | Tf::Depth32FloatStencil8 => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth24Plus | Tf::Depth24PlusStencil8 => { + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count; + if pc.format_depth24_stencil8 { + flags |= Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE_RESOLVE + } else { + flags |= msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + } + flags + } + Tf::NV12 => return Tfc::empty(), + Tf::Rgb9e5Ufloat => { + if pc.msaa_apple3 { + all_caps + } else if pc.msaa_desktop { + Tfc::SAMPLED_LINEAR + } else { + Tfc::SAMPLED_LINEAR + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE + } + } + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbFloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => { + if pc.format_bc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => { + if pc.format_eac_etc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Astc { + block: _, + channel: _, + } => { + if pc.format_astc || pc.format_astc_hdr { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + }; + + Tfc::COPY_SRC | Tfc::COPY_DST | Tfc::SAMPLED | extra + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = if surface.main_thread_id == thread::current().id() { + Some(surface.dimensions()) + } else { + log::warn!("Unable to get the current view dimensions on a non-main thread"); + None + }; + + let mut formats = vec![ + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Rgba16Float, + ]; + if self.shared.private_caps.format_rgb10a2_unorm_all { + formats.push(wgt::TextureFormat::Rgb10a2Unorm); + } + + let pc = &self.shared.private_caps; + Some(crate::SurfaceCapabilities { + formats, + // We use this here to govern the maximum number of drawables + 1. + // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount + maximum_frame_latency: if pc.can_set_maximum_drawables_count { + 1..=2 + } else { + // 3 is the default value for maximum drawables in `CAMetalLayer` documentation + // iOS 10.3 was tested to use 3 on iphone5s + 2..=2 + }, + present_modes: if pc.can_set_display_sync { + vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] + } else { + vec![wgt::PresentMode::Fifo] + }, + composite_alpha_modes: vec![ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::PostMultiplied, + ], + + current_extent, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + let timestamp = self.shared.presentation_timer.get_timestamp_ns(); + + wgt::PresentationTimestamp(timestamp) + } +} + +const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const SAMPLER_CLAMP_TO_BORDER_SUPPORT: &[MTLFeatureSet] = &[MTLFeatureSet::macOS_GPUFamily1_v2]; + +const ASTC_PIXEL_FORMAT_FEATURES: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, +]; + +const ANY8_UNORM_SRGB_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const ANY8_SNORM_RESOLVE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGBA8_SRGB: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const RGB10A2UNORM_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB10A2UINT_WRITE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RG11B10FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB9E5FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const BGR10A2_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +/// "Indirect draw & dispatch arguments" in the Metal feature set tables +const INDIRECT_DRAW_DISPATCH_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +/// "Base vertex/instance drawing" in the Metal feature set tables +/// +/// in our terms, `base_vertex` and `first_instance` must be 0 +const BASE_VERTEX_FIRST_INSTANCE_SUPPORT: &[MTLFeatureSet] = INDIRECT_DRAW_DISPATCH_SUPPORT; + +const TEXTURE_CUBE_ARRAY_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const DUAL_SOURCE_BLEND_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const LAYERED_RENDERING_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const FUNCTION_SPECIALIZATION_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const OS_NOT_SUPPORT: (usize, usize) = (10000, 0); + +impl super::PrivateCapabilities { + fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + features_sets + .iter() + .cloned() + .any(|x| raw.supports_feature_set(x)) + } + + pub fn new(device: &metal::Device) -> Self { + #[repr(C)] + #[derive(Clone, Copy, Debug)] + #[allow(clippy::upper_case_acronyms)] + struct NSOperatingSystemVersion { + major: usize, + minor: usize, + patch: usize, + } + + impl NSOperatingSystemVersion { + fn at_least( + &self, + mac_version: (usize, usize), + ios_version: (usize, usize), + is_mac: bool, + ) -> bool { + if is_mac { + self.major > mac_version.0 + || (self.major == mac_version.0 && self.minor >= mac_version.1) + } else { + self.major > ios_version.0 + || (self.major == ios_version.0 && self.minor >= ios_version.1) + } + } + } + + let version: NSOperatingSystemVersion = unsafe { + let process_info: *mut objc::runtime::Object = + msg_send![class!(NSProcessInfo), processInfo]; + msg_send![process_info, operatingSystemVersion] + }; + + let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + // Metal was first introduced in OS X 10.11 and iOS 8. The current version number of visionOS is 1.0.0. Additionally, + // on the Simulator, Apple only provides the Apple2 GPU capability, and the Apple2+ GPU capability covers the capabilities of Apple2. + // Therefore, the following conditions can be used to determine if it is visionOS. + // https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator + let os_is_xr = version.major < 8 && device.supports_family(MTLGPUFamily::Apple2); + let family_check = os_is_xr || version.at_least((10, 15), (13, 0), os_is_mac); + + let mut sample_count_mask = crate::TextureFormatCapabilities::MULTISAMPLE_X4; // 1 and 4 samples are supported on all devices + if device.supports_texture_sample_count(2) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X2; + } + if device.supports_texture_sample_count(8) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X8; + } + if device.supports_texture_sample_count(16) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X16; + } + + let rw_texture_tier = if version.at_least((10, 13), (11, 0), os_is_mac) { + device.read_write_texture_support() + } else if version.at_least((10, 12), OS_NOT_SUPPORT, os_is_mac) { + if Self::supports_any(device, &[MTLFeatureSet::macOS_ReadWriteTextureTier2]) { + MTLReadWriteTextureTier::Tier2 + } else { + MTLReadWriteTextureTier::Tier1 + } + } else { + MTLReadWriteTextureTier::TierNone + }; + + let mut timestamp_query_support = TimestampQuerySupport::empty(); + if version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary) + { + // If we don't support at stage boundary, don't support anything else. + timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES); + + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary) + { + timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtBlitBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER); + } + // `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags. + } + + Self { + family_check, + msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) { + MTLLanguageVersion::V2_4 + } else if version.at_least((11, 0), (14, 0), os_is_mac) { + MTLLanguageVersion::V2_3 + } else if version.at_least((10, 15), (13, 0), os_is_mac) { + MTLLanguageVersion::V2_2 + } else if version.at_least((10, 14), (12, 0), os_is_mac) { + MTLLanguageVersion::V2_1 + } else if version.at_least((10, 13), (11, 0), os_is_mac) { + MTLLanguageVersion::V2_0 + } else if version.at_least((10, 12), (10, 0), os_is_mac) { + MTLLanguageVersion::V1_2 + } else if version.at_least((10, 11), (9, 0), os_is_mac) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + }, + // macOS 10.11 doesn't support read-write resources + fragment_rw_storage: version.at_least((10, 12), (8, 0), os_is_mac), + read_write_texture_tier: rw_texture_tier, + msaa_desktop: os_is_mac, + msaa_apple3: if family_check { + device.supports_family(MTLGPUFamily::Apple3) + } else { + device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily3_v4) + }, + msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), + resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), + argument_buffers: Self::supports_any(device, ARGUMENT_BUFFER_SUPPORT), + shared_textures: !os_is_mac, + mutable_comparison_samplers: Self::supports_any( + device, + MUTABLE_COMPARISON_SAMPLER_SUPPORT, + ), + sampler_clamp_to_border: Self::supports_any(device, SAMPLER_CLAMP_TO_BORDER_SUPPORT), + indirect_draw_dispatch: Self::supports_any(device, INDIRECT_DRAW_DISPATCH_SUPPORT), + base_vertex_first_instance_drawing: Self::supports_any( + device, + BASE_VERTEX_FIRST_INSTANCE_SUPPORT, + ), + dual_source_blending: Self::supports_any(device, DUAL_SOURCE_BLEND_SUPPORT), + low_power: !os_is_mac || device.is_low_power(), + headless: os_is_mac && device.is_headless(), + layered_rendering: Self::supports_any(device, LAYERED_RENDERING_SUPPORT), + function_specialization: Self::supports_any(device, FUNCTION_SPECIALIZATION_SUPPORT), + depth_clip_mode: Self::supports_any(device, DEPTH_CLIP_MODE), + texture_cube_array: Self::supports_any(device, TEXTURE_CUBE_ARRAY_SUPPORT), + supports_float_filtering: os_is_mac + || (version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_32bit_float_filtering()), + format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + format_depth32_stencil8_filter: os_is_mac, + format_depth32_stencil8_none: !os_is_mac, + format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, + format_b5: !os_is_mac, + format_bc: os_is_mac, + format_eac_etc: !os_is_mac + // M1 in macOS supports EAC/ETC2 + || (family_check && device.supports_family(MTLGPUFamily::Apple7)), + // A8(Apple2) and later always support ASTC pixel formats + format_astc: (family_check && device.supports_family(MTLGPUFamily::Apple2)) + || Self::supports_any(device, ASTC_PIXEL_FORMAT_FEATURES), + // A13(Apple6) M1(Apple7) and later always support HDR ASTC pixel formats + format_astc_hdr: family_check && device.supports_family(MTLGPUFamily::Apple6), + format_any8_unorm_srgb_all: Self::supports_any(device, ANY8_UNORM_SRGB_ALL), + format_any8_unorm_srgb_no_write: !Self::supports_any(device, ANY8_UNORM_SRGB_ALL) + && !os_is_mac, + format_any8_snorm_all: Self::supports_any(device, ANY8_SNORM_RESOLVE), + format_r16_norm_all: os_is_mac, + // No devices support r32's all capabilities + format_r32_all: false, + // All devices support r32's write capability + format_r32_no_write: false, + // iOS support r32float's write capability, macOS support r32float's all capabilities + format_r32float_no_write_no_filter: false, + // Only iOS doesn't support r32float's filter capability + format_r32float_no_filter: !os_is_mac, + format_r32float_all: os_is_mac, + format_rgba8_srgb_all: Self::supports_any(device, RGBA8_SRGB), + format_rgba8_srgb_no_write: !Self::supports_any(device, RGBA8_SRGB), + format_rgb10a2_unorm_all: Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_unorm_no_write: !Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_uint_write: Self::supports_any(device, RGB10A2UINT_WRITE), + format_rg11b10_all: Self::supports_any(device, RG11B10FLOAT_ALL), + format_rg11b10_no_write: !Self::supports_any(device, RG11B10FLOAT_ALL), + format_rgb9e5_all: Self::supports_any(device, RGB9E5FLOAT_ALL), + format_rgb9e5_no_write: !Self::supports_any(device, RGB9E5FLOAT_ALL) && !os_is_mac, + format_rgb9e5_filter_only: os_is_mac, + format_rg32_color: true, + format_rg32_color_write: true, + // Only macOS support rg32float's all capabilities + format_rg32float_all: os_is_mac, + // All devices support rg32float's color + blend capabilities + format_rg32float_color_blend: true, + // Only iOS doesn't support rg32float's filter + format_rg32float_no_filter: !os_is_mac, + format_rgba32int_color: true, + // All devices support rgba32uint and rgba32sint's color + write capabilities + format_rgba32int_color_write: true, + format_rgba32float_color: true, + // All devices support rgba32float's color + write capabilities + format_rgba32float_color_write: true, + // Only macOS support rgba32float's all capabilities + format_rgba32float_all: os_is_mac, + format_depth16unorm: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ), + format_depth32float_filter: os_is_mac, + format_depth32float_none: !os_is_mac, + format_bgr10a2_all: Self::supports_any(device, BGR10A2_ALL), + format_bgr10a2_no_write: !Self::supports_any(device, BGR10A2_ALL), + max_buffers_per_stage: 31, + max_vertex_buffers: 31.min(crate::MAX_VERTEX_BUFFERS as u32), + max_textures_per_stage: if os_is_mac + || (family_check && device.supports_family(MTLGPUFamily::Apple6)) + { + 128 + } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + 96 + } else { + 31 + }, + max_samplers_per_stage: 16, + buffer_alignment: if os_is_mac || os_is_xr { 256 } else { 64 }, + max_buffer_size: if version.at_least((10, 14), (12, 0), os_is_mac) { + // maxBufferLength available on macOS 10.14+ and iOS 12.0+ + let buffer_size: metal::NSInteger = + unsafe { msg_send![device.as_ref(), maxBufferLength] }; + buffer_size as _ + } else if os_is_mac { + 1 << 30 // 1GB on macOS 10.11 and up + } else { + 1 << 28 // 256MB on iOS 8.0+ + }, + max_texture_size: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 16384 + } else { + 8192 + }, + max_texture_3d_size: 2048, + max_texture_layers: 2048, + max_fragment_input_components: if os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1) + { + 124 + } else { + 60 + }, + max_color_render_targets: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 8 + } else { + 4 + }, + max_varying_components: if device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) + { + 124 + } else { + 60 + }, + max_threads_per_group: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 1024 + } else { + 512 + }, + max_total_threadgroup_memory: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ) { + 32 << 10 + } else { + 16 << 10 + }, + sample_count_mask, + supports_debug_markers: Self::supports_any( + device, + &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + ], + ), + supports_binary_archives: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Mac1)), + supports_capture_manager: version.at_least((10, 13), (11, 0), os_is_mac), + can_set_maximum_drawables_count: version.at_least((10, 14), (11, 2), os_is_mac), + can_set_display_sync: version.at_least((10, 13), OS_NOT_SUPPORT, os_is_mac), + can_set_next_drawable_timeout: version.at_least((10, 13), (11, 0), os_is_mac), + supports_arrays_of_textures: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + ], + ), + supports_arrays_of_textures_write: family_check + && (device.supports_family(MTLGPUFamily::Apple6) + || device.supports_family(MTLGPUFamily::Mac1) + || device.supports_family(MTLGPUFamily::MacCatalyst1)), + supports_mutability: version.at_least((10, 13), (11, 0), os_is_mac), + //Depth clipping is supported on all macOS GPU families and iOS family 4 and later + supports_depth_clip_control: os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1), + supports_preserve_invariance: version.at_least((11, 0), (13, 0), os_is_mac), + // Metal 2.2 on mac, 2.3 on iOS. + supports_shader_primitive_index: version.at_least((10, 15), (14, 0), os_is_mac), + has_unified_memory: if version.at_least((10, 15), (13, 0), os_is_mac) { + Some(device.has_unified_memory()) + } else { + None + }, + timestamp_query_support, + } + } + + pub fn device_type(&self) -> wgt::DeviceType { + if self.has_unified_memory.unwrap_or(self.low_power) { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + } + } + + pub fn features(&self) -> wgt::Features { + use wgt::Features as F; + + let mut features = F::empty() + | F::MAPPABLE_PRIMARY_BUFFERS + | F::VERTEX_WRITABLE_STORAGE + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::PUSH_CONSTANTS + | F::POLYGON_MODE_LINE + | F::CLEAR_TEXTURE + | F::TEXTURE_FORMAT_16BIT_NORM + | F::SHADER_F16 + | F::DEPTH32FLOAT_STENCIL8 + | F::BGRA8UNORM_STORAGE; + + features.set(F::FLOAT32_FILTERABLE, self.supports_float_filtering); + features.set( + F::INDIRECT_FIRST_INSTANCE | F::MULTI_DRAW_INDIRECT, + self.indirect_draw_dispatch, + ); + features.set( + F::TIMESTAMP_QUERY, + self.timestamp_query_support + .contains(TimestampQuerySupport::STAGE_BOUNDARIES), + ); + features.set( + F::TIMESTAMP_QUERY_INSIDE_PASSES, + self.timestamp_query_support + .contains(TimestampQuerySupport::INSIDE_WGPU_PASSES), + ); + features.set( + F::DUAL_SOURCE_BLENDING, + self.msl_version >= MTLLanguageVersion::V1_2 && self.dual_source_blending, + ); + features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); + features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); + features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); + features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc); + + features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control); + features.set( + F::SHADER_PRIMITIVE_INDEX, + self.supports_shader_primitive_index, + ); + + features.set( + F::TEXTURE_BINDING_ARRAY + | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + ); + //// XXX: this is technically not true, as read-only storage images can be used in arrays + //// on precisely the same conditions that sampled textures can. But texel fetch from a + //// sampled texture is a thing; should we bother introducing another feature flag? + if self.msl_version >= MTLLanguageVersion::V2_2 + && self.supports_arrays_of_textures + && self.supports_arrays_of_textures_write + { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + + features.set( + F::ADDRESS_MODE_CLAMP_TO_BORDER, + self.sampler_clamp_to_border, + ); + features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); + + features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); + features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + + features + } + + pub fn capabilities(&self) -> crate::Capabilities { + let mut downlevel = wgt::DownlevelCapabilities::default(); + downlevel.flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + self.fragment_rw_storage, + ); + downlevel.flags.set( + wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES, + self.texture_cube_array, + ); + // TODO: separate the mutable comparisons from immutable ones + downlevel.flags.set( + wgt::DownlevelFlags::COMPARISON_SAMPLERS, + self.mutable_comparison_samplers, + ); + downlevel.flags.set( + wgt::DownlevelFlags::INDIRECT_EXECUTION, + self.indirect_draw_dispatch, + ); + // TODO: add another flag for `first_instance` + downlevel.flags.set( + wgt::DownlevelFlags::BASE_VERTEX, + self.base_vertex_first_instance_drawing, + ); + downlevel + .flags + .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + + let base = wgt::Limits::default(); + crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: self.max_texture_size as u32, + max_texture_dimension_2d: self.max_texture_size as u32, + max_texture_dimension_3d: self.max_texture_3d_size as u32, + max_texture_array_layers: self.max_texture_layers as u32, + max_bind_groups: 8, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: self.max_textures_per_stage, + max_samplers_per_shader_stage: self.max_samplers_per_stage, + max_storage_buffers_per_shader_stage: self.max_buffers_per_stage, + max_storage_textures_per_shader_stage: self.max_textures_per_stage, + max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage, + max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_vertex_buffers: self.max_vertex_buffers, + max_vertex_attributes: 31, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + max_push_constant_size: 0x1000, + min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, + min_storage_buffer_offset_alignment: self.buffer_alignment as u32, + max_inter_stage_shader_components: self.max_varying_components, + max_compute_workgroup_storage_size: self.max_total_threadgroup_memory, + max_compute_invocations_per_workgroup: self.max_threads_per_group, + max_compute_workgroup_size_x: self.max_threads_per_group, + max_compute_workgroup_size_y: self.max_threads_per_group, + max_compute_workgroup_size_z: self.max_threads_per_group, + max_compute_workgroups_per_dimension: 0xFFFF, + max_buffer_size: self.max_buffer_size, + max_non_sampler_bindings: std::u32::MAX, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + downlevel, + } + } + + pub fn map_format(&self, format: wgt::TextureFormat) -> metal::MTLPixelFormat { + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match format { + Tf::R8Unorm => R8Unorm, + Tf::R8Snorm => R8Snorm, + Tf::R8Uint => R8Uint, + Tf::R8Sint => R8Sint, + Tf::R16Uint => R16Uint, + Tf::R16Sint => R16Sint, + Tf::R16Unorm => R16Unorm, + Tf::R16Snorm => R16Snorm, + Tf::R16Float => R16Float, + Tf::Rg8Unorm => RG8Unorm, + Tf::Rg8Snorm => RG8Snorm, + Tf::Rg8Uint => RG8Uint, + Tf::Rg8Sint => RG8Sint, + Tf::Rg16Unorm => RG16Unorm, + Tf::Rg16Snorm => RG16Snorm, + Tf::R32Uint => R32Uint, + Tf::R32Sint => R32Sint, + Tf::R32Float => R32Float, + Tf::Rg16Uint => RG16Uint, + Tf::Rg16Sint => RG16Sint, + Tf::Rg16Float => RG16Float, + Tf::Rgba8Unorm => RGBA8Unorm, + Tf::Rgba8UnormSrgb => RGBA8Unorm_sRGB, + Tf::Bgra8UnormSrgb => BGRA8Unorm_sRGB, + Tf::Rgba8Snorm => RGBA8Snorm, + Tf::Bgra8Unorm => BGRA8Unorm, + Tf::Rgba8Uint => RGBA8Uint, + Tf::Rgba8Sint => RGBA8Sint, + Tf::Rgb10a2Uint => RGB10A2Uint, + Tf::Rgb10a2Unorm => RGB10A2Unorm, + Tf::Rg11b10Float => RG11B10Float, + Tf::Rg32Uint => RG32Uint, + Tf::Rg32Sint => RG32Sint, + Tf::Rg32Float => RG32Float, + Tf::Rgba16Uint => RGBA16Uint, + Tf::Rgba16Sint => RGBA16Sint, + Tf::Rgba16Unorm => RGBA16Unorm, + Tf::Rgba16Snorm => RGBA16Snorm, + Tf::Rgba16Float => RGBA16Float, + Tf::Rgba32Uint => RGBA32Uint, + Tf::Rgba32Sint => RGBA32Sint, + Tf::Rgba32Float => RGBA32Float, + Tf::Stencil8 => Stencil8, + Tf::Depth16Unorm => Depth16Unorm, + Tf::Depth32Float => Depth32Float, + Tf::Depth32FloatStencil8 => Depth32Float_Stencil8, + Tf::Depth24Plus => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float + } + } + Tf::Depth24PlusStencil8 => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float_Stencil8 + } + } + Tf::NV12 => unreachable!(), + Tf::Rgb9e5Ufloat => RGB9E5Float, + Tf::Bc1RgbaUnorm => BC1_RGBA, + Tf::Bc1RgbaUnormSrgb => BC1_RGBA_sRGB, + Tf::Bc2RgbaUnorm => BC2_RGBA, + Tf::Bc2RgbaUnormSrgb => BC2_RGBA_sRGB, + Tf::Bc3RgbaUnorm => BC3_RGBA, + Tf::Bc3RgbaUnormSrgb => BC3_RGBA_sRGB, + Tf::Bc4RUnorm => BC4_RUnorm, + Tf::Bc4RSnorm => BC4_RSnorm, + Tf::Bc5RgUnorm => BC5_RGUnorm, + Tf::Bc5RgSnorm => BC5_RGSnorm, + Tf::Bc6hRgbFloat => BC6H_RGBFloat, + Tf::Bc6hRgbUfloat => BC6H_RGBUfloat, + Tf::Bc7RgbaUnorm => BC7_RGBAUnorm, + Tf::Bc7RgbaUnormSrgb => BC7_RGBAUnorm_sRGB, + Tf::Etc2Rgb8Unorm => ETC2_RGB8, + Tf::Etc2Rgb8UnormSrgb => ETC2_RGB8_sRGB, + Tf::Etc2Rgb8A1Unorm => ETC2_RGB8A1, + Tf::Etc2Rgb8A1UnormSrgb => ETC2_RGB8A1_sRGB, + Tf::Etc2Rgba8Unorm => EAC_RGBA8, + Tf::Etc2Rgba8UnormSrgb => EAC_RGBA8_sRGB, + Tf::EacR11Unorm => EAC_R11Unorm, + Tf::EacR11Snorm => EAC_R11Snorm, + Tf::EacRg11Unorm => EAC_RG11Unorm, + Tf::EacRg11Snorm => EAC_RG11Snorm, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => ASTC_4x4_LDR, + AstcBlock::B5x4 => ASTC_5x4_LDR, + AstcBlock::B5x5 => ASTC_5x5_LDR, + AstcBlock::B6x5 => ASTC_6x5_LDR, + AstcBlock::B6x6 => ASTC_6x6_LDR, + AstcBlock::B8x5 => ASTC_8x5_LDR, + AstcBlock::B8x6 => ASTC_8x6_LDR, + AstcBlock::B8x8 => ASTC_8x8_LDR, + AstcBlock::B10x5 => ASTC_10x5_LDR, + AstcBlock::B10x6 => ASTC_10x6_LDR, + AstcBlock::B10x8 => ASTC_10x8_LDR, + AstcBlock::B10x10 => ASTC_10x10_LDR, + AstcBlock::B12x10 => ASTC_12x10_LDR, + AstcBlock::B12x12 => ASTC_12x12_LDR, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => ASTC_4x4_sRGB, + AstcBlock::B5x4 => ASTC_5x4_sRGB, + AstcBlock::B5x5 => ASTC_5x5_sRGB, + AstcBlock::B6x5 => ASTC_6x5_sRGB, + AstcBlock::B6x6 => ASTC_6x6_sRGB, + AstcBlock::B8x5 => ASTC_8x5_sRGB, + AstcBlock::B8x6 => ASTC_8x6_sRGB, + AstcBlock::B8x8 => ASTC_8x8_sRGB, + AstcBlock::B10x5 => ASTC_10x5_sRGB, + AstcBlock::B10x6 => ASTC_10x6_sRGB, + AstcBlock::B10x8 => ASTC_10x8_sRGB, + AstcBlock::B10x10 => ASTC_10x10_sRGB, + AstcBlock::B12x10 => ASTC_12x10_sRGB, + AstcBlock::B12x12 => ASTC_12x12_sRGB, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => ASTC_4x4_HDR, + AstcBlock::B5x4 => ASTC_5x4_HDR, + AstcBlock::B5x5 => ASTC_5x5_HDR, + AstcBlock::B6x5 => ASTC_6x5_HDR, + AstcBlock::B6x6 => ASTC_6x6_HDR, + AstcBlock::B8x5 => ASTC_8x5_HDR, + AstcBlock::B8x6 => ASTC_8x6_HDR, + AstcBlock::B8x8 => ASTC_8x8_HDR, + AstcBlock::B10x5 => ASTC_10x5_HDR, + AstcBlock::B10x6 => ASTC_10x6_HDR, + AstcBlock::B10x8 => ASTC_10x8_HDR, + AstcBlock::B10x10 => ASTC_10x10_HDR, + AstcBlock::B12x10 => ASTC_12x10_HDR, + AstcBlock::B12x12 => ASTC_12x12_HDR, + }, + }, + } + } + + pub fn map_view_format( + &self, + format: wgt::TextureFormat, + aspects: crate::FormatAspects, + ) -> metal::MTLPixelFormat { + use crate::FormatAspects as Fa; + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match (format, aspects) { + // map combined depth-stencil format to their stencil-only format + // see https://developer.apple.com/library/archive/documentation/Miscellaneous/Conceptual/MetalProgrammingGuide/WhatsNewiniOS10tvOS10andOSX1012/WhatsNewiniOS10tvOS10andOSX1012.html#//apple_ref/doc/uid/TP40014221-CH14-DontLinkElementID_77 + (Tf::Depth24PlusStencil8, Fa::STENCIL) => { + if self.format_depth24_stencil8 { + X24_Stencil8 + } else { + X32_Stencil8 + } + } + (Tf::Depth32FloatStencil8, Fa::STENCIL) => X32_Stencil8, + + _ => self.map_format(format), + } + } +} + +impl super::PrivateDisabilities { + pub fn new(device: &metal::Device) -> Self { + let is_intel = device.name().starts_with("Intel"); + Self { + broken_viewport_near_depth: is_intel + && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + broken_layered_clear_image: is_intel, + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/command.rs b/third_party/rust/wgpu-hal/src/metal/command.rs new file mode 100644 index 0000000000..6f1a0d9c2f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/command.rs @@ -0,0 +1,1254 @@ +use super::{conv, AsNative, TimestampQuerySupport}; +use crate::CommandEncoder as _; +use std::{borrow::Cow, mem, ops::Range}; + +// has to match `Temp::binding_sizes` +const WORD_SIZE: usize = 4; + +impl Default for super::CommandState { + fn default() -> Self { + Self { + blit: None, + render: None, + compute: None, + raw_primitive_type: metal::MTLPrimitiveType::Point, + index: None, + raw_wg_size: metal::MTLSize::new(0, 0, 0), + stage_infos: Default::default(), + storage_buffer_length_map: Default::default(), + work_group_memory_sizes: Vec::new(), + push_constants: Vec::new(), + pending_timer_queries: Vec::new(), + } + } +} + +impl super::CommandEncoder { + fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { + if self.state.blit.is_none() { + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + + // Take care of pending timer queries. + // If we can't use `sample_counters_in_buffer` we have to create a dummy blit encoder! + // + // There is a known bug in Metal where blit encoders won't write timestamps if they don't have a blit operation. + // See https://github.com/gpuweb/gpuweb/issues/2046#issuecomment-1205793680 & https://source.chromium.org/chromium/chromium/src/+/006c4eb70c96229834bbaf271290f40418144cd3:third_party/dawn/src/dawn/native/metal/BackendMTL.mm;l=350 + // + // To make things worse: + // * what counts as a blit operation is a bit unclear, experimenting seemed to indicate that resolve_counters doesn't count. + // * in some cases (when?) using `set_start_of_encoder_sample_index` doesn't work, so we have to use `set_end_of_encoder_sample_index` instead + // + // All this means that pretty much the only *reliable* thing as of writing is to: + // * create a dummy blit encoder using set_end_of_encoder_sample_index + // * do a dummy write that is known to be not optimized out. + // * close the encoder since we used set_end_of_encoder_sample_index and don't want to get any extra stuff in there. + // * create another encoder for whatever we actually had in mind. + let supports_sample_counters_in_buffer = self + .shared + .private_caps + .timestamp_query_support + .contains(TimestampQuerySupport::ON_BLIT_ENCODER); + + if !self.state.pending_timer_queries.is_empty() && !supports_sample_counters_in_buffer { + objc::rc::autoreleasepool(|| { + let descriptor = metal::BlitPassDescriptor::new(); + let mut last_query = None; + for (i, (set, index)) in self.state.pending_timer_queries.drain(..).enumerate() + { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(i as _) + .unwrap(); + sba_descriptor + .set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + + // Here be dragons: + // As mentioned above, for some reasons using the start of the encoder won't yield any results sometimes! + sba_descriptor + .set_start_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_end_of_encoder_sample_index(index as _); + + last_query = Some((set, index)); + } + let encoder = cmd_buf.blit_command_encoder_with_descriptor(descriptor); + + // As explained above, we need to do some write: + // Conveniently, we have a buffer with every query set, that we can use for this for a dummy write, + // since we know that it is going to be overwritten again on timer resolve and HAL doesn't define its state before that. + let raw_range = metal::NSRange { + location: last_query.as_ref().unwrap().1 as u64 * crate::QUERY_SIZE, + length: 1, + }; + encoder.fill_buffer( + &last_query.as_ref().unwrap().0.raw_buffer, + raw_range, + 255, // Don't write 0, so it's easier to identify if something went wrong. + ); + + encoder.end_encoding(); + }); + } + + objc::rc::autoreleasepool(|| { + self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); + }); + + let encoder = self.state.blit.as_ref().unwrap(); + + // UNTESTED: + // If the above described issue with empty blit encoder applies to `sample_counters_in_buffer` as well, we should use the same workaround instead! + for (set, index) in self.state.pending_timer_queries.drain(..) { + debug_assert!(supports_sample_counters_in_buffer); + encoder.sample_counters_in_buffer( + set.counter_sample_buffer.as_ref().unwrap(), + index as _, + true, + ) + } + } + self.state.blit.as_ref().unwrap() + } + + pub(super) fn leave_blit(&mut self) { + if let Some(encoder) = self.state.blit.take() { + encoder.end_encoding(); + } + } + + fn active_encoder(&mut self) -> Option<&metal::CommandEncoderRef> { + if let Some(ref encoder) = self.state.render { + Some(encoder) + } else if let Some(ref encoder) = self.state.compute { + Some(encoder) + } else if let Some(ref encoder) = self.state.blit { + Some(encoder) + } else { + None + } + } + + fn begin_pass(&mut self) { + self.state.reset(); + self.leave_blit(); + } +} + +impl super::CommandState { + fn reset(&mut self) { + self.storage_buffer_length_map.clear(); + self.stage_infos.vs.clear(); + self.stage_infos.fs.clear(); + self.stage_infos.cs.clear(); + self.work_group_memory_sizes.clear(); + self.push_constants.clear(); + } + + fn make_sizes_buffer_update<'a>( + &self, + stage: naga::ShaderStage, + result_sizes: &'a mut Vec<u32>, + ) -> Option<(u32, &'a [u32])> { + let stage_info = &self.stage_infos[stage]; + let slot = stage_info.sizes_slot?; + + result_sizes.clear(); + result_sizes.extend(stage_info.sized_bindings.iter().map(|br| { + self.storage_buffer_length_map + .get(br) + .map(|size| u32::try_from(size.get()).unwrap_or(u32::MAX)) + .unwrap_or_default() + })); + + if !result_sizes.is_empty() { + Some((slot as _, result_sizes)) + } else { + None + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let queue = &self.raw_queue.lock(); + let retain_references = self.shared.settings.retain_command_buffer_references; + let raw = objc::rc::autoreleasepool(move || { + let cmd_buf_ref = if retain_references { + queue.new_command_buffer() + } else { + queue.new_command_buffer_with_unretained_references() + }; + if let Some(label) = label { + cmd_buf_ref.set_label(label); + } + cmd_buf_ref.to_owned() + }); + + self.raw_cmd_buf = Some(raw); + + Ok(()) + } + + unsafe fn discard_encoding(&mut self) { + self.leave_blit(); + // when discarding, we don't have a guarantee that + // everything is in a good state, so check carefully + if let Some(encoder) = self.state.render.take() { + encoder.end_encoding(); + } + if let Some(encoder) = self.state.compute.take() { + encoder.end_encoding(); + } + self.raw_cmd_buf = None; + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + // Handle pending timer query if any. + if !self.state.pending_timer_queries.is_empty() { + self.leave_blit(); + self.enter_blit(); + } + + self.leave_blit(); + debug_assert!(self.state.render.is_none()); + debug_assert!(self.state.compute.is_none()); + debug_assert!(self.state.pending_timer_queries.is_empty()); + + Ok(super::CommandBuffer { + raw: self.raw_cmd_buf.take().unwrap(), + }) + } + + unsafe fn reset_all<I>(&mut self, _cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + //do nothing + } + + unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let encoder = self.enter_blit(); + encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + encoder.copy_from_buffer( + &src.raw, + copy.src_offset, + &dst.raw, + copy.dst_offset, + copy.size.get(), + ); + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let dst_texture = if src.format != dst.format { + let raw_format = self.shared.private_caps.map_format(src.format); + Cow::Owned(objc::rc::autoreleasepool(|| { + dst.raw.new_texture_view(raw_format) + })) + } else { + Cow::Borrowed(&dst.raw) + }; + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + // no clamping is done: Metal expects physical sizes here + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst_texture, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let dst_origin = conv::map_origin(©.texture_base.origin); + // Metal expects buffer-texture copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&dst.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let image_byte_stride = if extent.depth > 1 { + copy.buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row) + } else { + // Don't pass a stride when updating a single layer, otherwise metal validation + // fails when updating a subset of the image due to the stride being larger than + // the amount of data to copy. + 0 + }; + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + image_byte_stride, + conv::map_copy_extent(&extent), + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + conv::get_blit_option(dst.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.texture_base.origin); + // Metal expects texture-buffer copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&src.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let bytes_per_image = copy + .buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row); + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + conv::map_copy_extent(&extent), + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + conv::get_blit_option(src.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode( + metal::MTLVisibilityResultMode::Boolean, + index as u64 * crate::QUERY_SIZE, + ); + } + _ => {} + } + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode(metal::MTLVisibilityResultMode::Disabled, 0); + } + _ => {} + } + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + let support = self.shared.private_caps.timestamp_query_support; + debug_assert!( + support.contains(TimestampQuerySupport::STAGE_BOUNDARIES), + "Timestamp queries are not supported" + ); + let sample_buffer = set.counter_sample_buffer.as_ref().unwrap(); + let with_barrier = true; + + // Try to use an existing encoder for timestamp query if possible. + // This works only if it's supported for the active encoder. + if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_BLIT_ENCODER), + self.state.blit.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_RENDER_ENCODER), + self.state.render.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_COMPUTE_ENCODER), + self.state.compute.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else { + // If we're here it means we either have no encoder open, or it's not supported to sample within them. + // If this happens with render/compute open, this is an invalid usage! + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + + // But otherwise it means we'll put defer this to the next created encoder. + self.state.pending_timer_queries.push((set.clone(), index)); + + // Ensure we didn't already have a blit open. + self.leave_blit(); + }; + } + + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + let encoder = self.enter_blit(); + let raw_range = metal::NSRange { + location: range.start as u64 * crate::QUERY_SIZE, + length: (range.end - range.start) as u64 * crate::QUERY_SIZE, + }; + encoder.fill_buffer(&set.raw_buffer, raw_range, 0); + } + + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported + ) { + let encoder = self.enter_blit(); + match set.ty { + wgt::QueryType::Occlusion => { + let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; + encoder.copy_from_buffer( + &set.raw_buffer, + range.start as u64 * crate::QUERY_SIZE, + &buffer.raw, + offset, + size, + ); + } + wgt::QueryType::Timestamp => { + encoder.resolve_counters( + set.counter_sample_buffer.as_ref().unwrap(), + metal::NSRange::new(range.start as u64, range.end as u64), + &buffer.raw, + offset, + ); + } + wgt::QueryType::PipelineStatistics(_) => todo!(), + } + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.begin_pass(); + self.state.index = None; + + assert!(self.state.blit.is_none()); + assert!(self.state.compute.is_none()); + assert!(self.state.render.is_none()); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPassDescriptor::new(); + + for (i, at) in desc.color_attachments.iter().enumerate() { + if let Some(at) = at.as_ref() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + if let Some(ref resolve) = at.resolve_target { + //Note: the selection of levels and slices is already handled by `TextureView` + at_descriptor.set_resolve_texture(Some(&resolve.view.raw)); + } + let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value)); + metal::MTLLoadAction::Clear + }; + let store_action = conv::map_store_action( + at.ops.contains(crate::AttachmentOps::STORE), + at.resolve_target.is_some(), + ); + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + if let Some(ref at) = desc.depth_stencil_attachment { + if at.target.view.aspects.contains(crate::FormatAspects::DEPTH) { + let at_descriptor = descriptor.depth_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_depth(at.clear_value.0 as f64); + metal::MTLLoadAction::Clear + }; + let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + if at + .target + .view + .aspects + .contains(crate::FormatAspects::STENCIL) + { + let at_descriptor = descriptor.stencil_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_stencil(at.clear_value.1); + metal::MTLLoadAction::Clear + }; + let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + let mut sba_index = 0; + let mut next_sba_descriptor = || { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(sba_index) + .unwrap(); + + sba_descriptor.set_end_of_vertex_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_start_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_vertex_sample_index(index as _); + sba_descriptor.set_end_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + } + + if let Some(ref timestamp_writes) = desc.timestamp_writes { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + sba_descriptor.set_start_of_vertex_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_fragment_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + } + + if let Some(occlusion_query_set) = desc.occlusion_query_set { + descriptor + .set_visibility_result_buffer(Some(occlusion_query_set.raw_buffer.as_ref())) + } + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + let encoder = raw.new_render_command_encoder(descriptor); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.render = Some(encoder.to_owned()); + }); + } + + unsafe fn end_render_pass(&mut self) { + self.state.render.take().unwrap().end_encoding(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + group_index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let bg_info = &layout.bind_group_infos[group_index as usize]; + + if let Some(ref encoder) = self.state.render { + let mut changes_sizes_buffer = false; + for index in 0..group.counters.vs.buffers { + let buf = &group.buffers[index as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_vertex_buffer( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Vertex, + &mut self.temp.binding_sizes, + ) { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + changes_sizes_buffer = false; + for index in 0..group.counters.fs.buffers { + let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_fragment_buffer( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Fragment, + &mut self.temp.binding_sizes, + ) { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.vs.samplers { + let res = group.samplers[index as usize]; + encoder.set_vertex_sampler_state( + (bg_info.base_resource_indices.vs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.samplers { + let res = group.samplers[(group.counters.vs.samplers + index) as usize]; + encoder.set_fragment_sampler_state( + (bg_info.base_resource_indices.fs.samplers + index) as u64, + Some(res.as_native()), + ); + } + + for index in 0..group.counters.vs.textures { + let res = group.textures[index as usize]; + encoder.set_vertex_texture( + (bg_info.base_resource_indices.vs.textures + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.textures { + let res = group.textures[(group.counters.vs.textures + index) as usize]; + encoder.set_fragment_texture( + (bg_info.base_resource_indices.fs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + + if let Some(ref encoder) = self.state.compute { + let index_base = super::ResourceData { + buffers: group.counters.vs.buffers + group.counters.fs.buffers, + samplers: group.counters.vs.samplers + group.counters.fs.samplers, + textures: group.counters.vs.textures + group.counters.fs.textures, + }; + + let mut changes_sizes_buffer = false; + for index in 0..group.counters.cs.buffers { + let buf = &group.buffers[(index_base.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_buffer( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Compute, + &mut self.temp.binding_sizes, + ) { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.cs.samplers { + let res = group.samplers[(index_base.samplers + index) as usize]; + encoder.set_sampler_state( + (bg_info.base_resource_indices.cs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.cs.textures { + let res = group.textures[(index_base.textures + index) as usize]; + encoder.set_texture( + (bg_info.base_resource_indices.cs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + let state_pc = &mut self.state.push_constants; + if state_pc.len() < layout.total_push_constants as usize { + state_pc.resize(layout.total_push_constants as usize, 0); + } + debug_assert_eq!(offset_bytes as usize % WORD_SIZE, 0); + + let offset_words = offset_bytes as usize / WORD_SIZE; + state_pc[offset_words..offset_words + data.len()].copy_from_slice(data); + + if stages.contains(wgt::ShaderStages::COMPUTE) { + self.state.compute.as_ref().unwrap().set_bytes( + layout.push_constants_infos.cs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::VERTEX) { + self.state.render.as_ref().unwrap().set_vertex_bytes( + layout.push_constants_infos.vs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::FRAGMENT) { + self.state.render.as_ref().unwrap().set_fragment_bytes( + layout.push_constants_infos.fs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(encoder) = self.active_encoder() { + encoder.insert_debug_signpost(label); + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(encoder) = self.active_encoder() { + encoder.push_debug_group(group_label); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.push_debug_group(group_label); + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(encoder) = self.active_encoder() { + encoder.pop_debug_group(); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.pop_debug_group(); + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.raw_primitive_type = pipeline.raw_primitive_type; + self.state.stage_infos.vs.assign_from(&pipeline.vs_info); + match pipeline.fs_info { + Some(ref info) => self.state.stage_infos.fs.assign_from(info), + None => self.state.stage_infos.fs.clear(), + } + + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_render_pipeline_state(&pipeline.raw); + encoder.set_front_facing_winding(pipeline.raw_front_winding); + encoder.set_cull_mode(pipeline.raw_cull_mode); + encoder.set_triangle_fill_mode(pipeline.raw_triangle_fill_mode); + if let Some(depth_clip) = pipeline.raw_depth_clip_mode { + encoder.set_depth_clip_mode(depth_clip); + } + if let Some((ref state, bias)) = pipeline.depth_stencil { + encoder.set_depth_stencil_state(state); + encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); + } + + { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) + { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + if pipeline.fs_lib.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) + { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + let (stride, raw_type) = match format { + wgt::IndexFormat::Uint16 => (2, metal::MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, metal::MTLIndexType::UInt32), + }; + self.state.index = Some(super::IndexState { + buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), + offset: binding.offset, + stride, + raw_type, + }); + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let zfar = if self.shared.disabilities.broken_viewport_near_depth { + depth_range.end - depth_range.start + } else { + depth_range.end + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_viewport(metal::MTLViewport { + originX: rect.x as _, + originY: rect.y as _, + width: rect.w as _, + height: rect.h as _, + znear: depth_range.start as _, + zfar: zfar as _, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + //TODO: support empty scissors by modifying the viewport + let scissor = metal::MTLScissorRect { + x: rect.x as _, + y: rect.y as _, + width: rect.w as _, + height: rect.h as _, + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_scissor_rect(scissor); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_stencil_front_back_reference_value(value, value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); + } + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + if first_instance != 0 { + encoder.draw_primitives_instanced_base_instance( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + instance_count as _, + first_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_primitives_instanced( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + instance_count as _, + ); + } else { + encoder.draw_primitives( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + ); + } + } + + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + let offset = index.offset + index.stride * first_index as wgt::BufferAddress; + if base_vertex != 0 || first_instance != 0 { + encoder.draw_indexed_primitives_instanced_base_instance( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + base_vertex as _, + first_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_indexed_primitives_instanced( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + ); + } else { + encoder.draw_indexed_primitives( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + ); + } + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_primitives_indirect(self.state.raw_primitive_type, &buffer.raw, offset); + offset += mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_indexed_primitives_indirect( + self.state.raw_primitive_type, + index.raw_type, + index.buffer_ptr.as_native(), + index.offset, + &buffer.raw, + offset, + ); + offset += mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) { + self.begin_pass(); + + debug_assert!(self.state.blit.is_none()); + debug_assert!(self.state.compute.is_none()); + debug_assert!(self.state.render.is_none()); + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + + objc::rc::autoreleasepool(|| { + // TimeStamp Queries and ComputePassDescriptor were both introduced in Metal 2.3 (macOS 11, iOS 14) + // and we currently only need ComputePassDescriptor for timestamp queries + let encoder = if self.shared.private_caps.timestamp_query_support.is_empty() { + raw.new_compute_command_encoder() + } else { + let descriptor = metal::ComputePassDescriptor::new(); + + let mut sba_index = 0; + let mut next_sba_descriptor = || { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(sba_index) + .unwrap(); + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_encoder_sample_index(index as _); + sba_descriptor.set_end_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + } + + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + sba_descriptor.set_start_of_encoder_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_encoder_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + } + + raw.compute_command_encoder_with_descriptor(descriptor) + }; + + if let Some(label) = desc.label { + encoder.set_label(label); + } + + self.state.compute = Some(encoder.to_owned()); + }); + } + unsafe fn end_compute_pass(&mut self) { + self.state.compute.take().unwrap().end_encoding(); + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.state.raw_wg_size = pipeline.work_group_size; + self.state.stage_infos.cs.assign_from(&pipeline.cs_info); + + let encoder = self.state.compute.as_ref().unwrap(); + encoder.set_compute_pipeline_state(&pipeline.raw); + + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes) + { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + + // update the threadgroup memory sizes + while self.state.work_group_memory_sizes.len() < pipeline.work_group_memory_sizes.len() { + self.state.work_group_memory_sizes.push(0); + } + for (index, (cur_size, pipeline_size)) in self + .state + .work_group_memory_sizes + .iter_mut() + .zip(pipeline.work_group_memory_sizes.iter()) + .enumerate() + { + const ALIGN_MASK: u32 = 0xF; // must be a multiple of 16 bytes + let size = ((*pipeline_size - 1) | ALIGN_MASK) + 1; + if *cur_size != size { + *cur_size = size; + encoder.set_threadgroup_memory_length(index as _, size as _); + } + } + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + let encoder = self.state.compute.as_ref().unwrap(); + let raw_count = metal::MTLSize { + width: count[0] as u64, + height: count[1] as u64, + depth: count[2] as u64, + }; + encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + let encoder = self.state.compute.as_ref().unwrap(); + encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>, + { + unimplemented!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + unimplemented!() + } +} + +impl Drop for super::CommandEncoder { + fn drop(&mut self) { + // Metal raises an assert when a MTLCommandEncoder is deallocated without a call + // to endEncoding. This isn't documented in the general case at + // https://developer.apple.com/documentation/metal/mtlcommandencoder, but for the + // more-specific MTLComputeCommandEncoder it is stated as a requirement at + // https://developer.apple.com/documentation/metal/mtlcomputecommandencoder. It + // appears to be a requirement for all MTLCommandEncoder objects. Failing to call + // endEncoding causes a crash with the message 'Command encoder released without + // endEncoding'. To prevent this, we explicitiy call discard_encoding, which + // calls end_encoding on any still-held metal::CommandEncoders. + unsafe { + self.discard_encoding(); + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs new file mode 100644 index 0000000000..8f6439b50b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -0,0 +1,322 @@ +pub fn map_texture_usage( + format: wgt::TextureFormat, + usage: crate::TextureUses, +) -> metal::MTLTextureUsage { + use crate::TextureUses as Tu; + + let mut mtl_usage = metal::MTLTextureUsage::Unknown; + + mtl_usage.set( + metal::MTLTextureUsage::RenderTarget, + usage.intersects(Tu::COLOR_TARGET | Tu::DEPTH_STENCIL_READ | Tu::DEPTH_STENCIL_WRITE), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderRead, + usage.intersects( + Tu::RESOURCE | Tu::DEPTH_STENCIL_READ | Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE, + ), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderWrite, + usage.intersects(Tu::STORAGE_READ_WRITE), + ); + // needed for combined depth/stencil formats since we might + // create a stencil-only view from them + mtl_usage.set( + metal::MTLTextureUsage::PixelFormatView, + format.is_combined_depth_stencil_format(), + ); + + mtl_usage +} + +pub fn map_texture_view_dimension(dim: wgt::TextureViewDimension) -> metal::MTLTextureType { + use metal::MTLTextureType::*; + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 => D1, + Tvd::D2 => D2, + Tvd::D2Array => D2Array, + Tvd::D3 => D3, + Tvd::Cube => Cube, + Tvd::CubeArray => CubeArray, + } +} + +pub fn map_compare_function(fun: wgt::CompareFunction) -> metal::MTLCompareFunction { + use metal::MTLCompareFunction::*; + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => Never, + Cf::Less => Less, + Cf::LessEqual => LessEqual, + Cf::Equal => Equal, + Cf::GreaterEqual => GreaterEqual, + Cf::Greater => Greater, + Cf::NotEqual => NotEqual, + Cf::Always => Always, + } +} + +pub fn map_filter_mode(filter: wgt::FilterMode) -> metal::MTLSamplerMinMagFilter { + use metal::MTLSamplerMinMagFilter::*; + match filter { + wgt::FilterMode::Nearest => Nearest, + wgt::FilterMode::Linear => Linear, + } +} + +pub fn map_address_mode(address: wgt::AddressMode) -> metal::MTLSamplerAddressMode { + use metal::MTLSamplerAddressMode::*; + use wgt::AddressMode as Fm; + match address { + Fm::Repeat => Repeat, + Fm::MirrorRepeat => MirrorRepeat, + Fm::ClampToEdge => ClampToEdge, + Fm::ClampToBorder => ClampToBorderColor, + //Fm::MirrorClamp => MirrorClampToEdge, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> metal::MTLSamplerBorderColor { + use metal::MTLSamplerBorderColor::*; + match border_color { + wgt::SamplerBorderColor::TransparentBlack => TransparentBlack, + wgt::SamplerBorderColor::OpaqueBlack => OpaqueBlack, + wgt::SamplerBorderColor::OpaqueWhite => OpaqueWhite, + wgt::SamplerBorderColor::Zero => unreachable!(), + } +} + +pub fn map_primitive_topology( + topology: wgt::PrimitiveTopology, +) -> (metal::MTLPrimitiveTopologyClass, metal::MTLPrimitiveType) { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => ( + metal::MTLPrimitiveTopologyClass::Point, + metal::MTLPrimitiveType::Point, + ), + Pt::LineList => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::Line, + ), + Pt::LineStrip => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::LineStrip, + ), + Pt::TriangleList => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::Triangle, + ), + Pt::TriangleStrip => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::TriangleStrip, + ), + } +} + +pub fn map_color_write(mask: wgt::ColorWrites) -> metal::MTLColorWriteMask { + let mut raw_mask = metal::MTLColorWriteMask::empty(); + + if mask.contains(wgt::ColorWrites::RED) { + raw_mask |= metal::MTLColorWriteMask::Red; + } + if mask.contains(wgt::ColorWrites::GREEN) { + raw_mask |= metal::MTLColorWriteMask::Green; + } + if mask.contains(wgt::ColorWrites::BLUE) { + raw_mask |= metal::MTLColorWriteMask::Blue; + } + if mask.contains(wgt::ColorWrites::ALPHA) { + raw_mask |= metal::MTLColorWriteMask::Alpha; + } + + raw_mask +} + +pub fn map_blend_factor(factor: wgt::BlendFactor) -> metal::MTLBlendFactor { + use metal::MTLBlendFactor::*; + use wgt::BlendFactor as Bf; + + match factor { + Bf::Zero => Zero, + Bf::One => One, + Bf::Src => SourceColor, + Bf::OneMinusSrc => OneMinusSourceColor, + Bf::Dst => DestinationColor, + Bf::OneMinusDst => OneMinusDestinationColor, + Bf::SrcAlpha => SourceAlpha, + Bf::OneMinusSrcAlpha => OneMinusSourceAlpha, + Bf::DstAlpha => DestinationAlpha, + Bf::OneMinusDstAlpha => OneMinusDestinationAlpha, + Bf::Constant => BlendColor, + Bf::OneMinusConstant => OneMinusBlendColor, + Bf::SrcAlphaSaturated => SourceAlphaSaturated, + Bf::Src1 => Source1Color, + Bf::OneMinusSrc1 => OneMinusSource1Color, + Bf::Src1Alpha => Source1Alpha, + Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, + } +} + +pub fn map_blend_op(operation: wgt::BlendOperation) -> metal::MTLBlendOperation { + use metal::MTLBlendOperation::*; + use wgt::BlendOperation as Bo; + + match operation { + Bo::Add => Add, + Bo::Subtract => Subtract, + Bo::ReverseSubtract => ReverseSubtract, + Bo::Min => Min, + Bo::Max => Max, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> ( + metal::MTLBlendOperation, + metal::MTLBlendFactor, + metal::MTLBlendFactor, +) { + ( + map_blend_op(component.operation), + map_blend_factor(component.src_factor), + map_blend_factor(component.dst_factor), + ) +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { + use metal::MTLVertexFormat::*; + use wgt::VertexFormat as Vf; + + match format { + Vf::Unorm8x2 => UChar2Normalized, + Vf::Snorm8x2 => Char2Normalized, + Vf::Uint8x2 => UChar2, + Vf::Sint8x2 => Char2, + Vf::Unorm8x4 => UChar4Normalized, + Vf::Snorm8x4 => Char4Normalized, + Vf::Uint8x4 => UChar4, + Vf::Sint8x4 => Char4, + Vf::Unorm16x2 => UShort2Normalized, + Vf::Snorm16x2 => Short2Normalized, + Vf::Uint16x2 => UShort2, + Vf::Sint16x2 => Short2, + Vf::Float16x2 => Half2, + Vf::Unorm16x4 => UShort4Normalized, + Vf::Snorm16x4 => Short4Normalized, + Vf::Uint16x4 => UShort4, + Vf::Sint16x4 => Short4, + Vf::Float16x4 => Half4, + Vf::Uint32 => UInt, + Vf::Sint32 => Int, + Vf::Float32 => Float, + Vf::Uint32x2 => UInt2, + Vf::Sint32x2 => Int2, + Vf::Float32x2 => Float2, + Vf::Uint32x3 => UInt3, + Vf::Sint32x3 => Int3, + Vf::Float32x3 => Float3, + Vf::Uint32x4 => UInt4, + Vf::Sint32x4 => Int4, + Vf::Float32x4 => Float4, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_step_mode(mode: wgt::VertexStepMode) -> metal::MTLVertexStepFunction { + match mode { + wgt::VertexStepMode::Vertex => metal::MTLVertexStepFunction::PerVertex, + wgt::VertexStepMode::Instance => metal::MTLVertexStepFunction::PerInstance, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> metal::MTLStencilOperation { + use metal::MTLStencilOperation::*; + use wgt::StencilOperation as So; + + match op { + So::Keep => Keep, + So::Zero => Zero, + So::Replace => Replace, + So::IncrementClamp => IncrementClamp, + So::IncrementWrap => IncrementWrap, + So::DecrementClamp => DecrementClamp, + So::DecrementWrap => DecrementWrap, + So::Invert => Invert, + } +} + +pub fn map_winding(winding: wgt::FrontFace) -> metal::MTLWinding { + match winding { + wgt::FrontFace::Cw => metal::MTLWinding::Clockwise, + wgt::FrontFace::Ccw => metal::MTLWinding::CounterClockwise, + } +} + +pub fn map_cull_mode(face: Option<wgt::Face>) -> metal::MTLCullMode { + match face { + None => metal::MTLCullMode::None, + Some(wgt::Face::Front) => metal::MTLCullMode::Front, + Some(wgt::Face::Back) => metal::MTLCullMode::Back, + } +} + +pub fn map_range(range: &crate::MemoryRange) -> metal::NSRange { + metal::NSRange { + location: range.start, + length: range.end - range.start, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> metal::MTLSize { + metal::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } +} + +pub fn map_origin(origin: &wgt::Origin3d) -> metal::MTLOrigin { + metal::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } +} + +pub fn map_store_action(store: bool, resolve: bool) -> metal::MTLStoreAction { + use metal::MTLStoreAction::*; + match (store, resolve) { + (true, true) => StoreAndMultisampleResolve, + (false, true) => MultisampleResolve, + (true, false) => Store, + (false, false) => DontCare, + } +} + +pub fn map_clear_color(color: &wgt::Color) -> metal::MTLClearColor { + metal::MTLClearColor { + red: color.r, + green: color.g, + blue: color.b, + alpha: color.a, + } +} + +pub fn get_blit_option( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> metal::MTLBlitOption { + if format.is_combined_depth_stencil_format() { + match aspect { + crate::FormatAspects::DEPTH => metal::MTLBlitOption::DepthFromDepthStencil, + crate::FormatAspects::STENCIL => metal::MTLBlitOption::StencilFromDepthStencil, + _ => unreachable!(), + } + } else { + metal::MTLBlitOption::None + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs new file mode 100644 index 0000000000..d7fd06c8f3 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -0,0 +1,1251 @@ +use parking_lot::Mutex; +use std::{ + num::NonZeroU32, + ptr, + sync::{atomic, Arc}, + thread, time, +}; + +use super::conv; +use crate::auxil::map_naga_stage; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +struct CompiledShader { + library: metal::Library, + function: metal::Function, + wg_size: metal::MTLSize, + wg_memory_sizes: Vec<u32>, + + /// Bindings of WGSL `storage` globals that contain variable-sized arrays. + /// + /// In order to implement bounds checks and the `arrayLength` function for + /// WGSL runtime-sized arrays, we pass the entry point a struct with a + /// member for each global variable that contains such an array. That member + /// is a `u32` holding the variable's total size in bytes---which is simply + /// the size of the `Buffer` supplying that variable's contents for the + /// draw call. + sized_bindings: Vec<naga::ResourceBinding>, + + immutable_buffer_mask: usize, +} + +fn create_stencil_desc( + face: &wgt::StencilFaceState, + read_mask: u32, + write_mask: u32, +) -> metal::StencilDescriptor { + let desc = metal::StencilDescriptor::new(); + desc.set_stencil_compare_function(conv::map_compare_function(face.compare)); + desc.set_read_mask(read_mask); + desc.set_write_mask(write_mask); + desc.set_stencil_failure_operation(conv::map_stencil_op(face.fail_op)); + desc.set_depth_failure_operation(conv::map_stencil_op(face.depth_fail_op)); + desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.pass_op)); + desc +} + +fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthStencilDescriptor { + let desc = metal::DepthStencilDescriptor::new(); + desc.set_depth_compare_function(conv::map_compare_function(state.depth_compare)); + desc.set_depth_write_enabled(state.depth_write_enabled); + let s = &state.stencil; + if s.is_enabled() { + let front_desc = create_stencil_desc(&s.front, s.read_mask, s.write_mask); + desc.set_front_face_stencil(Some(&front_desc)); + let back_desc = create_stencil_desc(&s.back, s.read_mask, s.write_mask); + desc.set_back_face_stencil(Some(&back_desc)); + } + desc +} + +impl super::Device { + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + primitive_class: metal::MTLPrimitiveTopologyClass, + naga_stage: naga::ShaderStage, + ) -> Result<CompiledShader, crate::PipelineError> { + let stage_bit = map_naga_stage(naga_stage); + + let module = &stage.module.naga.module; + let ep_resources = &layout.per_stage_map[naga_stage]; + + let bounds_check_policy = if stage.module.runtime_checks { + naga::proc::BoundsCheckPolicy::ReadZeroSkipWrite + } else { + naga::proc::BoundsCheckPolicy::Unchecked + }; + + let options = naga::back::msl::Options { + lang_version: match self.shared.private_caps.msl_version { + metal::MTLLanguageVersion::V1_0 => (1, 0), + metal::MTLLanguageVersion::V1_1 => (1, 1), + metal::MTLLanguageVersion::V1_2 => (1, 2), + metal::MTLLanguageVersion::V2_0 => (2, 0), + metal::MTLLanguageVersion::V2_1 => (2, 1), + metal::MTLLanguageVersion::V2_2 => (2, 2), + metal::MTLLanguageVersion::V2_3 => (2, 3), + metal::MTLLanguageVersion::V2_4 => (2, 4), + }, + inline_samplers: Default::default(), + spirv_cross_compatibility: false, + fake_missing_bindings: false, + per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( + stage.entry_point.to_string(), + ep_resources.clone(), + )]), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: bounds_check_policy, + buffer: bounds_check_policy, + image_load: bounds_check_policy, + image_store: naga::proc::BoundsCheckPolicy::Unchecked, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: true, + }; + + let pipeline_options = naga::back::msl::PipelineOptions { + allow_and_force_point_size: match primitive_class { + metal::MTLPrimitiveTopologyClass::Point => true, + _ => false, + }, + }; + + let (source, info) = naga::back::msl::write_string( + module, + &stage.module.naga.info, + &options, + &pipeline_options, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + + log::debug!( + "Naga generated shader for entry point '{}' and stage {:?}\n{}", + stage.entry_point, + naga_stage, + &source + ); + + let options = metal::CompileOptions::new(); + options.set_language_version(self.shared.private_caps.msl_version); + + if self.shared.private_caps.supports_preserve_invariance { + options.set_preserve_invariance(true); + } + + let library = self + .shared + .device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| { + log::warn!("Naga generated shader:\n{}", source); + crate::PipelineError::Linkage(stage_bit, format!("Metal: {}", err)) + })?; + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + let ep = &module.entry_points[ep_index]; + let ep_name = info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{}", e)))?; + + let wg_size = metal::MTLSize { + width: ep.workgroup_size[0] as _, + height: ep.workgroup_size[1] as _, + depth: ep.workgroup_size[2] as _, + }; + + let function = library.get_function(ep_name, None).map_err(|e| { + log::error!("get_function: {:?}", e); + crate::PipelineError::EntryPoint(naga_stage) + })?; + + // collect sizes indices, immutable buffers, and work group memory sizes + let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let mut wg_memory_sizes = Vec::new(); + let mut sized_bindings = Vec::new(); + let mut immutable_buffer_mask = 0; + for (var_handle, var) in module.global_variables.iter() { + match var.space { + naga::AddressSpace::WorkGroup => { + if !ep_info[var_handle].is_empty() { + let size = module.types[var.ty].inner.size(module.to_ctx()); + wg_memory_sizes.push(size); + } + } + naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { + let br = match var.binding { + Some(ref br) => br.clone(), + None => continue, + }; + let storage_access_store = match var.space { + naga::AddressSpace::Storage { access } => { + access.contains(naga::StorageAccess::STORE) + } + _ => false, + }; + + // check for an immutable buffer + if !ep_info[var_handle].is_empty() && !storage_access_store { + let slot = ep_resources.resources[&br].buffer.unwrap(); + immutable_buffer_mask |= 1 << slot; + } + + let mut dynamic_array_container_ty = var.ty; + if let naga::TypeInner::Struct { ref members, .. } = module.types[var.ty].inner + { + dynamic_array_container_ty = members.last().unwrap().ty; + } + if let naga::TypeInner::Array { + size: naga::ArraySize::Dynamic, + .. + } = module.types[dynamic_array_container_ty].inner + { + sized_bindings.push(br); + } + } + _ => {} + } + } + + Ok(CompiledShader { + library, + function, + wg_size, + wg_memory_sizes, + sized_bindings, + immutable_buffer_mask, + }) + } + + fn set_buffers_mutability( + buffers: &metal::PipelineBufferDescriptorArrayRef, + mut immutable_mask: usize, + ) { + while immutable_mask != 0 { + let slot = immutable_mask.trailing_zeros(); + immutable_mask ^= 1 << slot; + buffers + .object_at(slot as u64) + .unwrap() + .set_mutability(metal::MTLMutability::Immutable); + } + } + + pub unsafe fn texture_from_raw( + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, + ) -> super::Texture { + super::Texture { + raw, + format, + raw_type, + array_layers, + mip_levels, + copy_size, + } + } + + pub unsafe fn device_from_raw(raw: metal::Device, features: wgt::Features) -> super::Device { + super::Device { + shared: Arc::new(super::AdapterShared::new(raw)), + features, + } + } + + pub unsafe fn buffer_from_raw(raw: metal::Buffer, size: wgt::BufferAddress) -> super::Buffer { + super::Buffer { raw, size } + } + + pub fn raw_device(&self) -> &Mutex<metal::Device> { + &self.shared.device + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, _queue: super::Queue) {} + + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> { + let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let mut options = metal::MTLResourceOptions::empty(); + options |= if map_read || map_write { + // `crate::MemoryFlags::PREFER_COHERENT` is ignored here + metal::MTLResourceOptions::StorageModeShared + } else { + metal::MTLResourceOptions::StorageModePrivate + }; + options.set( + metal::MTLResourceOptions::CPUCacheModeWriteCombined, + map_write, + ); + + //TODO: HazardTrackingModeUntracked + + objc::rc::autoreleasepool(|| { + let raw = self.shared.device.lock().new_buffer(desc.size, options); + if let Some(label) = desc.label { + raw.set_label(label); + } + Ok(super::Buffer { + raw, + size: desc.size, + }) + }) + } + unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {} + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + let ptr = buffer.raw.contents() as *mut u8; + assert!(!ptr.is_null()); + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> DeviceResult<super::Texture> { + use metal::foreign_types::ForeignType as _; + + let mtl_format = self.shared.private_caps.map_format(desc.format); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::TextureDescriptor::new(); + + let mtl_type = match desc.dimension { + wgt::TextureDimension::D1 => metal::MTLTextureType::D1, + wgt::TextureDimension::D2 => { + if desc.sample_count > 1 { + descriptor.set_sample_count(desc.sample_count as u64); + metal::MTLTextureType::D2Multisample + } else if desc.size.depth_or_array_layers > 1 { + descriptor.set_array_length(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D2Array + } else { + metal::MTLTextureType::D2 + } + } + wgt::TextureDimension::D3 => { + descriptor.set_depth(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D3 + } + }; + + descriptor.set_texture_type(mtl_type); + descriptor.set_width(desc.size.width as u64); + descriptor.set_height(desc.size.height as u64); + descriptor.set_mipmap_level_count(desc.mip_level_count as u64); + descriptor.set_pixel_format(mtl_format); + descriptor.set_usage(conv::map_texture_usage(desc.format, desc.usage)); + descriptor.set_storage_mode(metal::MTLStorageMode::Private); + + let raw = self.shared.device.lock().new_texture(&descriptor); + if raw.as_ptr().is_null() { + return Err(crate::DeviceError::OutOfMemory); + } + if let Some(label) = desc.label { + raw.set_label(label); + } + + Ok(super::Texture { + raw, + format: desc.format, + raw_type: mtl_type, + mip_levels: desc.mip_level_count, + array_layers: desc.array_layer_count(), + copy_size: desc.copy_extent(), + }) + }) + } + + unsafe fn destroy_texture(&self, _texture: super::Texture) {} + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<super::TextureView> { + let raw_type = if texture.raw_type == metal::MTLTextureType::D2Multisample { + texture.raw_type + } else { + conv::map_texture_view_dimension(desc.dimension) + }; + + let aspects = crate::FormatAspects::new(texture.format, desc.range.aspect); + + let raw_format = self + .shared + .private_caps + .map_view_format(desc.format, aspects); + + let format_equal = raw_format == self.shared.private_caps.map_format(texture.format); + let type_equal = raw_type == texture.raw_type; + let range_full_resource = + desc.range + .is_full_resource(desc.format, texture.mip_levels, texture.array_layers); + + let raw = if format_equal && type_equal && range_full_resource { + // Some images are marked as framebuffer-only, and we can't create aliases of them. + // Also helps working around Metal bugs with aliased array textures. + texture.raw.to_owned() + } else { + let mip_level_count = desc + .range + .mip_level_count + .unwrap_or(texture.mip_levels - desc.range.base_mip_level); + let array_layer_count = desc + .range + .array_layer_count + .unwrap_or(texture.array_layers - desc.range.base_array_layer); + + objc::rc::autoreleasepool(|| { + let raw = texture.raw.new_texture_view_from_slice( + raw_format, + raw_type, + metal::NSRange { + location: desc.range.base_mip_level as _, + length: mip_level_count as _, + }, + metal::NSRange { + location: desc.range.base_array_layer as _, + length: array_layer_count as _, + }, + ); + if let Some(label) = desc.label { + raw.set_label(label); + } + raw + }) + }; + + Ok(super::TextureView { raw, aspects }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> DeviceResult<super::Sampler> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::SamplerDescriptor::new(); + + descriptor.set_min_filter(conv::map_filter_mode(desc.min_filter)); + descriptor.set_mag_filter(conv::map_filter_mode(desc.mag_filter)); + descriptor.set_mip_filter(match desc.mipmap_filter { + wgt::FilterMode::Nearest if desc.lod_clamp == (0.0..0.0) => { + metal::MTLSamplerMipFilter::NotMipmapped + } + wgt::FilterMode::Nearest => metal::MTLSamplerMipFilter::Nearest, + wgt::FilterMode::Linear => metal::MTLSamplerMipFilter::Linear, + }); + + let [s, t, r] = desc.address_modes; + descriptor.set_address_mode_s(conv::map_address_mode(s)); + descriptor.set_address_mode_t(conv::map_address_mode(t)); + descriptor.set_address_mode_r(conv::map_address_mode(r)); + + // Anisotropy is always supported on mac up to 16x + descriptor.set_max_anisotropy(desc.anisotropy_clamp as _); + + descriptor.set_lod_min_clamp(desc.lod_clamp.start); + descriptor.set_lod_max_clamp(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + descriptor.set_compare_function(conv::map_compare_function(fun)); + } + + if let Some(border_color) = desc.border_color { + if let wgt::SamplerBorderColor::Zero = border_color { + if s == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_s(metal::MTLSamplerAddressMode::ClampToZero); + } + + if t == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_t(metal::MTLSamplerAddressMode::ClampToZero); + } + + if r == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_r(metal::MTLSamplerAddressMode::ClampToZero); + } + } else { + descriptor.set_border_color(conv::map_border_color(border_color)); + } + } + + if let Some(label) = desc.label { + descriptor.set_label(label); + } + let raw = self.shared.device.lock().new_sampler(&descriptor); + + Ok(super::Sampler { raw }) + }) + } + unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + shared: Arc::clone(&self.shared), + raw_queue: Arc::clone(&desc.queue.raw), + raw_cmd_buf: None, + state: super::CommandState::default(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<super::BindGroupLayout> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> DeviceResult<super::PipelineLayout> { + #[derive(Debug)] + struct StageInfo { + stage: naga::ShaderStage, + counters: super::ResourceData<super::ResourceIndex>, + pc_buffer: Option<super::ResourceIndex>, + pc_limit: u32, + sizes_buffer: Option<super::ResourceIndex>, + sizes_count: u8, + resources: naga::back::msl::BindingMap, + } + + let mut stage_data = super::NAGA_STAGES.map(|stage| StageInfo { + stage, + counters: super::ResourceData::default(), + pc_buffer: None, + pc_limit: 0, + sizes_buffer: None, + sizes_count: 0, + resources: Default::default(), + }); + let mut bind_group_infos = arrayvec::ArrayVec::new(); + + // First, place the push constants + let mut total_push_constants = 0; + for info in stage_data.iter_mut() { + for pcr in desc.push_constant_ranges { + if pcr.stages.contains(map_naga_stage(info.stage)) { + debug_assert_eq!(pcr.range.end % 4, 0); + info.pc_limit = (pcr.range.end / 4).max(info.pc_limit); + } + } + + // round up the limits alignment to 4, so that it matches MTL compiler logic + const LIMIT_MASK: u32 = 3; + //TODO: figure out what and how exactly does the alignment. Clearly, it's not + // straightforward, given that value of 2 stays non-aligned. + if info.pc_limit > LIMIT_MASK { + info.pc_limit = (info.pc_limit + LIMIT_MASK) & !LIMIT_MASK; + } + + // handle the push constant buffer assignment and shader overrides + if info.pc_limit != 0 { + info.pc_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + + total_push_constants = total_push_constants.max(info.pc_limit); + } + + // Second, place the described resources + for (group_index, &bgl) in desc.bind_group_layouts.iter().enumerate() { + // remember where the resources for this set start at each shader stage + let base_resource_indices = stage_data.map_ref(|info| info.counters.clone()); + + for entry in bgl.entries.iter() { + if let wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + for info in stage_data.iter_mut() { + if entry.visibility.contains(map_naga_stage(info.stage)) { + info.sizes_count += 1; + } + } + } + + for info in stage_data.iter_mut() { + if !entry.visibility.contains(map_naga_stage(info.stage)) { + continue; + } + + let mut target = naga::back::msl::BindTarget::default(); + let count = entry.count.map_or(1, NonZeroU32::get); + target.binding_array_size = entry.count.map(NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += count; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } + } + wgt::BindingType::Sampler { .. } => { + target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += count; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } + + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + info.resources.insert(br, target); + } + } + + bind_group_infos.push(super::BindGroupLayoutInfo { + base_resource_indices, + }); + } + + // Finally, make sure we fit the limits + for info in stage_data.iter_mut() { + // handle the sizes buffer assignment and shader overrides + if info.sizes_count != 0 { + info.sizes_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage + || info.counters.textures > self.shared.private_caps.max_textures_per_stage + || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage + { + log::error!("Resource limit exceeded: {:?}", info); + return Err(crate::DeviceError::OutOfMemory); + } + } + + let push_constants_infos = stage_data.map_ref(|info| { + info.pc_buffer.map(|buffer_index| super::PushConstantsInfo { + count: info.pc_limit, + buffer_index, + }) + }); + + let total_counters = stage_data.map_ref(|info| info.counters.clone()); + + let per_stage_map = stage_data.map(|info| naga::back::msl::EntryPointResources { + push_constant_buffer: info + .pc_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + sizes_buffer: info + .sizes_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + resources: info.resources, + }); + + Ok(super::PipelineLayout { + bind_group_infos, + push_constants_infos, + total_counters, + total_push_constants, + per_stage_map, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> DeviceResult<super::BindGroup> { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let size = layout.count.map_or(1, |c| c.get()); + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += size; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = + wgt::BufferSize::new(source.buffer.size - source.offset); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.samplers + .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); + counter.samplers += size; + } + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += size; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } + } + } + + Ok(bg) + } + + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { + naga, + runtime_checks: desc.runtime_checks, + }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPipelineDescriptor::new(); + + let raw_triangle_fill_mode = match desc.primitive.polygon_mode { + wgt::PolygonMode::Fill => metal::MTLTriangleFillMode::Fill, + wgt::PolygonMode::Line => metal::MTLTriangleFillMode::Lines, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), + }; + + let (primitive_class, raw_primitive_type) = + conv::map_primitive_topology(desc.primitive.topology); + + // Vertex shader + let (vs_lib, vs_info) = { + let vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + primitive_class, + naga::ShaderStage::Vertex, + )?; + + descriptor.set_vertex_function(Some(&vs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.vertex_buffers().unwrap(), + vs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.vs, + sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, + sized_bindings: vs.sized_bindings, + }; + + (vs.library, info) + }; + + // Fragment shader + let (fs_lib, fs_info) = match desc.fragment_stage { + Some(ref stage) => { + let fs = self.load_shader( + stage, + desc.layout, + primitive_class, + naga::ShaderStage::Fragment, + )?; + + descriptor.set_fragment_function(Some(&fs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.fragment_buffers().unwrap(), + fs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.fs, + sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, + sized_bindings: fs.sized_bindings, + }; + + (Some(fs.library), Some(info)) + } + None => { + // TODO: This is a workaround for what appears to be a Metal validation bug + // A pixel format is required even though no attachments are provided + if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { + descriptor + .set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); + } + (None, None) + } + }; + + for (i, ct) in desc.color_targets.iter().enumerate() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let ct = if let Some(color_target) = ct.as_ref() { + color_target + } else { + at_descriptor.set_pixel_format(metal::MTLPixelFormat::Invalid); + continue; + }; + + let raw_format = self.shared.private_caps.map_format(ct.format); + at_descriptor.set_pixel_format(raw_format); + at_descriptor.set_write_mask(conv::map_color_write(ct.write_mask)); + + if let Some(ref blend) = ct.blend { + at_descriptor.set_blending_enabled(true); + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + + at_descriptor.set_rgb_blend_operation(color_op); + at_descriptor.set_source_rgb_blend_factor(color_src); + at_descriptor.set_destination_rgb_blend_factor(color_dst); + + at_descriptor.set_alpha_blend_operation(alpha_op); + at_descriptor.set_source_alpha_blend_factor(alpha_src); + at_descriptor.set_destination_alpha_blend_factor(alpha_dst); + } + } + + let depth_stencil = match desc.depth_stencil { + Some(ref ds) => { + let raw_format = self.shared.private_caps.map_format(ds.format); + let aspects = crate::FormatAspects::from(ds.format); + if aspects.contains(crate::FormatAspects::DEPTH) { + descriptor.set_depth_attachment_pixel_format(raw_format); + } + if aspects.contains(crate::FormatAspects::STENCIL) { + descriptor.set_stencil_attachment_pixel_format(raw_format); + } + + let ds_descriptor = create_depth_stencil_desc(ds); + let raw = self + .shared + .device + .lock() + .new_depth_stencil_state(&ds_descriptor); + Some((raw, ds.bias)) + } + None => None, + }; + + if desc.layout.total_counters.vs.buffers + (desc.vertex_buffers.len() as u32) + > self.shared.private_caps.max_vertex_buffers + { + let msg = format!( + "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", + desc.vertex_buffers.len(), + desc.layout.total_counters.vs.buffers + ); + return Err(crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX, + msg, + )); + } + + if !desc.vertex_buffers.is_empty() { + let vertex_descriptor = metal::VertexDescriptor::new(); + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + let buffer_index = + self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; + let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + + // Metal expects the stride to be the actual size of the attributes. + // The semantics of array_stride == 0 can be achieved by setting + // the step function to constant and rate to 0. + if vb.array_stride == 0 { + let stride = vb + .attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0); + buffer_desc.set_stride(wgt::math::align_to(stride, 4)); + buffer_desc.set_step_function(metal::MTLVertexStepFunction::Constant); + buffer_desc.set_step_rate(0); + } else { + buffer_desc.set_stride(vb.array_stride); + buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + } + + for at in vb.attributes { + let attribute_desc = vertex_descriptor + .attributes() + .object_at(at.shader_location as u64) + .unwrap(); + attribute_desc.set_format(conv::map_vertex_format(at.format)); + attribute_desc.set_buffer_index(buffer_index); + attribute_desc.set_offset(at.offset); + } + } + descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + } + + if desc.multisample.count != 1 { + //TODO: handle sample mask + descriptor.set_sample_count(desc.multisample.count as u64); + descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); + } + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_render_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::RenderPipeline { + raw, + vs_lib, + fs_lib, + vs_info, + fs_info, + raw_primitive_type, + raw_triangle_fill_mode, + raw_front_winding: conv::map_winding(desc.primitive.front_face), + raw_cull_mode: conv::map_cull_mode(desc.primitive.cull_mode), + raw_depth_clip_mode: if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + Some(if desc.primitive.unclipped_depth { + metal::MTLDepthClipMode::Clamp + } else { + metal::MTLDepthClipMode::Clip + }) + } else { + None + }, + depth_stencil, + }) + }) + } + unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {} + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::ComputePipelineDescriptor::new(); + + let cs = self.load_shader( + &desc.stage, + desc.layout, + metal::MTLPrimitiveTopologyClass::Unspecified, + naga::ShaderStage::Compute, + )?; + descriptor.set_compute_function(Some(&cs.function)); + + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.buffers().unwrap(), + cs.immutable_buffer_mask, + ); + } + + let cs_info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.cs, + sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, + sized_bindings: cs.sized_bindings, + }; + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_compute_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::COMPUTE, + format!("new_compute_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::ComputePipeline { + raw, + cs_info, + cs_lib: cs.library, + work_group_size: cs.wg_size, + work_group_memory_sizes: cs.wg_memory_sizes, + }) + }) + } + unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<super::QuerySet> { + objc::rc::autoreleasepool(|| { + match desc.ty { + wgt::QueryType::Occlusion => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let options = metal::MTLResourceOptions::empty(); + //TODO: HazardTrackingModeUntracked + let raw_buffer = self.shared.device.lock().new_buffer(size, options); + if let Some(label) = desc.label { + raw_buffer.set_label(label); + } + Ok(super::QuerySet { + raw_buffer, + counter_sample_buffer: None, + ty: desc.ty, + }) + } + wgt::QueryType::Timestamp => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let device = self.shared.device.lock(); + let destination_buffer = + device.new_buffer(size, metal::MTLResourceOptions::empty()); + + let csb_desc = metal::CounterSampleBufferDescriptor::new(); + csb_desc.set_storage_mode(metal::MTLStorageMode::Shared); + csb_desc.set_sample_count(desc.count as _); + if let Some(label) = desc.label { + csb_desc.set_label(label); + } + + let counter_sets = device.counter_sets(); + let timestamp_counter = + match counter_sets.iter().find(|cs| cs.name() == "timestamp") { + Some(counter) => counter, + None => { + log::error!("Failed to obtain timestamp counter set."); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; + csb_desc.set_counter_set(timestamp_counter); + + let counter_sample_buffer = + match device.new_counter_sample_buffer_with_descriptor(&csb_desc) { + Ok(buffer) => buffer, + Err(err) => { + log::error!("Failed to create counter sample buffer: {:?}", err); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; + + Ok(super::QuerySet { + raw_buffer: destination_buffer, + counter_sample_buffer: Some(counter_sample_buffer), + ty: desc.ty, + }) + } + _ => { + todo!() + } + } + }) + } + unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> DeviceResult<super::Fence> { + Ok(super::Fence { + completed_value: Arc::new(atomic::AtomicU64::new(0)), + pending_command_buffers: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, _fence: super::Fence) {} + unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> { + let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + Ok(max_value) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) { + return Ok(true); + } + + let cmd_buf = match fence + .pending_command_buffers + .iter() + .find(|&&(value, _)| value >= wait_value) + { + Some(&(_, ref cmd_buf)) => cmd_buf, + None => { + log::error!("No active command buffers for fence value {}", wait_value); + return Err(crate::DeviceError::Lost); + } + }; + + let start = time::Instant::now(); + loop { + if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { + return Ok(true); + } + if start.elapsed().as_millis() >= timeout_ms as u128 { + return Ok(false); + } + thread::sleep(time::Duration::from_millis(1)); + } + } + + unsafe fn start_capture(&self) -> bool { + if !self.shared.private_caps.supports_capture_manager { + return false; + } + let device = self.shared.device.lock(); + let shared_capture_manager = metal::CaptureManager::shared(); + let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device); + shared_capture_manager.set_default_capture_scope(&default_capture_scope); + shared_capture_manager.start_capture_with_scope(&default_capture_scope); + default_capture_scope.begin_scope(); + true + } + unsafe fn stop_capture(&self) { + let shared_capture_manager = metal::CaptureManager::shared(); + if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { + default_capture_scope.end_scope(); + } + shared_capture_manager.stop_capture(); + } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + unimplemented!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<super::AccelerationStructure, crate::DeviceError> { + unimplemented!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + unimplemented!() + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs new file mode 100644 index 0000000000..298f60faac --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -0,0 +1,852 @@ +/*! +# Metal API internals. + +## Pipeline Layout + +In Metal, push constants, vertex buffers, and resources in the bind groups +are all placed together in the native resource bindings, which work similarly to D3D11: +there are tables of textures, buffers, and samplers. + +We put push constants first (if any) in the table, followed by bind group 0 +resources, followed by other bind groups. The vertex buffers are bound at the very +end of the VS buffer table. + +!*/ + +// `MTLFeatureSet` is superseded by `MTLGpuFamily`. +// However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13, +// See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec. +// TODO: Eventually all deprecated features should be abstracted and use new api when available. +#[allow(deprecated)] +mod adapter; +mod command; +mod conv; +mod device; +mod surface; +mod time; + +use std::{ + fmt, iter, ops, + ptr::NonNull, + sync::{atomic, Arc}, + thread, +}; + +use arrayvec::ArrayVec; +use bitflags::bitflags; +use metal::foreign_types::ForeignTypeRef as _; +use parking_lot::{Mutex, RwLock}; + +#[derive(Clone, Debug)] +pub struct Api; + +type ResourceIndex = u32; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; +} + +pub struct Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate, +} + +impl Instance { + pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { + unsafe { Surface::from_layer(layer) } + } +} + +impl crate::Instance<Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init Metal Backend"); + // We do not enable metal validation based on the validation flags as it affects the entire + // process. Instead, we enable the validation inside the test harness itself in tests/src/native.rs. + Ok(Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate::new(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + match window_handle { + #[cfg(target_os = "ios")] + raw_window_handle::RawWindowHandle::UiKit(handle) => { + let _ = &self.managed_metal_layer_delegate; + Ok(unsafe { Surface::from_view(handle.ui_view.as_ptr(), None) }) + } + #[cfg(target_os = "macos")] + raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe { + Surface::from_view( + handle.ns_view.as_ptr(), + Some(&self.managed_metal_layer_delegate), + ) + }), + _ => Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a Metal-compatible handle" + ))), + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + unsafe { surface.dispose() }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + let devices = metal::Device::all(); + let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices + .into_iter() + .map(|dev| { + let name = dev.name().into(); + let shared = AdapterShared::new(dev); + crate::ExposedAdapter { + info: wgt::AdapterInfo { + name, + vendor: 0, + device: 0, + device_type: shared.private_caps.device_type(), + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Metal, + }, + features: shared.private_caps.features(), + capabilities: shared.private_caps.capabilities(), + adapter: Adapter::new(Arc::new(shared)), + } + }) + .collect(); + adapters.sort_by_key(|ad| { + ( + ad.adapter.shared.private_caps.low_power, + ad.adapter.shared.private_caps.headless, + ) + }); + adapters + } +} + +bitflags!( + /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes. + #[derive(Debug, Copy, Clone)] + pub struct TimestampQuerySupport: u32 { + /// On creating Metal encoders. + const STAGE_BOUNDARIES = 1 << 1; + /// Within existing draw encoders. + const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2); + /// Within existing dispatch encoders. + const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3); + /// Within existing blit encoders. + const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4); + + /// Within any wgpu render/compute pass. + const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits(); + } +); + +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct PrivateCapabilities { + family_check: bool, + msl_version: metal::MTLLanguageVersion, + fragment_rw_storage: bool, + read_write_texture_tier: metal::MTLReadWriteTextureTier, + msaa_desktop: bool, + msaa_apple3: bool, + msaa_apple7: bool, + resource_heaps: bool, + argument_buffers: bool, + shared_textures: bool, + mutable_comparison_samplers: bool, + sampler_clamp_to_border: bool, + indirect_draw_dispatch: bool, + base_vertex_first_instance_drawing: bool, + dual_source_blending: bool, + low_power: bool, + headless: bool, + layered_rendering: bool, + function_specialization: bool, + depth_clip_mode: bool, + texture_cube_array: bool, + supports_float_filtering: bool, + format_depth24_stencil8: bool, + format_depth32_stencil8_filter: bool, + format_depth32_stencil8_none: bool, + format_min_srgb_channels: u8, + format_b5: bool, + format_bc: bool, + format_eac_etc: bool, + format_astc: bool, + format_astc_hdr: bool, + format_any8_unorm_srgb_all: bool, + format_any8_unorm_srgb_no_write: bool, + format_any8_snorm_all: bool, + format_r16_norm_all: bool, + format_r32_all: bool, + format_r32_no_write: bool, + format_r32float_no_write_no_filter: bool, + format_r32float_no_filter: bool, + format_r32float_all: bool, + format_rgba8_srgb_all: bool, + format_rgba8_srgb_no_write: bool, + format_rgb10a2_unorm_all: bool, + format_rgb10a2_unorm_no_write: bool, + format_rgb10a2_uint_write: bool, + format_rg11b10_all: bool, + format_rg11b10_no_write: bool, + format_rgb9e5_all: bool, + format_rgb9e5_no_write: bool, + format_rgb9e5_filter_only: bool, + format_rg32_color: bool, + format_rg32_color_write: bool, + format_rg32float_all: bool, + format_rg32float_color_blend: bool, + format_rg32float_no_filter: bool, + format_rgba32int_color: bool, + format_rgba32int_color_write: bool, + format_rgba32float_color: bool, + format_rgba32float_color_write: bool, + format_rgba32float_all: bool, + format_depth16unorm: bool, + format_depth32float_filter: bool, + format_depth32float_none: bool, + format_bgr10a2_all: bool, + format_bgr10a2_no_write: bool, + max_buffers_per_stage: ResourceIndex, + max_vertex_buffers: ResourceIndex, + max_textures_per_stage: ResourceIndex, + max_samplers_per_stage: ResourceIndex, + buffer_alignment: u64, + max_buffer_size: u64, + max_texture_size: u64, + max_texture_3d_size: u64, + max_texture_layers: u64, + max_fragment_input_components: u64, + max_color_render_targets: u8, + max_varying_components: u32, + max_threads_per_group: u32, + max_total_threadgroup_memory: u32, + sample_count_mask: crate::TextureFormatCapabilities, + supports_debug_markers: bool, + supports_binary_archives: bool, + supports_capture_manager: bool, + can_set_maximum_drawables_count: bool, + can_set_display_sync: bool, + can_set_next_drawable_timeout: bool, + supports_arrays_of_textures: bool, + supports_arrays_of_textures_write: bool, + supports_mutability: bool, + supports_depth_clip_control: bool, + supports_preserve_invariance: bool, + supports_shader_primitive_index: bool, + has_unified_memory: Option<bool>, + timestamp_query_support: TimestampQuerySupport, +} + +#[derive(Clone, Debug)] +struct PrivateDisabilities { + /// Near depth is not respected properly on some Intel GPUs. + broken_viewport_near_depth: bool, + /// Multi-target clears don't appear to work properly on Intel GPUs. + #[allow(dead_code)] + broken_layered_clear_image: bool, +} + +#[derive(Debug, Default)] +struct Settings { + retain_command_buffer_references: bool, +} + +struct AdapterShared { + device: Mutex<metal::Device>, + disabilities: PrivateDisabilities, + private_caps: PrivateCapabilities, + settings: Settings, + presentation_timer: time::PresentationTimer, +} + +unsafe impl Send for AdapterShared {} +unsafe impl Sync for AdapterShared {} + +impl AdapterShared { + fn new(device: metal::Device) -> Self { + let private_caps = PrivateCapabilities::new(&device); + log::debug!("{:#?}", private_caps); + + Self { + disabilities: PrivateDisabilities::new(&device), + private_caps, + device: Mutex::new(device), + settings: Settings::default(), + presentation_timer: time::PresentationTimer::new(), + } + } +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Queue { + raw: Arc<Mutex<metal::CommandQueue>>, + timestamp_period: f32, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl Queue { + pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self { + Self { + raw: Arc::new(Mutex::new(raw)), + timestamp_period, + } + } +} + +pub struct Device { + shared: Arc<AdapterShared>, + features: wgt::Features, +} + +pub struct Surface { + view: Option<NonNull<objc::runtime::Object>>, + render_layer: Mutex<metal::MetalLayer>, + swapchain_format: RwLock<Option<wgt::TextureFormat>>, + extent: RwLock<wgt::Extent3d>, + main_thread_id: thread::ThreadId, + // Useful for UI-intensive applications that are sensitive to + // window resizing. + pub present_with_transaction: bool, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug)] +pub struct SurfaceTexture { + texture: Texture, + drawable: metal::MetalDrawable, + present_with_transaction: bool, +} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +unsafe impl Send for SurfaceTexture {} +unsafe impl Sync for SurfaceTexture {} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &self, + command_buffers: &[&CommandBuffer], + _surface_textures: &[&SurfaceTexture], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + objc::rc::autoreleasepool(|| { + let extra_command_buffer = match signal_fence { + Some((fence, value)) => { + let completed_value = Arc::clone(&fence.completed_value); + let block = block::ConcreteBlock::new(move |_cmd_buf| { + completed_value.store(value, atomic::Ordering::Release); + }) + .copy(); + + let raw = match command_buffers.last() { + Some(&cmd_buf) => cmd_buf.raw.to_owned(), + None => { + let queue = self.raw.lock(); + queue + .new_command_buffer_with_unretained_references() + .to_owned() + } + }; + raw.set_label("(wgpu internal) Signal"); + raw.add_completed_handler(&block); + + fence.maintain(); + fence.pending_command_buffers.push((value, raw.to_owned())); + // only return an extra one if it's extra + match command_buffers.last() { + Some(_) => None, + None => Some(raw), + } + } + None => None, + }; + + for cmd_buffer in command_buffers { + cmd_buffer.raw.commit(); + } + + if let Some(raw) = extra_command_buffer { + raw.commit(); + } + }); + Ok(()) + } + unsafe fn present( + &self, + _surface: &Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let queue = &self.raw.lock(); + objc::rc::autoreleasepool(|| { + let command_buffer = queue.new_command_buffer(); + command_buffer.set_label("(wgpu internal) Present"); + + // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc + if !texture.present_with_transaction { + command_buffer.present_drawable(&texture.drawable); + } + + command_buffer.commit(); + + if texture.present_with_transaction { + command_buffer.wait_until_scheduled(); + texture.drawable.present(); + } + }); + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.timestamp_period + } +} + +#[derive(Debug)] +pub struct Buffer { + raw: metal::Buffer, + size: wgt::BufferAddress, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl Buffer { + fn as_raw(&self) -> BufferPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Texture { + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +#[derive(Debug)] +pub struct TextureView { + raw: metal::Texture, + aspects: crate::FormatAspects, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +impl TextureView { + fn as_raw(&self) -> TexturePtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Sampler { + raw: metal::SamplerState, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +impl Sampler { + fn as_raw(&self) -> SamplerPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct BindGroupLayout { + /// Sorted list of BGL entries. + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +#[derive(Clone, Debug, Default)] +struct ResourceData<T> { + buffers: T, + textures: T, + samplers: T, +} + +#[derive(Clone, Debug, Default)] +struct MultiStageData<T> { + vs: T, + fs: T, + cs: T, +} + +const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData { + vs: naga::ShaderStage::Vertex, + fs: naga::ShaderStage::Fragment, + cs: naga::ShaderStage::Compute, +}; + +impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> { + type Output = T; + fn index(&self, stage: naga::ShaderStage) -> &T { + match stage { + naga::ShaderStage::Vertex => &self.vs, + naga::ShaderStage::Fragment => &self.fs, + naga::ShaderStage::Compute => &self.cs, + } + } +} + +impl<T> MultiStageData<T> { + fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(&self.vs), + fs: fun(&self.fs), + cs: fun(&self.cs), + } + } + fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(self.vs), + fs: fun(self.fs), + cs: fun(self.cs), + } + } + fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> { + iter::once(&self.vs) + .chain(iter::once(&self.fs)) + .chain(iter::once(&self.cs)) + } + fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> { + iter::once(&mut self.vs) + .chain(iter::once(&mut self.fs)) + .chain(iter::once(&mut self.cs)) + } +} + +type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>; +type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>; + +#[derive(Debug)] +struct BindGroupLayoutInfo { + base_resource_indices: MultiStageResourceCounters, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PushConstantsInfo { + count: u32, + buffer_index: ResourceIndex, +} + +#[derive(Debug)] +pub struct PipelineLayout { + bind_group_infos: ArrayVec<BindGroupLayoutInfo, { crate::MAX_BIND_GROUPS }>, + push_constants_infos: MultiStageData<Option<PushConstantsInfo>>, + total_counters: MultiStageResourceCounters, + total_push_constants: u32, + per_stage_map: MultiStageResources, +} + +trait AsNative { + type Native; + fn from(native: &Self::Native) -> Self; + fn as_native(&self) -> &Self::Native; +} + +type BufferPtr = NonNull<metal::MTLBuffer>; +type TexturePtr = NonNull<metal::MTLTexture>; +type SamplerPtr = NonNull<metal::MTLSamplerState>; + +impl AsNative for BufferPtr { + type Native = metal::BufferRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for TexturePtr { + type Native = metal::TextureRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for SamplerPtr { + type Native = metal::SamplerStateRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +#[derive(Debug)] +struct BufferResource { + ptr: BufferPtr, + offset: wgt::BufferAddress, + dynamic_index: Option<u32>, + + /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. + /// + /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can + /// hold WGSL runtime-sized arrays. When one does, we must pass its size to + /// shader entry points to implement bounds checks and WGSL's `arrayLength` + /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// + /// [`Storage`]: wgt::BufferBindingType::Storage + binding_size: Option<wgt::BufferSize>, + + binding_location: u32, +} + +#[derive(Debug, Default)] +pub struct BindGroup { + counters: MultiStageResourceCounters, + buffers: Vec<BufferResource>, + samplers: Vec<SamplerPtr>, + textures: Vec<TexturePtr>, +} + +unsafe impl Send for BindGroup {} +unsafe impl Sync for BindGroup {} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + runtime_checks: bool, +} + +#[derive(Debug, Default)] +struct PipelineStageInfo { + push_constants: Option<PushConstantsInfo>, + + /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sizes_slot: Option<naga::back::msl::Slot>, + + /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sized_bindings: Vec<naga::ResourceBinding>, +} + +impl PipelineStageInfo { + fn clear(&mut self) { + self.push_constants = None; + self.sizes_slot = None; + self.sized_bindings.clear(); + } + + fn assign_from(&mut self, other: &Self) { + self.push_constants = other.push_constants; + self.sizes_slot = other.sizes_slot; + self.sized_bindings.clear(); + self.sized_bindings.extend_from_slice(&other.sized_bindings); + } +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: metal::RenderPipelineState, + #[allow(dead_code)] + vs_lib: metal::Library, + #[allow(dead_code)] + fs_lib: Option<metal::Library>, + vs_info: PipelineStageInfo, + fs_info: Option<PipelineStageInfo>, + raw_primitive_type: metal::MTLPrimitiveType, + raw_triangle_fill_mode: metal::MTLTriangleFillMode, + raw_front_winding: metal::MTLWinding, + raw_cull_mode: metal::MTLCullMode, + raw_depth_clip_mode: Option<metal::MTLDepthClipMode>, + depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: metal::ComputePipelineState, + #[allow(dead_code)] + cs_lib: metal::Library, + cs_info: PipelineStageInfo, + work_group_size: metal::MTLSize, + work_group_memory_sizes: Vec<u32>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug, Clone)] +pub struct QuerySet { + raw_buffer: metal::Buffer, + //Metal has a custom buffer for counters. + counter_sample_buffer: Option<metal::CounterSampleBuffer>, + ty: wgt::QueryType, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + completed_value: Arc<atomic::AtomicU64>, + /// The pending fence values have to be ascending. + pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self) -> crate::FenceValue { + let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self) { + let latest = self.get_latest(); + self.pending_command_buffers + .retain(|&(value, _)| value > latest); + } +} + +struct IndexState { + buffer_ptr: BufferPtr, + offset: wgt::BufferAddress, + stride: wgt::BufferAddress, + raw_type: metal::MTLIndexType, +} + +#[derive(Default)] +struct Temp { + binding_sizes: Vec<u32>, +} + +struct CommandState { + blit: Option<metal::BlitCommandEncoder>, + render: Option<metal::RenderCommandEncoder>, + compute: Option<metal::ComputeCommandEncoder>, + raw_primitive_type: metal::MTLPrimitiveType, + index: Option<IndexState>, + raw_wg_size: metal::MTLSize, + stage_infos: MultiStageData<PipelineStageInfo>, + + /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. + /// + /// Specifically: + /// + /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// and `@binding` attributes) for `var<storage>` global variables in the + /// current module that contain runtime-sized arrays. + /// + /// - The values are the actual sizes of the buffers currently bound to + /// provide those globals' contents, which are needed to implement bounds + /// checks and the WGSL `arrayLength` function. + /// + /// For each stage `S` in `stage_infos`, we consult this to find the sizes + /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must + /// pass to the entry point. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + /// + /// [`ResourceBinding`]: naga::ResourceBinding + storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, + + work_group_memory_sizes: Vec<u32>, + push_constants: Vec<u32>, + + /// Timer query that should be executed when the next pass starts. + pending_timer_queries: Vec<(QuerySet, u32)>, +} + +pub struct CommandEncoder { + shared: Arc<AdapterShared>, + raw_queue: Arc<Mutex<metal::CommandQueue>>, + raw_cmd_buf: Option<metal::CommandBuffer>, + state: CommandState, + temp: Temp, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw_queue", &self.raw_queue) + .field("raw_cmd_buf", &self.raw_cmd_buf) + .finish() + } +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: metal::CommandBuffer, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct AccelerationStructure; diff --git a/third_party/rust/wgpu-hal/src/metal/surface.rs b/third_party/rust/wgpu-hal/src/metal/surface.rs new file mode 100644 index 0000000000..a97eff0aae --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/surface.rs @@ -0,0 +1,280 @@ +#![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type + +use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread}; + +use core_graphics_types::{ + base::CGFloat, + geometry::{CGRect, CGSize}, +}; +use objc::{ + class, + declare::ClassDecl, + msg_send, + rc::autoreleasepool, + runtime::{Class, Object, Sel, BOOL, NO, YES}, + sel, sel_impl, +}; +use parking_lot::{Mutex, RwLock}; + +#[cfg(target_os = "macos")] +#[cfg_attr(feature = "link", link(name = "QuartzCore", kind = "framework"))] +extern "C" { + #[allow(non_upper_case_globals)] + static kCAGravityTopLeft: *mut Object; +} + +extern "C" fn layer_should_inherit_contents_scale_from_window( + _: &Class, + _: Sel, + _layer: *mut Object, + _new_scale: CGFloat, + _from_window: *mut Object, +) -> BOOL { + YES +} + +static CAML_DELEGATE_REGISTER: Once = Once::new(); + +#[derive(Debug)] +pub struct HalManagedMetalLayerDelegate(&'static Class); + +impl HalManagedMetalLayerDelegate { + pub fn new() -> Self { + let class_name = format!("HalManagedMetalLayerDelegate@{:p}", &CAML_DELEGATE_REGISTER); + + CAML_DELEGATE_REGISTER.call_once(|| { + type Fun = extern "C" fn(&Class, Sel, *mut Object, CGFloat, *mut Object) -> BOOL; + let mut decl = ClassDecl::new(&class_name, class!(NSObject)).unwrap(); + #[allow(trivial_casts)] // false positive + unsafe { + decl.add_class_method( + sel!(layer:shouldInheritContentsScale:fromWindow:), + layer_should_inherit_contents_scale_from_window as Fun, + ); + } + decl.register(); + }); + Self(Class::get(&class_name).unwrap()) + } +} + +impl super::Surface { + fn new(view: Option<NonNull<Object>>, layer: metal::MetalLayer) -> Self { + Self { + view, + render_layer: Mutex::new(layer), + swapchain_format: RwLock::new(None), + extent: RwLock::new(wgt::Extent3d::default()), + main_thread_id: thread::current().id(), + present_with_transaction: false, + } + } + + pub unsafe fn dispose(self) { + if let Some(view) = self.view { + let () = msg_send![view.as_ptr(), release]; + } + } + + /// If not called on the main thread, this will panic. + #[allow(clippy::transmute_ptr_to_ref)] + pub unsafe fn from_view( + view: *mut c_void, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> Self { + let view = view as *mut Object; + let render_layer = { + let layer = unsafe { Self::get_metal_layer(view, delegate) }; + unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) } + } + .to_owned(); + let _: *mut c_void = msg_send![view, retain]; + Self::new(NonNull::new(view), render_layer) + } + + pub unsafe fn from_layer(layer: &metal::MetalLayerRef) -> Self { + let class = class!(CAMetalLayer); + let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; + assert_eq!(proper_kind, YES); + Self::new(None, layer.to_owned()) + } + + /// If not called on the main thread, this will panic. + pub(crate) unsafe fn get_metal_layer( + view: *mut Object, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> *mut Object { + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let is_main_thread: BOOL = msg_send![class!(NSThread), isMainThread]; + if is_main_thread == NO { + panic!("get_metal_layer cannot be called in non-ui thread."); + } + + let main_layer: *mut Object = msg_send![view, layer]; + let class = class!(CAMetalLayer); + let is_valid_layer: BOOL = msg_send![main_layer, isKindOfClass: class]; + + if is_valid_layer == YES { + main_layer + } else { + // If the main layer is not a CAMetalLayer, we create a CAMetalLayer and use it. + let new_layer: *mut Object = msg_send![class, new]; + let frame: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![new_layer, setFrame: frame]; + #[cfg(target_os = "ios")] + { + // Unlike NSView, UIView does not allow to replace main layer. + let () = msg_send![main_layer, addSublayer: new_layer]; + // On iOS, "from_view" may be called before the application initialization is complete, + // `msg_send![view, window]` and `msg_send![window, screen]` will get null. + let screen: *mut Object = msg_send![class!(UIScreen), mainScreen]; + let scale_factor: CGFloat = msg_send![screen, nativeScale]; + let () = msg_send![view, setContentScaleFactor: scale_factor]; + }; + #[cfg(target_os = "macos")] + { + let () = msg_send![view, setLayer: new_layer]; + let () = msg_send![view, setWantsLayer: YES]; + let () = msg_send![new_layer, setContentsGravity: unsafe { kCAGravityTopLeft }]; + let window: *mut Object = msg_send![view, window]; + if !window.is_null() { + let scale_factor: CGFloat = msg_send![window, backingScaleFactor]; + let () = msg_send![new_layer, setContentsScale: scale_factor]; + } + }; + if let Some(delegate) = delegate { + let () = msg_send![new_layer, setDelegate: delegate.0]; + } + new_layer + } + } + + pub(super) fn dimensions(&self) -> wgt::Extent3d { + let (size, scale): (CGSize, CGFloat) = unsafe { + let render_layer_borrow = self.render_layer.lock(); + let render_layer = render_layer_borrow.as_ref(); + let bounds: CGRect = msg_send![render_layer, bounds]; + let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + (bounds.size, contents_scale) + }; + + wgt::Extent3d { + width: (size.width * scale) as u32, + height: (size.height * scale) as u32, + depth_or_array_layers: 1, + } + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + log::debug!("build swapchain {:?}", config); + + let caps = &device.shared.private_caps; + *self.swapchain_format.write() = Some(config.format); + *self.extent.write() = config.extent; + + let render_layer = self.render_layer.lock(); + let framebuffer_only = config.usage == crate::TextureUses::COLOR_TARGET; + let display_sync = match config.present_mode { + wgt::PresentMode::Fifo => true, + wgt::PresentMode::Immediate => false, + m => unreachable!("Unsupported present mode: {m:?}"), + }; + let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); + + match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Opaque => render_layer.set_opaque(true), + wgt::CompositeAlphaMode::PostMultiplied => render_layer.set_opaque(false), + _ => (), + } + + let device_raw = device.shared.device.lock(); + // On iOS, unless the user supplies a view with a CAMetalLayer, we + // create one as a sublayer. However, when the view changes size, + // its sublayers are not automatically resized, and we must resize + // it here. The drawable size and the layer size don't correlate + #[cfg(target_os = "ios")] + { + if let Some(view) = self.view { + let main_layer: *mut Object = msg_send![view.as_ptr(), layer]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![*render_layer, setFrame: bounds]; + } + } + render_layer.set_device(&device_raw); + render_layer.set_pixel_format(caps.map_format(config.format)); + render_layer.set_framebuffer_only(framebuffer_only); + render_layer.set_presents_with_transaction(self.present_with_transaction); + // opt-in to Metal EDR + // EDR potentially more power used in display and more bandwidth, memory footprint. + let wants_edr = config.format == wgt::TextureFormat::Rgba16Float; + if wants_edr != render_layer.wants_extended_dynamic_range_content() { + render_layer.set_wants_extended_dynamic_range_content(wants_edr); + } + + // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); + render_layer.set_drawable_size(drawable_size); + if caps.can_set_next_drawable_timeout { + let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + } + if caps.can_set_display_sync { + let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + } + + Ok(()) + } + + unsafe fn unconfigure(&self, _device: &super::Device) { + *self.swapchain_format.write() = None; + } + + unsafe fn acquire_texture( + &self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let render_layer = self.render_layer.lock(); + let (drawable, texture) = match autoreleasepool(|| { + render_layer + .next_drawable() + .map(|drawable| (drawable.to_owned(), drawable.texture().to_owned())) + }) { + Some(pair) => pair, + None => return Ok(None), + }; + + let swapchain_format = self.swapchain_format.read().unwrap(); + let extent = self.extent.read(); + let suf_texture = super::SurfaceTexture { + texture: super::Texture { + raw: texture, + format: swapchain_format, + raw_type: metal::MTLTextureType::D2, + array_layers: 1, + mip_levels: 1, + copy_size: crate::CopyExtent { + width: extent.width, + height: extent.height, + depth: 1, + }, + }, + drawable, + present_with_transaction: self.present_with_transaction, + }; + + Ok(Some(crate::AcquiredSurfaceTexture { + texture: suf_texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/metal/time.rs b/third_party/rust/wgpu-hal/src/metal/time.rs new file mode 100644 index 0000000000..5c6bec10cd --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/time.rs @@ -0,0 +1,38 @@ +//! Handling of global timestamps. + +#[repr(C)] +#[derive(Debug)] +struct MachTimebaseInfo { + numerator: u32, + denominator: u32, +} +extern "C" { + fn mach_timebase_info(out: *mut MachTimebaseInfo) -> u32; + fn mach_absolute_time() -> u64; +} + +/// A timer which uses mach_absolute_time to get its time. This is what the metal callbacks use. +#[derive(Debug)] +pub struct PresentationTimer { + scale: MachTimebaseInfo, +} +impl PresentationTimer { + /// Generates a new timer. + pub fn new() -> Self { + // Default to 1 / 1 in case the call to timebase_info fails. + let mut scale = MachTimebaseInfo { + numerator: 1, + denominator: 1, + }; + unsafe { mach_timebase_info(&mut scale) }; + + Self { scale } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + let time = unsafe { mach_absolute_time() }; + + (time as u128 * self.scale.numerator as u128) / self.scale.denominator as u128 + } +} |