use crate::{ command, conversions as conv, internal::{Channel, FastStorageMap}, native as n, AsNative, Backend, OnlineRecording, QueueFamily, ResourceIndex, Shared, VisibilityShared, MAX_BOUND_DESCRIPTOR_SETS, MAX_COLOR_ATTACHMENTS, }; use arrayvec::ArrayVec; use auxil::{spirv_cross_specialize_ast, FastHashMap, ShaderStage}; use cocoa_foundation::foundation::{NSRange, NSUInteger}; use copyless::VecHelper; use foreign_types::{ForeignType, ForeignTypeRef}; use hal::{ adapter, buffer, device::{ AllocationError, BindError, CreationError as DeviceCreationError, DeviceLost, MapError, OomOrDeviceLost, OutOfMemory, ShaderError, }, format, image, memory, memory::Properties, pass, pool::CommandPoolCreateFlags, pso, pso::VertexInputRate, query, queue::{QueueFamilyId, QueueGroup, QueuePriority}, }; use metal::{ CaptureManager, MTLCPUCacheMode, MTLLanguageVersion, MTLPrimitiveTopologyClass, MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType, MTLVertexStepFunction, }; use objc::{ rc::autoreleasepool, runtime::{Object, BOOL, NO}, }; use parking_lot::Mutex; use spirv_cross::{msl, spirv, ErrorCode as SpirvErrorCode}; use std::{ borrow::Borrow, cmp, collections::hash_map::Entry, collections::BTreeMap, iter, mem, ops::Range, ptr, sync::{ atomic::{AtomicBool, Ordering}, Arc, }, thread, time, }; const PUSH_CONSTANTS_DESC_SET: u32 = !0; const PUSH_CONSTANTS_DESC_BINDING: u32 = 0; const STRIDE_GRANULARITY: pso::ElemStride = 4; //TODO: work around? const SHADER_STAGE_COUNT: usize = 3; /// Emit error during shader module creation. Used if we don't expect an error /// but might panic due to an exception in SPIRV-Cross. fn gen_unexpected_error(err: SpirvErrorCode) -> ShaderError { let msg = match err { SpirvErrorCode::CompilationError(msg) => msg, SpirvErrorCode::Unhandled => "Unexpected error".into(), }; ShaderError::CompilationFailed(msg) } #[derive(Clone, Debug)] enum FunctionError { InvalidEntryPoint, MissingRequiredSpecialization, BadSpecialization, } fn get_final_function( library: &metal::LibraryRef, entry: &str, specialization: &pso::Specialization, function_specialization: bool, ) -> Result { type MTLFunctionConstant = Object; let mut mtl_function = library.get_function(entry, None).map_err(|e| { error!("Function retrieval error {:?}", e); FunctionError::InvalidEntryPoint })?; if !function_specialization { if !specialization.data.is_empty() || !specialization.constants.is_empty() { error!("platform does not support specialization"); } return Ok(mtl_function); } let dictionary = mtl_function.function_constants_dictionary(); let count: NSUInteger = unsafe { msg_send![dictionary, count] }; if count == 0 { return Ok(mtl_function); } let all_values: *mut Object = unsafe { msg_send![dictionary, allValues] }; let constants = metal::FunctionConstantValues::new(); for i in 0..count { let object: *mut MTLFunctionConstant = unsafe { msg_send![all_values, objectAtIndex: i] }; let index: NSUInteger = unsafe { msg_send![object, index] }; let required: BOOL = unsafe { msg_send![object, required] }; match specialization .constants .iter() .find(|c| c.id as NSUInteger == index) { Some(c) => unsafe { let ptr = &specialization.data[c.range.start as usize] as *const u8 as *const _; let ty: metal::MTLDataType = msg_send![object, type]; constants.set_constant_value_at_index(c.id as NSUInteger, ty, ptr); }, None if required != NO => { //TODO: get name error!("Missing required specialization constant id {}", index); return Err(FunctionError::MissingRequiredSpecialization); } None => {} } } mtl_function = library.get_function(entry, Some(constants)).map_err(|e| { error!("Specialized function retrieval error {:?}", e); FunctionError::BadSpecialization })?; Ok(mtl_function) } impl VisibilityShared { fn are_available(&self, pool_base: query::Id, queries: &Range) -> bool { unsafe { let availability_ptr = ((self.buffer.contents() as *mut u8) .offset(self.availability_offset as isize) as *mut u32) .offset(pool_base as isize); queries .clone() .all(|id| *availability_ptr.offset(id as isize) != 0) } } } #[derive(Debug)] pub struct Device { pub(crate) shared: Arc, invalidation_queue: command::QueueInner, memory_types: Vec, features: hal::Features, pub online_recording: OnlineRecording, } unsafe impl Send for Device {} unsafe impl Sync for Device {} impl Drop for Device { fn drop(&mut self) { if cfg!(feature = "auto-capture") { info!("Metal capture stop"); let shared_capture_manager = CaptureManager::shared(); if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { default_capture_scope.end_scope(); } shared_capture_manager.stop_capture(); } } } bitflags! { /// Memory type bits. struct MemoryTypes: u32 { const PRIVATE = 1<<0; const SHARED = 1<<1; const MANAGED_UPLOAD = 1<<2; const MANAGED_DOWNLOAD = 1<<3; } } impl MemoryTypes { fn describe(index: usize) -> (MTLStorageMode, MTLCPUCacheMode) { match Self::from_bits(1 << index).unwrap() { Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache), Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache), Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined), Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache), _ => unreachable!(), } } } #[derive(Debug)] pub struct PhysicalDevice { pub(crate) shared: Arc, memory_types: Vec, } unsafe impl Send for PhysicalDevice {} unsafe impl Sync for PhysicalDevice {} impl PhysicalDevice { pub(crate) fn new(shared: Arc) -> Self { let memory_types = if shared.private_caps.os_is_mac { vec![ adapter::MemoryType { // PRIVATE properties: Properties::DEVICE_LOCAL, heap_index: 0, }, adapter::MemoryType { // SHARED properties: Properties::CPU_VISIBLE | Properties::COHERENT, heap_index: 1, }, adapter::MemoryType { // MANAGED_UPLOAD properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE, heap_index: 1, }, adapter::MemoryType { // MANAGED_DOWNLOAD properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED, heap_index: 1, }, ] } else { vec![ adapter::MemoryType { // PRIVATE properties: Properties::DEVICE_LOCAL, heap_index: 0, }, adapter::MemoryType { // SHARED properties: Properties::CPU_VISIBLE | Properties::COHERENT, heap_index: 1, }, ] }; PhysicalDevice { shared: shared.clone(), memory_types, } } /// Return true if the specified format-swizzle pair is supported natively. pub fn supports_swizzle(&self, format: format::Format, swizzle: format::Swizzle) -> bool { self.shared .private_caps .map_format_with_swizzle(format, swizzle) .is_some() } } impl adapter::PhysicalDevice for PhysicalDevice { unsafe fn open( &self, families: &[(&QueueFamily, &[QueuePriority])], requested_features: hal::Features, ) -> Result, DeviceCreationError> { use hal::queue::QueueFamily as _; // TODO: Query supported features by feature set rather than hard coding in the supported // features. https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf if !self.features().contains(requested_features) { warn!( "Features missing: {:?}", requested_features - self.features() ); return Err(DeviceCreationError::MissingFeature); } let device = self.shared.device.lock(); if cfg!(feature = "auto-capture") { info!("Metal capture start"); let shared_capture_manager = CaptureManager::shared(); let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&*device); shared_capture_manager.set_default_capture_scope(&default_capture_scope); shared_capture_manager.start_capture_with_scope(&default_capture_scope); default_capture_scope.begin_scope(); } assert_eq!(families.len(), 1); assert_eq!(families[0].1.len(), 1); let mut queue_group = QueueGroup::new(families[0].0.id()); for _ in 0..self.shared.private_caps.exposed_queues { queue_group.add_queue(command::CommandQueue::new(self.shared.clone())); } let device = Device { shared: self.shared.clone(), invalidation_queue: command::QueueInner::new(&*device, Some(1)), memory_types: self.memory_types.clone(), features: requested_features, online_recording: OnlineRecording::default(), }; Ok(adapter::Gpu { device, queue_groups: vec![queue_group], }) } fn format_properties(&self, format: Option) -> format::Properties { match format { Some(format) => self.shared.private_caps.map_format_properties(format), None => format::Properties { linear_tiling: format::ImageFeature::empty(), optimal_tiling: format::ImageFeature::empty(), buffer_features: format::BufferFeature::empty(), }, } } fn image_format_properties( &self, format: format::Format, dimensions: u8, tiling: image::Tiling, usage: image::Usage, view_caps: image::ViewCapabilities, ) -> Option { if let image::Tiling::Linear = tiling { let format_desc = format.surface_desc(); let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST; if dimensions != 2 || !view_caps.is_empty() || !host_usage.contains(usage) || format_desc.aspects != format::Aspects::COLOR || format_desc.is_compressed() { return None; } } if dimensions == 1 && usage .intersects(image::Usage::COLOR_ATTACHMENT | image::Usage::DEPTH_STENCIL_ATTACHMENT) { // MTLRenderPassDescriptor texture must not be MTLTextureType1D return None; } if dimensions == 3 && view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) { // Can't create 2D/2DArray views of 3D textures return None; } let max_dimension = if dimensions == 3 { self.shared.private_caps.max_texture_3d_size as _ } else { self.shared.private_caps.max_texture_size as _ }; let max_extent = image::Extent { width: max_dimension, height: if dimensions >= 2 { max_dimension } else { 1 }, depth: if dimensions >= 3 { max_dimension } else { 1 }, }; self.shared .private_caps .map_format(format) .map(|_| image::FormatProperties { max_extent, max_levels: if dimensions == 1 { 1 } else { 12 }, // 3D images enforce a single layer max_layers: if dimensions == 3 { 1 } else { self.shared.private_caps.max_texture_layers as _ }, sample_count_mask: self.shared.private_caps.sample_count_mask as _, //TODO: buffers and textures have separate limits // Max buffer size is determined by feature set // Max texture size does not appear to be documented publicly max_resource_size: self.shared.private_caps.max_buffer_size as _, }) } fn memory_properties(&self) -> adapter::MemoryProperties { adapter::MemoryProperties { memory_heaps: vec![ adapter::MemoryHeap { size: !0, //TODO: private memory limits flags: memory::HeapFlags::DEVICE_LOCAL, }, adapter::MemoryHeap { size: self.shared.private_caps.max_buffer_size, flags: memory::HeapFlags::empty(), }, ], memory_types: self.memory_types.to_vec(), } } fn features(&self) -> hal::Features { use hal::Features as F; F::empty() | F::FULL_DRAW_INDEX_U32 | if self.shared.private_caps.texture_cube_array { F::IMAGE_CUBE_ARRAY } else { F::empty() } | F::INDEPENDENT_BLENDING | if self.shared.private_caps.dual_source_blending { F::DUAL_SRC_BLENDING } else { F::empty() } | F::DRAW_INDIRECT_FIRST_INSTANCE | F::DEPTH_CLAMP //| F::DEPTH_BOUNDS | F::SAMPLER_ANISOTROPY | F::FORMAT_BC | F::PRECISE_OCCLUSION_QUERY | F::SHADER_STORAGE_BUFFER_ARRAY_DYNAMIC_INDEXING | F::VERTEX_STORES_AND_ATOMICS | F::FRAGMENT_STORES_AND_ATOMICS | F::INSTANCE_RATE | F::SEPARATE_STENCIL_REF_VALUES | if self.shared.private_caps.expose_line_mode { F::NON_FILL_POLYGON_MODE } else { F::empty() } | F::SHADER_CLIP_DISTANCE | if self.shared.private_caps.msl_version >= metal::MTLLanguageVersion::V2_0 { F::TEXTURE_DESCRIPTOR_ARRAY | F::SHADER_SAMPLED_IMAGE_ARRAY_DYNAMIC_INDEXING | F::SAMPLED_TEXTURE_DESCRIPTOR_INDEXING | F::STORAGE_TEXTURE_DESCRIPTOR_INDEXING } else { F::empty() } //| F::SAMPLER_MIRROR_CLAMP_EDGE | if self.shared.private_caps.sampler_clamp_to_border { F::SAMPLER_BORDER_COLOR } else { F::empty() } | if self.shared.private_caps.mutable_comparison_samplers { F::MUTABLE_COMPARISON_SAMPLER } else { F::empty() } | F::NDC_Y_UP } fn hints(&self) -> hal::Hints { if self.shared.private_caps.base_vertex_instance_drawing { hal::Hints::BASE_VERTEX_INSTANCE_DRAWING } else { hal::Hints::empty() } } fn limits(&self) -> hal::Limits { let pc = &self.shared.private_caps; let device = self.shared.device.lock(); hal::Limits { max_image_1d_size: pc.max_texture_size as _, max_image_2d_size: pc.max_texture_size as _, max_image_3d_size: pc.max_texture_3d_size as _, max_image_cube_size: pc.max_texture_size as _, max_image_array_layers: pc.max_texture_layers as _, max_texel_elements: (pc.max_texture_size * pc.max_texture_size) as usize, max_uniform_buffer_range: pc.max_buffer_size, max_storage_buffer_range: pc.max_buffer_size, // "Maximum length of an inlined constant data buffer, per graphics or compute function" max_push_constants_size: 0x1000, max_sampler_allocation_count: !0, max_bound_descriptor_sets: MAX_BOUND_DESCRIPTOR_SETS as _, max_descriptor_set_samplers: pc.max_samplers_per_stage as usize * SHADER_STAGE_COUNT, max_descriptor_set_uniform_buffers: pc.max_buffers_per_stage as usize * SHADER_STAGE_COUNT, max_descriptor_set_uniform_buffers_dynamic: 8 * SHADER_STAGE_COUNT, max_descriptor_set_storage_buffers: pc.max_buffers_per_stage as usize * SHADER_STAGE_COUNT, max_descriptor_set_storage_buffers_dynamic: 4 * SHADER_STAGE_COUNT, max_descriptor_set_sampled_images: pc .max_textures_per_stage .min(pc.max_samplers_per_stage) as usize * SHADER_STAGE_COUNT, max_descriptor_set_storage_images: pc.max_textures_per_stage as usize * SHADER_STAGE_COUNT, max_descriptor_set_input_attachments: pc.max_textures_per_stage as usize * SHADER_STAGE_COUNT, max_fragment_input_components: pc.max_fragment_input_components as usize, max_framebuffer_layers: 2048, // TODO: Determine is this is the correct value max_memory_allocation_count: 4096, // TODO: Determine is this is the correct value max_per_stage_descriptor_samplers: pc.max_samplers_per_stage as usize, max_per_stage_descriptor_uniform_buffers: pc.max_buffers_per_stage as usize, max_per_stage_descriptor_storage_buffers: pc.max_buffers_per_stage as usize, max_per_stage_descriptor_sampled_images: pc .max_textures_per_stage .min(pc.max_samplers_per_stage) as usize, max_per_stage_descriptor_storage_images: pc.max_textures_per_stage as usize, max_per_stage_descriptor_input_attachments: pc.max_textures_per_stage as usize, //TODO max_per_stage_resources: 0x100, //TODO max_patch_size: 0, // No tessellation // Note: The maximum number of supported viewports and scissor rectangles varies by device. // TODO: read from Metal Feature Sets. max_viewports: 1, max_viewport_dimensions: [pc.max_texture_size as _; 2], max_framebuffer_extent: hal::image::Extent { //TODO width: pc.max_texture_size as _, height: pc.max_texture_size as _, depth: pc.max_texture_layers as _, }, min_memory_map_alignment: 4, optimal_buffer_copy_offset_alignment: pc.buffer_alignment, optimal_buffer_copy_pitch_alignment: 4, min_texel_buffer_offset_alignment: pc.buffer_alignment, min_uniform_buffer_offset_alignment: pc.buffer_alignment, min_storage_buffer_offset_alignment: pc.buffer_alignment, max_compute_work_group_count: [!0; 3], // really undefined max_compute_work_group_size: { let size = device.max_threads_per_threadgroup(); [size.width as u32, size.height as u32, size.depth as u32] }, max_compute_shared_memory_size: pc.max_total_threadgroup_memory as usize, max_vertex_input_attributes: 31, max_vertex_input_bindings: 31, max_vertex_input_attribute_offset: 255, // TODO max_vertex_input_binding_stride: 256, // TODO max_vertex_output_components: pc.max_fragment_input_components as usize, framebuffer_color_sample_counts: 0b101, // TODO framebuffer_depth_sample_counts: 0b101, // TODO framebuffer_stencil_sample_counts: 0b101, // TODO max_color_attachments: pc.max_color_render_targets as usize, buffer_image_granularity: 1, // Note: we issue Metal buffer-to-buffer copies on memory flush/invalidate, // and those need to operate on sizes being multiples of 4. non_coherent_atom_size: 4, max_sampler_anisotropy: 16., min_vertex_input_binding_stride_alignment: STRIDE_GRANULARITY as u64, ..hal::Limits::default() // TODO! } } } pub struct LanguageVersion { pub major: u8, pub minor: u8, } impl LanguageVersion { pub fn new(major: u8, minor: u8) -> Self { LanguageVersion { major, minor } } } impl Device { fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool { match *heap { n::MemoryHeap::Private => false, n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0] .properties .contains(Properties::COHERENT), n::MemoryHeap::Native(ref heap) => heap.storage_mode() == MTLStorageMode::Shared, } } fn compile_shader_library_cross( device: &Mutex, raw_data: &[u32], compiler_options: &msl::CompilerOptions, msl_version: MTLLanguageVersion, specialization: &pso::Specialization, ) -> Result { let module = spirv::Module::from_words(raw_data); // now parse again using the new overrides let mut ast = spirv::Ast::::parse(&module).map_err(|err| { ShaderError::CompilationFailed(match err { SpirvErrorCode::CompilationError(msg) => msg, SpirvErrorCode::Unhandled => "Unexpected parse error".into(), }) })?; spirv_cross_specialize_ast(&mut ast, specialization)?; ast.set_compiler_options(compiler_options) .map_err(gen_unexpected_error)?; let entry_points = ast.get_entry_points().map_err(|err| { ShaderError::CompilationFailed(match err { SpirvErrorCode::CompilationError(msg) => msg, SpirvErrorCode::Unhandled => "Unexpected entry point error".into(), }) })?; let shader_code = ast.compile().map_err(|err| { ShaderError::CompilationFailed(match err { SpirvErrorCode::CompilationError(msg) => msg, SpirvErrorCode::Unhandled => "Unknown compile error".into(), }) })?; let mut entry_point_map = n::EntryPointMap::default(); for entry_point in entry_points { info!("Entry point {:?}", entry_point); let cleansed = ast .get_cleansed_entry_point_name(&entry_point.name, entry_point.execution_model) .map_err(|err| { ShaderError::CompilationFailed(match err { SpirvErrorCode::CompilationError(msg) => msg, SpirvErrorCode::Unhandled => "Unknown compile error".into(), }) })?; entry_point_map.insert( entry_point.name, spirv::EntryPoint { name: cleansed, ..entry_point }, ); } let rasterization_enabled = ast .is_rasterization_enabled() .map_err(|_| ShaderError::CompilationFailed("Unknown compile error".into()))?; // done debug!("SPIRV-Cross generated shader:\n{}", shader_code); let options = metal::CompileOptions::new(); options.set_language_version(msl_version); let library = device .lock() .new_library_with_source(shader_code.as_ref(), &options) .map_err(|err| ShaderError::CompilationFailed(err.into()))?; Ok(n::ModuleInfo { library, entry_point_map, rasterization_enabled, }) } #[cfg(feature = "naga")] fn compile_shader_library_naga( device: &Mutex, module: &naga::Module, naga_options: &naga::back::msl::Options, ) -> Result { let source = naga::back::msl::write_string(module, naga_options) .map_err(|e| ShaderError::CompilationFailed(format!("{:?}", e)))?; let mut entry_point_map = n::EntryPointMap::default(); for (&(stage, ref name), ep) in module.entry_points.iter() { entry_point_map.insert( name.clone(), spirv::EntryPoint { //TODO: fill that information by Naga name: format!("{}{:?}", name, stage), execution_model: match stage { naga::ShaderStage::Vertex => spirv::ExecutionModel::Vertex, naga::ShaderStage::Fragment => spirv::ExecutionModel::Fragment, naga::ShaderStage::Compute => spirv::ExecutionModel::GlCompute, }, work_group_size: spirv::WorkGroupSize { x: ep.workgroup_size[0], y: ep.workgroup_size[1], z: ep.workgroup_size[2], }, }, ); } debug!("Naga generated shader:\n{}", source); let options = metal::CompileOptions::new(); let msl_version = match naga_options.lang_version { (1, 0) => MTLLanguageVersion::V1_0, (1, 1) => MTLLanguageVersion::V1_1, (1, 2) => MTLLanguageVersion::V1_2, (2, 0) => MTLLanguageVersion::V2_0, (2, 1) => MTLLanguageVersion::V2_1, (2, 2) => MTLLanguageVersion::V2_2, other => panic!("Unexpected language version {:?}", other), }; options.set_language_version(msl_version); let library = device .lock() .new_library_with_source(source.as_ref(), &options) .map_err(|err| ShaderError::CompilationFailed(err.into()))?; Ok(n::ModuleInfo { library, entry_point_map, rasterization_enabled: true, //TODO }) } fn load_shader( &self, ep: &pso::EntryPoint, layout: &n::PipelineLayout, primitive_class: MTLPrimitiveTopologyClass, pipeline_cache: Option<&n::PipelineCache>, stage: ShaderStage, ) -> Result<(metal::Library, metal::Function, metal::MTLSize, bool), pso::CreationError> { let device = &self.shared.device; let msl_version = self.shared.private_caps.msl_version; let module_map; let (info_owned, info_guard); let compiler_options = &mut match primitive_class { MTLPrimitiveTopologyClass::Point => layout.shader_compiler_options_point.clone(), _ => layout.shader_compiler_options.clone(), }; compiler_options.entry_point = Some(( ep.entry.to_string(), match stage { ShaderStage::Vertex => spirv::ExecutionModel::Vertex, ShaderStage::Fragment => spirv::ExecutionModel::Fragment, ShaderStage::Compute => spirv::ExecutionModel::GlCompute, _ => return Err(pso::CreationError::UnsupportedPipeline), }, )); let data = &ep.module.spv; let info = match pipeline_cache { Some(cache) => { module_map = cache .modules .get_or_create_with(compiler_options, FastStorageMap::default); info_guard = module_map.get_or_create_with(data, || { Self::compile_shader_library_cross( device, data, compiler_options, msl_version, &ep.specialization, ) .unwrap() }); &*info_guard } None => { let mut result = Err(ShaderError::CompilationFailed(String::new())); #[cfg(feature = "naga")] if let Some(ref module) = ep.module.naga { result = Self::compile_shader_library_naga(device, module, &layout.naga_options); if let Err(ShaderError::CompilationFailed(ref msg)) = result { warn!("Naga: {:?}", msg); } } if result.is_err() { result = Self::compile_shader_library_cross( device, data, compiler_options, msl_version, &ep.specialization, ); } info_owned = result.map_err(|e| { error!("Error compiling the shader {:?}", e); pso::CreationError::Other })?; &info_owned } }; let lib = info.library.clone(); let (name, wg_size) = match info.entry_point_map.get(ep.entry) { Some(p) => ( p.name.as_str(), metal::MTLSize { width: p.work_group_size.x as _, height: p.work_group_size.y as _, depth: p.work_group_size.z as _, }, ), // this can only happen if the shader came directly from the user None => ( ep.entry, metal::MTLSize { width: 0, height: 0, depth: 0, }, ), }; let mtl_function = get_final_function( &lib, name, &ep.specialization, self.shared.private_caps.function_specialization, ) .map_err(|e| { error!("Invalid shader entry point '{}': {:?}", name, e); pso::CreationError::Other })?; Ok((lib, mtl_function, wg_size, info.rasterization_enabled)) } fn make_sampler_descriptor( &self, info: &image::SamplerDesc, ) -> Option { let caps = &self.shared.private_caps; let descriptor = metal::SamplerDescriptor::new(); descriptor.set_normalized_coordinates(info.normalized); descriptor.set_min_filter(conv::map_filter(info.min_filter)); descriptor.set_mag_filter(conv::map_filter(info.mag_filter)); descriptor.set_mip_filter(match info.mip_filter { // Note: this shouldn't be required, but Metal appears to be confused when mipmaps // are provided even with trivial LOD bias. image::Filter::Nearest if info.lod_range.end.0 < 0.5 => { MTLSamplerMipFilter::NotMipmapped } image::Filter::Nearest => MTLSamplerMipFilter::Nearest, image::Filter::Linear => MTLSamplerMipFilter::Linear, }); if let Some(aniso) = info.anisotropy_clamp { descriptor.set_max_anisotropy(aniso as _); } let (s, t, r) = info.wrap_mode; descriptor.set_address_mode_s(conv::map_wrap_mode(s)); descriptor.set_address_mode_t(conv::map_wrap_mode(t)); descriptor.set_address_mode_r(conv::map_wrap_mode(r)); let lod_bias = info.lod_bias.0; if lod_bias != 0.0 { if self.features.contains(hal::Features::SAMPLER_MIP_LOD_BIAS) { unsafe { descriptor.set_lod_bias(lod_bias); } } else { error!("Lod bias {:?} is not supported", info.lod_bias); } } descriptor.set_lod_min_clamp(info.lod_range.start.0); descriptor.set_lod_max_clamp(info.lod_range.end.0); // TODO: Clarify minimum macOS version with Apple (43707452) if (caps.os_is_mac && caps.has_version_at_least(10, 13)) || (!caps.os_is_mac && caps.has_version_at_least(9, 0)) { descriptor.set_lod_average(true); // optimization } if let Some(fun) = info.comparison { if !caps.mutable_comparison_samplers { return None; } descriptor.set_compare_function(conv::map_compare_function(fun)); } if [r, s, t].iter().any(|&am| am == image::WrapMode::Border) { descriptor.set_border_color(conv::map_border_color(info.border)); } if caps.argument_buffers { descriptor.set_support_argument_buffers(true); } Some(descriptor) } fn make_sampler_data(info: &image::SamplerDesc) -> msl::SamplerData { fn map_address(wrap: image::WrapMode) -> msl::SamplerAddress { match wrap { image::WrapMode::Tile => msl::SamplerAddress::Repeat, image::WrapMode::Mirror => msl::SamplerAddress::MirroredRepeat, image::WrapMode::Clamp => msl::SamplerAddress::ClampToEdge, image::WrapMode::Border => msl::SamplerAddress::ClampToBorder, image::WrapMode::MirrorClamp => { unimplemented!("https://github.com/grovesNL/spirv_cross/issues/138") } } } let lods = info.lod_range.start.0..info.lod_range.end.0; msl::SamplerData { coord: if info.normalized { msl::SamplerCoord::Normalized } else { msl::SamplerCoord::Pixel }, min_filter: match info.min_filter { image::Filter::Nearest => msl::SamplerFilter::Nearest, image::Filter::Linear => msl::SamplerFilter::Linear, }, mag_filter: match info.mag_filter { image::Filter::Nearest => msl::SamplerFilter::Nearest, image::Filter::Linear => msl::SamplerFilter::Linear, }, mip_filter: match info.min_filter { image::Filter::Nearest if info.lod_range.end.0 < 0.5 => msl::SamplerMipFilter::None, image::Filter::Nearest => msl::SamplerMipFilter::Nearest, image::Filter::Linear => msl::SamplerMipFilter::Linear, }, s_address: map_address(info.wrap_mode.0), t_address: map_address(info.wrap_mode.1), r_address: map_address(info.wrap_mode.2), compare_func: match info.comparison { Some(func) => unsafe { mem::transmute(conv::map_compare_function(func) as u32) }, None => msl::SamplerCompareFunc::Always, }, border_color: match info.border { image::BorderColor::TransparentBlack => msl::SamplerBorderColor::TransparentBlack, image::BorderColor::OpaqueBlack => msl::SamplerBorderColor::OpaqueBlack, image::BorderColor::OpaqueWhite => msl::SamplerBorderColor::OpaqueWhite, }, lod_clamp_min: lods.start.into(), lod_clamp_max: lods.end.into(), max_anisotropy: info.anisotropy_clamp.map_or(0, |aniso| aniso as i32), planes: 0, resolution: msl::FormatResolution::_444, chroma_filter: msl::SamplerFilter::Nearest, x_chroma_offset: msl::ChromaLocation::CositedEven, y_chroma_offset: msl::ChromaLocation::CositedEven, swizzle: [ msl::ComponentSwizzle::Identity, msl::ComponentSwizzle::Identity, msl::ComponentSwizzle::Identity, msl::ComponentSwizzle::Identity, ], ycbcr_conversion_enable: false, ycbcr_model: msl::SamplerYCbCrModelConversion::RgbIdentity, ycbcr_range: msl::SamplerYCbCrRange::ItuFull, bpc: 8, } } } impl hal::device::Device for Device { unsafe fn create_command_pool( &self, _family: QueueFamilyId, _flags: CommandPoolCreateFlags, ) -> Result { Ok(command::CommandPool::new( &self.shared, self.online_recording.clone(), )) } unsafe fn destroy_command_pool(&self, mut pool: command::CommandPool) { use hal::pool::CommandPool as _; pool.reset(false); } unsafe fn create_render_pass<'a, IA, IS, ID>( &self, attachments: IA, subpasses: IS, _dependencies: ID, ) -> Result where IA: IntoIterator, IA::Item: Borrow, IS: IntoIterator, IS::Item: Borrow>, ID: IntoIterator, ID::Item: Borrow, { let attachments: Vec = attachments .into_iter() .map(|at| at.borrow().clone()) .collect(); let mut subpasses: Vec = subpasses .into_iter() .map(|sp| { let sub = sp.borrow(); let mut colors: ArrayVec<[_; MAX_COLOR_ATTACHMENTS]> = sub .colors .iter() .map(|&(id, _)| { let hal_format = attachments[id].format.expect("No format!"); n::AttachmentInfo { id, resolve_id: None, ops: n::AttachmentOps::empty(), format: self .shared .private_caps .map_format(hal_format) .expect("Unable to map color format!"), channel: Channel::from(hal_format.base_format().1), } }) .collect(); for (color, &(resolve_id, _)) in colors.iter_mut().zip(sub.resolves.iter()) { if resolve_id != pass::ATTACHMENT_UNUSED { color.resolve_id = Some(resolve_id); } } let depth_stencil = sub.depth_stencil.map(|&(id, _)| { let hal_format = attachments[id].format.expect("No format!"); n::AttachmentInfo { id, resolve_id: None, ops: n::AttachmentOps::empty(), format: self .shared .private_caps .map_format(hal_format) .expect("Unable to map depth-stencil format!"), channel: Channel::Float, } }); n::Subpass { attachments: n::SubpassData { colors, depth_stencil, }, inputs: sub.inputs.iter().map(|&(id, _)| id).collect(), } }) .collect(); // sprinkle load operations // an attachment receives LOAD flag on a subpass if it's the first sub-pass that uses it let mut use_mask = 0u64; for sub in subpasses.iter_mut() { for at in sub.attachments.colors.iter_mut() { if use_mask & 1 << at.id == 0 { at.ops |= n::AttachmentOps::LOAD; use_mask ^= 1 << at.id; } } if let Some(ref mut at) = sub.attachments.depth_stencil { if use_mask & 1 << at.id == 0 { at.ops |= n::AttachmentOps::LOAD; use_mask ^= 1 << at.id; } } } // sprinkle store operations // an attachment receives STORE flag on a subpass if it's the last sub-pass that uses it for sub in subpasses.iter_mut().rev() { for at in sub.attachments.colors.iter_mut() { if use_mask & 1 << at.id != 0 { at.ops |= n::AttachmentOps::STORE; use_mask ^= 1 << at.id; } } if let Some(ref mut at) = sub.attachments.depth_stencil { if use_mask & 1 << at.id != 0 { at.ops |= n::AttachmentOps::STORE; use_mask ^= 1 << at.id; } } } Ok(n::RenderPass { attachments, subpasses, name: String::new(), }) } unsafe fn create_pipeline_layout( &self, set_layouts: IS, push_constant_ranges: IR, ) -> Result where IS: IntoIterator, IS::Item: Borrow, IR: IntoIterator, IR::Item: Borrow<(pso::ShaderStageFlags, Range)>, { let mut stage_infos = [ ( pso::ShaderStageFlags::VERTEX, spirv::ExecutionModel::Vertex, n::ResourceData::::new(), ), ( pso::ShaderStageFlags::FRAGMENT, spirv::ExecutionModel::Fragment, n::ResourceData::::new(), ), ( pso::ShaderStageFlags::COMPUTE, spirv::ExecutionModel::GlCompute, n::ResourceData::::new(), ), ]; let mut res_overrides = BTreeMap::new(); let mut const_samplers = BTreeMap::new(); let mut infos = Vec::new(); // First, place the push constants let mut pc_buffers = [None; 3]; let mut pc_limits = [0u32; 3]; for pcr in push_constant_ranges { let (flags, range) = pcr.borrow(); for (limit, &(stage_bit, _, _)) in pc_limits.iter_mut().zip(&stage_infos) { if flags.contains(stage_bit) { debug_assert_eq!(range.end % 4, 0); *limit = (range.end / 4).max(*limit); } } } const LIMIT_MASK: u32 = 3; // round up the limits alignment to 4, so that it matches MTL compiler logic //TODO: figure out what and how exactly does the alignment. Clearly, it's not // straightforward, given that value of 2 stays non-aligned. for limit in &mut pc_limits { if *limit > LIMIT_MASK { *limit = (*limit + LIMIT_MASK) & !LIMIT_MASK; } } for ((limit, ref mut buffer_index), &mut (_, stage, ref mut counters)) in pc_limits .iter() .zip(pc_buffers.iter_mut()) .zip(stage_infos.iter_mut()) { // handle the push constant buffer assignment and shader overrides if *limit != 0 { let index = counters.buffers; **buffer_index = Some(index); counters.buffers += 1; res_overrides.insert( msl::ResourceBindingLocation { stage, desc_set: PUSH_CONSTANTS_DESC_SET, binding: PUSH_CONSTANTS_DESC_BINDING, }, msl::ResourceBinding { buffer_id: index as _, texture_id: !0, sampler_id: !0, }, ); } } // Second, place the descripted resources for (set_index, set_layout) in set_layouts.into_iter().enumerate() { // remember where the resources for this set start at each shader stage let mut dynamic_buffers = Vec::new(); let offsets = n::MultiStageResourceCounters { vs: stage_infos[0].2.clone(), ps: stage_infos[1].2.clone(), cs: stage_infos[2].2.clone(), }; match *set_layout.borrow() { n::DescriptorSetLayout::Emulated { layouts: ref desc_layouts, ref immutable_samplers, total: _, } => { for (&binding, data) in immutable_samplers.iter() { //TODO: array support? const_samplers.insert( msl::SamplerLocation { desc_set: set_index as u32, binding, }, data.clone(), ); } for layout in desc_layouts.iter() { if layout .content .contains(n::DescriptorContent::DYNAMIC_BUFFER) { dynamic_buffers.alloc().init(n::MultiStageData { vs: if layout.stages.contains(pso::ShaderStageFlags::VERTEX) { stage_infos[0].2.buffers } else { !0 }, ps: if layout.stages.contains(pso::ShaderStageFlags::FRAGMENT) { stage_infos[1].2.buffers } else { !0 }, cs: if layout.stages.contains(pso::ShaderStageFlags::COMPUTE) { stage_infos[2].2.buffers } else { !0 }, }); } for &mut (stage_bit, stage, ref mut counters) in stage_infos.iter_mut() { if !layout.stages.contains(stage_bit) { continue; } let res = msl::ResourceBinding { buffer_id: if layout.content.contains(n::DescriptorContent::BUFFER) { counters.buffers as _ } else { !0 }, texture_id: if layout .content .contains(n::DescriptorContent::TEXTURE) { counters.textures as _ } else { !0 }, sampler_id: if layout .content .contains(n::DescriptorContent::SAMPLER) { counters.samplers as _ } else { !0 }, }; counters.add(layout.content); if layout.array_index == 0 { let location = msl::ResourceBindingLocation { stage, desc_set: set_index as _, binding: layout.binding, }; res_overrides.insert(location, res); } } } } n::DescriptorSetLayout::ArgumentBuffer { ref bindings, stage_flags, .. } => { for &mut (stage_bit, stage, ref mut counters) in stage_infos.iter_mut() { let has_stage = stage_flags.contains(stage_bit); res_overrides.insert( msl::ResourceBindingLocation { stage, desc_set: set_index as _, binding: msl::ARGUMENT_BUFFER_BINDING, }, msl::ResourceBinding { buffer_id: if has_stage { counters.buffers } else { !0 }, texture_id: !0, sampler_id: !0, }, ); if has_stage { res_overrides.extend(bindings.iter().map(|(&binding, arg)| { let key = msl::ResourceBindingLocation { stage, desc_set: set_index as _, binding, }; (key, arg.res.clone()) })); counters.buffers += 1; } } } } infos.alloc().init(n::DescriptorSetInfo { offsets, dynamic_buffers, }); } // Finally, make sure we fit the limits for &(_, _, ref counters) in stage_infos.iter() { assert!(counters.buffers <= self.shared.private_caps.max_buffers_per_stage); assert!(counters.textures <= self.shared.private_caps.max_textures_per_stage); assert!(counters.samplers <= self.shared.private_caps.max_samplers_per_stage); } #[cfg(feature = "naga")] let naga_options = { use naga::back::msl; fn res_index(id: u32) -> Option { if id == !0 { None } else { Some(id as _) } } msl::Options { lang_version: match self.shared.private_caps.msl_version { MTLLanguageVersion::V1_0 => (1, 0), MTLLanguageVersion::V1_1 => (1, 1), MTLLanguageVersion::V1_2 => (1, 2), MTLLanguageVersion::V2_0 => (2, 0), MTLLanguageVersion::V2_1 => (2, 1), MTLLanguageVersion::V2_2 => (2, 2), }, spirv_cross_compatibility: true, binding_map: res_overrides .iter() .map(|(loc, binding)| { let source = msl::BindSource { stage: match loc.stage { spirv::ExecutionModel::Vertex => naga::ShaderStage::Vertex, spirv::ExecutionModel::Fragment => naga::ShaderStage::Fragment, spirv::ExecutionModel::GlCompute => naga::ShaderStage::Compute, other => panic!("Unexpected stage: {:?}", other), }, group: loc.desc_set, binding: loc.binding, }; let target = msl::BindTarget { buffer: res_index(binding.buffer_id), texture: res_index(binding.texture_id), sampler: res_index(binding.sampler_id), mutable: false, //TODO }; (source, target) }) .collect(), } }; let mut shader_compiler_options = msl::CompilerOptions::default(); shader_compiler_options.version = match self.shared.private_caps.msl_version { MTLLanguageVersion::V1_0 => msl::Version::V1_0, MTLLanguageVersion::V1_1 => msl::Version::V1_1, MTLLanguageVersion::V1_2 => msl::Version::V1_2, MTLLanguageVersion::V2_0 => msl::Version::V2_0, MTLLanguageVersion::V2_1 => msl::Version::V2_1, MTLLanguageVersion::V2_2 => msl::Version::V2_2, }; shader_compiler_options.enable_point_size_builtin = false; shader_compiler_options.vertex.invert_y = !self.features.contains(hal::Features::NDC_Y_UP); shader_compiler_options.resource_binding_overrides = res_overrides; shader_compiler_options.const_samplers = const_samplers; shader_compiler_options.enable_argument_buffers = self.shared.private_caps.argument_buffers; shader_compiler_options.force_zero_initialized_variables = true; shader_compiler_options.force_native_arrays = true; let mut shader_compiler_options_point = shader_compiler_options.clone(); shader_compiler_options_point.enable_point_size_builtin = true; Ok(n::PipelineLayout { shader_compiler_options, shader_compiler_options_point, #[cfg(feature = "naga")] naga_options, infos, total: n::MultiStageResourceCounters { vs: stage_infos[0].2.clone(), ps: stage_infos[1].2.clone(), cs: stage_infos[2].2.clone(), }, push_constants: n::MultiStageData { vs: pc_buffers[0].map(|buffer_index| n::PushConstantInfo { count: pc_limits[0], buffer_index, }), ps: pc_buffers[1].map(|buffer_index| n::PushConstantInfo { count: pc_limits[1], buffer_index, }), cs: pc_buffers[2].map(|buffer_index| n::PushConstantInfo { count: pc_limits[2], buffer_index, }), }, total_push_constants: pc_limits[0].max(pc_limits[1]).max(pc_limits[2]), }) } unsafe fn create_pipeline_cache( &self, _data: Option<&[u8]>, ) -> Result { Ok(n::PipelineCache { modules: FastStorageMap::default(), }) } unsafe fn get_pipeline_cache_data( &self, _cache: &n::PipelineCache, ) -> Result, OutOfMemory> { //empty Ok(Vec::new()) } unsafe fn destroy_pipeline_cache(&self, _cache: n::PipelineCache) { //drop } unsafe fn merge_pipeline_caches( &self, target: &n::PipelineCache, sources: I, ) -> Result<(), OutOfMemory> where I: IntoIterator, I::Item: Borrow, { let mut dst = target.modules.whole_write(); for source in sources { let src = source.borrow().modules.whole_write(); for (key, value) in src.iter() { let storage = dst .entry(key.clone()) .or_insert_with(FastStorageMap::default); let mut dst_module = storage.whole_write(); let src_module = value.whole_write(); for (key_module, value_module) in src_module.iter() { match dst_module.entry(key_module.clone()) { Entry::Vacant(em) => { em.insert(value_module.clone()); } Entry::Occupied(em) => { if em.get().library.as_ptr() != value_module.library.as_ptr() || em.get().entry_point_map != value_module.entry_point_map { warn!( "Merged module don't match, target: {:?}, source: {:?}", em.get(), value_module ); } } } } } } Ok(()) } unsafe fn create_graphics_pipeline<'a>( &self, pipeline_desc: &pso::GraphicsPipelineDesc<'a, Backend>, cache: Option<&n::PipelineCache>, ) -> Result { debug!("create_graphics_pipeline {:#?}", pipeline_desc); let pipeline = metal::RenderPipelineDescriptor::new(); let pipeline_layout = &pipeline_desc.layout; let (rp_attachments, subpass) = { let pass::Subpass { main_pass, index } = pipeline_desc.subpass; (&main_pass.attachments, &main_pass.subpasses[index as usize]) }; let (desc_vertex_buffers, attributes, input_assembler, vs, gs, hs, ds) = match pipeline_desc.primitive_assembler { pso::PrimitiveAssemblerDesc::Vertex { buffers, attributes, ref input_assembler, ref vertex, ref tessellation, ref geometry, } => { let (hs, ds) = if let Some(ts) = tessellation { (Some(&ts.0), Some(&ts.1)) } else { (None, None) }; ( buffers, attributes, input_assembler, vertex, geometry, hs, ds, ) } pso::PrimitiveAssemblerDesc::Mesh { .. } => { return Err(pso::CreationError::UnsupportedPipeline) } }; let (primitive_class, primitive_type) = match input_assembler.primitive { pso::Primitive::PointList => { (MTLPrimitiveTopologyClass::Point, MTLPrimitiveType::Point) } pso::Primitive::LineList => (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::Line), pso::Primitive::LineStrip => { (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::LineStrip) } pso::Primitive::TriangleList => ( MTLPrimitiveTopologyClass::Triangle, MTLPrimitiveType::Triangle, ), pso::Primitive::TriangleStrip => ( MTLPrimitiveTopologyClass::Triangle, MTLPrimitiveType::TriangleStrip, ), pso::Primitive::PatchList(_) => ( MTLPrimitiveTopologyClass::Unspecified, MTLPrimitiveType::Point, ), }; if self.shared.private_caps.layered_rendering { pipeline.set_input_primitive_topology(primitive_class); } // Vertex shader let (vs_lib, vs_function, _, enable_rasterization) = self.load_shader( vs, pipeline_layout, primitive_class, cache, ShaderStage::Vertex, )?; pipeline.set_vertex_function(Some(&vs_function)); // Fragment shader let fs_function; let fs_lib = match pipeline_desc.fragment { Some(ref ep) => { let (lib, fun, _, _) = self.load_shader( ep, pipeline_layout, primitive_class, cache, ShaderStage::Fragment, )?; fs_function = fun; pipeline.set_fragment_function(Some(&fs_function)); Some(lib) } None => { // TODO: This is a workaround for what appears to be a Metal validation bug // A pixel format is required even though no attachments are provided if subpass.attachments.colors.is_empty() && subpass.attachments.depth_stencil.is_none() { pipeline.set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); } None } }; // Other shaders if hs.is_some() { return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( pso::ShaderStageFlags::HULL, ))); } if ds.is_some() { return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( pso::ShaderStageFlags::DOMAIN, ))); } if gs.is_some() { return Err(pso::CreationError::Shader(ShaderError::UnsupportedStage( pso::ShaderStageFlags::GEOMETRY, ))); } pipeline.set_rasterization_enabled(enable_rasterization); // Assign target formats let blend_targets = pipeline_desc .blender .targets .iter() .chain(iter::repeat(&pso::ColorBlendDesc::EMPTY)); for (i, (at, color_desc)) in subpass .attachments .colors .iter() .zip(blend_targets) .enumerate() { let desc = pipeline .color_attachments() .object_at(i as u64) .expect("too many color attachments"); desc.set_pixel_format(at.format); desc.set_write_mask(conv::map_write_mask(color_desc.mask)); if let Some(ref blend) = color_desc.blend { desc.set_blending_enabled(true); let (color_op, color_src, color_dst) = conv::map_blend_op(blend.color); let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_op(blend.alpha); desc.set_rgb_blend_operation(color_op); desc.set_source_rgb_blend_factor(color_src); desc.set_destination_rgb_blend_factor(color_dst); desc.set_alpha_blend_operation(alpha_op); desc.set_source_alpha_blend_factor(alpha_src); desc.set_destination_alpha_blend_factor(alpha_dst); } } if let Some(ref at) = subpass.attachments.depth_stencil { let orig_format = rp_attachments[at.id].format.unwrap(); if orig_format.is_depth() { pipeline.set_depth_attachment_pixel_format(at.format); } if orig_format.is_stencil() { pipeline.set_stencil_attachment_pixel_format(at.format); } } // Vertex buffers let vertex_descriptor = metal::VertexDescriptor::new(); let mut vertex_buffers: n::VertexBufferVec = Vec::new(); trace!("Vertex attribute remapping started"); for &pso::AttributeDesc { location, binding, element, } in attributes { let original = desc_vertex_buffers .iter() .find(|vb| vb.binding == binding) .expect("no associated vertex buffer found"); // handle wrapping offsets let elem_size = element.format.surface_desc().bits as pso::ElemOffset / 8; let (cut_offset, base_offset) = if original.stride == 0 || element.offset + elem_size <= original.stride { (element.offset, 0) } else { let remainder = element.offset % original.stride; if remainder + elem_size <= original.stride { (remainder, element.offset - remainder) } else { (0, element.offset) } }; let relative_index = vertex_buffers .iter() .position(|(ref vb, offset)| vb.binding == binding && base_offset == *offset) .unwrap_or_else(|| { vertex_buffers.alloc().init((original.clone(), base_offset)); vertex_buffers.len() - 1 }); let mtl_buffer_index = self.shared.private_caps.max_buffers_per_stage - 1 - (relative_index as ResourceIndex); if mtl_buffer_index < pipeline_layout.total.vs.buffers { error!("Attribute offset {} exceeds the stride {}, and there is no room for replacement.", element.offset, original.stride); return Err(pso::CreationError::Other); } trace!("\tAttribute[{}] is mapped to vertex buffer[{}] with binding {} and offsets {} + {}", location, binding, mtl_buffer_index, base_offset, cut_offset); // pass the refined data to Metal let mtl_attribute_desc = vertex_descriptor .attributes() .object_at(location as u64) .expect("too many vertex attributes"); let mtl_vertex_format = conv::map_vertex_format(element.format).expect("unsupported vertex format"); mtl_attribute_desc.set_format(mtl_vertex_format); mtl_attribute_desc.set_buffer_index(mtl_buffer_index as _); mtl_attribute_desc.set_offset(cut_offset as _); } for (i, (vb, _)) in vertex_buffers.iter().enumerate() { let mtl_buffer_desc = vertex_descriptor .layouts() .object_at(self.shared.private_caps.max_buffers_per_stage as u64 - 1 - i as u64) .expect("too many vertex descriptor layouts"); if vb.stride % STRIDE_GRANULARITY != 0 { error!( "Stride ({}) must be a multiple of {}", vb.stride, STRIDE_GRANULARITY ); return Err(pso::CreationError::Other); } if vb.stride != 0 { mtl_buffer_desc.set_stride(vb.stride as u64); match vb.rate { VertexInputRate::Vertex => { mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerVertex); } VertexInputRate::Instance(divisor) => { mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance); mtl_buffer_desc.set_step_rate(divisor as u64); } } } else { mtl_buffer_desc.set_stride(256); // big enough to fit all the elements mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance); mtl_buffer_desc.set_step_rate(!0); } } if !vertex_buffers.is_empty() { pipeline.set_vertex_descriptor(Some(&vertex_descriptor)); } if let pso::State::Static(w) = pipeline_desc.rasterizer.line_width { if w != 1.0 { warn!("Unsupported line width: {:?}", w); } } let rasterizer_state = Some(n::RasterizerState { front_winding: conv::map_winding(pipeline_desc.rasterizer.front_face), fill_mode: conv::map_polygon_mode(pipeline_desc.rasterizer.polygon_mode), cull_mode: match conv::map_cull_face(pipeline_desc.rasterizer.cull_face) { Some(mode) => mode, None => { //TODO - Metal validation fails with // RasterizationEnabled is false but the vertex shader's return type is not void error!("Culling both sides is not yet supported"); //pipeline.set_rasterization_enabled(false); metal::MTLCullMode::None } }, depth_clip: if self.shared.private_caps.depth_clip_mode { Some(if pipeline_desc.rasterizer.depth_clamping { metal::MTLDepthClipMode::Clamp } else { metal::MTLDepthClipMode::Clip }) } else { None }, }); let depth_bias = pipeline_desc .rasterizer .depth_bias .unwrap_or(pso::State::Static(pso::DepthBias::default())); // prepare the depth-stencil state now let device = self.shared.device.lock(); self.shared .service_pipes .depth_stencil_states .prepare(&pipeline_desc.depth_stencil, &*device); let samples = if let Some(multisampling) = &pipeline_desc.multisampling { pipeline.set_sample_count(multisampling.rasterization_samples as u64); pipeline.set_alpha_to_coverage_enabled(multisampling.alpha_coverage); pipeline.set_alpha_to_one_enabled(multisampling.alpha_to_one); // TODO: sample_mask // TODO: sample_shading multisampling.rasterization_samples } else { 1 }; device .new_render_pipeline_state(&pipeline) .map(|raw| n::GraphicsPipeline { vs_lib, fs_lib, raw, primitive_type, vs_pc_info: pipeline_desc.layout.push_constants.vs, ps_pc_info: pipeline_desc.layout.push_constants.ps, rasterizer_state, depth_bias, depth_stencil_desc: pipeline_desc.depth_stencil.clone(), baked_states: pipeline_desc.baked_states.clone(), vertex_buffers, attachment_formats: subpass.attachments.map(|at| (at.format, at.channel)), samples, }) .map_err(|err| { error!("PSO creation failed: {}", err); pso::CreationError::Other }) } unsafe fn create_compute_pipeline<'a>( &self, pipeline_desc: &pso::ComputePipelineDesc<'a, Backend>, cache: Option<&n::PipelineCache>, ) -> Result { debug!("create_compute_pipeline {:?}", pipeline_desc); let pipeline = metal::ComputePipelineDescriptor::new(); let (cs_lib, cs_function, work_group_size, _) = self.load_shader( &pipeline_desc.shader, &pipeline_desc.layout, MTLPrimitiveTopologyClass::Unspecified, cache, ShaderStage::Compute, )?; pipeline.set_compute_function(Some(&cs_function)); self.shared .device .lock() .new_compute_pipeline_state(&pipeline) .map(|raw| n::ComputePipeline { cs_lib, raw, work_group_size, pc_info: pipeline_desc.layout.push_constants.cs, }) .map_err(|err| { error!("PSO creation failed: {}", err); pso::CreationError::Other }) } unsafe fn create_framebuffer( &self, _render_pass: &n::RenderPass, attachments: I, extent: image::Extent, ) -> Result where I: IntoIterator, I::Item: Borrow, { Ok(n::Framebuffer { extent, attachments: attachments .into_iter() .map(|at| at.borrow().texture.clone()) .collect(), }) } unsafe fn create_shader_module( &self, raw_data: &[u32], ) -> Result { //TODO: we can probably at least parse here and save the `Ast` Ok(n::ShaderModule { spv: raw_data.to_vec(), #[cfg(feature = "naga")] naga: match naga::front::spv::Parser::new(raw_data.iter().cloned(), &Default::default()) .parse() { Ok(module) => match naga::proc::Validator::new().validate(&module) { Ok(()) => Some(module), Err(e) => { warn!("Naga validation failed: {:?}", e); None } }, Err(e) => { warn!("Naga parsing failed: {:?}", e); None } }, }) } unsafe fn create_sampler( &self, info: &image::SamplerDesc, ) -> Result { Ok(n::Sampler { raw: match self.make_sampler_descriptor(&info) { Some(ref descriptor) => Some(self.shared.device.lock().new_sampler(descriptor)), None => None, }, data: Self::make_sampler_data(&info), }) } unsafe fn destroy_sampler(&self, _sampler: n::Sampler) {} unsafe fn map_memory( &self, memory: &n::Memory, segment: memory::Segment, ) -> Result<*mut u8, MapError> { let range = memory.resolve(&segment); debug!("map_memory of size {} at {:?}", memory.size, range); let base_ptr = match memory.heap { n::MemoryHeap::Public(_, ref cpu_buffer) => cpu_buffer.contents() as *mut u8, n::MemoryHeap::Native(_) | n::MemoryHeap::Private => panic!("Unable to map memory!"), }; Ok(base_ptr.offset(range.start as _)) } unsafe fn unmap_memory(&self, memory: &n::Memory) { debug!("unmap_memory of size {}", memory.size); } unsafe fn flush_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), OutOfMemory> where I: IntoIterator, I::Item: Borrow<(&'a n::Memory, memory::Segment)>, { debug!("flush_mapped_memory_ranges"); for item in iter { let (memory, ref segment) = *item.borrow(); let range = memory.resolve(segment); debug!("\trange {:?}", range); match memory.heap { n::MemoryHeap::Native(_) => unimplemented!(), n::MemoryHeap::Public(mt, ref cpu_buffer) if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize => { cpu_buffer.did_modify_range(NSRange { location: range.start as _, length: (range.end - range.start) as _, }); } n::MemoryHeap::Public(..) => continue, n::MemoryHeap::Private => panic!("Can't map private memory!"), }; } Ok(()) } unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), OutOfMemory> where I: IntoIterator, I::Item: Borrow<(&'a n::Memory, memory::Segment)>, { let mut num_syncs = 0; debug!("invalidate_mapped_memory_ranges"); // temporary command buffer to copy the contents from // the given buffers into the allocated CPU-visible buffers // Note: using a separate internal queue in order to avoid a stall let cmd_buffer = self.invalidation_queue.spawn_temp(); autoreleasepool(|| { let encoder = cmd_buffer.new_blit_command_encoder(); for item in iter { let (memory, ref segment) = *item.borrow(); let range = memory.resolve(segment); debug!("\trange {:?}", range); match memory.heap { n::MemoryHeap::Native(_) => unimplemented!(), n::MemoryHeap::Public(mt, ref cpu_buffer) if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize => { num_syncs += 1; encoder.synchronize_resource(cpu_buffer); } n::MemoryHeap::Public(..) => continue, n::MemoryHeap::Private => panic!("Can't map private memory!"), }; } encoder.end_encoding(); }); if num_syncs != 0 { debug!("\twaiting..."); cmd_buffer.set_label("invalidate_mapped_memory_ranges"); cmd_buffer.commit(); cmd_buffer.wait_until_completed(); } Ok(()) } fn create_semaphore(&self) -> Result { Ok(n::Semaphore { // Semaphore synchronization between command buffers of the same queue // is useless, don't bother even creating one. system: if self.shared.private_caps.exposed_queues > 1 { Some(n::SystemSemaphore::new()) } else { None }, }) } unsafe fn create_descriptor_pool( &self, max_sets: usize, descriptor_ranges: I, _flags: pso::DescriptorPoolCreateFlags, ) -> Result where I: IntoIterator, I::Item: Borrow, { if self.shared.private_caps.argument_buffers { let mut arguments = n::ArgumentArray::default(); for desc_range in descriptor_ranges { let dr = desc_range.borrow(); let content = n::DescriptorContent::from(dr.ty); let usage = n::ArgumentArray::describe_usage(dr.ty); if content.contains(n::DescriptorContent::BUFFER) { arguments.push(metal::MTLDataType::Pointer, dr.count, usage); } if content.contains(n::DescriptorContent::TEXTURE) { arguments.push(metal::MTLDataType::Texture, dr.count, usage); } if content.contains(n::DescriptorContent::SAMPLER) { arguments.push(metal::MTLDataType::Sampler, dr.count, usage); } } let device = self.shared.device.lock(); let (array_ref, total_resources) = arguments.build(); let encoder = device.new_argument_encoder(array_ref); let alignment = self.shared.private_caps.buffer_alignment; let total_size = encoder.encoded_length() + (max_sets as u64) * alignment; let raw = device.new_buffer(total_size, MTLResourceOptions::empty()); Ok(n::DescriptorPool::new_argument( raw, total_size, alignment, total_resources, )) } else { let mut counters = n::ResourceData::::new(); for desc_range in descriptor_ranges { let dr = desc_range.borrow(); counters.add_many( n::DescriptorContent::from(dr.ty), dr.count as pso::DescriptorBinding, ); } Ok(n::DescriptorPool::new_emulated(counters)) } } unsafe fn create_descriptor_set_layout( &self, binding_iter: I, immutable_samplers: J, ) -> Result where I: IntoIterator, I::Item: Borrow, J: IntoIterator, J::Item: Borrow, { if self.shared.private_caps.argument_buffers { let mut stage_flags = pso::ShaderStageFlags::empty(); let mut arguments = n::ArgumentArray::default(); let mut bindings = FastHashMap::default(); for desc in binding_iter { let desc = desc.borrow(); //TODO: have the API providing the dimensions and MSAA flag // for textures in an argument buffer match desc.ty { pso::DescriptorType::Buffer { format: pso::BufferDescriptorFormat::Structured { dynamic_offset: true, }, .. } => { //TODO: apply the offsets somehow at the binding time error!("Dynamic offsets are not yet supported in argument buffers!"); } pso::DescriptorType::Image { ty: pso::ImageDescriptorType::Storage { .. }, } | pso::DescriptorType::Buffer { ty: pso::BufferDescriptorType::Storage { .. }, format: pso::BufferDescriptorFormat::Texel, } => { //TODO: bind storage buffers and images separately error!("Storage images are not yet supported in argument buffers!"); } _ => {} } stage_flags |= desc.stage_flags; let content = n::DescriptorContent::from(desc.ty); let usage = n::ArgumentArray::describe_usage(desc.ty); let res = msl::ResourceBinding { buffer_id: if content.contains(n::DescriptorContent::BUFFER) { arguments.push(metal::MTLDataType::Pointer, desc.count, usage) as u32 } else { !0 }, texture_id: if content.contains(n::DescriptorContent::TEXTURE) { arguments.push(metal::MTLDataType::Texture, desc.count, usage) as u32 } else { !0 }, sampler_id: if content.contains(n::DescriptorContent::SAMPLER) { arguments.push(metal::MTLDataType::Sampler, desc.count, usage) as u32 } else { !0 }, }; let res_offset = res.buffer_id.min(res.texture_id).min(res.sampler_id); bindings.insert( desc.binding, n::ArgumentLayout { res, res_offset, count: desc.count, usage, content, }, ); } let (array_ref, arg_total) = arguments.build(); let encoder = self.shared.device.lock().new_argument_encoder(array_ref); Ok(n::DescriptorSetLayout::ArgumentBuffer { encoder, stage_flags, bindings: Arc::new(bindings), total: arg_total as n::PoolResourceIndex, }) } else { struct TempSampler { data: msl::SamplerData, binding: pso::DescriptorBinding, array_index: pso::DescriptorArrayIndex, }; let mut immutable_sampler_iter = immutable_samplers.into_iter(); let mut tmp_samplers = Vec::new(); let mut desc_layouts = Vec::new(); let mut total = n::ResourceData::new(); for set_layout_binding in binding_iter { let slb = set_layout_binding.borrow(); let mut content = n::DescriptorContent::from(slb.ty); total.add(content); if slb.immutable_samplers { tmp_samplers.extend( immutable_sampler_iter .by_ref() .take(slb.count) .enumerate() .map(|(array_index, sm)| TempSampler { data: sm.borrow().data.clone(), binding: slb.binding, array_index, }), ); content |= n::DescriptorContent::IMMUTABLE_SAMPLER; } desc_layouts.extend((0..slb.count).map(|array_index| n::DescriptorLayout { content, stages: slb.stage_flags, binding: slb.binding, array_index, })); } desc_layouts.sort_by_key(|dl| (dl.binding, dl.array_index)); tmp_samplers.sort_by_key(|ts| (ts.binding, ts.array_index)); // From here on, we assume that `desc_layouts` has at most a single item for // a (binding, array_index) pair. To achieve that, we deduplicate the array now desc_layouts.dedup_by(|a, b| { if (a.binding, a.array_index) == (b.binding, b.array_index) { debug_assert!(!b.stages.intersects(a.stages)); debug_assert_eq!(a.content, b.content); //TODO: double check if this can be demanded b.stages |= a.stages; //`b` is here to stay true } else { false } }); Ok(n::DescriptorSetLayout::Emulated { layouts: Arc::new(desc_layouts), total, immutable_samplers: tmp_samplers .into_iter() .map(|ts| (ts.binding, ts.data)) .collect(), }) } } unsafe fn write_descriptor_sets<'a, I, J>(&self, write_iter: I) where I: IntoIterator>, J: IntoIterator, J::Item: Borrow>, { debug!("write_descriptor_sets"); for write in write_iter { match *write.set { n::DescriptorSet::Emulated { ref pool, ref layouts, ref resources, } => { let mut counters = resources.map(|r| r.start); let mut start = None; //TODO: can pre-compute this for (i, layout) in layouts.iter().enumerate() { if layout.binding == write.binding && layout.array_index == write.array_offset { start = Some(i); break; } counters.add(layout.content); } let mut data = pool.write(); for (layout, descriptor) in layouts[start.unwrap()..].iter().zip(write.descriptors) { trace!("\t{:?}", layout); match *descriptor.borrow() { pso::Descriptor::Sampler(sam) => { debug_assert!(!layout .content .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)); data.samplers[counters.samplers as usize] = ( layout.stages, Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())), ); } pso::Descriptor::Image(view, il) => { data.textures[counters.textures as usize] = ( layout.stages, Some(AsNative::from(view.texture.as_ref())), il, ); } pso::Descriptor::CombinedImageSampler(view, il, sam) => { if !layout .content .contains(n::DescriptorContent::IMMUTABLE_SAMPLER) { data.samplers[counters.samplers as usize] = ( layout.stages, Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())), ); } data.textures[counters.textures as usize] = ( layout.stages, Some(AsNative::from(view.texture.as_ref())), il, ); } pso::Descriptor::TexelBuffer(view) => { data.textures[counters.textures as usize] = ( layout.stages, Some(AsNative::from(view.raw.as_ref())), image::Layout::General, ); } pso::Descriptor::Buffer(buf, ref sub) => { let (raw, range) = buf.as_bound(); debug_assert!( range.start + sub.offset + sub.size.unwrap_or(0) <= range.end ); data.buffers[counters.buffers as usize] = ( layout.stages, Some(AsNative::from(raw)), range.start + sub.offset, ); } } counters.add(layout.content); } } n::DescriptorSet::ArgumentBuffer { ref raw, raw_offset, ref pool, ref range, ref encoder, ref bindings, .. } => { debug_assert!(self.shared.private_caps.argument_buffers); encoder.set_argument_buffer(raw, raw_offset); let mut arg_index = { let binding = &bindings[&write.binding]; debug_assert!((write.array_offset as usize) < binding.count); (binding.res_offset as NSUInteger) + (write.array_offset as NSUInteger) }; for (data, descriptor) in pool.write().resources [range.start as usize + arg_index as usize..range.end as usize] .iter_mut() .zip(write.descriptors) { match *descriptor.borrow() { pso::Descriptor::Sampler(sampler) => { debug_assert!(!bindings[&write.binding] .content .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)); encoder.set_sampler_state(arg_index, sampler.raw.as_ref().unwrap()); arg_index += 1; } pso::Descriptor::Image(image, _layout) => { let tex_ref = image.texture.as_ref(); encoder.set_texture(arg_index, tex_ref); data.ptr = (&**tex_ref).as_ptr(); arg_index += 1; } pso::Descriptor::CombinedImageSampler(image, _il, sampler) => { let binding = &bindings[&write.binding]; if !binding .content .contains(n::DescriptorContent::IMMUTABLE_SAMPLER) { //TODO: supporting arrays of combined image-samplers can be tricky. // We need to scan both sampler and image sections of the encoder // at the same time. assert!( arg_index < (binding.res_offset as NSUInteger) + (binding.count as NSUInteger) ); encoder.set_sampler_state( arg_index + binding.count as NSUInteger, sampler.raw.as_ref().unwrap(), ); } let tex_ref = image.texture.as_ref(); encoder.set_texture(arg_index, tex_ref); data.ptr = (&**tex_ref).as_ptr(); } pso::Descriptor::TexelBuffer(view) => { encoder.set_texture(arg_index, &view.raw); data.ptr = (&**view.raw).as_ptr(); arg_index += 1; } pso::Descriptor::Buffer(buffer, ref sub) => { let (buf_raw, buf_range) = buffer.as_bound(); encoder.set_buffer( arg_index, buf_raw, buf_range.start + sub.offset, ); data.ptr = (&**buf_raw).as_ptr(); arg_index += 1; } } } } } } } unsafe fn copy_descriptor_sets<'a, I>(&self, copies: I) where I: IntoIterator, I::Item: Borrow>, { for _copy in copies { unimplemented!() } } unsafe fn destroy_descriptor_pool(&self, _pool: n::DescriptorPool) {} unsafe fn destroy_descriptor_set_layout(&self, _layout: n::DescriptorSetLayout) {} unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: n::PipelineLayout) {} unsafe fn destroy_shader_module(&self, _module: n::ShaderModule) {} unsafe fn destroy_render_pass(&self, _pass: n::RenderPass) {} unsafe fn destroy_graphics_pipeline(&self, _pipeline: n::GraphicsPipeline) {} unsafe fn destroy_compute_pipeline(&self, _pipeline: n::ComputePipeline) {} unsafe fn destroy_framebuffer(&self, _buffer: n::Framebuffer) {} unsafe fn destroy_semaphore(&self, _semaphore: n::Semaphore) {} unsafe fn allocate_memory( &self, memory_type: hal::MemoryTypeId, size: u64, ) -> Result { let (storage, cache) = MemoryTypes::describe(memory_type.0); let device = self.shared.device.lock(); debug!("allocate_memory type {:?} of size {}", memory_type, size); // Heaps cannot be used for CPU coherent resources //TEMP: MacOS supports Private only, iOS and tvOS can do private/shared let heap = if self.shared.private_caps.resource_heaps && storage != MTLStorageMode::Shared && false { let descriptor = metal::HeapDescriptor::new(); descriptor.set_storage_mode(storage); descriptor.set_cpu_cache_mode(cache); descriptor.set_size(size); let heap_raw = device.new_heap(&descriptor); n::MemoryHeap::Native(heap_raw) } else if storage == MTLStorageMode::Private { n::MemoryHeap::Private } else { let options = conv::resource_options_from_storage_and_cache(storage, cache); let cpu_buffer = device.new_buffer(size, options); debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr()); n::MemoryHeap::Public(memory_type, cpu_buffer) }; Ok(n::Memory::new(heap, size)) } unsafe fn free_memory(&self, memory: n::Memory) { debug!("free_memory of size {}", memory.size); if let n::MemoryHeap::Public(_, ref cpu_buffer) = memory.heap { debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr()); } } unsafe fn create_buffer( &self, size: u64, usage: buffer::Usage, ) -> Result { debug!("create_buffer of size {} and usage {:?}", size, usage); Ok(n::Buffer::Unbound { usage, size, name: String::new(), }) } unsafe fn get_buffer_requirements(&self, buffer: &n::Buffer) -> memory::Requirements { let (size, usage) = match *buffer { n::Buffer::Unbound { size, usage, .. } => (size, usage), n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"), }; let mut max_size = size; let mut max_alignment = self.shared.private_caps.buffer_alignment; if self.shared.private_caps.resource_heaps { // We don't know what memory type the user will try to allocate the buffer with, so we test them // all get the most stringent ones. for (i, _mt) in self.memory_types.iter().enumerate() { let (storage, cache) = MemoryTypes::describe(i); let options = conv::resource_options_from_storage_and_cache(storage, cache); let requirements = self .shared .device .lock() .heap_buffer_size_and_align(size, options); max_size = cmp::max(max_size, requirements.size); max_alignment = cmp::max(max_alignment, requirements.align); } } // based on Metal validation error for view creation: // failed assertion `BytesPerRow of a buffer-backed texture with pixelFormat(XXX) must be aligned to 256 bytes const SIZE_MASK: u64 = 0xFF; let supports_texel_view = usage.intersects(buffer::Usage::UNIFORM_TEXEL | buffer::Usage::STORAGE_TEXEL); memory::Requirements { size: (max_size + SIZE_MASK) & !SIZE_MASK, alignment: max_alignment, type_mask: if !supports_texel_view || self.shared.private_caps.shared_textures { MemoryTypes::all().bits() } else { (MemoryTypes::all() ^ MemoryTypes::SHARED).bits() }, } } unsafe fn bind_buffer_memory( &self, memory: &n::Memory, offset: u64, buffer: &mut n::Buffer, ) -> Result<(), BindError> { let (size, name) = match buffer { n::Buffer::Unbound { size, name, .. } => (*size, name), n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"), }; debug!("bind_buffer_memory of size {} at offset {}", size, offset); *buffer = match memory.heap { n::MemoryHeap::Native(ref heap) => { let options = conv::resource_options_from_storage_and_cache( heap.storage_mode(), heap.cpu_cache_mode(), ); let raw = heap.new_buffer(size, options).unwrap_or_else(|| { // TODO: disable hazard tracking? self.shared.device.lock().new_buffer(size, options) }); raw.set_label(name); n::Buffer::Bound { raw, options, range: 0..size, //TODO? } } n::MemoryHeap::Public(mt, ref cpu_buffer) => { debug!( "\tmapped to public heap with address {:?}", cpu_buffer.as_ptr() ); let (storage, cache) = MemoryTypes::describe(mt.0); let options = conv::resource_options_from_storage_and_cache(storage, cache); if offset == 0x0 && size == cpu_buffer.length() { cpu_buffer.set_label(name); } else if self.shared.private_caps.supports_debug_markers { cpu_buffer.add_debug_marker( name, NSRange { location: offset, length: size, }, ); } n::Buffer::Bound { raw: cpu_buffer.clone(), options, range: offset..offset + size, } } n::MemoryHeap::Private => { //TODO: check for aliasing let options = MTLResourceOptions::StorageModePrivate | MTLResourceOptions::CPUCacheModeDefaultCache; let raw = self.shared.device.lock().new_buffer(size, options); raw.set_label(name); n::Buffer::Bound { raw, options, range: 0..size, } } }; Ok(()) } unsafe fn destroy_buffer(&self, buffer: n::Buffer) { if let n::Buffer::Bound { raw, range, .. } = buffer { debug!( "destroy_buffer {:?} occupying memory {:?}", raw.as_ptr(), range ); } } unsafe fn create_buffer_view( &self, buffer: &n::Buffer, format_maybe: Option, sub: buffer::SubRange, ) -> Result { let (raw, base_range, options) = match *buffer { n::Buffer::Bound { ref raw, ref range, options, } => (raw, range, options), n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"), }; let start = base_range.start + sub.offset; let size_rough = sub.size.unwrap_or(base_range.end - start); let format = match format_maybe { Some(fmt) => fmt, None => { return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe)); } }; let format_desc = format.surface_desc(); if format_desc.aspects != format::Aspects::COLOR || format_desc.is_compressed() { // Vadlidator says "Linear texture: cannot create compressed, depth, or stencil textures" return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe)); } //Note: we rely on SPIRV-Cross to use the proper 2D texel indexing here let texel_count = size_rough * 8 / format_desc.bits as u64; let col_count = cmp::min(texel_count, self.shared.private_caps.max_texture_size); let row_count = (texel_count + self.shared.private_caps.max_texture_size - 1) / self.shared.private_caps.max_texture_size; let mtl_format = self .shared .private_caps .map_format(format) .ok_or(buffer::ViewCreationError::UnsupportedFormat(format_maybe))?; let descriptor = metal::TextureDescriptor::new(); descriptor.set_texture_type(MTLTextureType::D2); descriptor.set_width(col_count); descriptor.set_height(row_count); descriptor.set_mipmap_level_count(1); descriptor.set_pixel_format(mtl_format); descriptor.set_resource_options(options); descriptor.set_storage_mode(raw.storage_mode()); descriptor.set_usage(metal::MTLTextureUsage::ShaderRead); let align_mask = self.shared.private_caps.buffer_alignment - 1; let stride = (col_count * (format_desc.bits as u64 / 8) + align_mask) & !align_mask; Ok(n::BufferView { raw: raw.new_texture_from_contents(&descriptor, start, stride), }) } unsafe fn destroy_buffer_view(&self, _view: n::BufferView) { //nothing to do } unsafe fn create_image( &self, kind: image::Kind, mip_levels: image::Level, format: format::Format, tiling: image::Tiling, usage: image::Usage, view_caps: image::ViewCapabilities, ) -> Result { debug!( "create_image {:?} with {} mips of {:?} {:?} and usage {:?} with {:?}", kind, mip_levels, format, tiling, usage, view_caps ); let is_cube = view_caps.contains(image::ViewCapabilities::KIND_CUBE); let mtl_format = self .shared .private_caps .map_format(format) .ok_or_else(|| image::CreationError::Format(format))?; let descriptor = metal::TextureDescriptor::new(); let (mtl_type, num_layers) = match kind { image::Kind::D1(_, 1) => { assert!(!is_cube); (MTLTextureType::D1, None) } image::Kind::D1(_, layers) => { assert!(!is_cube); (MTLTextureType::D1Array, Some(layers)) } image::Kind::D2(_, _, layers, 1) => { if is_cube && layers > 6 { assert_eq!(layers % 6, 0); (MTLTextureType::CubeArray, Some(layers / 6)) } else if is_cube { assert_eq!(layers, 6); (MTLTextureType::Cube, None) } else if layers > 1 { (MTLTextureType::D2Array, Some(layers)) } else { (MTLTextureType::D2, None) } } image::Kind::D2(_, _, 1, samples) if !is_cube => { descriptor.set_sample_count(samples as u64); (MTLTextureType::D2Multisample, None) } image::Kind::D2(..) => { error!( "Multi-sampled array textures or cubes are not supported: {:?}", kind ); return Err(image::CreationError::Kind); } image::Kind::D3(..) => { assert!(!is_cube); if view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) { warn!("Unable to support 2D array views of 3D textures"); } (MTLTextureType::D3, None) } }; descriptor.set_texture_type(mtl_type); if let Some(count) = num_layers { descriptor.set_array_length(count as u64); } let extent = kind.extent(); descriptor.set_width(extent.width as u64); descriptor.set_height(extent.height as u64); descriptor.set_depth(extent.depth as u64); descriptor.set_mipmap_level_count(mip_levels as u64); descriptor.set_pixel_format(mtl_format); descriptor.set_usage(conv::map_texture_usage(usage, tiling, view_caps)); let base = format.base_format(); let format_desc = base.0.desc(); let mip_sizes = (0..mip_levels) .map(|level| { let pitches = n::Image::pitches_impl(extent.at_level(level), format_desc); num_layers.unwrap_or(1) as buffer::Offset * pitches[3] }) .collect(); let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST; let host_visible = mtl_type == MTLTextureType::D2 && mip_levels == 1 && num_layers.is_none() && format_desc.aspects.contains(format::Aspects::COLOR) && tiling == image::Tiling::Linear && host_usage.contains(usage); Ok(n::Image { like: n::ImageLike::Unbound { descriptor, mip_sizes, host_visible, name: String::new(), }, kind, mip_levels, format_desc, shader_channel: base.1.into(), mtl_format, mtl_type, }) } unsafe fn get_image_requirements(&self, image: &n::Image) -> memory::Requirements { let (descriptor, mip_sizes, host_visible) = match image.like { n::ImageLike::Unbound { ref descriptor, ref mip_sizes, host_visible, .. } => (descriptor, mip_sizes, host_visible), n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => { panic!("Expected Image::Unbound") } }; if self.shared.private_caps.resource_heaps { // We don't know what memory type the user will try to allocate the image with, so we test them // all get the most stringent ones. Note we don't check Shared because heaps can't use it let mut max_size = 0; let mut max_alignment = 0; let types = if host_visible { MemoryTypes::all() } else { MemoryTypes::PRIVATE }; for (i, _) in self.memory_types.iter().enumerate() { if !types.contains(MemoryTypes::from_bits(1 << i).unwrap()) { continue; } let (storage, cache_mode) = MemoryTypes::describe(i); descriptor.set_storage_mode(storage); descriptor.set_cpu_cache_mode(cache_mode); let requirements = self .shared .device .lock() .heap_texture_size_and_align(descriptor); max_size = cmp::max(max_size, requirements.size); max_alignment = cmp::max(max_alignment, requirements.align); } memory::Requirements { size: max_size, alignment: max_alignment, type_mask: types.bits(), } } else if host_visible { assert_eq!(mip_sizes.len(), 1); let mask = self.shared.private_caps.buffer_alignment - 1; memory::Requirements { size: (mip_sizes[0] + mask) & !mask, alignment: self.shared.private_caps.buffer_alignment, type_mask: MemoryTypes::all().bits(), } } else { memory::Requirements { size: mip_sizes.iter().sum(), alignment: 4, type_mask: MemoryTypes::PRIVATE.bits(), } } } unsafe fn get_image_subresource_footprint( &self, image: &n::Image, sub: image::Subresource, ) -> image::SubresourceFootprint { let num_layers = image.kind.num_layers() as buffer::Offset; let level_offset = (0..sub.level).fold(0, |offset, level| { let pitches = image.pitches(level); offset + num_layers * pitches[3] }); let pitches = image.pitches(sub.level); let layer_offset = level_offset + sub.layer as buffer::Offset * pitches[3]; image::SubresourceFootprint { slice: layer_offset..layer_offset + pitches[3], row_pitch: pitches[1] as _, depth_pitch: pitches[2] as _, array_pitch: pitches[3] as _, } } unsafe fn bind_image_memory( &self, memory: &n::Memory, offset: u64, image: &mut n::Image, ) -> Result<(), BindError> { let like = { let (descriptor, mip_sizes, name) = match image.like { n::ImageLike::Unbound { ref descriptor, ref mip_sizes, ref name, .. } => (descriptor, mip_sizes, name), n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => { panic!("Expected Image::Unbound") } }; match memory.heap { n::MemoryHeap::Native(ref heap) => { let resource_options = conv::resource_options_from_storage_and_cache( heap.storage_mode(), heap.cpu_cache_mode(), ); descriptor.set_resource_options(resource_options); n::ImageLike::Texture(heap.new_texture(descriptor).unwrap_or_else(|| { // TODO: disable hazard tracking? let texture = self.shared.device.lock().new_texture(&descriptor); texture.set_label(name); texture })) } n::MemoryHeap::Public(_memory_type, ref cpu_buffer) => { assert_eq!(mip_sizes.len(), 1); if offset == 0x0 && cpu_buffer.length() == mip_sizes[0] { cpu_buffer.set_label(name); } else if self.shared.private_caps.supports_debug_markers { cpu_buffer.add_debug_marker( name, NSRange { location: offset, length: mip_sizes[0], }, ); } n::ImageLike::Buffer(n::Buffer::Bound { raw: cpu_buffer.clone(), range: offset..offset + mip_sizes[0] as u64, options: MTLResourceOptions::StorageModeShared, }) } n::MemoryHeap::Private => { descriptor.set_storage_mode(MTLStorageMode::Private); let texture = self.shared.device.lock().new_texture(descriptor); texture.set_label(name); n::ImageLike::Texture(texture) } } }; Ok(image.like = like) } unsafe fn destroy_image(&self, _image: n::Image) { //nothing to do } unsafe fn create_image_view( &self, image: &n::Image, kind: image::ViewKind, format: format::Format, swizzle: format::Swizzle, range: image::SubresourceRange, ) -> Result { let mtl_format = match self .shared .private_caps .map_format_with_swizzle(format, swizzle) { Some(f) => f, None => { error!("failed to swizzle format {:?} with {:?}", format, swizzle); return Err(image::ViewCreationError::BadFormat(format)); } }; let raw = image.like.as_texture(); let full_range = image::SubresourceRange { aspects: image.format_desc.aspects, ..Default::default() }; let mtl_type = if image.mtl_type == MTLTextureType::D2Multisample { if kind != image::ViewKind::D2 { error!("Requested {:?} for MSAA texture", kind); } image.mtl_type } else { conv::map_texture_type(kind) }; let texture = if mtl_format == image.mtl_format && mtl_type == image.mtl_type && swizzle == format::Swizzle::NO && range == full_range { // Some images are marked as framebuffer-only, and we can't create aliases of them. // Also helps working around Metal bugs with aliased array textures. raw.to_owned() } else { raw.new_texture_view_from_slice( mtl_format, mtl_type, NSRange { location: range.level_start as _, length: range.resolve_level_count(image.mip_levels) as _, }, NSRange { location: range.layer_start as _, length: range.resolve_layer_count(image.kind.num_layers()) as _, }, ) }; Ok(n::ImageView { texture, mtl_format, }) } unsafe fn destroy_image_view(&self, _view: n::ImageView) {} fn create_fence(&self, signaled: bool) -> Result { let mutex = Mutex::new(n::FenceInner::Idle { signaled }); debug!( "Creating fence ptr {:?} with signal={}", unsafe { mutex.raw() } as *const _, signaled ); Ok(n::Fence(mutex)) } unsafe fn reset_fence(&self, fence: &n::Fence) -> Result<(), OutOfMemory> { debug!("Resetting fence ptr {:?}", fence.0.raw() as *const _); *fence.0.lock() = n::FenceInner::Idle { signaled: false }; Ok(()) } unsafe fn wait_for_fence( &self, fence: &n::Fence, timeout_ns: u64, ) -> Result { unsafe fn to_ns(duration: time::Duration) -> u64 { duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64 } debug!("wait_for_fence {:?} for {} ms", fence, timeout_ns); match *fence.0.lock() { n::FenceInner::Idle { signaled } => { if !signaled { warn!( "Fence ptr {:?} is not pending, waiting not possible", fence.0.raw() as *const _ ); } Ok(signaled) } n::FenceInner::PendingSubmission(ref cmd_buf) => { if timeout_ns == !0 { cmd_buf.wait_until_completed(); return Ok(true); } let start = time::Instant::now(); loop { if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { return Ok(true); } if to_ns(start.elapsed()) >= timeout_ns { return Ok(false); } thread::sleep(time::Duration::from_millis(1)); self.shared.queue_blocker.lock().triage(); } } } } unsafe fn get_fence_status(&self, fence: &n::Fence) -> Result { Ok(match *fence.0.lock() { n::FenceInner::Idle { signaled } => signaled, n::FenceInner::PendingSubmission(ref cmd_buf) => match cmd_buf.status() { metal::MTLCommandBufferStatus::Completed => true, _ => false, }, }) } unsafe fn destroy_fence(&self, _fence: n::Fence) { //empty } fn create_event(&self) -> Result { Ok(n::Event(Arc::new(AtomicBool::new(false)))) } unsafe fn get_event_status(&self, event: &n::Event) -> Result { Ok(event.0.load(Ordering::Acquire)) } unsafe fn set_event(&self, event: &n::Event) -> Result<(), OutOfMemory> { event.0.store(true, Ordering::Release); self.shared.queue_blocker.lock().triage(); Ok(()) } unsafe fn reset_event(&self, event: &n::Event) -> Result<(), OutOfMemory> { Ok(event.0.store(false, Ordering::Release)) } unsafe fn destroy_event(&self, _event: n::Event) { //empty } unsafe fn create_query_pool( &self, ty: query::Type, count: query::Id, ) -> Result { match ty { query::Type::Occlusion => { let range = self .shared .visibility .allocator .lock() .allocate_range(count) .map_err(|_| { error!("Not enough space to allocate an occlusion query pool"); OutOfMemory::Host })?; Ok(n::QueryPool::Occlusion(range)) } query::Type::Timestamp => { warn!("Timestamp queries are not really useful yet"); Ok(n::QueryPool::Timestamp) } query::Type::PipelineStatistics(..) => Err(query::CreationError::Unsupported(ty)), } } unsafe fn destroy_query_pool(&self, pool: n::QueryPool) { match pool { n::QueryPool::Occlusion(range) => { self.shared.visibility.allocator.lock().free_range(range); } n::QueryPool::Timestamp => {} } } unsafe fn get_query_pool_results( &self, pool: &n::QueryPool, queries: Range, data: &mut [u8], stride: buffer::Offset, flags: query::ResultFlags, ) -> Result { let is_ready = match *pool { n::QueryPool::Occlusion(ref pool_range) => { let visibility = &self.shared.visibility; let is_ready = if flags.contains(query::ResultFlags::WAIT) { let mut guard = visibility.allocator.lock(); while !visibility.are_available(pool_range.start, &queries) { visibility.condvar.wait(&mut guard); } true } else { visibility.are_available(pool_range.start, &queries) }; let size_data = mem::size_of::() as buffer::Offset; if stride == size_data && flags.contains(query::ResultFlags::BITS_64) && !flags.contains(query::ResultFlags::WITH_AVAILABILITY) { // if stride is matching, copy everything in one go ptr::copy_nonoverlapping( (visibility.buffer.contents() as *const u8).offset( (pool_range.start + queries.start) as isize * size_data as isize, ), data.as_mut_ptr(), stride as usize * (queries.end - queries.start) as usize, ); } else { // copy parts of individual entries for i in 0..queries.end - queries.start { let absolute_index = (pool_range.start + queries.start + i) as isize; let value = *(visibility.buffer.contents() as *const u64).offset(absolute_index); let base = (visibility.buffer.contents() as *const u8) .offset(visibility.availability_offset as isize); let availability = *(base as *const u32).offset(absolute_index); let data_ptr = data[i as usize * stride as usize..].as_mut_ptr(); if flags.contains(query::ResultFlags::BITS_64) { *(data_ptr as *mut u64) = value; if flags.contains(query::ResultFlags::WITH_AVAILABILITY) { *(data_ptr as *mut u64).offset(1) = availability as u64; } } else { *(data_ptr as *mut u32) = value as u32; if flags.contains(query::ResultFlags::WITH_AVAILABILITY) { *(data_ptr as *mut u32).offset(1) = availability; } } } } is_ready } n::QueryPool::Timestamp => { for d in data.iter_mut() { *d = 0; } true } }; Ok(is_ready) } fn wait_idle(&self) -> Result<(), OutOfMemory> { command::QueueInner::wait_idle(&self.shared.queue); Ok(()) } unsafe fn set_image_name(&self, image: &mut n::Image, name: &str) { match image { n::Image { like: n::ImageLike::Buffer(ref mut buf), .. } => self.set_buffer_name(buf, name), n::Image { like: n::ImageLike::Texture(ref tex), .. } => tex.set_label(name), n::Image { like: n::ImageLike::Unbound { name: ref mut unbound_name, .. }, .. } => { *unbound_name = name.to_string(); } }; } unsafe fn set_buffer_name(&self, buffer: &mut n::Buffer, name: &str) { match buffer { n::Buffer::Unbound { name: ref mut unbound_name, .. } => { *unbound_name = name.to_string(); } n::Buffer::Bound { ref raw, ref range, .. } => { if self.shared.private_caps.supports_debug_markers { raw.add_debug_marker( name, NSRange { location: range.start, length: range.end - range.start, }, ); } } } } unsafe fn set_command_buffer_name( &self, command_buffer: &mut command::CommandBuffer, name: &str, ) { command_buffer.name = name.to_string(); } unsafe fn set_semaphore_name(&self, _semaphore: &mut n::Semaphore, _name: &str) {} unsafe fn set_fence_name(&self, _fence: &mut n::Fence, _name: &str) {} unsafe fn set_framebuffer_name(&self, _framebuffer: &mut n::Framebuffer, _name: &str) {} unsafe fn set_render_pass_name(&self, render_pass: &mut n::RenderPass, name: &str) { render_pass.name = name.to_string(); } unsafe fn set_descriptor_set_name(&self, _descriptor_set: &mut n::DescriptorSet, _name: &str) { // TODO } unsafe fn set_descriptor_set_layout_name( &self, _descriptor_set_layout: &mut n::DescriptorSetLayout, _name: &str, ) { // TODO } unsafe fn set_pipeline_layout_name( &self, _pipeline_layout: &mut n::PipelineLayout, _name: &str, ) { // TODO } unsafe fn set_compute_pipeline_name( &self, compute_pipeline: &mut n::ComputePipeline, name: &str, ) { if self.shared.private_caps.supports_debug_markers { compute_pipeline.raw.set_label(name); } } unsafe fn set_graphics_pipeline_name( &self, graphics_pipeline: &mut n::GraphicsPipeline, name: &str, ) { if self.shared.private_caps.supports_debug_markers { graphics_pipeline.raw.set_label(name); } } } #[test] fn test_send_sync() { fn foo() {} foo::() }