From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- .../examples/compute/compute-argument-buffer.metal | 14 ++ .../examples/compute/compute-argument-buffer.rs | 95 ++++++++++ .../rust/metal/examples/compute/embedded-lib.rs | 24 +++ third_party/rust/metal/examples/compute/main.rs | 194 +++++++++++++++++++++ .../rust/metal/examples/compute/shaders.metal | 10 ++ .../rust/metal/examples/compute/shaders.metallib | Bin 0 -> 3209 bytes 6 files changed, 337 insertions(+) create mode 100644 third_party/rust/metal/examples/compute/compute-argument-buffer.metal create mode 100644 third_party/rust/metal/examples/compute/compute-argument-buffer.rs create mode 100644 third_party/rust/metal/examples/compute/embedded-lib.rs create mode 100644 third_party/rust/metal/examples/compute/main.rs create mode 100644 third_party/rust/metal/examples/compute/shaders.metal create mode 100644 third_party/rust/metal/examples/compute/shaders.metallib (limited to 'third_party/rust/metal/examples/compute') diff --git a/third_party/rust/metal/examples/compute/compute-argument-buffer.metal b/third_party/rust/metal/examples/compute/compute-argument-buffer.metal new file mode 100644 index 0000000000..1dcc79daf5 --- /dev/null +++ b/third_party/rust/metal/examples/compute/compute-argument-buffer.metal @@ -0,0 +1,14 @@ +#include + +using namespace metal; + +struct SumInput { + device uint *data; + volatile device atomic_uint *sum; +}; + +kernel void sum(device SumInput& input [[ buffer(0) ]], + uint gid [[ thread_position_in_grid ]]) +{ + atomic_fetch_add_explicit(input.sum, input.data[gid], memory_order_relaxed); +} diff --git a/third_party/rust/metal/examples/compute/compute-argument-buffer.rs b/third_party/rust/metal/examples/compute/compute-argument-buffer.rs new file mode 100644 index 0000000000..97527091a3 --- /dev/null +++ b/third_party/rust/metal/examples/compute/compute-argument-buffer.rs @@ -0,0 +1,95 @@ +// Copyright 2017 GFX developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use metal::*; +use objc::rc::autoreleasepool; +use std::mem; + +static LIBRARY_SRC: &str = include_str!("compute-argument-buffer.metal"); + +fn main() { + autoreleasepool(|| { + let device = Device::system_default().expect("no device found"); + let command_queue = device.new_command_queue(); + + let data = [ + 1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, + ]; + + let buffer = device.new_buffer_with_data( + unsafe { mem::transmute(data.as_ptr()) }, + (data.len() * mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ); + + let sum = { + let data = [0u32]; + device.new_buffer_with_data( + unsafe { mem::transmute(data.as_ptr()) }, + (data.len() * mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + + let command_buffer = command_queue.new_command_buffer(); + let encoder = command_buffer.new_compute_command_encoder(); + + let library = device + .new_library_with_source(LIBRARY_SRC, &CompileOptions::new()) + .unwrap(); + let kernel = library.get_function("sum", None).unwrap(); + + let argument_encoder = kernel.new_argument_encoder(0); + let arg_buffer = device.new_buffer( + argument_encoder.encoded_length(), + MTLResourceOptions::empty(), + ); + argument_encoder.set_argument_buffer(&arg_buffer, 0); + argument_encoder.set_buffer(0, &buffer, 0); + argument_encoder.set_buffer(1, &sum, 0); + + let pipeline_state_descriptor = ComputePipelineDescriptor::new(); + pipeline_state_descriptor.set_compute_function(Some(&kernel)); + + let pipeline_state = device + .new_compute_pipeline_state_with_function( + pipeline_state_descriptor.compute_function().unwrap(), + ) + .unwrap(); + + encoder.set_compute_pipeline_state(&pipeline_state); + encoder.set_buffer(0, Some(&arg_buffer), 0); + + encoder.use_resource(&buffer, MTLResourceUsage::Read); + encoder.use_resource(&sum, MTLResourceUsage::Write); + + let width = 16; + + let thread_group_count = MTLSize { + width, + height: 1, + depth: 1, + }; + + let thread_group_size = MTLSize { + width: (data.len() as u64 + width) / width, + height: 1, + depth: 1, + }; + + encoder.dispatch_thread_groups(thread_group_count, thread_group_size); + encoder.end_encoding(); + command_buffer.commit(); + command_buffer.wait_until_completed(); + + let ptr = sum.contents() as *mut u32; + unsafe { + assert_eq!(465, *ptr); + } + }); +} diff --git a/third_party/rust/metal/examples/compute/embedded-lib.rs b/third_party/rust/metal/examples/compute/embedded-lib.rs new file mode 100644 index 0000000000..0fd193abe3 --- /dev/null +++ b/third_party/rust/metal/examples/compute/embedded-lib.rs @@ -0,0 +1,24 @@ +// Copyright 2017 GFX developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use metal::*; +use objc::rc::autoreleasepool; + +fn main() { + let library_data = include_bytes!("shaders.metallib"); + + autoreleasepool(|| { + let device = Device::system_default().expect("no device found"); + + let library = device.new_library_with_data(&library_data[..]).unwrap(); + let kernel = library.get_function("sum", None).unwrap(); + + println!("Function name: {}", kernel.name()); + println!("Function type: {:?}", kernel.function_type()); + println!("OK"); + }); +} diff --git a/third_party/rust/metal/examples/compute/main.rs b/third_party/rust/metal/examples/compute/main.rs new file mode 100644 index 0000000000..b654a666f1 --- /dev/null +++ b/third_party/rust/metal/examples/compute/main.rs @@ -0,0 +1,194 @@ +use metal::*; +use objc::rc::autoreleasepool; +use std::path::PathBuf; + +const NUM_SAMPLES: u64 = 2; + +fn main() { + let num_elements = std::env::args() + .nth(1) + .map(|s| s.parse::().unwrap()) + .unwrap_or(64 * 64); + + autoreleasepool(|| { + let device = Device::system_default().expect("No device found"); + let mut cpu_start = 0; + let mut gpu_start = 0; + device.sample_timestamps(&mut cpu_start, &mut gpu_start); + + let counter_sample_buffer = create_counter_sample_buffer(&device); + let destination_buffer = device.new_buffer( + (std::mem::size_of::() * NUM_SAMPLES as usize) as u64, + MTLResourceOptions::StorageModeShared, + ); + + let counter_sampling_point = MTLCounterSamplingPoint::AtStageBoundary; + assert!(device.supports_counter_sampling(counter_sampling_point)); + + let command_queue = device.new_command_queue(); + let command_buffer = command_queue.new_command_buffer(); + + let compute_pass_descriptor = ComputePassDescriptor::new(); + handle_compute_pass_sample_buffer_attachment( + compute_pass_descriptor, + &counter_sample_buffer, + ); + let encoder = + command_buffer.compute_command_encoder_with_descriptor(compute_pass_descriptor); + + let pipeline_state = create_pipeline_state(&device); + encoder.set_compute_pipeline_state(&pipeline_state); + + let (buffer, sum) = create_input_and_output_buffers(&device, num_elements); + encoder.set_buffer(0, Some(&buffer), 0); + encoder.set_buffer(1, Some(&sum), 0); + + let num_threads = pipeline_state.thread_execution_width(); + + let thread_group_count = MTLSize { + width: ((num_elements as NSUInteger + num_threads) / num_threads), + height: 1, + depth: 1, + }; + + let thread_group_size = MTLSize { + width: num_threads, + height: 1, + depth: 1, + }; + + encoder.dispatch_thread_groups(thread_group_count, thread_group_size); + encoder.end_encoding(); + + resolve_samples_into_buffer(command_buffer, &counter_sample_buffer, &destination_buffer); + + command_buffer.commit(); + command_buffer.wait_until_completed(); + let mut cpu_end = 0; + let mut gpu_end = 0; + device.sample_timestamps(&mut cpu_end, &mut gpu_end); + + let ptr = sum.contents() as *mut u32; + println!("Compute shader sum: {}", unsafe { *ptr }); + + unsafe { + assert_eq!(num_elements, *ptr); + } + + handle_timestamps(&destination_buffer, cpu_start, cpu_end, gpu_start, gpu_end); + }); +} + +fn create_pipeline_state(device: &Device) -> ComputePipelineState { + let library_path = + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("examples/compute/shaders.metallib"); + let library = device.new_library_with_file(library_path).unwrap(); + let kernel = library.get_function("sum", None).unwrap(); + + let pipeline_state_descriptor = ComputePipelineDescriptor::new(); + pipeline_state_descriptor.set_compute_function(Some(&kernel)); + + device + .new_compute_pipeline_state_with_function( + pipeline_state_descriptor.compute_function().unwrap(), + ) + .unwrap() +} + +fn handle_compute_pass_sample_buffer_attachment( + compute_pass_descriptor: &ComputePassDescriptorRef, + counter_sample_buffer: &CounterSampleBufferRef, +) { + let sample_buffer_attachment_descriptor = compute_pass_descriptor + .sample_buffer_attachments() + .object_at(0) + .unwrap(); + + sample_buffer_attachment_descriptor.set_sample_buffer(counter_sample_buffer); + sample_buffer_attachment_descriptor.set_start_of_encoder_sample_index(0); + sample_buffer_attachment_descriptor.set_end_of_encoder_sample_index(1); +} + +fn resolve_samples_into_buffer( + command_buffer: &CommandBufferRef, + counter_sample_buffer: &CounterSampleBufferRef, + destination_buffer: &BufferRef, +) { + let blit_encoder = command_buffer.new_blit_command_encoder(); + blit_encoder.resolve_counters( + counter_sample_buffer, + crate::NSRange::new(0_u64, NUM_SAMPLES), + destination_buffer, + 0_u64, + ); + blit_encoder.end_encoding(); +} + +fn handle_timestamps( + resolved_sample_buffer: &BufferRef, + cpu_start: u64, + cpu_end: u64, + gpu_start: u64, + gpu_end: u64, +) { + let samples = unsafe { + std::slice::from_raw_parts( + resolved_sample_buffer.contents() as *const u64, + NUM_SAMPLES as usize, + ) + }; + let pass_start = samples[0]; + let pass_end = samples[1]; + + let cpu_time_span = cpu_end - cpu_start; + let gpu_time_span = gpu_end - gpu_start; + + let micros = microseconds_between_begin(pass_start, pass_end, gpu_time_span, cpu_time_span); + println!("Compute pass duration: {} µs", micros); +} + +fn create_counter_sample_buffer(device: &Device) -> CounterSampleBuffer { + let counter_sample_buffer_desc = metal::CounterSampleBufferDescriptor::new(); + counter_sample_buffer_desc.set_storage_mode(metal::MTLStorageMode::Shared); + counter_sample_buffer_desc.set_sample_count(NUM_SAMPLES); + let counter_sets = device.counter_sets(); + + let timestamp_counter = counter_sets.iter().find(|cs| cs.name() == "timestamp"); + + counter_sample_buffer_desc + .set_counter_set(timestamp_counter.expect("No timestamp counter found")); + + device + .new_counter_sample_buffer_with_descriptor(&counter_sample_buffer_desc) + .unwrap() +} + +fn create_input_and_output_buffers( + device: &Device, + num_elements: u32, +) -> (metal::Buffer, metal::Buffer) { + let data = vec![1u32; num_elements as usize]; + + let buffer = device.new_buffer_with_data( + unsafe { std::mem::transmute(data.as_ptr()) }, + (data.len() * std::mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ); + + let sum = { + let data = [0u32]; + device.new_buffer_with_data( + unsafe { std::mem::transmute(data.as_ptr()) }, + (data.len() * std::mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + (buffer, sum) +} + +/// +fn microseconds_between_begin(begin: u64, end: u64, gpu_time_span: u64, cpu_time_span: u64) -> f64 { + let time_span = (end as f64) - (begin as f64); + let nanoseconds = time_span / (gpu_time_span as f64) * (cpu_time_span as f64); + nanoseconds / 1000.0 +} diff --git a/third_party/rust/metal/examples/compute/shaders.metal b/third_party/rust/metal/examples/compute/shaders.metal new file mode 100644 index 0000000000..51363a1d36 --- /dev/null +++ b/third_party/rust/metal/examples/compute/shaders.metal @@ -0,0 +1,10 @@ +#include + +using namespace metal; + +kernel void sum(device uint *data [[ buffer(0) ]], + volatile device atomic_uint *sum [[ buffer(1) ]], + uint gid [[ thread_position_in_grid ]]) +{ + atomic_fetch_add_explicit(sum, data[gid], memory_order_relaxed); +} diff --git a/third_party/rust/metal/examples/compute/shaders.metallib b/third_party/rust/metal/examples/compute/shaders.metallib new file mode 100644 index 0000000000..af7cb17240 Binary files /dev/null and b/third_party/rust/metal/examples/compute/shaders.metallib differ -- cgit v1.2.3