From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/metal/examples/compute/main.rs | 194 ++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 third_party/rust/metal/examples/compute/main.rs (limited to 'third_party/rust/metal/examples/compute/main.rs') diff --git a/third_party/rust/metal/examples/compute/main.rs b/third_party/rust/metal/examples/compute/main.rs new file mode 100644 index 0000000000..b654a666f1 --- /dev/null +++ b/third_party/rust/metal/examples/compute/main.rs @@ -0,0 +1,194 @@ +use metal::*; +use objc::rc::autoreleasepool; +use std::path::PathBuf; + +const NUM_SAMPLES: u64 = 2; + +fn main() { + let num_elements = std::env::args() + .nth(1) + .map(|s| s.parse::().unwrap()) + .unwrap_or(64 * 64); + + autoreleasepool(|| { + let device = Device::system_default().expect("No device found"); + let mut cpu_start = 0; + let mut gpu_start = 0; + device.sample_timestamps(&mut cpu_start, &mut gpu_start); + + let counter_sample_buffer = create_counter_sample_buffer(&device); + let destination_buffer = device.new_buffer( + (std::mem::size_of::() * NUM_SAMPLES as usize) as u64, + MTLResourceOptions::StorageModeShared, + ); + + let counter_sampling_point = MTLCounterSamplingPoint::AtStageBoundary; + assert!(device.supports_counter_sampling(counter_sampling_point)); + + let command_queue = device.new_command_queue(); + let command_buffer = command_queue.new_command_buffer(); + + let compute_pass_descriptor = ComputePassDescriptor::new(); + handle_compute_pass_sample_buffer_attachment( + compute_pass_descriptor, + &counter_sample_buffer, + ); + let encoder = + command_buffer.compute_command_encoder_with_descriptor(compute_pass_descriptor); + + let pipeline_state = create_pipeline_state(&device); + encoder.set_compute_pipeline_state(&pipeline_state); + + let (buffer, sum) = create_input_and_output_buffers(&device, num_elements); + encoder.set_buffer(0, Some(&buffer), 0); + encoder.set_buffer(1, Some(&sum), 0); + + let num_threads = pipeline_state.thread_execution_width(); + + let thread_group_count = MTLSize { + width: ((num_elements as NSUInteger + num_threads) / num_threads), + height: 1, + depth: 1, + }; + + let thread_group_size = MTLSize { + width: num_threads, + height: 1, + depth: 1, + }; + + encoder.dispatch_thread_groups(thread_group_count, thread_group_size); + encoder.end_encoding(); + + resolve_samples_into_buffer(command_buffer, &counter_sample_buffer, &destination_buffer); + + command_buffer.commit(); + command_buffer.wait_until_completed(); + let mut cpu_end = 0; + let mut gpu_end = 0; + device.sample_timestamps(&mut cpu_end, &mut gpu_end); + + let ptr = sum.contents() as *mut u32; + println!("Compute shader sum: {}", unsafe { *ptr }); + + unsafe { + assert_eq!(num_elements, *ptr); + } + + handle_timestamps(&destination_buffer, cpu_start, cpu_end, gpu_start, gpu_end); + }); +} + +fn create_pipeline_state(device: &Device) -> ComputePipelineState { + let library_path = + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("examples/compute/shaders.metallib"); + let library = device.new_library_with_file(library_path).unwrap(); + let kernel = library.get_function("sum", None).unwrap(); + + let pipeline_state_descriptor = ComputePipelineDescriptor::new(); + pipeline_state_descriptor.set_compute_function(Some(&kernel)); + + device + .new_compute_pipeline_state_with_function( + pipeline_state_descriptor.compute_function().unwrap(), + ) + .unwrap() +} + +fn handle_compute_pass_sample_buffer_attachment( + compute_pass_descriptor: &ComputePassDescriptorRef, + counter_sample_buffer: &CounterSampleBufferRef, +) { + let sample_buffer_attachment_descriptor = compute_pass_descriptor + .sample_buffer_attachments() + .object_at(0) + .unwrap(); + + sample_buffer_attachment_descriptor.set_sample_buffer(counter_sample_buffer); + sample_buffer_attachment_descriptor.set_start_of_encoder_sample_index(0); + sample_buffer_attachment_descriptor.set_end_of_encoder_sample_index(1); +} + +fn resolve_samples_into_buffer( + command_buffer: &CommandBufferRef, + counter_sample_buffer: &CounterSampleBufferRef, + destination_buffer: &BufferRef, +) { + let blit_encoder = command_buffer.new_blit_command_encoder(); + blit_encoder.resolve_counters( + counter_sample_buffer, + crate::NSRange::new(0_u64, NUM_SAMPLES), + destination_buffer, + 0_u64, + ); + blit_encoder.end_encoding(); +} + +fn handle_timestamps( + resolved_sample_buffer: &BufferRef, + cpu_start: u64, + cpu_end: u64, + gpu_start: u64, + gpu_end: u64, +) { + let samples = unsafe { + std::slice::from_raw_parts( + resolved_sample_buffer.contents() as *const u64, + NUM_SAMPLES as usize, + ) + }; + let pass_start = samples[0]; + let pass_end = samples[1]; + + let cpu_time_span = cpu_end - cpu_start; + let gpu_time_span = gpu_end - gpu_start; + + let micros = microseconds_between_begin(pass_start, pass_end, gpu_time_span, cpu_time_span); + println!("Compute pass duration: {} µs", micros); +} + +fn create_counter_sample_buffer(device: &Device) -> CounterSampleBuffer { + let counter_sample_buffer_desc = metal::CounterSampleBufferDescriptor::new(); + counter_sample_buffer_desc.set_storage_mode(metal::MTLStorageMode::Shared); + counter_sample_buffer_desc.set_sample_count(NUM_SAMPLES); + let counter_sets = device.counter_sets(); + + let timestamp_counter = counter_sets.iter().find(|cs| cs.name() == "timestamp"); + + counter_sample_buffer_desc + .set_counter_set(timestamp_counter.expect("No timestamp counter found")); + + device + .new_counter_sample_buffer_with_descriptor(&counter_sample_buffer_desc) + .unwrap() +} + +fn create_input_and_output_buffers( + device: &Device, + num_elements: u32, +) -> (metal::Buffer, metal::Buffer) { + let data = vec![1u32; num_elements as usize]; + + let buffer = device.new_buffer_with_data( + unsafe { std::mem::transmute(data.as_ptr()) }, + (data.len() * std::mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ); + + let sum = { + let data = [0u32]; + device.new_buffer_with_data( + unsafe { std::mem::transmute(data.as_ptr()) }, + (data.len() * std::mem::size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + (buffer, sum) +} + +/// +fn microseconds_between_begin(begin: u64, end: u64, gpu_time_span: u64, cpu_time_span: u64) -> f64 { + let time_span = (end as f64) - (begin as f64); + let nanoseconds = time_span / (gpu_time_span as f64) * (cpu_time_span as f64); + nanoseconds / 1000.0 +} -- cgit v1.2.3