summaryrefslogtreecommitdiffstats
path: root/third_party/rust/metal/examples/compute/main.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/metal/examples/compute/main.rs
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/metal/examples/compute/main.rs')
-rw-r--r--third_party/rust/metal/examples/compute/main.rs194
1 files changed, 194 insertions, 0 deletions
diff --git a/third_party/rust/metal/examples/compute/main.rs b/third_party/rust/metal/examples/compute/main.rs
new file mode 100644
index 0000000000..b654a666f1
--- /dev/null
+++ b/third_party/rust/metal/examples/compute/main.rs
@@ -0,0 +1,194 @@
+use metal::*;
+use objc::rc::autoreleasepool;
+use std::path::PathBuf;
+
+const NUM_SAMPLES: u64 = 2;
+
+fn main() {
+ let num_elements = std::env::args()
+ .nth(1)
+ .map(|s| s.parse::<u32>().unwrap())
+ .unwrap_or(64 * 64);
+
+ autoreleasepool(|| {
+ let device = Device::system_default().expect("No device found");
+ let mut cpu_start = 0;
+ let mut gpu_start = 0;
+ device.sample_timestamps(&mut cpu_start, &mut gpu_start);
+
+ let counter_sample_buffer = create_counter_sample_buffer(&device);
+ let destination_buffer = device.new_buffer(
+ (std::mem::size_of::<u64>() * NUM_SAMPLES as usize) as u64,
+ MTLResourceOptions::StorageModeShared,
+ );
+
+ let counter_sampling_point = MTLCounterSamplingPoint::AtStageBoundary;
+ assert!(device.supports_counter_sampling(counter_sampling_point));
+
+ let command_queue = device.new_command_queue();
+ let command_buffer = command_queue.new_command_buffer();
+
+ let compute_pass_descriptor = ComputePassDescriptor::new();
+ handle_compute_pass_sample_buffer_attachment(
+ compute_pass_descriptor,
+ &counter_sample_buffer,
+ );
+ let encoder =
+ command_buffer.compute_command_encoder_with_descriptor(compute_pass_descriptor);
+
+ let pipeline_state = create_pipeline_state(&device);
+ encoder.set_compute_pipeline_state(&pipeline_state);
+
+ let (buffer, sum) = create_input_and_output_buffers(&device, num_elements);
+ encoder.set_buffer(0, Some(&buffer), 0);
+ encoder.set_buffer(1, Some(&sum), 0);
+
+ let num_threads = pipeline_state.thread_execution_width();
+
+ let thread_group_count = MTLSize {
+ width: ((num_elements as NSUInteger + num_threads) / num_threads),
+ height: 1,
+ depth: 1,
+ };
+
+ let thread_group_size = MTLSize {
+ width: num_threads,
+ height: 1,
+ depth: 1,
+ };
+
+ encoder.dispatch_thread_groups(thread_group_count, thread_group_size);
+ encoder.end_encoding();
+
+ resolve_samples_into_buffer(command_buffer, &counter_sample_buffer, &destination_buffer);
+
+ command_buffer.commit();
+ command_buffer.wait_until_completed();
+ let mut cpu_end = 0;
+ let mut gpu_end = 0;
+ device.sample_timestamps(&mut cpu_end, &mut gpu_end);
+
+ let ptr = sum.contents() as *mut u32;
+ println!("Compute shader sum: {}", unsafe { *ptr });
+
+ unsafe {
+ assert_eq!(num_elements, *ptr);
+ }
+
+ handle_timestamps(&destination_buffer, cpu_start, cpu_end, gpu_start, gpu_end);
+ });
+}
+
+fn create_pipeline_state(device: &Device) -> ComputePipelineState {
+ let library_path =
+ PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("examples/compute/shaders.metallib");
+ let library = device.new_library_with_file(library_path).unwrap();
+ let kernel = library.get_function("sum", None).unwrap();
+
+ let pipeline_state_descriptor = ComputePipelineDescriptor::new();
+ pipeline_state_descriptor.set_compute_function(Some(&kernel));
+
+ device
+ .new_compute_pipeline_state_with_function(
+ pipeline_state_descriptor.compute_function().unwrap(),
+ )
+ .unwrap()
+}
+
+fn handle_compute_pass_sample_buffer_attachment(
+ compute_pass_descriptor: &ComputePassDescriptorRef,
+ counter_sample_buffer: &CounterSampleBufferRef,
+) {
+ let sample_buffer_attachment_descriptor = compute_pass_descriptor
+ .sample_buffer_attachments()
+ .object_at(0)
+ .unwrap();
+
+ sample_buffer_attachment_descriptor.set_sample_buffer(counter_sample_buffer);
+ sample_buffer_attachment_descriptor.set_start_of_encoder_sample_index(0);
+ sample_buffer_attachment_descriptor.set_end_of_encoder_sample_index(1);
+}
+
+fn resolve_samples_into_buffer(
+ command_buffer: &CommandBufferRef,
+ counter_sample_buffer: &CounterSampleBufferRef,
+ destination_buffer: &BufferRef,
+) {
+ let blit_encoder = command_buffer.new_blit_command_encoder();
+ blit_encoder.resolve_counters(
+ counter_sample_buffer,
+ crate::NSRange::new(0_u64, NUM_SAMPLES),
+ destination_buffer,
+ 0_u64,
+ );
+ blit_encoder.end_encoding();
+}
+
+fn handle_timestamps(
+ resolved_sample_buffer: &BufferRef,
+ cpu_start: u64,
+ cpu_end: u64,
+ gpu_start: u64,
+ gpu_end: u64,
+) {
+ let samples = unsafe {
+ std::slice::from_raw_parts(
+ resolved_sample_buffer.contents() as *const u64,
+ NUM_SAMPLES as usize,
+ )
+ };
+ let pass_start = samples[0];
+ let pass_end = samples[1];
+
+ let cpu_time_span = cpu_end - cpu_start;
+ let gpu_time_span = gpu_end - gpu_start;
+
+ let micros = microseconds_between_begin(pass_start, pass_end, gpu_time_span, cpu_time_span);
+ println!("Compute pass duration: {} µs", micros);
+}
+
+fn create_counter_sample_buffer(device: &Device) -> CounterSampleBuffer {
+ let counter_sample_buffer_desc = metal::CounterSampleBufferDescriptor::new();
+ counter_sample_buffer_desc.set_storage_mode(metal::MTLStorageMode::Shared);
+ counter_sample_buffer_desc.set_sample_count(NUM_SAMPLES);
+ let counter_sets = device.counter_sets();
+
+ let timestamp_counter = counter_sets.iter().find(|cs| cs.name() == "timestamp");
+
+ counter_sample_buffer_desc
+ .set_counter_set(timestamp_counter.expect("No timestamp counter found"));
+
+ device
+ .new_counter_sample_buffer_with_descriptor(&counter_sample_buffer_desc)
+ .unwrap()
+}
+
+fn create_input_and_output_buffers(
+ device: &Device,
+ num_elements: u32,
+) -> (metal::Buffer, metal::Buffer) {
+ let data = vec![1u32; num_elements as usize];
+
+ let buffer = device.new_buffer_with_data(
+ unsafe { std::mem::transmute(data.as_ptr()) },
+ (data.len() * std::mem::size_of::<u32>()) as u64,
+ MTLResourceOptions::CPUCacheModeDefaultCache,
+ );
+
+ let sum = {
+ let data = [0u32];
+ device.new_buffer_with_data(
+ unsafe { std::mem::transmute(data.as_ptr()) },
+ (data.len() * std::mem::size_of::<u32>()) as u64,
+ MTLResourceOptions::CPUCacheModeDefaultCache,
+ )
+ };
+ (buffer, sum)
+}
+
+/// <https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time>
+fn microseconds_between_begin(begin: u64, end: u64, gpu_time_span: u64, cpu_time_span: u64) -> f64 {
+ let time_span = (end as f64) - (begin as f64);
+ let nanoseconds = time_span / (gpu_time_span as f64) * (cpu_time_span as f64);
+ nanoseconds / 1000.0
+}