1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
|
use metal::*;
use winit::{
event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
platform::macos::WindowExtMacOS,
};
use cocoa::{appkit::NSView, base::id as cocoa_id};
use core_graphics_types::geometry::CGSize;
use objc::{rc::autoreleasepool, runtime::YES};
use std::mem;
// Declare the data structures needed to carry vertex layout to
// metal shading language(MSL) program. Use #[repr(C)], to make
// the data structure compatible with C++ type data structure
// for vertex defined in MSL program as MSL program is broadly
// based on C++
#[repr(C)]
#[derive(Debug)]
pub struct position(cty::c_float, cty::c_float);
#[repr(C)]
#[derive(Debug)]
pub struct color(cty::c_float, cty::c_float, cty::c_float);
#[repr(C)]
#[derive(Debug)]
pub struct AAPLVertex {
p: position,
c: color,
}
fn main() {
// Create a window for viewing the content
let event_loop = EventLoop::new();
let size = winit::dpi::LogicalSize::new(800, 600);
let window = winit::window::WindowBuilder::new()
.with_inner_size(size)
.with_title("Metal".to_string())
.build(&event_loop)
.unwrap();
// Set up the GPU device found in the system
let device = Device::system_default().expect("no device found");
println!("Your device is: {}", device.name(),);
// Scaffold required to sample the GPU and CPU timestamps
let mut cpu_start = 0;
let mut gpu_start = 0;
device.sample_timestamps(&mut cpu_start, &mut gpu_start);
let counter_sample_buffer = create_counter_sample_buffer(&device);
let destination_buffer = device.new_buffer(
(std::mem::size_of::<u64>() * 4 as usize) as u64,
MTLResourceOptions::StorageModeShared,
);
let counter_sampling_point = MTLCounterSamplingPoint::AtStageBoundary;
assert!(device.supports_counter_sampling(counter_sampling_point));
let binary_archive_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("examples/circle/binary_archive.metallib");
let binary_archive_url =
URL::new_with_string(&format!("file://{}", binary_archive_path.display()));
let binary_archive_descriptor = BinaryArchiveDescriptor::new();
if binary_archive_path.exists() {
binary_archive_descriptor.set_url(&binary_archive_url);
}
// Set up a binary archive to cache compiled shaders.
let binary_archive = device
.new_binary_archive_with_descriptor(&binary_archive_descriptor)
.unwrap();
let library_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("examples/circle/shaders.metallib");
// Use the metallib file generated out of .metal shader file
let library = device.new_library_with_file(library_path).unwrap();
// The render pipeline generated from the vertex and fragment shaders in the .metal shader file.
let pipeline_state = prepare_pipeline_state(&device, &library, &binary_archive);
// Serialize the binary archive to disk.
binary_archive
.serialize_to_url(&binary_archive_url)
.unwrap();
// Set the command queue used to pass commands to the device.
let command_queue = device.new_command_queue();
// Currently, MetalLayer is the only interface that provide
// layers to carry drawable texture from GPU rendaring through metal
// library to viewable windows.
let layer = MetalLayer::new();
layer.set_device(&device);
layer.set_pixel_format(MTLPixelFormat::BGRA8Unorm);
layer.set_presents_with_transaction(false);
unsafe {
let view = window.ns_view() as cocoa_id;
view.setWantsLayer(YES);
view.setLayer(mem::transmute(layer.as_ref()));
}
let draw_size = window.inner_size();
layer.set_drawable_size(CGSize::new(draw_size.width as f64, draw_size.height as f64));
let vbuf = {
let vertex_data = create_vertex_points_for_circle();
let vertex_data = vertex_data.as_slice();
device.new_buffer_with_data(
vertex_data.as_ptr() as *const _,
(vertex_data.len() * mem::size_of::<AAPLVertex>()) as u64,
MTLResourceOptions::CPUCacheModeDefaultCache | MTLResourceOptions::StorageModeManaged,
)
};
event_loop.run(move |event, _, control_flow| {
autoreleasepool(|| {
// ControlFlow::Wait pauses the event loop if no events are available to process.
// This is ideal for non-game applications that only update in response to user
// input, and uses significantly less power/CPU time than ControlFlow::Poll.
*control_flow = ControlFlow::Wait;
match event {
Event::WindowEvent {
event: WindowEvent::CloseRequested,
..
} => {
println!("The close button was pressed; stopping");
*control_flow = ControlFlow::Exit
}
Event::MainEventsCleared => {
// Queue a RedrawRequested event.
window.request_redraw();
}
Event::RedrawRequested(_) => {
// It's preferrable to render in this event rather than in MainEventsCleared, since
// rendering in here allows the program to gracefully handle redraws requested
// by the OS.
let drawable = match layer.next_drawable() {
Some(drawable) => drawable,
None => return,
};
// Create a new command buffer for each render pass to the current drawable
let command_buffer = command_queue.new_command_buffer();
// Obtain a renderPassDescriptor generated from the view's drawable textures.
let render_pass_descriptor = RenderPassDescriptor::new();
handle_render_pass_color_attachment(
&render_pass_descriptor,
drawable.texture(),
);
handle_render_pass_sample_buffer_attachment(
&render_pass_descriptor,
&counter_sample_buffer,
);
// Create a render command encoder.
let encoder =
command_buffer.new_render_command_encoder(&render_pass_descriptor);
encoder.set_render_pipeline_state(&pipeline_state);
// Pass in the parameter data.
encoder.set_vertex_buffer(0, Some(&vbuf), 0);
// Draw the triangles which will eventually form the circle.
encoder.draw_primitives(MTLPrimitiveType::TriangleStrip, 0, 1080);
encoder.end_encoding();
resolve_samples_into_buffer(
&command_buffer,
&counter_sample_buffer,
&destination_buffer,
);
// Schedule a present once the framebuffer is complete using the current drawable.
command_buffer.present_drawable(&drawable);
// Finalize rendering here & push the command buffer to the GPU.
command_buffer.commit();
command_buffer.wait_until_completed();
let mut cpu_end = 0;
let mut gpu_end = 0;
device.sample_timestamps(&mut cpu_end, &mut gpu_end);
handle_timestamps(&destination_buffer, cpu_start, cpu_end, gpu_start, gpu_end);
}
_ => (),
}
});
});
}
// If we want to draw a circle, we need to draw it out of the three primitive
// types available with metal framework. Triangle is used in this case to form
// the circle. If we consider a circle to be total of 360 degree at center, we
// can form small triangle with one point at origin and two points at the
// perimeter of the circle for each degree. Eventually, if we can take enough
// triangle virtices for total of 360 degree, the triangles together will
// form a circle. This function captures the triangle vertices for each degree
// and push the co-ordinates of the vertices to a rust vector
fn create_vertex_points_for_circle() -> Vec<AAPLVertex> {
let mut v: Vec<AAPLVertex> = Vec::new();
let origin_x: f32 = 0.0;
let origin_y: f32 = 0.0;
// Size of the circle
let circle_size = 0.8f32;
for i in 0..720 {
let y = i as f32;
// Get the X co-ordinate of each point on the perimeter of circle
let position_x: f32 = y.to_radians().cos() * 100.0;
let position_x: f32 = position_x.trunc() / 100.0;
// Set the size of the circle
let position_x: f32 = position_x * circle_size;
// Get the Y co-ordinate of each point on the perimeter of circle
let position_y: f32 = y.to_radians().sin() * 100.0;
let position_y: f32 = position_y.trunc() / 100.0;
// Set the size of the circle
let position_y: f32 = position_y * circle_size;
v.push(AAPLVertex {
p: position(position_x, position_y),
c: color(0.7, 0.3, 0.5),
});
if (i + 1) % 2 == 0 {
// For each two points on perimeter, push one point of origin
v.push(AAPLVertex {
p: position(origin_x, origin_y),
c: color(0.2, 0.7, 0.4),
});
}
}
v
}
fn handle_render_pass_sample_buffer_attachment(
descriptor: &RenderPassDescriptorRef,
counter_sample_buffer: &CounterSampleBufferRef,
) {
let sample_buffer_attachment_descriptor =
descriptor.sample_buffer_attachments().object_at(0).unwrap();
sample_buffer_attachment_descriptor.set_sample_buffer(&counter_sample_buffer);
sample_buffer_attachment_descriptor.set_start_of_vertex_sample_index(0 as NSUInteger);
sample_buffer_attachment_descriptor.set_end_of_vertex_sample_index(1 as NSUInteger);
sample_buffer_attachment_descriptor.set_start_of_fragment_sample_index(2 as NSUInteger);
sample_buffer_attachment_descriptor.set_end_of_fragment_sample_index(3 as NSUInteger);
}
fn handle_render_pass_color_attachment(descriptor: &RenderPassDescriptorRef, texture: &TextureRef) {
let color_attachment = descriptor.color_attachments().object_at(0).unwrap();
color_attachment.set_texture(Some(texture));
color_attachment.set_load_action(MTLLoadAction::Clear);
// Setting a background color
color_attachment.set_clear_color(MTLClearColor::new(0.5, 0.5, 0.8, 1.0));
color_attachment.set_store_action(MTLStoreAction::Store);
}
fn prepare_pipeline_state(
device: &Device,
library: &Library,
binary_archive: &BinaryArchive,
) -> RenderPipelineState {
let vert = library.get_function("vs", None).unwrap();
let frag = library.get_function("ps", None).unwrap();
let pipeline_state_descriptor = RenderPipelineDescriptor::new();
pipeline_state_descriptor.set_vertex_function(Some(&vert));
pipeline_state_descriptor.set_fragment_function(Some(&frag));
pipeline_state_descriptor
.color_attachments()
.object_at(0)
.unwrap()
.set_pixel_format(MTLPixelFormat::BGRA8Unorm);
// Set the binary archives to search for a cached pipeline in.
pipeline_state_descriptor.set_binary_archives(&[binary_archive]);
// Add the pipeline descriptor to the binary archive cache.
binary_archive
.add_render_pipeline_functions_with_descriptor(&pipeline_state_descriptor)
.unwrap();
device
.new_render_pipeline_state(&pipeline_state_descriptor)
.unwrap()
}
fn resolve_samples_into_buffer(
command_buffer: &CommandBufferRef,
counter_sample_buffer: &CounterSampleBufferRef,
destination_buffer: &BufferRef,
) {
let blit_encoder = command_buffer.new_blit_command_encoder();
blit_encoder.resolve_counters(
&counter_sample_buffer,
crate::NSRange::new(0_u64, 4),
&destination_buffer,
0_u64,
);
blit_encoder.end_encoding();
}
fn handle_timestamps(
resolved_sample_buffer: &BufferRef,
cpu_start: u64,
cpu_end: u64,
gpu_start: u64,
gpu_end: u64,
) {
let samples = unsafe {
std::slice::from_raw_parts(resolved_sample_buffer.contents() as *const u64, 4 as usize)
};
let vertex_pass_start = samples[0];
let vertex_pass_end = samples[1];
let fragment_pass_start = samples[2];
let fragment_pass_end = samples[3];
let cpu_time_span = cpu_end - cpu_start;
let gpu_time_span = gpu_end - gpu_start;
let vertex_micros = microseconds_between_begin(
vertex_pass_start,
vertex_pass_end,
gpu_time_span,
cpu_time_span,
);
let fragment_micros = microseconds_between_begin(
fragment_pass_start,
fragment_pass_end,
gpu_time_span,
cpu_time_span,
);
println!("Vertex pass duration: {:.2} µs", vertex_micros);
println!("Fragment pass duration: {:.2} µs\n", fragment_micros);
}
fn create_counter_sample_buffer(device: &Device) -> CounterSampleBuffer {
let counter_sample_buffer_desc = metal::CounterSampleBufferDescriptor::new();
counter_sample_buffer_desc.set_storage_mode(metal::MTLStorageMode::Shared);
counter_sample_buffer_desc.set_sample_count(4_u64);
counter_sample_buffer_desc.set_counter_set(&fetch_timestamp_counter_set(device));
device
.new_counter_sample_buffer_with_descriptor(&counter_sample_buffer_desc)
.unwrap()
}
fn fetch_timestamp_counter_set(device: &Device) -> metal::CounterSet {
let counter_sets = device.counter_sets();
let mut timestamp_counter = None;
for cs in counter_sets.iter() {
if cs.name() == "timestamp" {
timestamp_counter = Some(cs);
break;
}
}
timestamp_counter
.expect("No timestamp counter found")
.clone()
}
/// <https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time>
fn microseconds_between_begin(begin: u64, end: u64, gpu_time_span: u64, cpu_time_span: u64) -> f64 {
let time_span = (end as f64) - (begin as f64);
let nanoseconds = time_span / (gpu_time_span as f64) * (cpu_time_span as f64);
let microseconds = nanoseconds / 1000.0;
return microseconds;
}
|