diff options
Diffstat (limited to 'third_party/rust/wgpu-hal')
28 files changed, 1021 insertions, 189 deletions
diff --git a/third_party/rust/wgpu-hal/.cargo-checksum.json b/third_party/rust/wgpu-hal/.cargo-checksum.json index 2cba88cfdf..265725f165 100644 --- a/third_party/rust/wgpu-hal/.cargo-checksum.json +++ b/third_party/rust/wgpu-hal/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"332863a1b354637fc62ff5729e7c2f5089fa65db05d330ec268a7aab04bb3d42","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","README.md":"099ee611a911dc19330a61bffcde13663929a51b25ac528ee33ea796d695491e","build.rs":"c80bdc0152a00471eec6ed0dd0f7d55d0b975498a00ba05e94100c84ad639a49","examples/halmark/main.rs":"4604737f714943383c57feac2b8468ecf15e9e60c54a5303455e9953ec5c79fb","examples/halmark/shader.wgsl":"26c256ec36d6f0e9a1647431ca772766bee4382d64eaa718ba7b488dcfb6bcca","examples/raw-gles.em.html":"70fbe68394a1a4522192de1dcfaf7d399f60d7bdf5de70b708f9bb0417427546","examples/raw-gles.rs":"095113a1ba0851652a77aabfc8fa6ea7edcc2d09e91fd1e5009ead87d5998ea9","examples/ray-traced-triangle/main.rs":"955c2b8700c3b2daf14e9ef963ff499ed185b6f349dbc63caa422b2cf4942a1f","examples/ray-traced-triangle/shader.wgsl":"cc10caf92746724a71f6dd0dbc3a71e57b37c7d1d83278556805a535c0728a9d","src/auxil/dxgi/conv.rs":"760cd4eaa79b530368a30140b96bf73ac4fbdb4025eb95f0bed581638c8bb1cb","src/auxil/dxgi/exception.rs":"f0cfb5a0adcdc3b6db909601fee51ad51368f5da269bcd46e4dbea45a3bec4b1","src/auxil/dxgi/factory.rs":"5f861fbfe2f4cce08722a95283549b8f62b96f24a306d080d9f1730ae53501d8","src/auxil/dxgi/mod.rs":"a202564d9ac97530b16a234b87d180cd345aae705e082a9b1177dcde813645f9","src/auxil/dxgi/result.rs":"79fe5aa17a2b21a7f06b1b604200c3c3e73fca31e8193aab80b5b15e7e9818a0","src/auxil/dxgi/time.rs":"b6f966b250e9424d5d7e4065f2108cba87197c1e30baae6d87083055d1bc5a4b","src/auxil/mod.rs":"720ef2aae258733322a3274fd858f91effb8951dabaf7bbfd8a9a0be2d2dba97","src/auxil/renderdoc.rs":"c2f849f70f576b0c9b0d32dd155b6a6353f74dff59cbeeaa994a12789d047c0f","src/dx12/adapter.rs":"2a90a4222702e50fabfc64339ac2aa667e1ee2d2bf801c3e2e59a91e261c7a04","src/dx12/command.rs":"e0675560784214a18e078062cbd0965c21a35c99eecf0e697d1badb9c692db35","src/dx12/conv.rs":"94d35f117ae003b07049f3a0bc6c45a0ffda9fb8053233d39c173cfb1b644403","src/dx12/descriptor.rs":"e06eb08bee4c805fa76b6ab791893b5b563ee60de9c8f8d8e0e21ab97ade5664","src/dx12/device.rs":"6a38dabd4db9d7ca79bfcfbeb3483320ad88f2db6e93a7210051bbf81acb1bbd","src/dx12/instance.rs":"030a86ac810d5ebf151328bbc06b9b5a89788721aa963cb4e0f3ff943b2c8633","src/dx12/mod.rs":"87e7012a113554c976abdbecd10b2fe6aab3ba695c88dc77d6beb3880d96ba54","src/dx12/shader_compilation.rs":"5087adb8576e2d7751619dfdf8b37c573bb4e494290c594077ca3208cce1e746","src/dx12/suballocation.rs":"6939fc36223a15cc070c744d0418f9ac6fa2829d794af17cdea7c61eb5f8d2c0","src/dx12/types.rs":"9573736baaa0ef607367c3b72144556d24faf677a26bb8df49a4372a1348e06b","src/dx12/view.rs":"792772e9c87840dcd045b7381a03162eb4a501492a95ca586e77e81aed621c67","src/empty.rs":"0849e0b7210d33145b3fb1368ed08b13fbc2144b1ccc0a1af913896e916bbe46","src/gles/adapter.rs":"22fe404177d4abb077f35a5be54295f81eacb021d448ad01aa96450ade8b089a","src/gles/command.rs":"788505168ae1af312781c86d31d5bc289fcc4e2b3cb047fb41deb62542db1a95","src/gles/conv.rs":"5d15d3a33032d32ff99bc338fba0689fa54c76d0714e335fe48523d841df386f","src/gles/device.rs":"e126042653b0651a12684504e19c05db3fe333bde3ac408063675eef6c27754a","src/gles/egl.rs":"72feefbcd399e686b5c210a67151ef040e45e123ad371539274d1d52b3dd162d","src/gles/emscripten.rs":"19bb73a9d140645f3f32cd48b002151711a9b8456e213eab5f3a2be79239e147","src/gles/mod.rs":"b8999f76ad45e07312b291457100f12699ba6a2635c1f1913b0648e9a9394015","src/gles/queue.rs":"7f62e9c1cb670f66ac8bb0d62cd531b9c11b2a29f8ed28736c40e21071fbe25d","src/gles/shaders/clear.frag":"9133ed8ed97d3641fbb6b5f5ea894a3554c629ccc1b80a5fc9221d7293aa1954","src/gles/shaders/clear.vert":"a543768725f4121ff2e9e1fb5b00644931e9d6f2f946c0ef01968afb5a135abd","src/gles/shaders/srgb_present.frag":"dd9a43c339a2fa4ccf7f6a1854c6f400cabf271a7d5e9230768e9f39d47f3ff5","src/gles/shaders/srgb_present.vert":"6e85d489403d80b81cc94790730bb53b309dfc5eeede8f1ea3412a660f31d357","src/gles/web.rs":"000ed39eae4a6442ca16bdca68e1e61459e6815a5db8eeec53e0ee0f5f14ae66","src/gles/wgl.rs":"78bfe5bcdacfde8d8df0d6f3987048f8ad8a2c2c93df6d607cf5a16795d3bb8e","src/lib.rs":"b80b7d14f524f25875b1d7e7ee67cd6057bbe9944ea92b59d5d4d08d7b17580d","src/metal/adapter.rs":"e473a1857817030ee5c5358e8387daff66d1aca1f5133224cc844ea4b17d3e49","src/metal/command.rs":"61640f2cb7269f44487df2d911771fbe1f78484d44a5363a6ddfce7d77143b02","src/metal/conv.rs":"0bce6a8d0ccef16783475803d70d35e03ab7938c19374e22c9d253abe1f8b111","src/metal/device.rs":"1343ad60a423b81649a6eaad69de743a2247234a75ec48c0698973af67592e48","src/metal/mod.rs":"3e3f69ac864cb6a96c5b9df551a934bbc4bf08503430b9507561eef973daf57f","src/metal/surface.rs":"397dab6726eead96f0be9ecb6686874e691079da94c072921a6422e9b1284fb5","src/metal/time.rs":"c32d69f30e846dfcc0e39e01097fb80df63b2bebb6586143bb62494999850246","src/vulkan/adapter.rs":"ac4525114d37d6ffa31f2c240b2bb9e1d97d19cd6d400598c2a1153d9144bd0f","src/vulkan/command.rs":"82060e8040eea27b717ec676525139a9c7238074ad5ce6902f5174ac5c7aa048","src/vulkan/conv.rs":"7e6266e3a0b7d0b8d5d51362a0386a84bc047350eeac663b6352a94d5e5c0a87","src/vulkan/device.rs":"24abe83a2cbe8fa3377d0fb8d64fcaac3a3e6b0521ec28143fa8b7dee0c1afee","src/vulkan/instance.rs":"60742c3c1feee63eca0953165dae57c86b0d58e496ad374743ff83ae50a4e8f1","src/vulkan/mod.rs":"dfd8e079b7b783de3dfdfbf2349d7283c1b9108242d1ffbb4e072376901c7bd1"},"package":null}
\ No newline at end of file +{"files":{"Cargo.toml":"8c0a58e1b89cd82b644379eaaa71825499293fe0667192812e1cf41ecec6fd42","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","README.md":"b432248c4d5c85925ea2d741dc53069111619631a009994dbeaef980993d23a2","build.rs":"c80bdc0152a00471eec6ed0dd0f7d55d0b975498a00ba05e94100c84ad639a49","examples/halmark/main.rs":"c5debf7ff46b76fcdd4663c4cbd365d0015ac96c200488f6de34f914ba47d6e3","examples/halmark/shader.wgsl":"26c256ec36d6f0e9a1647431ca772766bee4382d64eaa718ba7b488dcfb6bcca","examples/raw-gles.em.html":"70fbe68394a1a4522192de1dcfaf7d399f60d7bdf5de70b708f9bb0417427546","examples/raw-gles.rs":"095113a1ba0851652a77aabfc8fa6ea7edcc2d09e91fd1e5009ead87d5998ea9","examples/ray-traced-triangle/main.rs":"5679fb4794966d9132b8d15471526f6adce58378bd319ae3f345494d2a216ddc","examples/ray-traced-triangle/shader.wgsl":"cc10caf92746724a71f6dd0dbc3a71e57b37c7d1d83278556805a535c0728a9d","src/auxil/dxgi/conv.rs":"134a1459339537a462448d5e1eb117931f450c6724bd9cb8388f39870c466eb3","src/auxil/dxgi/exception.rs":"f0cfb5a0adcdc3b6db909601fee51ad51368f5da269bcd46e4dbea45a3bec4b1","src/auxil/dxgi/factory.rs":"5f861fbfe2f4cce08722a95283549b8f62b96f24a306d080d9f1730ae53501d8","src/auxil/dxgi/mod.rs":"a202564d9ac97530b16a234b87d180cd345aae705e082a9b1177dcde813645f9","src/auxil/dxgi/result.rs":"79fe5aa17a2b21a7f06b1b604200c3c3e73fca31e8193aab80b5b15e7e9818a0","src/auxil/dxgi/time.rs":"b6f966b250e9424d5d7e4065f2108cba87197c1e30baae6d87083055d1bc5a4b","src/auxil/mod.rs":"720ef2aae258733322a3274fd858f91effb8951dabaf7bbfd8a9a0be2d2dba97","src/auxil/renderdoc.rs":"c2f849f70f576b0c9b0d32dd155b6a6353f74dff59cbeeaa994a12789d047c0f","src/dx12/adapter.rs":"757fe846d1114bcd2343b8de405869377e1b3930e7e70a4908f07dc5284e0fca","src/dx12/command.rs":"e0675560784214a18e078062cbd0965c21a35c99eecf0e697d1badb9c692db35","src/dx12/conv.rs":"8efa0ecfaaef2d0aaca4a17a4ed293d4921b82383cbce90bec1b2fef41cb3462","src/dx12/descriptor.rs":"e06eb08bee4c805fa76b6ab791893b5b563ee60de9c8f8d8e0e21ab97ade5664","src/dx12/device.rs":"30e1d9ad16a077fdcb8091eed7fe90ba8c51edb905b2908dcdb3c85113cb88a3","src/dx12/instance.rs":"030a86ac810d5ebf151328bbc06b9b5a89788721aa963cb4e0f3ff943b2c8633","src/dx12/mod.rs":"edaf24893edbd5390460a618e8db9e5f767d319efaa834b11f0c2cd1373b41f4","src/dx12/shader_compilation.rs":"5087adb8576e2d7751619dfdf8b37c573bb4e494290c594077ca3208cce1e746","src/dx12/suballocation.rs":"6939fc36223a15cc070c744d0418f9ac6fa2829d794af17cdea7c61eb5f8d2c0","src/dx12/types.rs":"b5140522eea491b6943ee9da235cdbfbd611f830d2719fd8812dfdd494ed16e2","src/dx12/view.rs":"792772e9c87840dcd045b7381a03162eb4a501492a95ca586e77e81aed621c67","src/empty.rs":"0849e0b7210d33145b3fb1368ed08b13fbc2144b1ccc0a1af913896e916bbe46","src/gles/adapter.rs":"90f0402e2b62b9311d247f3eba2ff956ef38e3799ca08355721730fb9a25e6d3","src/gles/command.rs":"788505168ae1af312781c86d31d5bc289fcc4e2b3cb047fb41deb62542db1a95","src/gles/conv.rs":"2e99a39fbef7a5bb8f2a24779bf5387601ba336de4cac9ff2bdac5b93f48a1d7","src/gles/device.rs":"64a8f202f123a4f2238e295023803724cd3095f2932462cc087a2d2212f0df3d","src/gles/egl.rs":"fed5b9c74ada548f90afcf138985514eadf7cc87859b013a3fa33cc7f72fb201","src/gles/emscripten.rs":"19bb73a9d140645f3f32cd48b002151711a9b8456e213eab5f3a2be79239e147","src/gles/mod.rs":"a54401494e4279bd56867fc621bfe3a8252fe82409cdc1594e7b22ee09f68fc0","src/gles/queue.rs":"1d4ea44a8e2ce47f9266260f1f384f02f870473c0411b77c5bab6794ee8484f5","src/gles/shaders/clear.frag":"9133ed8ed97d3641fbb6b5f5ea894a3554c629ccc1b80a5fc9221d7293aa1954","src/gles/shaders/clear.vert":"a543768725f4121ff2e9e1fb5b00644931e9d6f2f946c0ef01968afb5a135abd","src/gles/shaders/srgb_present.frag":"dd9a43c339a2fa4ccf7f6a1854c6f400cabf271a7d5e9230768e9f39d47f3ff5","src/gles/shaders/srgb_present.vert":"6e85d489403d80b81cc94790730bb53b309dfc5eeede8f1ea3412a660f31d357","src/gles/web.rs":"000ed39eae4a6442ca16bdca68e1e61459e6815a5db8eeec53e0ee0f5f14ae66","src/gles/wgl.rs":"59cd28ba85c6c3cbcbd82fd0e4225303a5f98e6a41b839400f9cce3a1b267af8","src/lib.rs":"88c8e93b1bea9eb31d2632f519c82e1c15b479e29b3700914a033efb59295634","src/metal/adapter.rs":"87636eaedbb18108c592aebf553ba3d84d052be567aa9306457b8cd9a2c6b0e7","src/metal/command.rs":"61640f2cb7269f44487df2d911771fbe1f78484d44a5363a6ddfce7d77143b02","src/metal/conv.rs":"a34a44e6affb594f3285f8d585b0fb33132aa7dc6a612f7c1aecaa400fe43265","src/metal/device.rs":"81ad3f5e4e5967cdb31a5e0f5e9404324b121408409b1422263bd9b0b793dd26","src/metal/mod.rs":"295223d5a06d9752d6911ec25ceb79c17dd93ced84c2c8a052957e6568e2e749","src/metal/surface.rs":"397dab6726eead96f0be9ecb6686874e691079da94c072921a6422e9b1284fb5","src/metal/time.rs":"c32d69f30e846dfcc0e39e01097fb80df63b2bebb6586143bb62494999850246","src/vulkan/adapter.rs":"3c82f3294c330c9b3b3b554ea7ef8601459c4f4d28a34700690230e3572c1d44","src/vulkan/command.rs":"a79df7f455c4061ab404c319e6602e6c1a3567443b50c20ddba6150fbdcc8826","src/vulkan/conv.rs":"47b8d7822703a5bd2b8c80118ee6ac1cd0b985758e2f929317fe4c42ce9a9bd9","src/vulkan/device.rs":"fefa6ebebb2d578323f01420fecd85f36e584efdc8a88a0c89ffc66c9d5524f9","src/vulkan/instance.rs":"60742c3c1feee63eca0953165dae57c86b0d58e496ad374743ff83ae50a4e8f1","src/vulkan/mod.rs":"d1aefbd87a166bd2103b418f096ba777d9bc7d19eb2d1363dd2f351639fc732a"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/Cargo.toml b/third_party/rust/wgpu-hal/Cargo.toml index f601949231..dc9fc7e827 100644 --- a/third_party/rust/wgpu-hal/Cargo.toml +++ b/third_party/rust/wgpu-hal/Cargo.toml @@ -13,7 +13,7 @@ edition = "2021" rust-version = "1.74" name = "wgpu-hal" -version = "0.19.0" +version = "0.20.0" authors = ["gfx-rs developers"] description = "WebGPU hardware abstraction layer" homepage = "https://wgpu.rs/" @@ -63,26 +63,25 @@ version = "0.13.1" optional = true [dependencies.naga] -version = "0.19.0" +version = "0.20.0" path = "../naga" -features = ["clone"] [dependencies.profiling] version = "1" default-features = false [dependencies.wgt] -version = "0.19.0" +version = "0.20.0" path = "../wgpu-types" package = "wgpu-types" [dev-dependencies] cfg-if = "1" env_logger = "0.11" -glam = "0.25.0" +glam = "0.27.0" [dev-dependencies.naga] -version = "0.19.0" +version = "0.20.0" path = "../naga" features = ["wgsl-in"] @@ -156,13 +155,15 @@ features = [ [target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies] core-graphics-types = "0.1" -metal = "0.27.0" objc = "0.2.5" [target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies.block] version = "0.1" optional = true +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies.metal] +version = "0.28.0" + [target."cfg(not(target_arch = \"wasm32\"))".dependencies.ash] version = "0.37.3" optional = true @@ -172,7 +173,7 @@ version = "0.6" optional = true [target."cfg(not(target_arch = \"wasm32\"))".dependencies.gpu-descriptor] -version = "0.2" +version = "0.3" optional = true [target."cfg(not(target_arch = \"wasm32\"))".dependencies.khronos-egl] @@ -223,7 +224,7 @@ version = "0.5" optional = true [target."cfg(windows)".dependencies.d3d12] -version = "0.19.0" +version = "0.20.0" path = "../d3d12/" features = ["libloading"] optional = true diff --git a/third_party/rust/wgpu-hal/README.md b/third_party/rust/wgpu-hal/README.md index 588baa3cf5..bb5556b3d2 100644 --- a/third_party/rust/wgpu-hal/README.md +++ b/third_party/rust/wgpu-hal/README.md @@ -1,23 +1,120 @@ -*wgpu-hal* is an explicit low-level GPU abstraction powering *wgpu-core*. -It's a spiritual successor to [gfx-hal](https://github.com/gfx-rs/gfx), -but with reduced scope, and oriented towards WebGPU implementation goals. +# `wgpu_hal`: a cross-platform unsafe graphics abstraction -It has no overhead for validation or tracking, and the API translation overhead is kept to the bare minimum by the design of WebGPU. -This API can be used for resource-demanding applications and engines. +This crate defines a set of traits abstracting over modern graphics APIs, +with implementations ("backends") for Vulkan, Metal, Direct3D, and GL. -# Usage notes +`wgpu_hal` is a spiritual successor to +[gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and +oriented towards WebGPU implementation goals. It has no overhead for +validation or tracking, and the API translation overhead is kept to the bare +minimum by the design of WebGPU. This API can be used for resource-demanding +applications and engines. -All of the API is `unsafe`. Documenting the exact safety requirements for the -state and function arguments is desired, but will likely be incomplete while the library is in early development. +The `wgpu_hal` crate's main design choices: -The returned errors are only for cases that the user can't anticipate, -such as running out-of-memory, or losing the device. -For the counter-example, there is no error for mapping a buffer that's not mappable. -As the buffer creator, the user should already know if they can map it. +- Our traits are meant to be *portable*: proper use + should get equivalent results regardless of the backend. -The API accepts iterators in order to avoid forcing the user to store data in particular containers. The implementation doesn't guarantee that any of the iterators are drained, unless stated otherwise by the function documentation. -For this reason, we recommend that iterators don't do any mutating work. +- Our traits' contracts are *unsafe*: implementations perform minimal + validation, if any, and incorrect use will often cause undefined behavior. + This allows us to minimize the overhead we impose over the underlying + graphics system. If you need safety, the [`wgpu-core`] crate provides a + safe API for driving `wgpu_hal`, implementing all necessary validation, + resource state tracking, and so on. (Note that `wgpu-core` is designed for + use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for + `wgpu-core`.) Or, you can do your own validation. -# Debugging +- In the same vein, returned errors *only cover cases the user can't + anticipate*, like running out of memory or losing the device. Any errors + that the user could reasonably anticipate are their responsibility to + avoid. For example, `wgpu_hal` returns no error for mapping a buffer that's + not mappable: as the buffer creator, the user should already know if they + can map it. + +- We use *static dispatch*. The traits are not + generally object-safe. You must select a specific backend type + like [`vulkan::Api`] or [`metal::Api`], and then use that + according to the main traits, or call backend-specific methods. + +- We use *idiomatic Rust parameter passing*, + taking objects by reference, returning them by value, and so on, + unlike `wgpu-core`, which refers to objects by ID. + +- We map buffer contents *persistently*. This means that the buffer + can remain mapped on the CPU while the GPU reads or writes to it. + You must explicitly indicate when data might need to be + transferred between CPU and GPU, if `wgpu_hal` indicates that the + mapping is not coherent (that is, automatically synchronized + between the two devices). + +- You must record *explicit barriers* between different usages of a + resource. For example, if a buffer is written to by a compute + shader, and then used as and index buffer to a draw call, you + must use [`CommandEncoder::transition_buffers`] between those two + operations. + +- Pipeline layouts are *explicitly specified* when setting bind + group. Incompatible layouts disturb groups bound at higher indices. + +- The API *accepts collections as iterators*, to avoid forcing the user to + store data in particular containers. The implementation doesn't guarantee + that any of the iterators are drained, unless stated otherwise by the + function documentation. For this reason, we recommend that iterators don't + do any mutating work. + +Unfortunately, `wgpu_hal`'s safety requirements are not fully documented. +Ideally, all trait methods would have doc comments setting out the +requirements users must meet to ensure correct and portable behavior. If you +are aware of a specific requirement that a backend imposes that is not +ensured by the traits' documented rules, please file an issue. Or, if you are +a capable technical writer, please file a pull request! + +[`wgpu-core`]: https://crates.io/crates/wgpu-core +[`wgpu`]: https://crates.io/crates/wgpu +[`vulkan::Api`]: vulkan/struct.Api.html +[`metal::Api`]: metal/struct.Api.html + +## Primary backends + +The `wgpu_hal` crate has full-featured backends implemented on the following +platform graphics APIs: + +- Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's + Vulkan bindings. It's also available on macOS, if you install [MoltenVK]. + +- Metal on macOS, using the [`metal`] crate's bindings. + +- Direct3D 12 on Windows, using the [`d3d12`] crate's bindings. + +[`ash`]: https://crates.io/crates/ash +[MoltenVK]: https://github.com/KhronosGroup/MoltenVK +[`metal`]: https://crates.io/crates/metal +[`d3d12`]: ahttps://crates.io/crates/d3d12 + +## Secondary backends + +The `wgpu_hal` crate has a partial implementation based on the following +platform graphics API: + +- The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are + available. See the [`gles`] module documentation for details. + +[`gles`]: gles/index.html + +You can see what capabilities an adapter is missing by checking the +[`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available +from [`Instance::enumerate_adapters`]. + +The API is generally designed to fit the primary backends better than the +secondary backends, so the latter may impose more overhead. + +[tdc]: wgt::DownlevelCapabilities + +## Debugging + +Most of the information on the wiki [Debugging wgpu Applications][wiki-debug] +page still applies to this API, with the exception of API tracing/replay +functionality, which is only available in `wgpu-core`. + +[wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications -Most of the information in https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications still applies to this API, with an exception of API tracing/replay functionality, which is only available in *wgpu-core*. diff --git a/third_party/rust/wgpu-hal/examples/halmark/main.rs b/third_party/rust/wgpu-hal/examples/halmark/main.rs index c238f299e7..aef6919c8f 100644 --- a/third_party/rust/wgpu-hal/examples/halmark/main.rs +++ b/third_party/rust/wgpu-hal/examples/halmark/main.rs @@ -245,17 +245,22 @@ impl<A: hal::Api> Example<A> { .unwrap() }; + let constants = naga::back::PipelineConstants::default(); let pipeline_desc = hal::RenderPipelineDescriptor { label: None, layout: &pipeline_layout, vertex_stage: hal::ProgrammableStage { module: &shader, entry_point: "vs_main", + constants: &constants, + zero_initialize_workgroup_memory: true, }, vertex_buffers: &[], fragment_stage: Some(hal::ProgrammableStage { module: &shader, entry_point: "fs_main", + constants: &constants, + zero_initialize_workgroup_memory: true, }), primitive: wgt::PrimitiveState { topology: wgt::PrimitiveTopology::TriangleStrip, @@ -840,6 +845,7 @@ fn main() { } } ex.render(); + window.request_redraw(); } _ => { example.as_mut().unwrap().update(event); diff --git a/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs index c05feae820..3985cd60af 100644 --- a/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -371,6 +371,8 @@ impl<A: hal::Api> Example<A> { stage: hal::ProgrammableStage { module: &shader_module, entry_point: "main", + constants: &Default::default(), + zero_initialize_workgroup_memory: true, }, }) } diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs index 6af4b77bb3..e5162362f7 100644 --- a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs @@ -261,6 +261,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Unorm10_10_10_2 => DXGI_FORMAT_R10G10B10A2_UNORM, Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), } } diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs index b417a88a6f..faf25cc852 100644 --- a/third_party/rust/wgpu-hal/src/dx12/adapter.rs +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -115,18 +115,6 @@ impl super::Adapter { ) }); - let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = - d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { - HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, - }; - assert_eq!(0, unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_SHADER_MODEL, - &mut shader_model_support as *mut _ as *mut _, - mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, - ) - }); - let mut workarounds = super::Workarounds::default(); let info = wgt::AdapterInfo { @@ -181,6 +169,53 @@ impl super::Adapter { hr == 0 && features3.CastingFullyTypedFormatSupported != 0 }; + let shader_model = if dxc_container.is_none() { + naga::back::hlsl::ShaderModel::V5_1 + } else { + let mut versions = [ + crate::dx12::types::D3D_SHADER_MODEL_6_7, + crate::dx12::types::D3D_SHADER_MODEL_6_6, + crate::dx12::types::D3D_SHADER_MODEL_6_5, + crate::dx12::types::D3D_SHADER_MODEL_6_4, + crate::dx12::types::D3D_SHADER_MODEL_6_3, + crate::dx12::types::D3D_SHADER_MODEL_6_2, + crate::dx12::types::D3D_SHADER_MODEL_6_1, + crate::dx12::types::D3D_SHADER_MODEL_6_0, + crate::dx12::types::D3D_SHADER_MODEL_5_1, + ] + .iter(); + match loop { + if let Some(&sm) = versions.next() { + let mut sm = crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: sm, + }; + if 0 == unsafe { + device.CheckFeatureSupport( + 7, // D3D12_FEATURE_SHADER_MODEL + &mut sm as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>() + as _, + ) + } { + break sm.HighestShaderModel; + } + } else { + break crate::dx12::types::D3D_SHADER_MODEL_5_1; + } + } { + crate::dx12::types::D3D_SHADER_MODEL_5_1 => naga::back::hlsl::ShaderModel::V5_1, + crate::dx12::types::D3D_SHADER_MODEL_6_0 => naga::back::hlsl::ShaderModel::V6_0, + crate::dx12::types::D3D_SHADER_MODEL_6_1 => naga::back::hlsl::ShaderModel::V6_1, + crate::dx12::types::D3D_SHADER_MODEL_6_2 => naga::back::hlsl::ShaderModel::V6_2, + crate::dx12::types::D3D_SHADER_MODEL_6_3 => naga::back::hlsl::ShaderModel::V6_3, + crate::dx12::types::D3D_SHADER_MODEL_6_4 => naga::back::hlsl::ShaderModel::V6_4, + crate::dx12::types::D3D_SHADER_MODEL_6_5 => naga::back::hlsl::ShaderModel::V6_5, + crate::dx12::types::D3D_SHADER_MODEL_6_6 => naga::back::hlsl::ShaderModel::V6_6, + crate::dx12::types::D3D_SHADER_MODEL_6_7 => naga::back::hlsl::ShaderModel::V6_7, + _ => unreachable!(), + } + }; + let private_caps = super::PrivateCapabilities { instance_flags, heterogeneous_resource_heaps: options.ResourceHeapTier @@ -196,6 +231,7 @@ impl super::Adapter { casting_fully_typed_format_supported, // See https://github.com/gfx-rs/wgpu/issues/3552 suballocation_supported: !info.name.contains("Iris(R) Xe"), + shader_model, }; // Theoretically vram limited, but in practice 2^20 is the limit @@ -273,7 +309,7 @@ impl super::Adapter { wgt::Features::TEXTURE_BINDING_ARRAY | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, - shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + shader_model >= naga::back::hlsl::ShaderModel::V5_1, ); let bgra8unorm_storage_supported = { @@ -295,21 +331,28 @@ impl super::Adapter { bgra8unorm_storage_supported, ); - // we must be using DXC because uint64_t was added with Shader Model 6 - // and FXC only supports up to 5.1 - let int64_shader_ops_supported = dxc_container.is_some() && { - let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = - unsafe { mem::zeroed() }; - let hr = unsafe { - device.CheckFeatureSupport( - d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, - &mut features1 as *mut _ as *mut _, - mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _, - ) - }; - hr == 0 && features1.Int64ShaderOps != 0 + let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1, + &mut features1 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _, + ) }; - features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported); + + features.set( + wgt::Features::SHADER_INT64, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.Int64ShaderOps != 0, + ); + + features.set( + wgt::Features::SUBGROUP, + shader_model >= naga::back::hlsl::ShaderModel::V6_0 + && hr == 0 + && features1.WaveOps != 0, + ); // float32-filterable should always be available on d3d12 features.set(wgt::Features::FLOAT32_FILTERABLE, true); @@ -377,6 +420,8 @@ impl super::Adapter { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable + max_subgroup_size: 128, // The push constants are part of the root signature which // has a limit of 64 DWORDS (256 bytes), but other resources // also share the root signature: diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs index 2b6c1d959e..b09ea76080 100644 --- a/third_party/rust/wgpu-hal/src/dx12/conv.rs +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -224,7 +224,7 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { } /// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending -/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc). +/// (see <https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc>). /// Therefore this function takes an additional `is_alpha` argument /// which if set will return an equivalent `_ALPHA` factor. fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs index 23bd409dc4..82075294ee 100644 --- a/third_party/rust/wgpu-hal/src/dx12/device.rs +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -218,21 +218,39 @@ impl super::Device { use naga::back::hlsl; let stage_bit = crate::auxil::map_naga_stage(naga_stage); - let module = &stage.module.naga.module; + + let (module, info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?; + + let needs_temp_options = stage.zero_initialize_workgroup_memory + != layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + temp_options = layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &layout.naga_options + }; + //TODO: reuse the writer let mut source = String::new(); - let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let mut writer = hlsl::Writer::new(&mut source, naga_options); let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer - .write(module, &stage.module.naga.info) + .write(&module, &info) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? }; let full_stage = format!( "{}_{}\0", naga_stage.to_hlsl_str(), - layout.naga_options.shader_model.to_str() + naga_options.shader_model.to_str() ); let ep_index = module @@ -1062,12 +1080,7 @@ impl crate::Device for super::Device { }, bind_group_infos, naga_options: hlsl::Options { - shader_model: match self.dxc_container { - // DXC - Some(_) => hlsl::ShaderModel::V6_0, - // FXC doesn't support SM 6.0 - None => hlsl::ShaderModel::V5_1, - }, + shader_model: self.private_caps.shader_model, binding_map, fake_missing_bindings: false, special_constants_binding, diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs index 4f958943ca..9f021bc241 100644 --- a/third_party/rust/wgpu-hal/src/dx12/mod.rs +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -195,6 +195,7 @@ struct PrivateCapabilities { heap_create_not_zeroed: bool, casting_fully_typed_format_supported: bool, suballocation_supported: bool, + shader_model: naga::back::hlsl::ShaderModel, } #[derive(Default)] @@ -439,7 +440,7 @@ impl Texture { } } - /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + /// see <https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice> fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count } diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs index b4ad38324a..17b608b840 100644 --- a/third_party/rust/wgpu-hal/src/dx12/types.rs +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -41,3 +41,25 @@ winapi::STRUCT! { BarycentricsSupported: winapi::shared::minwindef::BOOL, } } + +winapi::ENUM! { + enum D3D_SHADER_MODEL { + D3D_SHADER_MODEL_NONE = 0, + D3D_SHADER_MODEL_5_1 = 0x51, + D3D_SHADER_MODEL_6_0 = 0x60, + D3D_SHADER_MODEL_6_1 = 0x61, + D3D_SHADER_MODEL_6_2 = 0x62, + D3D_SHADER_MODEL_6_3 = 0x63, + D3D_SHADER_MODEL_6_4 = 0x64, + D3D_SHADER_MODEL_6_5 = 0x65, + D3D_SHADER_MODEL_6_6 = 0x66, + D3D_SHADER_MODEL_6_7 = 0x67, + D3D_HIGHEST_SHADER_MODEL = 0x67, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: D3D_SHADER_MODEL, + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs index b9d044337c..052c77006b 100644 --- a/third_party/rust/wgpu-hal/src/gles/adapter.rs +++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs @@ -104,7 +104,7 @@ impl super::Adapter { } } - fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + fn make_info(vendor_orig: String, renderer_orig: String, version: String) -> wgt::AdapterInfo { let vendor = vendor_orig.to_lowercase(); let renderer = renderer_orig.to_lowercase(); @@ -179,13 +179,33 @@ impl super::Adapter { 0 }; + let driver; + let driver_info; + if version.starts_with("WebGL ") || version.starts_with("OpenGL ") { + let es_sig = " ES"; + match version.find(es_sig) { + Some(pos) => { + driver = version[..pos + es_sig.len()].to_owned(); + driver_info = version[pos + es_sig.len() + 1..].to_owned(); + } + None => { + let pos = version.find(' ').unwrap(); + driver = version[..pos].to_owned(); + driver_info = version[pos + 1..].to_owned(); + } + } + } else { + driver = "OpenGL".to_owned(); + driver_info = version; + } + wgt::AdapterInfo { name: renderer_orig, vendor: vendor_id, device: 0, device_type: inferred_device_type, - driver: String::new(), - driver_info: String::new(), + driver, + driver_info, backend: wgt::Backend::Gl, } } @@ -507,8 +527,7 @@ impl super::Adapter { let has_etc = if cfg!(any(webgl, Emscripten)) { extensions.contains("WEBGL_compressed_texture_etc") } else { - // This is a required part of GLES3, but not part of Desktop GL at all. - es_ver.is_some() + es_ver.is_some() || extensions.contains("GL_ARB_ES3_compatibility") }; features.set(wgt::Features::TEXTURE_COMPRESSION_ETC2, has_etc); @@ -728,6 +747,8 @@ impl super::Adapter { } else { !0 }, + min_subgroup_size: 0, + max_subgroup_size: 0, max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, @@ -825,7 +846,7 @@ impl super::Adapter { max_msaa_samples: max_samples, }), }, - info: Self::make_info(vendor, renderer), + info: Self::make_info(vendor, renderer, version), features, capabilities: crate::Capabilities { limits, diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs index bde69b8629..a6c924f162 100644 --- a/third_party/rust/wgpu-hal/src/gles/conv.rs +++ b/third_party/rust/wgpu-hal/src/gles/conv.rs @@ -212,6 +212,7 @@ pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super: Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer), Vf::Sint32x4 => (4, glow::INT, Vak::Integer), Vf::Float32x4 => (4, glow::FLOAT, Vak::Float), + Vf::Unorm10_10_10_2 => (4, glow::UNSIGNED_INT_10_10_10_2, Vak::Float), Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), }; diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs index 50c07f3ff0..a1e2736aa6 100644 --- a/third_party/rust/wgpu-hal/src/gles/device.rs +++ b/third_party/rust/wgpu-hal/src/gles/device.rs @@ -220,9 +220,17 @@ impl super::Device { multiview: context.multiview, }; - let shader = &stage.module.naga; - let entry_point_index = shader - .module + let (module, info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + let entry_point_index = module .entry_points .iter() .position(|ep| ep.name.as_str() == stage.entry_point) @@ -247,11 +255,23 @@ impl super::Device { }; let mut output = String::new(); + let needs_temp_options = stage.zero_initialize_workgroup_memory + != context.layout.naga_options.zero_initialize_workgroup_memory; + let mut temp_options; + let naga_options = if needs_temp_options { + // We use a conditional here, as cloning the naga_options could be expensive + // That is, we want to avoid doing that unless we cannot avoid it + temp_options = context.layout.naga_options.clone(); + temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; + &temp_options + } else { + &context.layout.naga_options + }; let mut writer = glsl::Writer::new( &mut output, - &shader.module, - &shader.info, - &context.layout.naga_options, + &module, + &info, + naga_options, &pipeline_options, policies, ) @@ -269,8 +289,8 @@ impl super::Device { context.consume_reflection( gl, - &shader.module, - shader.info.get_entry_point(entry_point_index), + &module, + info.get_entry_point(entry_point_index), reflection_info, naga_stage, program, @@ -297,6 +317,7 @@ impl super::Device { naga_stage: naga_stage.to_owned(), shader_id: stage.module.id, entry_point: stage.entry_point.to_owned(), + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }); } let mut guard = self diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs index b166f4f102..00ef70ba88 100644 --- a/third_party/rust/wgpu-hal/src/gles/egl.rs +++ b/third_party/rust/wgpu-hal/src/gles/egl.rs @@ -526,28 +526,51 @@ impl Inner { } let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; - egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let supports_opengl = if version >= (1, 4) { + let client_apis = egl + .query_string(Some(display), khronos_egl::CLIENT_APIS) + .unwrap() + .to_string_lossy(); + client_apis + .split(' ') + .any(|client_api| client_api == "OpenGL") + } else { + false + }; + egl.bind_api(if supports_opengl { + khronos_egl::OPENGL_API + } else { + khronos_egl::OPENGL_ES_API + }) + .unwrap(); let needs_robustness = true; let mut khr_context_flags = 0; let supports_khr_context = display_extensions.contains("EGL_KHR_create_context"); - //TODO: make it so `Device` == EGL Context - let mut context_attributes = vec![ - khronos_egl::CONTEXT_MAJOR_VERSION, - 3, // Request GLES 3.0 or higher - ]; - - if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + let mut context_attributes = vec![]; + if supports_opengl { + context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION); + context_attributes.push(3); context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION); - context_attributes.push(match force_gles_minor_version { - wgt::Gles3MinorVersion::Version0 => 0, - wgt::Gles3MinorVersion::Version1 => 1, - wgt::Gles3MinorVersion::Version2 => 2, - _ => unreachable!(), - }); + context_attributes.push(3); + if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + log::warn!("Ignoring specified GLES minor version as OpenGL is used"); + } + } else { + context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION); + context_attributes.push(3); // Request GLES 3.0 or higher + if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION); + context_attributes.push(match force_gles_minor_version { + wgt::Gles3MinorVersion::Automatic => unreachable!(), + wgt::Gles3MinorVersion::Version0 => 0, + wgt::Gles3MinorVersion::Version1 => 1, + wgt::Gles3MinorVersion::Version2 => 2, + }); + } } - if flags.contains(wgt::InstanceFlags::DEBUG) { if version >= (1, 5) { log::debug!("\tEGL context: +debug"); @@ -577,8 +600,6 @@ impl Inner { // because it's for desktop GL only, not GLES. log::warn!("\tEGL context: -robust access"); } - - //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`? } if khr_context_flags != 0 { context_attributes.push(EGL_CONTEXT_FLAGS_KHR); @@ -977,6 +998,7 @@ impl crate::Instance for Instance { srgb_kind: inner.srgb_kind, }) } + unsafe fn destroy_surface(&self, _surface: Surface) {} unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { @@ -993,6 +1015,12 @@ impl crate::Instance for Instance { }) }; + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. + if !matches!(inner.srgb_kind, SrgbFrameBufferKind::None) { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + if self.flags.contains(wgt::InstanceFlags::DEBUG) && gl.supports_debug() { log::debug!("Max label length: {}", unsafe { gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) @@ -1106,6 +1134,13 @@ impl Surface { unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + + if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) { + // Disable sRGB conversions for `glBlitFramebuffer` as behavior does diverge between + // drivers and formats otherwise and we want to ensure no sRGB conversions happen. + unsafe { gl.disable(glow::FRAMEBUFFER_SRGB) }; + } + // Note the Y-flipping here. GL's presentation is not flipped, // but main rendering is. Therefore, we Y-flip the output positions // in the shader, and also this blit. @@ -1123,6 +1158,11 @@ impl Surface { glow::NEAREST, ) }; + + if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; self.egl diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs index 6f41f7c000..0fcb09be46 100644 --- a/third_party/rust/wgpu-hal/src/gles/mod.rs +++ b/third_party/rust/wgpu-hal/src/gles/mod.rs @@ -602,6 +602,7 @@ struct ProgramStage { naga_stage: naga::ShaderStage, shader_id: ShaderId, entry_point: String, + zero_initialize_workgroup_memory: bool, } #[derive(PartialEq, Eq, Hash)] diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs index 29dfb79d04..7c728d3978 100644 --- a/third_party/rust/wgpu-hal/src/gles/queue.rs +++ b/third_party/rust/wgpu-hal/src/gles/queue.rs @@ -213,12 +213,27 @@ impl super::Queue { instance_count, ref first_instance_location, } => { - match base_vertex { - 0 => { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; + let supports_full_instancing = self + .shared + .private_caps + .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); + if supports_full_instancing { + unsafe { + gl.draw_elements_instanced_base_vertex_base_instance( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + first_instance, + ) + } + } else { + unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) }; + + if base_vertex == 0 { unsafe { // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. @@ -231,41 +246,17 @@ impl super::Queue { instance_count as i32, ) } - } - _ => { - let supports_full_instancing = self - .shared - .private_caps - .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); - - if supports_full_instancing { - unsafe { - gl.draw_elements_instanced_base_vertex_base_instance( - topology, - index_count as i32, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - first_instance, - ) - } - } else { - unsafe { - gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) - }; - - // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. - unsafe { - gl.draw_elements_instanced_base_vertex( - topology, - index_count as _, - index_type, - index_offset as i32, - instance_count as i32, - base_vertex, - ) - } + } else { + // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. + unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) } } } diff --git a/third_party/rust/wgpu-hal/src/gles/wgl.rs b/third_party/rust/wgpu-hal/src/gles/wgl.rs index 2564892969..aae70478b4 100644 --- a/third_party/rust/wgpu-hal/src/gles/wgl.rs +++ b/third_party/rust/wgpu-hal/src/gles/wgl.rs @@ -507,6 +507,8 @@ impl crate::Instance for Instance { .supported_extensions() .contains("GL_ARB_framebuffer_sRGB"); + // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions, + // as otherwise the user has to do the sRGB conversion. if srgb_capable { unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; } diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs index 79bd54e66e..d300ca30cc 100644 --- a/third_party/rust/wgpu-hal/src/lib.rs +++ b/third_party/rust/wgpu-hal/src/lib.rs @@ -1,17 +1,208 @@ -/*! This library describes the internal unsafe graphics abstraction API. - * It follows WebGPU for the most part, re-using wgpu-types, - * with the following deviations: - * - Fully unsafe: zero overhead, zero validation. - * - Compile-time backend selection via traits. - * - Objects are passed by references and returned by value. No IDs. - * - Mapping is persistent, with explicit synchronization. - * - Resource transitions are explicit. - * - All layouts are explicit. Binding model has compatibility. +/*! A cross-platform unsafe graphics abstraction. * - * General design direction is to follow the majority by the following weights: - * - wgpu-core: 1.5 - * - primary backends (Vulkan/Metal/DX12): 1.0 each - * - secondary backend (GLES): 0.5 + * This crate defines a set of traits abstracting over modern graphics APIs, + * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL. + * + * `wgpu-hal` is a spiritual successor to + * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and + * oriented towards WebGPU implementation goals. It has no overhead for + * validation or tracking, and the API translation overhead is kept to the bare + * minimum by the design of WebGPU. This API can be used for resource-demanding + * applications and engines. + * + * The `wgpu-hal` crate's main design choices: + * + * - Our traits are meant to be *portable*: proper use + * should get equivalent results regardless of the backend. + * + * - Our traits' contracts are *unsafe*: implementations perform minimal + * validation, if any, and incorrect use will often cause undefined behavior. + * This allows us to minimize the overhead we impose over the underlying + * graphics system. If you need safety, the [`wgpu-core`] crate provides a + * safe API for driving `wgpu-hal`, implementing all necessary validation, + * resource state tracking, and so on. (Note that `wgpu-core` is designed for + * use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for + * `wgpu-core`.) Or, you can do your own validation. + * + * - In the same vein, returned errors *only cover cases the user can't + * anticipate*, like running out of memory or losing the device. Any errors + * that the user could reasonably anticipate are their responsibility to + * avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's + * not mappable: as the buffer creator, the user should already know if they + * can map it. + * + * - We use *static dispatch*. The traits are not + * generally object-safe. You must select a specific backend type + * like [`vulkan::Api`] or [`metal::Api`], and then use that + * according to the main traits, or call backend-specific methods. + * + * - We use *idiomatic Rust parameter passing*, + * taking objects by reference, returning them by value, and so on, + * unlike `wgpu-core`, which refers to objects by ID. + * + * - We map buffer contents *persistently*. This means that the buffer + * can remain mapped on the CPU while the GPU reads or writes to it. + * You must explicitly indicate when data might need to be + * transferred between CPU and GPU, if `wgpu-hal` indicates that the + * mapping is not coherent (that is, automatically synchronized + * between the two devices). + * + * - You must record *explicit barriers* between different usages of a + * resource. For example, if a buffer is written to by a compute + * shader, and then used as and index buffer to a draw call, you + * must use [`CommandEncoder::transition_buffers`] between those two + * operations. + * + * - Pipeline layouts are *explicitly specified* when setting bind + * group. Incompatible layouts disturb groups bound at higher indices. + * + * - The API *accepts collections as iterators*, to avoid forcing the user to + * store data in particular containers. The implementation doesn't guarantee + * that any of the iterators are drained, unless stated otherwise by the + * function documentation. For this reason, we recommend that iterators don't + * do any mutating work. + * + * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented. + * Ideally, all trait methods would have doc comments setting out the + * requirements users must meet to ensure correct and portable behavior. If you + * are aware of a specific requirement that a backend imposes that is not + * ensured by the traits' documented rules, please file an issue. Or, if you are + * a capable technical writer, please file a pull request! + * + * [`wgpu-core`]: https://crates.io/crates/wgpu-core + * [`wgpu`]: https://crates.io/crates/wgpu + * [`vulkan::Api`]: vulkan/struct.Api.html + * [`metal::Api`]: metal/struct.Api.html + * + * ## Primary backends + * + * The `wgpu-hal` crate has full-featured backends implemented on the following + * platform graphics APIs: + * + * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's + * Vulkan bindings. It's also available on macOS, if you install [MoltenVK]. + * + * - Metal on macOS, using the [`metal`] crate's bindings. + * + * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings. + * + * [`ash`]: https://crates.io/crates/ash + * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK + * [`metal`]: https://crates.io/crates/metal + * [`d3d12`]: ahttps://crates.io/crates/d3d12 + * + * ## Secondary backends + * + * The `wgpu-hal` crate has a partial implementation based on the following + * platform graphics API: + * + * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are + * available. See the [`gles`] module documentation for details. + * + * [`gles`]: gles/index.html + * + * You can see what capabilities an adapter is missing by checking the + * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available + * from [`Instance::enumerate_adapters`]. + * + * The API is generally designed to fit the primary backends better than the + * secondary backends, so the latter may impose more overhead. + * + * [tdc]: wgt::DownlevelCapabilities + * + * ## Traits + * + * The `wgpu-hal` crate defines a handful of traits that together + * represent a cross-platform abstraction for modern GPU APIs. + * + * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its + * own, only a collection of associated types. + * + * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`] + * creates an instance value, which you can use to enumerate the adapters + * available on the system. For example, [`vulkan::Api::Instance::init`][Ii] + * returns an instance that can enumerate the Vulkan physical devices on your + * system. + * + * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a + * particular device from a particular backend. For example, a Vulkan instance + * might have a Lavapipe software adapter and a GPU-based adapter. + * + * - [`Api::Device`] implements the [`Device`] trait, representing an active + * link to a device. You get a device value by calling [`Adapter::open`], and + * then use it to create buffers, textures, shader modules, and so on. + * + * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit + * command buffers to a given device. + * + * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you + * use to build buffers of commands to submit to a queue. This has all the + * methods for drawing and running compute shaders, which is presumably what + * you're here for. + * + * - [`Api::Surface`] implements the [`Surface`] trait, which represents a + * swapchain for presenting images on the screen, via interaction with the + * system's window manager. + * + * The [`Api`] trait has various other associated types like [`Api::Buffer`] and + * [`Api::Texture`] that represent resources the rest of the interface can + * operate on, but these generally do not have their own traits. + * + * [Ii]: Instance::init + * + * ## Validation is the calling code's responsibility, not `wgpu-hal`'s + * + * As much as possible, `wgpu-hal` traits place the burden of validation, + * resource tracking, and state tracking on the caller, not on the trait + * implementations themselves. Anything which can reasonably be handled in + * backend-independent code should be. A `wgpu_hal` backend's sole obligation is + * to provide portable behavior, and report conditions that the calling code + * can't reasonably anticipate, like device loss or running out of memory. + * + * The `wgpu` crate collection is intended for use in security-sensitive + * applications, like web browsers, where the API is available to untrusted + * code. This means that `wgpu-core`'s validation is not simply a service to + * developers, to be provided opportunistically when the performance costs are + * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s + * validation must be exhaustive, to ensure that even malicious content cannot + * provoke and exploit undefined behavior in the platform's graphics API. + * + * Because graphics APIs' requirements are complex, the only practical way for + * `wgpu` to provide exhaustive validation is to comprehensively track the + * lifetime and state of all the resources in the system. Implementing this + * separately for each backend is infeasible; effort would be better spent + * making the cross-platform validation in `wgpu-core` legible and trustworthy. + * Fortunately, the requirements are largely similar across the various + * platforms, so cross-platform validation is practical. + * + * Some backends have specific requirements that aren't practical to foist off + * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or + * Microsoft COM reference counts is best handled by using appropriate pointer + * types within the backend. + * + * A desire for "defense in depth" may suggest performing additional validation + * in `wgpu-hal` when the opportunity arises, but this must be done with + * caution. Even experienced contributors infer the expectations their changes + * must meet by considering not just requirements made explicit in types, tests, + * assertions, and comments, but also those implicit in the surrounding code. + * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting + * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry + * about it - that would be redundant!" The responsibility for exhaustive + * validation always rests with `wgpu-core`, regardless of what may or may not + * be checked in `wgpu-hal`. + * + * To this end, any "defense in depth" validation that does appear in `wgpu-hal` + * for requirements that `wgpu-core` should have enforced should report failure + * via the `unreachable!` macro, because problems detected at this stage always + * indicate a bug in `wgpu-core`. + * + * ## Debugging + * + * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug] + * page still applies to this API, with the exception of API tracing/replay + * functionality, which is only available in `wgpu-core`. + * + * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications */ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] @@ -198,6 +389,15 @@ pub trait Api: Clone + fmt::Debug + Sized { type Queue: Queue<A = Self>; type CommandEncoder: CommandEncoder<A = Self>; + + /// This API's command buffer type. + /// + /// The only thing you can do with `CommandBuffer`s is build them + /// with a [`CommandEncoder`] and then pass them to + /// [`Queue::submit`] for execution, or destroy them by passing + /// them to [`CommandEncoder::reset_all`]. + /// + /// [`CommandEncoder`]: Api::CommandEncoder type CommandBuffer: WasmNotSendSync + fmt::Debug; type Buffer: fmt::Debug + WasmNotSendSync + 'static; @@ -206,6 +406,24 @@ pub trait Api: Clone + fmt::Debug + Sized { type TextureView: fmt::Debug + WasmNotSendSync; type Sampler: fmt::Debug + WasmNotSendSync; type QuerySet: fmt::Debug + WasmNotSendSync; + + /// A value you can block on to wait for something to finish. + /// + /// A `Fence` holds a monotonically increasing [`FenceValue`]. You can call + /// [`Device::wait`] to block until a fence reaches or passes a value you + /// choose. [`Queue::submit`] can take a `Fence` and a [`FenceValue`] to + /// store in it when the submitted work is complete. + /// + /// Attempting to set a fence to a value less than its current value has no + /// effect. + /// + /// Waiting on a fence returns as soon as the fence reaches *or passes* the + /// requested value. This implies that, in order to reliably determine when + /// an operation has completed, operations must finish in order of + /// increasing fence values: if a higher-valued operation were to finish + /// before a lower-valued operation, then waiting for the fence to reach the + /// lower value could return before the lower-valued operation has actually + /// finished. type Fence: fmt::Debug + WasmNotSendSync; type BindGroupLayout: fmt::Debug + WasmNotSendSync; @@ -405,7 +623,25 @@ pub trait Device: WasmNotSendSync { &self, fence: &<Self::A as Api>::Fence, ) -> Result<FenceValue, DeviceError>; - /// Calling wait with a lower value than the current fence value will immediately return. + + /// Wait for `fence` to reach `value`. + /// + /// Operations like [`Queue::submit`] can accept a [`Fence`] and a + /// [`FenceValue`] to store in it, so you can use this `wait` function + /// to wait for a given queue submission to finish execution. + /// + /// The `value` argument must be a value that some actual operation you have + /// already presented to the device is going to store in `fence`. You cannot + /// wait for values yet to be submitted. (This restriction accommodates + /// implementations like the `vulkan` backend's [`FencePool`] that must + /// allocate a distinct synchronization object for each fence value one is + /// able to wait for.) + /// + /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value + /// returns immediately. + /// + /// [`Fence`]: Api::Fence + /// [`FencePool`]: vulkan/enum.Fence.html#variant.FencePool unsafe fn wait( &self, fence: &<Self::A as Api>::Fence, @@ -437,14 +673,48 @@ pub trait Device: WasmNotSendSync { pub trait Queue: WasmNotSendSync { type A: Api; - /// Submits the command buffers for execution on GPU. + /// Submit `command_buffers` for execution on GPU. + /// + /// If `signal_fence` is `Some(fence, value)`, update `fence` to `value` + /// when the operation is complete. See [`Fence`] for details. + /// + /// If two calls to `submit` on a single `Queue` occur in a particular order + /// (that is, they happen on the same thread, or on two threads that have + /// synchronized to establish an ordering), then the first submission's + /// commands all complete execution before any of the second submission's + /// commands begin. All results produced by one submission are visible to + /// the next. + /// + /// Within a submission, command buffers execute in the order in which they + /// appear in `command_buffers`. All results produced by one buffer are + /// visible to the next. + /// + /// If two calls to `submit` on a single `Queue` from different threads are + /// not synchronized to occur in a particular order, they must pass distinct + /// [`Fence`]s. As explained in the [`Fence`] documentation, waiting for + /// operations to complete is only trustworthy when operations finish in + /// order of increasing fence value, but submissions from different threads + /// cannot determine how to order the fence values if the submissions + /// themselves are unordered. If each thread uses a separate [`Fence`], this + /// problem does not arise. /// /// Valid usage: - /// - all of the command buffers were created from command pools - /// that are associated with this queue. - /// - all of the command buffers had `CommandBuffer::finish()` called. - /// - all surface textures that the command buffers write to must be - /// passed to the surface_textures argument. + /// + /// - All of the [`CommandBuffer`][cb]s were created from + /// [`CommandEncoder`][ce]s that are associated with this queue. + /// + /// - All of those [`CommandBuffer`][cb]s must remain alive until + /// the submitted commands have finished execution. (Since + /// command buffers must not outlive their encoders, this + /// implies that the encoders must remain alive as well.) + /// + /// - All of the [`SurfaceTexture`][st]s that the command buffers + /// write to appear in the `surface_textures` argument. + /// + /// [`Fence`]: Api::Fence + /// [cb]: Api::CommandBuffer + /// [ce]: Api::CommandEncoder + /// [st]: Api::SurfaceTexture unsafe fn submit( &self, command_buffers: &[&<Self::A as Api>::CommandBuffer], @@ -459,7 +729,12 @@ pub trait Queue: WasmNotSendSync { unsafe fn get_timestamp_period(&self) -> f32; } -/// Encoder and allocation pool for `CommandBuffer`. +/// Encoder and allocation pool for `CommandBuffer`s. +/// +/// A `CommandEncoder` not only constructs `CommandBuffer`s but also +/// acts as the allocation pool that owns the buffers' underlying +/// storage. Thus, `CommandBuffer`s must not outlive the +/// `CommandEncoder` that created them. /// /// The life cycle of a `CommandBuffer` is as follows: /// @@ -472,14 +747,17 @@ pub trait Queue: WasmNotSendSync { /// /// - Call methods like `copy_buffer_to_buffer`, `begin_render_pass`, /// etc. on a "recording" `CommandEncoder` to add commands to the -/// list. +/// list. (If an error occurs, you must call `discard_encoding`; see +/// below.) /// /// - Call `end_encoding` on a recording `CommandEncoder` to close the /// encoder and construct a fresh `CommandBuffer` consisting of the /// list of commands recorded up to that point. /// /// - Call `discard_encoding` on a recording `CommandEncoder` to drop -/// the commands recorded thus far and close the encoder. +/// the commands recorded thus far and close the encoder. This is +/// the only safe thing to do on a `CommandEncoder` if an error has +/// occurred while recording commands. /// /// - Call `reset_all` on a closed `CommandEncoder`, passing all the /// live `CommandBuffers` built from it. All the `CommandBuffer`s @@ -497,6 +775,10 @@ pub trait Queue: WasmNotSendSync { /// built it. /// /// - A `CommandEncoder` must not outlive its `Device`. +/// +/// It is the user's responsibility to meet this requirements. This +/// allows `CommandEncoder` implementations to keep their state +/// tracking to a minimum. pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { type A: Api; @@ -509,13 +791,20 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { /// This `CommandEncoder` must be in the "closed" state. unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; - /// Discard the command list under construction, if any. + /// Discard the command list under construction. + /// + /// If an error has occurred while recording commands, this + /// is the only safe thing to do with the encoder. /// /// This puts this `CommandEncoder` in the "closed" state. /// /// # Safety /// /// This `CommandEncoder` must be in the "recording" state. + /// + /// Callers must not assume that implementations of this + /// function are idempotent, and thus should not call it + /// multiple times in a row. unsafe fn discard_encoding(&mut self); /// Return a fresh [`CommandBuffer`] holding the recorded commands. @@ -1318,6 +1607,13 @@ pub struct ProgrammableStage<'a, A: Api> { /// The name of the entry point in the compiled shader. There must be a function with this name /// in the shader. pub entry_point: &'a str, + /// Pipeline constants + pub constants: &'a naga::back::PipelineConstants, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, } // Rust gets confused about the impl requirements for `A` @@ -1326,6 +1622,8 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> { Self { module: self.module, entry_point: self.entry_point, + constants: self.constants, + zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, } } } diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs index 6211896838..cddba472bd 100644 --- a/third_party/rust/wgpu-hal/src/metal/adapter.rs +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -562,7 +562,11 @@ impl super::PrivateCapabilities { Self { family_check, - msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) { + msl_version: if os_is_xr || version.at_least((14, 0), (17, 0), os_is_mac) { + MTLLanguageVersion::V3_1 + } else if version.at_least((13, 0), (16, 0), os_is_mac) { + MTLLanguageVersion::V3_0 + } else if version.at_least((12, 0), (15, 0), os_is_mac) { MTLLanguageVersion::V2_4 } else if version.at_least((11, 0), (14, 0), os_is_mac) { MTLLanguageVersion::V2_3 @@ -809,6 +813,14 @@ impl super::PrivateCapabilities { None }, timestamp_query_support, + supports_simd_scoped_operations: family_check + && (device.supports_family(MTLGPUFamily::Metal3) + || device.supports_family(MTLGPUFamily::Mac2) + || device.supports_family(MTLGPUFamily::Apple7)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=5 + int64: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Metal3)), } } @@ -882,7 +894,7 @@ impl super::PrivateCapabilities { } features.set( F::SHADER_INT64, - self.msl_version >= MTLLanguageVersion::V2_3, + self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, ); features.set( @@ -894,6 +906,10 @@ impl super::PrivateCapabilities { features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + if self.supports_simd_scoped_operations { + features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); + } + features } @@ -948,6 +964,8 @@ impl super::PrivateCapabilities { max_vertex_buffers: self.max_vertex_buffers, max_vertex_attributes: 31, max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + min_subgroup_size: 4, + max_subgroup_size: 64, max_push_constant_size: 0x1000, min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs index 8f6439b50b..6ebabee1a6 100644 --- a/third_party/rust/wgpu-hal/src/metal/conv.rs +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -222,6 +222,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { Vf::Uint32x4 => UInt4, Vf::Sint32x4 => Int4, Vf::Float32x4 => Float4, + Vf::Unorm10_10_10_2 => UInt1010102Normalized, Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), } } diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs index 179429f5d7..2c8f5a2bfb 100644 --- a/third_party/rust/wgpu-hal/src/metal/device.rs +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -69,7 +69,13 @@ impl super::Device { ) -> Result<CompiledShader, crate::PipelineError> { let stage_bit = map_naga_stage(naga_stage); - let module = &stage.module.naga.module; + let (module, module_info) = naga::back::pipeline_constants::process_overrides( + &stage.module.naga.module, + &stage.module.naga.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + let ep_resources = &layout.per_stage_map[naga_stage]; let bounds_check_policy = if stage.module.runtime_checks { @@ -88,6 +94,8 @@ impl super::Device { metal::MTLLanguageVersion::V2_2 => (2, 2), metal::MTLLanguageVersion::V2_3 => (2, 3), metal::MTLLanguageVersion::V2_4 => (2, 4), + metal::MTLLanguageVersion::V3_0 => (3, 0), + metal::MTLLanguageVersion::V3_1 => (3, 1), }, inline_samplers: Default::default(), spirv_cross_compatibility: false, @@ -104,7 +112,7 @@ impl super::Device { // TODO: support bounds checks on binding arrays binding_array: naga::proc::BoundsCheckPolicy::Unchecked, }, - zero_initialize_workgroup_memory: true, + zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, }; let pipeline_options = naga::back::msl::PipelineOptions { @@ -114,13 +122,9 @@ impl super::Device { }, }; - let (source, info) = naga::back::msl::write_string( - module, - &stage.module.naga.info, - &options, - &pipeline_options, - ) - .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + let (source, info) = + naga::back::msl::write_string(&module, &module_info, &options, &pipeline_options) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; log::debug!( "Naga generated shader for entry point '{}' and stage {:?}\n{}", @@ -168,7 +172,7 @@ impl super::Device { })?; // collect sizes indices, immutable buffers, and work group memory sizes - let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let ep_info = &module_info.get_entry_point(ep_index); let mut wg_memory_sizes = Vec::new(); let mut sized_bindings = Vec::new(); let mut immutable_buffer_mask = 0; diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs index 6aeafb0f86..7d547cfe3c 100644 --- a/third_party/rust/wgpu-hal/src/metal/mod.rs +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -269,6 +269,8 @@ struct PrivateCapabilities { supports_shader_primitive_index: bool, has_unified_memory: Option<bool>, timestamp_query_support: TimestampQuerySupport, + supports_simd_scoped_operations: bool, + int64: bool, } #[derive(Clone, Debug)] @@ -649,7 +651,7 @@ struct BufferResource { /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can /// hold WGSL runtime-sized arrays. When one does, we must pass its size to /// shader entry points to implement bounds checks and WGSL's `arrayLength` - /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// function. See `device::CompiledShader::sized_bindings` for details. /// /// [`Storage`]: wgt::BufferBindingType::Storage binding_size: Option<wgt::BufferSize>, @@ -680,12 +682,12 @@ struct PipelineStageInfo { /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sizes_slot: Option<naga::back::msl::Slot>, /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. sized_bindings: Vec<naga::ResourceBinding>, } @@ -801,7 +803,7 @@ struct CommandState { /// /// Specifically: /// - /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group` /// and `@binding` attributes) for `var<storage>` global variables in the /// current module that contain runtime-sized arrays. /// @@ -813,7 +815,7 @@ struct CommandState { /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must /// pass to the entry point. /// - /// See [`device::CompiledShader::sized_bindings`] for more details. + /// See `device::CompiledShader::sized_bindings` for more details. /// /// [`ResourceBinding`]: naga::ResourceBinding storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs index 2665463792..21219361f4 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features { /// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`] /// constructs an value of this type indicating which Vulkan features to /// enable, based on the `wgpu_types::Features` requested. +/// +/// [`Instance::expose_adapter`]: super::Instance::expose_adapter #[derive(Debug, Default)] pub struct PhysicalDeviceFeatures { /// Basic Vulkan 1.0 features. @@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures { /// /// However, we do populate this when creating a device if /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter + /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>, /// Features provided by `VK_KHR_ray_query`, @@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures { /// this from `vkGetPhysicalDeviceFeatures2`. /// /// However, we do populate this when creating a device if ray tracing is requested. + /// + /// [`Instance::expose_adapter`]: super::Instance::expose_adapter ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>, /// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted /// to Vulkan 1.3. zero_initialize_workgroup_memory: Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, + + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures>, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.subgroup_size_control { + info = info.push_next(feature); + } info } @@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures { /// [`Features`]: wgt::Features /// [`DownlevelFlags`]: wgt::DownlevelFlags /// [`PrivateCapabilities`]: super::PrivateCapabilities + /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder /// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( @@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures { } else { None }, + subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name()) + { + Some( + vk::PhysicalDeviceSubgroupSizeControlFeatures::builder() + .subgroup_size_control(true) + .build(), + ) + } else { + None + }, } } @@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures { /// Given `self`, together with the instance and physical device it was /// built from, and a `caps` also built from those, determine which wgpu /// features and downlevel flags the device can support. + /// + /// [`Features`]: wgt::Features + /// [`DownlevelFlags`]: wgt::DownlevelFlags fn to_wgpu( &self, instance: &ash::Instance, @@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures { ); } + if let Some(ref subgroup) = caps.subgroup { + if (caps.device_api_version >= vk::API_VERSION_1_3 + || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name())) + && subgroup.supported_operations.contains( + vk::SubgroupFeatureFlags::BASIC + | vk::SubgroupFeatureFlags::VOTE + | vk::SubgroupFeatureFlags::ARITHMETIC + | vk::SubgroupFeatureFlags::BALLOT + | vk::SubgroupFeatureFlags::SHUFFLE + | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE, + ) + { + features.set( + F::SUBGROUP, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT), + ); + features.set( + F::SUBGROUP_VERTEX, + subgroup + .supported_stages + .contains(vk::ShaderStageFlags::VERTEX), + ); + features.insert(F::SUBGROUP_BARRIER); + } + } + let supports_depth_format = |format| { supports_format( instance, @@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties { /// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2. driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1. + subgroup: Option<vk::PhysicalDeviceSubgroupProperties>, + + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3. + subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties>, + /// The device API version. /// /// Which is the version of Vulkan supported for device-level functionality. @@ -888,6 +951,11 @@ impl PhysicalDeviceProperties { if self.supports_extension(vk::ExtImageRobustnessFn::name()) { extensions.push(vk::ExtImageRobustnessFn::name()); } + + // Require `VK_EXT_subgroup_size_control` if the associated feature was requested + if requested_features.contains(wgt::Features::SUBGROUP) { + extensions.push(vk::ExtSubgroupSizeControlFn::name()); + } } // Optional `VK_KHR_swapchain_mutable_format` @@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: limits.max_vertex_input_attributes, max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + min_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.min_subgroup_size) + .unwrap_or(0), + max_subgroup_size: self + .subgroup_size_control + .map(|subgroup_size| subgroup_size.max_subgroup_size) + .unwrap_or(0), max_push_constant_size: limits.max_push_constants_size, min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, @@ -1042,6 +1118,9 @@ impl super::InstanceShared { let supports_driver_properties = capabilities.device_api_version >= vk::API_VERSION_1_2 || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + let supports_subgroup_size_control = capabilities.device_api_version + >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()); let supports_acceleration_structure = capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); @@ -1075,6 +1154,20 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if capabilities.device_api_version >= vk::API_VERSION_1_1 { + let next = capabilities + .subgroup + .insert(vk::PhysicalDeviceSubgroupProperties::default()); + builder = builder.push_next(next); + } + + if supports_subgroup_size_control { + let next = capabilities + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -1190,6 +1283,16 @@ impl super::InstanceShared { builder = builder.push_next(next); } + // `VK_EXT_subgroup_size_control` is promoted to 1.3 + if capabilities.device_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name()) + { + let next = features + .subgroup_size_control + .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default()); + builder = builder.push_next(next); + } + let mut features2 = builder.build(); unsafe { get_device_properties.get_physical_device_features2(phd, &mut features2); @@ -1382,6 +1485,9 @@ impl super::Instance { }), image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2 || phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()), + subgroup_size_control: phd_features + .subgroup_size_control + .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1581,6 +1687,15 @@ impl super::Adapter { capabilities.push(spv::Capability::Geometry); } + if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) { + capabilities.push(spv::Capability::GroupNonUniform); + capabilities.push(spv::Capability::GroupNonUniformVote); + capabilities.push(spv::Capability::GroupNonUniformArithmetic); + capabilities.push(spv::Capability::GroupNonUniformBallot); + capabilities.push(spv::Capability::GroupNonUniformShuffle); + capabilities.push(spv::Capability::GroupNonUniformShuffleRelative); + } + if features.intersects( wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, @@ -1616,7 +1731,13 @@ impl super::Adapter { true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` ); spv::Options { - lang_version: (1, 0), + lang_version: if features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) + { + (1, 3) + } else { + (1, 0) + }, flags, capabilities: Some(capabilities.iter().cloned().collect()), bounds_check_policies: naga::proc::BoundsCheckPolicies { diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs index 43a2471954..ceb44dfbe6 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/command.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn discard_encoding(&mut self) { + // Safe use requires this is not called in the "closed" state, so the buffer + // shouldn't be null. Assert this to make sure we're not pushing null + // buffers to the discard pile. + assert_ne!(self.active, vk::CommandBuffer::null()); + self.discarded.push(self.active); self.active = vk::CommandBuffer::null(); } diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs index 8202c93aa3..fe284f32a9 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/conv.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -399,6 +399,7 @@ pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { Vf::Float64x2 => vk::Format::R64G64_SFLOAT, Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + Vf::Unorm10_10_10_2 => vk::Format::A2B10G10R10_UNORM_PACK32, } } diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs index 70028cc700..ec392533a0 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/device.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -2,6 +2,7 @@ use super::conv; use arrayvec::ArrayVec; use ash::{extensions::khr, vk}; +use naga::back::spv::ZeroInitializeWorkgroupMemoryMode; use parking_lot::Mutex; use std::{ @@ -737,7 +738,8 @@ impl super::Device { }; let needs_temp_options = !runtime_checks || !binding_map.is_empty() - || naga_shader.debug_source.is_some(); + || naga_shader.debug_source.is_some() + || !stage.zero_initialize_workgroup_memory; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -760,27 +762,40 @@ impl super::Device { file_name: debug.file_name.as_ref().as_ref(), }) } + if !stage.zero_initialize_workgroup_memory { + temp_options.zero_initialize_workgroup_memory = + ZeroInitializeWorkgroupMemoryMode::None; + } &temp_options } else { &self.naga_options }; + + let (module, info) = naga::back::pipeline_constants::process_overrides( + &naga_shader.module, + &naga_shader.info, + stage.constants, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + let spv = { profiling::scope!("naga::spv::write_vec"); - naga::back::spv::write_vec( - &naga_shader.module, - &naga_shader.info, - options, - Some(&pipeline_options), - ) + naga::back::spv::write_vec(&module, &info, options, Some(&pipeline_options)) } .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; self.create_shader_module_impl(&spv)? } }; + let mut flags = vk::PipelineShaderStageCreateFlags::empty(); + if self.shared.private_caps.subgroup_size_control { + flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE + } + let entry_point = CString::new(stage.entry_point).unwrap(); let create_info = vk::PipelineShaderStageCreateInfo::builder() + .flags(flags) .stage(conv::map_shader_stage(stage_flags)) .module(vk_module) .name(&entry_point) @@ -1587,6 +1602,7 @@ impl crate::Device for super::Device { .shared .workarounds .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + || !naga_shader.module.overrides.is_empty() { return Ok(super::ShaderModule::Intermediate { naga_shader, diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs index 0cd385045c..d1ea82772e 100644 --- a/third_party/rust/wgpu-hal/src/vulkan/mod.rs +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -238,6 +238,7 @@ struct PrivateCapabilities { robust_image_access2: bool, zero_initialize_workgroup_memory: bool, image_format_list: bool, + subgroup_size_control: bool, } bitflags::bitflags!( @@ -413,6 +414,15 @@ pub struct TextureView { attachment: FramebufferAttachment, } +impl TextureView { + /// # Safety + /// + /// - The image view handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::ImageView { + self.raw + } +} + #[derive(Debug)] pub struct Sampler { raw: vk::Sampler, @@ -438,6 +448,7 @@ pub struct BindGroup { set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, } +/// Miscellaneous allocation recycling pool for `CommandAllocator`. #[derive(Default)] struct Temp { marker: Vec<u8>, @@ -467,11 +478,31 @@ impl Temp { pub struct CommandEncoder { raw: vk::CommandPool, device: Arc<DeviceShared>, + + /// The current command buffer, if `self` is in the ["recording"] + /// state. + /// + /// ["recording"]: crate::CommandEncoder + /// + /// If non-`null`, the buffer is in the Vulkan "recording" state. active: vk::CommandBuffer, + + /// What kind of pass we are currently within: compute or render. bind_point: vk::PipelineBindPoint, + + /// Allocation recycling pool for this encoder. temp: Temp, + + /// A pool of available command buffers. + /// + /// These are all in the Vulkan "initial" state. free: Vec<vk::CommandBuffer>, + + /// A pool of discarded command buffers. + /// + /// These could be in any Vulkan state except "pending". discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, /// and needs to be disabled on renderpass close. rpass_debug_marker_active: bool, @@ -481,6 +512,15 @@ pub struct CommandEncoder { end_of_pass_timer_query: Option<(vk::QueryPool, u32)>, } +impl CommandEncoder { + /// # Safety + /// + /// - The command buffer handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::CommandBuffer { + self.active + } +} + impl fmt::Debug for CommandEncoder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("CommandEncoder") @@ -519,9 +559,47 @@ pub struct QuerySet { raw: vk::QueryPool, } +/// The [`Api::Fence`] type for [`vulkan::Api`]. +/// +/// This is an `enum` because there are two possible implementations of +/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of +/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but +/// require non-1.0 features. +/// +/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if +/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`] +/// otherwise. +/// +/// [`Api::Fence`]: crate::Api::Fence +/// [`vulkan::Api`]: Api +/// [`Device::create_fence`]: crate::Device::create_fence +/// [`TimelineSemaphore`]: Fence::TimelineSemaphore +/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore +/// [`FencePool`]: Fence::FencePool #[derive(Debug)] pub enum Fence { + /// A Vulkan [timeline semaphore]. + /// + /// These are simpler to use than Vulkan fences, since timeline semaphores + /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work. + /// + /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores + /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence TimelineSemaphore(vk::Semaphore), + + /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`]. + /// + /// The effective [`FenceValue`] of this variant is the greater of + /// `last_completed` and the maximum value associated with a signalled fence + /// in `active`. + /// + /// Fences are available in all versions of Vulkan, but since they only have + /// two states, "signaled" and "unsignaled", we need to use a separate fence + /// for each queue submission we might want to wait for, and remember which + /// [`FenceValue`] each one represents. + /// + /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences + /// [`FenceValue`]: crate::FenceValue FencePool { last_completed: crate::FenceValue, /// The pending fence values have to be ascending. @@ -531,21 +609,32 @@ pub enum Fence { } impl Fence { + /// Return the highest [`FenceValue`] among the signalled fences in `active`. + /// + /// As an optimization, assume that we already know that the fence has + /// reached `last_completed`, and don't bother checking fences whose values + /// are less than that: those fences remain in the `active` array only + /// because we haven't called `maintain` yet to clean them up. + /// + /// [`FenceValue`]: crate::FenceValue fn check_active( device: &ash::Device, - mut max_value: crate::FenceValue, + mut last_completed: crate::FenceValue, active: &[(crate::FenceValue, vk::Fence)], ) -> Result<crate::FenceValue, crate::DeviceError> { for &(value, raw) in active.iter() { unsafe { - if value > max_value && device.get_fence_status(raw)? { - max_value = value; + if value > last_completed && device.get_fence_status(raw)? { + last_completed = value; } } } - Ok(max_value) + Ok(last_completed) } + /// Return the highest signalled [`FenceValue`] for `self`. + /// + /// [`FenceValue`]: crate::FenceValue fn get_latest( &self, device: &ash::Device, @@ -566,6 +655,18 @@ impl Fence { } } + /// Trim the internal state of this [`Fence`]. + /// + /// This function has no externally visible effect, but you should call it + /// periodically to keep this fence's resource consumption under control. + /// + /// For fences using the [`FencePool`] implementation, this function + /// recycles fences that have been signaled. If you don't call this, + /// [`Queue::submit`] will just keep allocating a new Vulkan fence every + /// time it's called. + /// + /// [`FencePool`]: Fence::FencePool + /// [`Queue::submit`]: crate::Queue::submit fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { match *self { Self::TimelineSemaphore(_) => {} |