diff options
Diffstat (limited to 'third_party/rust/wgpu-hal')
60 files changed, 35479 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/.cargo-checksum.json b/third_party/rust/wgpu-hal/.cargo-checksum.json new file mode 100644 index 0000000000..65fb8499cb --- /dev/null +++ b/third_party/rust/wgpu-hal/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"24691ea259ad568f29d8b25dff9720f0f30dbaaf47df1b7c223bb2e1dc2a943f","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","README.md":"099ee611a911dc19330a61bffcde13663929a51b25ac528ee33ea796d695491e","build.rs":"c80bdc0152a00471eec6ed0dd0f7d55d0b975498a00ba05e94100c84ad639a49","examples/halmark/main.rs":"4604737f714943383c57feac2b8468ecf15e9e60c54a5303455e9953ec5c79fb","examples/halmark/shader.wgsl":"26c256ec36d6f0e9a1647431ca772766bee4382d64eaa718ba7b488dcfb6bcca","examples/raw-gles.em.html":"70fbe68394a1a4522192de1dcfaf7d399f60d7bdf5de70b708f9bb0417427546","examples/raw-gles.rs":"095113a1ba0851652a77aabfc8fa6ea7edcc2d09e91fd1e5009ead87d5998ea9","examples/ray-traced-triangle/main.rs":"955c2b8700c3b2daf14e9ef963ff499ed185b6f349dbc63caa422b2cf4942a1f","examples/ray-traced-triangle/shader.wgsl":"cc10caf92746724a71f6dd0dbc3a71e57b37c7d1d83278556805a535c0728a9d","src/auxil/dxgi/conv.rs":"760cd4eaa79b530368a30140b96bf73ac4fbdb4025eb95f0bed581638c8bb1cb","src/auxil/dxgi/exception.rs":"f0cfb5a0adcdc3b6db909601fee51ad51368f5da269bcd46e4dbea45a3bec4b1","src/auxil/dxgi/factory.rs":"5f861fbfe2f4cce08722a95283549b8f62b96f24a306d080d9f1730ae53501d8","src/auxil/dxgi/mod.rs":"a202564d9ac97530b16a234b87d180cd345aae705e082a9b1177dcde813645f9","src/auxil/dxgi/result.rs":"20c8eb03d738062dff198feca6327addb9882ed0462be842c789eadf7dca0573","src/auxil/dxgi/time.rs":"b6f966b250e9424d5d7e4065f2108cba87197c1e30baae6d87083055d1bc5a4b","src/auxil/mod.rs":"720ef2aae258733322a3274fd858f91effb8951dabaf7bbfd8a9a0be2d2dba97","src/auxil/renderdoc.rs":"c2f849f70f576b0c9b0d32dd155b6a6353f74dff59cbeeaa994a12789d047c0f","src/dx12/adapter.rs":"5143d009ab75950df6f6e311ca07108dedd373b99029d0eac9b882e4880893ea","src/dx12/command.rs":"bb3cc2ff1e77c0e4434eef8cab57e9018a1d00738fda71b860cdfc4fe802c0a4","src/dx12/conv.rs":"94d35f117ae003b07049f3a0bc6c45a0ffda9fb8053233d39c173cfb1b644403","src/dx12/descriptor.rs":"e06eb08bee4c805fa76b6ab791893b5b563ee60de9c8f8d8e0e21ab97ade5664","src/dx12/device.rs":"2a72beac1496b1682700e07923e9ad6ce7271e5a88641bf6c6d0b9b893b46cd9","src/dx12/instance.rs":"351a4e0d526de8eafc74bf5f01a41da48efa39e0c66704a85da72e1140b159d4","src/dx12/mod.rs":"4ec20d1082f10c7429db0fcdc6261210a0ff1565e87f4ab799719dc00aa636e0","src/dx12/shader_compilation.rs":"419ce7fe4df2973845851fac045dab21157eec6b26a573012f22fa41fc130b5b","src/dx12/suballocation.rs":"6939fc36223a15cc070c744d0418f9ac6fa2829d794af17cdea7c61eb5f8d2c0","src/dx12/types.rs":"9573736baaa0ef607367c3b72144556d24faf677a26bb8df49a4372a1348e06b","src/dx12/view.rs":"792772e9c87840dcd045b7381a03162eb4a501492a95ca586e77e81aed621c67","src/empty.rs":"5c3a5e39d45b4522ff3496fe6ec3b4a7afd906b6095dff1cad113c826aa9ea62","src/gles/adapter.rs":"05dd64c42b8b8265cfa1913dfdb9d1d7730abc05d189ed48bb0aa190debd90f6","src/gles/command.rs":"7118e42376e403e0d13db007534529d0e0650ff938a327cbdb0d6c90bee876de","src/gles/conv.rs":"5d15d3a33032d32ff99bc338fba0689fa54c76d0714e335fe48523d841df386f","src/gles/device.rs":"087fcfaf796b3fba2e6d638bb9840df941dd89aae43fcd8f528baf7b9ad9bd05","src/gles/egl.rs":"5ae9499e56f48ebe1797533c091529e77494ef69e32ea23e08e9135ba63188d1","src/gles/emscripten.rs":"19bb73a9d140645f3f32cd48b002151711a9b8456e213eab5f3a2be79239e147","src/gles/mod.rs":"772cf714874d12d815f2b0cf3309fd970545c582e8c2bc56eb1b266b013f5afb","src/gles/queue.rs":"9159af1636e838462ec562f25bbcacd15bc0a7e63606a3352e04f1f39818c61b","src/gles/shaders/clear.frag":"9133ed8ed97d3641fbb6b5f5ea894a3554c629ccc1b80a5fc9221d7293aa1954","src/gles/shaders/clear.vert":"a543768725f4121ff2e9e1fb5b00644931e9d6f2f946c0ef01968afb5a135abd","src/gles/shaders/srgb_present.frag":"dd9a43c339a2fa4ccf7f6a1854c6f400cabf271a7d5e9230768e9f39d47f3ff5","src/gles/shaders/srgb_present.vert":"6e85d489403d80b81cc94790730bb53b309dfc5eeede8f1ea3412a660f31d357","src/gles/web.rs":"d263695d45736d3c6ec3528c8c33fe6cf3767d3429a13a92d88b4fdc7b6340fb","src/gles/wgl.rs":"80351e261e2eaa47fff3ec4118d4ce781b24ab9a40072c8b3525baf09f041aca","src/lib.rs":"93873ebd663ed115a4bdd554eb5e33658658c89dd2fd4a5b33eda57417ab8d7d","src/metal/adapter.rs":"48747609f839dd9dbb5f6bc0a89f7f7017458e40dabc375efb07fbc93e36dfaa","src/metal/command.rs":"661b38a75d4f4cd1b0d6957f1f09db0743ec3a13bbafba9baa931894ee193f48","src/metal/conv.rs":"0bce6a8d0ccef16783475803d70d35e03ab7938c19374e22c9d253abe1f8b111","src/metal/device.rs":"c5deeecf475e0aa4b2027c656ea19207716f84b56cfa7c9132dca504d1abebfb","src/metal/mod.rs":"17665544754102ccf5f4bb1ccc0493ee8d2dbe45b22470bddaf9e609c24c0774","src/metal/surface.rs":"f2b9b65d4117db2b16c04469c573358eb65de104d5a72aa02da8483ee243cbd3","src/metal/time.rs":"c32d69f30e846dfcc0e39e01097fb80df63b2bebb6586143bb62494999850246","src/vulkan/adapter.rs":"a0f365b9d4fea8ec81b8f6211648a78ecf1e8442aaed3f41819b59ce1c66f05d","src/vulkan/command.rs":"e5a88eab59b3864cdf44ba2231270e16045505dc549b8b90251031de452ba826","src/vulkan/conv.rs":"7e6266e3a0b7d0b8d5d51362a0386a84bc047350eeac663b6352a94d5e5c0a87","src/vulkan/device.rs":"9824d597dbb51030bd337e80bb0f1eab6fdb6935fc87dfd8beae2c1f1048fbcf","src/vulkan/instance.rs":"fb583496865eb67b3997503ec58e8e2518fc88175aa3cc4c19b8022be267f1ec","src/vulkan/mod.rs":"5c873db859e740876e072bed752e76940dd97a35f3d532509a6357cb0fb9119b"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/Cargo.toml b/third_party/rust/wgpu-hal/Cargo.toml new file mode 100644 index 0000000000..88f96fb59b --- /dev/null +++ b/third_party/rust/wgpu-hal/Cargo.toml @@ -0,0 +1,250 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.70" +name = "wgpu-hal" +version = "0.19.0" +authors = ["gfx-rs developers"] +description = "WebGPU hardware abstraction layer" +homepage = "https://wgpu.rs/" +readme = "README.md" +keywords = ["graphics"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/gfx-rs/wgpu" + +[package.metadata.docs.rs] +features = [ + "vulkan", + "gles", + "renderdoc", +] +rustdoc-args = [ + "--cfg", + "docsrs", +] +targets = [ + "x86_64-unknown-linux-gnu", + "x86_64-apple-darwin", + "x86_64-pc-windows-msvc", + "wasm32-unknown-unknown", +] + +[lib] + +[[example]] +name = "halmark" + +[[example]] +name = "raw-gles" +required-features = ["gles"] + +[dependencies] +arrayvec = "0.7" +bitflags = "2" +log = "0.4" +once_cell = "1.19.0" +parking_lot = ">=0.11,<0.13" +raw-window-handle = "0.6" +rustc-hash = "1.1" +thiserror = "1" + +[dependencies.glow] +version = "0.13.1" +optional = true + +[dependencies.naga] +version = "0.19.0" +path = "../naga" +features = ["clone"] + +[dependencies.profiling] +version = "1" +default-features = false + +[dependencies.wgt] +version = "0.19.0" +path = "../wgpu-types" +package = "wgpu-types" + +[dev-dependencies] +cfg-if = "1" +env_logger = "0.10" +glam = "0.25.0" + +[dev-dependencies.naga] +version = "0.19.0" +path = "../naga" +features = ["wgsl-in"] + +[dev-dependencies.winit] +version = "0.29.10" +features = ["android-native-activity"] + +[build-dependencies] +cfg_aliases = "0.1" + +[features] +default = ["link"] +dx12 = [ + "naga/hlsl-out", + "d3d12", + "bit-set", + "libloading", + "range-alloc", + "winapi/std", + "winapi/winbase", + "winapi/d3d12", + "winapi/d3d12shader", + "winapi/d3d12sdklayers", + "winapi/dxgi1_6", +] +dxc_shader_compiler = ["hassle-rs"] +fragile-send-sync-non-atomic-wasm = ["wgt/fragile-send-sync-non-atomic-wasm"] +gles = [ + "naga/glsl-out", + "glow", + "glutin_wgl_sys", + "khronos-egl", + "libloading", +] +link = ["metal/link"] +metal = [ + "naga/msl-out", + "block", +] +renderdoc = [ + "libloading", + "renderdoc-sys", +] +vulkan = [ + "naga/spv-out", + "ash", + "gpu-alloc", + "gpu-descriptor", + "libloading", + "smallvec", +] +windows_rs = ["gpu-allocator"] + +[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies] +js-sys = "0.3.67" +wasm-bindgen = "0.2.87" + +[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.web-sys] +version = "0.3.67" +features = [ + "Window", + "HtmlCanvasElement", + "WebGl2RenderingContext", + "OffscreenCanvas", +] + +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies] +core-graphics-types = "0.1" +metal = "0.27.0" +objc = "0.2.5" + +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies.block] +version = "0.1" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.ash] +version = "0.37.3" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.gpu-alloc] +version = "0.6" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.gpu-descriptor] +version = "0.2" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.khronos-egl] +version = "6" +features = ["dynamic"] +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.libloading] +version = ">=0.7, <0.9" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.renderdoc-sys] +version = "1.0.0" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.smallvec] +version = "1" +features = ["union"] +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies] +glutin = "0.29.1" + +[target."cfg(target_os = \"android\")".dependencies] +android_system_properties = "0.1.1" + +[target."cfg(target_os = \"emscripten\")".dependencies.khronos-egl] +version = "6" +features = [ + "static", + "no-pkg-config", +] + +[target."cfg(target_os = \"emscripten\")".dependencies.libloading] +version = ">=0.7, <0.9" +optional = true + +[target."cfg(unix)".dependencies] +libc = "0.2" + +[target."cfg(windows)".dependencies.bit-set] +version = "0.5" +optional = true + +[target."cfg(windows)".dependencies.d3d12] +version = "0.19.0" +path = "../d3d12/" +features = ["libloading"] +optional = true + +[target."cfg(windows)".dependencies.glutin_wgl_sys] +version = "0.5" +optional = true + +[target."cfg(windows)".dependencies.gpu-allocator] +version = "0.25" +features = [ + "d3d12", + "public-winapi", +] +optional = true +default_features = false + +[target."cfg(windows)".dependencies.hassle-rs] +version = "0.11" +optional = true + +[target."cfg(windows)".dependencies.range-alloc] +version = "0.1" +optional = true + +[target."cfg(windows)".dependencies.winapi] +version = "0.3" +features = [ + "profileapi", + "libloaderapi", + "windef", + "winuser", + "dcomp", +] diff --git a/third_party/rust/wgpu-hal/LICENSE.APACHE b/third_party/rust/wgpu-hal/LICENSE.APACHE new file mode 100644 index 0000000000..d9a10c0d8e --- /dev/null +++ b/third_party/rust/wgpu-hal/LICENSE.APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/third_party/rust/wgpu-hal/LICENSE.MIT b/third_party/rust/wgpu-hal/LICENSE.MIT new file mode 100644 index 0000000000..4699691b8e --- /dev/null +++ b/third_party/rust/wgpu-hal/LICENSE.MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 The gfx-rs developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/wgpu-hal/README.md b/third_party/rust/wgpu-hal/README.md new file mode 100644 index 0000000000..588baa3cf5 --- /dev/null +++ b/third_party/rust/wgpu-hal/README.md @@ -0,0 +1,23 @@ +*wgpu-hal* is an explicit low-level GPU abstraction powering *wgpu-core*. +It's a spiritual successor to [gfx-hal](https://github.com/gfx-rs/gfx), +but with reduced scope, and oriented towards WebGPU implementation goals. + +It has no overhead for validation or tracking, and the API translation overhead is kept to the bare minimum by the design of WebGPU. +This API can be used for resource-demanding applications and engines. + +# Usage notes + +All of the API is `unsafe`. Documenting the exact safety requirements for the +state and function arguments is desired, but will likely be incomplete while the library is in early development. + +The returned errors are only for cases that the user can't anticipate, +such as running out-of-memory, or losing the device. +For the counter-example, there is no error for mapping a buffer that's not mappable. +As the buffer creator, the user should already know if they can map it. + +The API accepts iterators in order to avoid forcing the user to store data in particular containers. The implementation doesn't guarantee that any of the iterators are drained, unless stated otherwise by the function documentation. +For this reason, we recommend that iterators don't do any mutating work. + +# Debugging + +Most of the information in https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications still applies to this API, with an exception of API tracing/replay functionality, which is only available in *wgpu-core*. diff --git a/third_party/rust/wgpu-hal/build.rs b/third_party/rust/wgpu-hal/build.rs new file mode 100644 index 0000000000..7d17591605 --- /dev/null +++ b/third_party/rust/wgpu-hal/build.rs @@ -0,0 +1,15 @@ +fn main() { + cfg_aliases::cfg_aliases! { + native: { not(target_arch = "wasm32") }, + send_sync: { any( + not(target_arch = "wasm32"), + all(feature = "fragile-send-sync-non-atomic-wasm", not(target_feature = "atomics")) + ) }, + webgl: { all(target_arch = "wasm32", not(target_os = "emscripten"), gles) }, + Emscripten: { all(target_os = "emscripten", gles) }, + dx12: { all(target_os = "windows", feature = "dx12") }, + gles: { all(feature = "gles") }, + metal: { all(any(target_os = "ios", target_os = "macos"), feature = "metal") }, + vulkan: { all(not(target_arch = "wasm32"), feature = "vulkan") } + } +} diff --git a/third_party/rust/wgpu-hal/examples/halmark/main.rs b/third_party/rust/wgpu-hal/examples/halmark/main.rs new file mode 100644 index 0000000000..c238f299e7 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/halmark/main.rs @@ -0,0 +1,852 @@ +//! This example shows basic usage of wgpu-hal by rendering +//! a ton of moving sprites, each with a separate texture and draw call. +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; +use raw_window_handle::{HasDisplayHandle, HasWindowHandle}; +use winit::{ + event::{ElementState, Event, KeyEvent, WindowEvent}, + event_loop::ControlFlow, + keyboard::{Key, NamedKey}, +}; + +use std::{ + borrow::{Borrow, Cow}, + iter, mem, ptr, + time::Instant, +}; + +const MAX_BUNNIES: usize = 1 << 20; +const BUNNY_SIZE: f32 = 0.15 * 256.0; +const GRAVITY: f32 = -9.8 * 100.0; +const MAX_VELOCITY: f32 = 750.0; +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_MAX_LATENCY: u32 = 2; + +#[repr(C)] +#[derive(Clone, Copy)] +struct Globals { + mvp: [[f32; 4]; 4], + size: [f32; 2], + pad: [f32; 2], +} + +#[repr(C, align(256))] +#[derive(Clone, Copy)] +struct Locals { + position: [f32; 2], + velocity: [f32; 2], + color: u32, + _pad: u32, +} + +struct ExecutionContext<A: hal::Api> { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec<A::TextureView>, + used_cmd_bufs: Vec<A::CommandBuffer>, + frames_recorded: usize, +} + +impl<A: hal::Api> ExecutionContext<A> { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example<A: hal::Api> { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + global_group: A::BindGroup, + local_group: A::BindGroup, + global_group_layout: A::BindGroupLayout, + local_group_layout: A::BindGroupLayout, + pipeline_layout: A::PipelineLayout, + shader: A::ShaderModule, + pipeline: A::RenderPipeline, + bunnies: Vec<Locals>, + local_buffer: A::Buffer, + local_alignment: u32, + global_buffer: A::Buffer, + sampler: A::Sampler, + texture: A::Texture, + texture_view: A::TextureView, + contexts: Vec<ExecutionContext<A>>, + context_index: usize, + extent: [u32; 2], + start: Instant, +} + +impl<A: hal::Api> Example<A> { + fn init(window: &winit::window::Window) -> Result<Self, Box<dyn std::error::Error>> { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: wgt::InstanceFlags::from_build_config().with_env(), + // Can't rely on having DXC available, so use FXC instead + dx12_shader_compiler: wgt::Dx12Compiler::Fxc, + gles_minor_version: wgt::Gles3MinorVersion::default(), + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let surface = { + let raw_window_handle = window.window_handle()?.as_raw(); + let raw_display_handle = window.display_handle()?.as_raw(); + + unsafe { + instance + .create_surface(raw_display_handle, raw_window_handle) + .unwrap() + } + }; + + let (adapter, capabilities) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + return Err("no adapters found".into()); + } + let exposed = adapters.swap_remove(0); + (exposed.adapter, exposed.capabilities) + }; + + let surface_caps = unsafe { adapter.surface_capabilities(&surface) } + .ok_or("failed to get surface capabilities")?; + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, queue } = unsafe { + adapter + .open(wgt::Features::empty(), &wgt::Limits::default()) + .unwrap() + }; + + let window_size: (u32, u32) = window.inner_size().into(); + let surface_config = hal::SurfaceConfiguration { + maximum_frame_latency: DESIRED_MAX_LATENCY.clamp( + *surface_caps.maximum_frame_latency.start(), + *surface_caps.maximum_frame_latency.end(), + ), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + format: wgt::TextureFormat::Bgra8UnormSrgb, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET, + view_formats: vec![], + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + let naga_shader = { + let shader_file = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("halmark") + .join("shader.wgsl"); + let source = std::fs::read_to_string(shader_file).unwrap(); + let module = naga::front::wgsl::Frontend::new().parse(&source).unwrap(); + let info = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::empty(), + ) + .validate(&module) + .unwrap(); + hal::NagaShader { + module: Cow::Owned(module), + info, + debug_source: None, + } + }; + let shader_desc = hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }; + let shader = unsafe { + device + .create_shader_module(&shader_desc, hal::ShaderInput::Naga(naga_shader)) + .unwrap() + }; + + let global_bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::VERTEX, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::<Globals>() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::FRAGMENT, + ty: wgt::BindingType::Texture { + sample_type: wgt::TextureSampleType::Float { filterable: true }, + view_dimension: wgt::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::FRAGMENT, + ty: wgt::BindingType::Sampler(wgt::SamplerBindingType::Filtering), + count: None, + }, + ], + }; + + let global_group_layout = + unsafe { device.create_bind_group_layout(&global_bgl_desc).unwrap() }; + + let local_bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::VERTEX, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: wgt::BufferSize::new(mem::size_of::<Locals>() as _), + }, + count: None, + }], + }; + let local_group_layout = + unsafe { device.create_bind_group_layout(&local_bgl_desc).unwrap() }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&global_group_layout, &local_group_layout], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline_desc = hal::RenderPipelineDescriptor { + label: None, + layout: &pipeline_layout, + vertex_stage: hal::ProgrammableStage { + module: &shader, + entry_point: "vs_main", + }, + vertex_buffers: &[], + fragment_stage: Some(hal::ProgrammableStage { + module: &shader, + entry_point: "fs_main", + }), + primitive: wgt::PrimitiveState { + topology: wgt::PrimitiveTopology::TriangleStrip, + ..wgt::PrimitiveState::default() + }, + depth_stencil: None, + multisample: wgt::MultisampleState::default(), + color_targets: &[Some(wgt::ColorTargetState { + format: surface_config.format, + blend: Some(wgt::BlendState::ALPHA_BLENDING), + write_mask: wgt::ColorWrites::default(), + })], + multiview: None, + }; + let pipeline = unsafe { device.create_render_pipeline(&pipeline_desc).unwrap() }; + + let texture_data = [0xFFu8; 4]; + + let staging_buffer_desc = hal::BufferDescriptor { + label: Some("stage"), + size: texture_data.len() as wgt::BufferAddress, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }; + let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; + unsafe { + let mapping = device + .map_buffer(&staging_buffer, 0..staging_buffer_desc.size) + .unwrap(); + ptr::copy_nonoverlapping( + texture_data.as_ptr(), + mapping.ptr.as_ptr(), + texture_data.len(), + ); + device.unmap_buffer(&staging_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8UnormSrgb, + usage: hal::TextureUses::COPY_DST | hal::TextureUses::RESOURCE, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + { + let buffer_barrier = hal::BufferBarrier { + buffer: &staging_buffer, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_SRC, + }; + let texture_barrier1 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + let texture_barrier2 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::RESOURCE, + }; + let copy = hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: 0, + bytes_per_row: Some(4), + rows_per_image: None, + }, + texture_base: hal::TextureCopyBase { + origin: wgt::Origin3d::ZERO, + mip_level: 0, + array_layer: 0, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 1, + height: 1, + depth: 1, + }, + }; + unsafe { + cmd_encoder.transition_buffers(iter::once(buffer_barrier)); + cmd_encoder.transition_textures(iter::once(texture_barrier1)); + cmd_encoder.copy_buffer_to_texture(&staging_buffer, &texture, iter::once(copy)); + cmd_encoder.transition_textures(iter::once(texture_barrier2)); + } + } + + let sampler_desc = hal::SamplerDescriptor { + label: None, + address_modes: [wgt::AddressMode::ClampToEdge; 3], + mag_filter: wgt::FilterMode::Linear, + min_filter: wgt::FilterMode::Nearest, + mipmap_filter: wgt::FilterMode::Nearest, + lod_clamp: 0.0..32.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }; + let sampler = unsafe { device.create_sampler(&sampler_desc).unwrap() }; + + let globals = Globals { + // cgmath::ortho() projection + mvp: [ + [2.0 / window_size.0 as f32, 0.0, 0.0, 0.0], + [0.0, 2.0 / window_size.1 as f32, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [-1.0, -1.0, 0.0, 1.0], + ], + size: [BUNNY_SIZE; 2], + pad: [0.0; 2], + }; + + let global_buffer_desc = hal::BufferDescriptor { + label: Some("global"), + size: mem::size_of::<Globals>() as wgt::BufferAddress, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }; + let global_buffer = unsafe { + let buffer = device.create_buffer(&global_buffer_desc).unwrap(); + let mapping = device + .map_buffer(&buffer, 0..global_buffer_desc.size) + .unwrap(); + ptr::copy_nonoverlapping( + &globals as *const Globals as *const u8, + mapping.ptr.as_ptr(), + mem::size_of::<Globals>(), + ); + device.unmap_buffer(&buffer).unwrap(); + assert!(mapping.is_coherent); + buffer + }; + + let local_alignment = wgt::math::align_to( + mem::size_of::<Locals>() as u32, + capabilities.limits.min_uniform_buffer_offset_alignment, + ); + let local_buffer_desc = hal::BufferDescriptor { + label: Some("local"), + size: (MAX_BUNNIES as wgt::BufferAddress) * (local_alignment as wgt::BufferAddress), + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }; + let local_buffer = unsafe { device.create_buffer(&local_buffer_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::RESOURCE, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let global_group = { + let global_buffer_binding = hal::BufferBinding { + buffer: &global_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::RESOURCE, + }; + let global_group_desc = hal::BindGroupDescriptor { + label: Some("global"), + layout: &global_group_layout, + buffers: &[global_buffer_binding], + samplers: &[&sampler], + textures: &[texture_binding], + acceleration_structures: &[], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&global_group_desc).unwrap() } + }; + + let local_group = { + let local_buffer_binding = hal::BufferBinding { + buffer: &local_buffer, + offset: 0, + size: wgt::BufferSize::new(mem::size_of::<Locals>() as _), + }; + let local_group_desc = hal::BindGroupDescriptor { + label: Some("local"), + layout: &local_group_layout, + buffers: &[local_buffer_binding], + samplers: &[], + textures: &[], + acceleration_structures: &[], + entries: &[hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }], + }; + unsafe { device.create_bind_group(&local_group_desc).unwrap() } + }; + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + device.destroy_buffer(staging_buffer); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Example { + instance, + surface, + surface_format: surface_config.format, + adapter, + device, + queue, + pipeline_layout, + shader, + pipeline, + global_group, + local_group, + global_group_layout, + local_group_layout, + bunnies: Vec::new(), + local_buffer, + local_alignment, + global_buffer, + sampler, + texture, + texture_view, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + }) + } + + fn is_empty(&self) -> bool { + self.bunnies.is_empty() + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.local_group); + self.device.destroy_bind_group(self.global_group); + self.device.destroy_buffer(self.local_buffer); + self.device.destroy_buffer(self.global_buffer); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_sampler(self.sampler); + self.device.destroy_shader_module(self.shader); + self.device.destroy_render_pipeline(self.pipeline); + self.device + .destroy_bind_group_layout(self.local_group_layout); + self.device + .destroy_bind_group_layout(self.global_group_layout); + self.device.destroy_pipeline_layout(self.pipeline_layout); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } + + fn update(&mut self, event: winit::event::WindowEvent) { + if let winit::event::WindowEvent::KeyboardInput { + event: + KeyEvent { + logical_key: Key::Named(NamedKey::Space), + state: ElementState::Pressed, + .. + }, + .. + } = event + { + let spawn_count = 64 + self.bunnies.len() / 2; + let elapsed = self.start.elapsed(); + let color = elapsed.as_nanos() as u32; + println!( + "Spawning {} bunnies, total at {}", + spawn_count, + self.bunnies.len() + spawn_count + ); + for i in 0..spawn_count { + let random = ((elapsed.as_nanos() * (i + 1) as u128) & 0xFF) as f32 / 255.0; + let speed = random * MAX_VELOCITY - (MAX_VELOCITY * 0.5); + self.bunnies.push(Locals { + position: [0.0, 0.5 * (self.extent[1] as f32)], + velocity: [speed, 0.0], + color, + _pad: 0, + }); + } + } + } + + fn render(&mut self) { + let delta = 0.01; + for bunny in self.bunnies.iter_mut() { + bunny.position[0] += bunny.velocity[0] * delta; + bunny.position[1] += bunny.velocity[1] * delta; + bunny.velocity[1] += GRAVITY * delta; + if (bunny.velocity[0] > 0.0 + && bunny.position[0] + 0.5 * BUNNY_SIZE > self.extent[0] as f32) + || (bunny.velocity[0] < 0.0 && bunny.position[0] - 0.5 * BUNNY_SIZE < 0.0) + { + bunny.velocity[0] *= -1.0; + } + if bunny.velocity[1] < 0.0 && bunny.position[1] < 0.5 * BUNNY_SIZE { + bunny.velocity[1] *= -1.0; + } + } + + if !self.bunnies.is_empty() { + let size = self.bunnies.len() * self.local_alignment as usize; + unsafe { + let mapping = self + .device + .map_buffer(&self.local_buffer, 0..size as wgt::BufferAddress) + .unwrap(); + ptr::copy_nonoverlapping( + self.bunnies.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + size, + ); + assert!(mapping.is_coherent); + self.device.unmap_buffer(&self.local_buffer).unwrap(); + } + } + + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COLOR_TARGET, + }; + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + let pass_desc = hal::RenderPassDescriptor { + label: None, + extent: wgt::Extent3d { + width: self.extent[0], + height: self.extent[1], + depth_or_array_layers: 1, + }, + sample_count: 1, + color_attachments: &[Some(hal::ColorAttachment { + target: hal::Attachment { + view: &surface_tex_view, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color { + r: 0.1, + g: 0.2, + b: 0.3, + a: 1.0, + }, + })], + depth_stencil_attachment: None, + multiview: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + unsafe { + ctx.encoder.begin_render_pass(&pass_desc); + ctx.encoder.set_render_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.global_group, &[]); + } + + for i in 0..self.bunnies.len() { + let offset = (i as wgt::DynamicOffset) * (self.local_alignment as wgt::DynamicOffset); + unsafe { + ctx.encoder + .set_bind_group(&self.pipeline_layout, 1, &self.local_group, &[offset]); + ctx.encoder.draw(0, 4, 0, 1); + } + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::PRESENT, + }; + unsafe { + ctx.encoder.end_render_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); + self.queue.present(&self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::debug!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } +} + +cfg_if::cfg_if! { + // Apple + Metal + if #[cfg(all(any(target_os = "macos", target_os = "ios"), feature = "metal"))] { + type Api = hal::api::Metal; + } + // Wasm + Vulkan + else if #[cfg(all(not(target_arch = "wasm32"), feature = "vulkan"))] { + type Api = hal::api::Vulkan; + } + // Windows + DX12 + else if #[cfg(all(windows, feature = "dx12"))] { + type Api = hal::api::Dx12; + } + // Anything + GLES + else if #[cfg(feature = "gles")] { + type Api = hal::api::Gles; + } + // Fallback + else { + type Api = hal::api::Empty; + } +} + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new().unwrap(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-bunnymark") + .build(&event_loop) + .unwrap(); + + let example_result = Example::<Api>::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + let mut last_frame_inst = Instant::now(); + let (mut frame_count, mut accum_time) = (0, 0.0); + + event_loop + .run(move |event, target| { + let _ = &window; // force ownership by the closure + target.set_control_flow(ControlFlow::Poll); + + match event { + Event::LoopExiting => { + example.take().unwrap().exit(); + } + Event::WindowEvent { event, .. } => match event { + WindowEvent::KeyboardInput { + event: + KeyEvent { + logical_key: Key::Named(NamedKey::Escape), + state: ElementState::Pressed, + .. + }, + .. + } + | WindowEvent::CloseRequested => target.exit(), + WindowEvent::RedrawRequested => { + let ex = example.as_mut().unwrap(); + { + accum_time += last_frame_inst.elapsed().as_secs_f32(); + last_frame_inst = Instant::now(); + frame_count += 1; + if frame_count == 100 && !ex.is_empty() { + println!( + "Avg frame time {}ms", + accum_time * 1000.0 / frame_count as f32 + ); + accum_time = 0.0; + frame_count = 0; + } + } + ex.render(); + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + _ => {} + } + }) + .unwrap(); +} diff --git a/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl b/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl new file mode 100644 index 0000000000..ffa7264591 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl @@ -0,0 +1,50 @@ +struct Globals { + mvp: mat4x4<f32>, + size: vec2<f32>, + _pad0: u32, + _pad1: u32, +}; + +struct Locals { + position: vec2<f32>, + velocity: vec2<f32>, + color: u32, + _pad0: u32, + _pad1: u32, + _pad2: u32, +}; + +@group(0) +@binding(0) +var<uniform> globals: Globals; + +@group(1) +@binding(0) +var<uniform> locals: Locals; + +struct VertexOutput { + @builtin(position) position: vec4<f32>, + @location(0) tex_coords: vec2<f32>, + @location(1) color: vec4<f32>, +}; + +@vertex +fn vs_main(@builtin(vertex_index) vi: u32) -> VertexOutput { + let tc = vec2<f32>(f32(vi & 1u), 0.5 * f32(vi & 2u)); + let offset = vec2<f32>(tc.x * globals.size.x, tc.y * globals.size.y); + let pos = globals.mvp * vec4<f32>(locals.position + offset, 0.0, 1.0); + let color = vec4<f32>((vec4<u32>(locals.color) >> vec4<u32>(0u, 8u, 16u, 24u)) & vec4<u32>(255u)) / 255.0; + return VertexOutput(pos, tc, color); +} + +@group(0) +@binding(1) +var tex: texture_2d<f32>; +@group(0) +@binding(2) +var sam: sampler; + +@fragment +fn fs_main(vertex: VertexOutput) -> @location(0) vec4<f32> { + return vertex.color * textureSampleLevel(tex, sam, vertex.tex_coords, 0.0); +} diff --git a/third_party/rust/wgpu-hal/examples/raw-gles.em.html b/third_party/rust/wgpu-hal/examples/raw-gles.em.html new file mode 100644 index 0000000000..f3587e8575 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/raw-gles.em.html @@ -0,0 +1,16 @@ +<html> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + </head> + <body> + <canvas id="canvas" width="640" height="400"></canvas> + <script> + var Module = { + canvas: document.getElementById("canvas"), + preRun: [function() {ENV.RUST_LOG = "debug"}] + }; + </script> + <script src="raw-gles.js"></script> + </body> +</html>
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/examples/raw-gles.rs b/third_party/rust/wgpu-hal/examples/raw-gles.rs new file mode 100644 index 0000000000..342100e1cb --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/raw-gles.rs @@ -0,0 +1,188 @@ +//! This example shows interop with raw GLES contexts - +//! the ability to hook up wgpu-hal to an existing context and draw into it. +//! +//! Emscripten build: +//! 1. install emsdk +//! 2. build this example with cargo: +//! EMCC_CFLAGS="-g -s ERROR_ON_UNDEFINED_SYMBOLS=0 --no-entry -s FULL_ES3=1" cargo build --example raw-gles --target wasm32-unknown-emscripten +//! 3. copy raw-gles.em.html into target directory and open it in browser: +//! cp wgpu-hal/examples/raw-gles.em.html target/wasm32-unknown-emscripten/debug/examples + +extern crate wgpu_hal as hal; + +#[cfg(not(any(windows, target_arch = "wasm32")))] +fn main() { + env_logger::init(); + println!("Initializing external GL context"); + + let event_loop = glutin::event_loop::EventLoop::new(); + let window_builder = glutin::window::WindowBuilder::new(); + let gl_context = unsafe { + glutin::ContextBuilder::new() + .with_gl(glutin::GlRequest::Specific(glutin::Api::OpenGlEs, (3, 0))) + .build_windowed(window_builder, &event_loop) + .unwrap() + .make_current() + .unwrap() + }; + let inner_size = gl_context.window().inner_size(); + + println!("Hooking up to wgpu-hal"); + let exposed = unsafe { + <hal::api::Gles as hal::Api>::Adapter::new_external(|name| { + gl_context.get_proc_address(name) + }) + } + .expect("GL adapter can't be initialized"); + + fill_screen(&exposed, inner_size.width, inner_size.height); + + println!("Showing the window"); + gl_context.swap_buffers().unwrap(); + + event_loop.run(move |event, _, control_flow| { + use glutin::{ + event::{Event, KeyboardInput, VirtualKeyCode, WindowEvent}, + event_loop::ControlFlow, + }; + *control_flow = ControlFlow::Wait; + + match event { + Event::LoopDestroyed => (), + Event::WindowEvent { event, .. } => match event { + WindowEvent::CloseRequested + | WindowEvent::KeyboardInput { + input: + KeyboardInput { + virtual_keycode: Some(VirtualKeyCode::Escape), + .. + }, + .. + } => *control_flow = ControlFlow::Exit, + _ => (), + }, + _ => (), + } + }); +} + +#[cfg(target_os = "emscripten")] +fn main() { + env_logger::init(); + + println!("Initializing external GL context"); + let egl = khronos_egl::Instance::new(khronos_egl::Static); + let display = unsafe { egl.get_display(khronos_egl::DEFAULT_DISPLAY) }.unwrap(); + egl.initialize(display) + .expect("unable to initialize display"); + + let attributes = [ + khronos_egl::RED_SIZE, + 8, + khronos_egl::GREEN_SIZE, + 8, + khronos_egl::BLUE_SIZE, + 8, + khronos_egl::NONE, + ]; + + let config = egl + .choose_first_config(display, &attributes) + .unwrap() + .expect("unable to choose config"); + let surface = unsafe { + let window = std::ptr::null_mut::<std::ffi::c_void>(); + egl.create_window_surface(display, config, window, None) + } + .expect("unable to create surface"); + + let context_attributes = [khronos_egl::CONTEXT_CLIENT_VERSION, 3, khronos_egl::NONE]; + + let gl_context = egl + .create_context(display, config, None, &context_attributes) + .expect("unable to create context"); + egl.make_current(display, Some(surface), Some(surface), Some(gl_context)) + .expect("can't make context current"); + + println!("Hooking up to wgpu-hal"); + let exposed = unsafe { + <hal::api::Gles as hal::Api>::Adapter::new_external(|name| { + egl.get_proc_address(name) + .map_or(std::ptr::null(), |p| p as *const _) + }) + } + .expect("GL adapter can't be initialized"); + + fill_screen(&exposed, 640, 400); +} + +#[cfg(any(windows, all(target_arch = "wasm32", not(target_os = "emscripten"))))] +fn main() {} + +#[cfg(any(not(any(windows, target_arch = "wasm32")), target_os = "emscripten"))] +fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height: u32) { + use hal::{Adapter as _, CommandEncoder as _, Device as _, Queue as _}; + + let od = unsafe { + exposed + .adapter + .open(wgt::Features::empty(), &wgt::Limits::downlevel_defaults()) + } + .unwrap(); + + let format = wgt::TextureFormat::Rgba8UnormSrgb; + let texture = <hal::api::Gles as hal::Api>::Texture::default_framebuffer(format); + let view = unsafe { + od.device + .create_texture_view( + &texture, + &hal::TextureViewDescriptor { + label: None, + format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }, + ) + .unwrap() + }; + + println!("Filling the screen"); + let mut encoder = unsafe { + od.device + .create_command_encoder(&hal::CommandEncoderDescriptor { + label: None, + queue: &od.queue, + }) + .unwrap() + }; + let rp_desc = hal::RenderPassDescriptor { + label: None, + extent: wgt::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + sample_count: 1, + color_attachments: &[Some(hal::ColorAttachment { + target: hal::Attachment { + view: &view, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color::BLUE, + })], + depth_stencil_attachment: None, + multiview: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + unsafe { + encoder.begin_encoding(None).unwrap(); + encoder.begin_render_pass(&rp_desc); + encoder.end_render_pass(); + let cmd_buf = encoder.end_encoding().unwrap(); + od.queue.submit(&[&cmd_buf], &[], None).unwrap(); + } +} diff --git a/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs new file mode 100644 index 0000000000..c05feae820 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -0,0 +1,1113 @@ +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; +use raw_window_handle::{HasDisplayHandle, HasWindowHandle}; + +use glam::{Affine3A, Mat4, Vec3}; +use std::{ + borrow::{Borrow, Cow}, + iter, mem, ptr, + time::Instant, +}; +use winit::window::WindowButtons; + +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_MAX_LATENCY: u32 = 2; + +/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) +/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) +#[derive(Clone)] +#[repr(C)] +struct AccelerationStructureInstance { + transform: [f32; 12], + custom_index_and_mask: u32, + shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, +} + +impl std::fmt::Debug for AccelerationStructureInstance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instance") + .field("transform", &self.transform) + .field("custom_index()", &self.custom_index()) + .field("mask()", &self.mask()) + .field( + "shader_binding_table_record_offset()", + &self.shader_binding_table_record_offset(), + ) + .field("flags()", &self.flags()) + .field( + "acceleration_structure_reference", + &self.acceleration_structure_reference, + ) + .finish() + } +} + +#[allow(dead_code)] +impl AccelerationStructureInstance { + const LOW_24_MASK: u32 = 0x00ff_ffff; + const MAX_U24: u32 = (1u32 << 24u32) - 1u32; + + #[inline] + fn affine_to_rows(mat: &Affine3A) -> [f32; 12] { + let row_0 = mat.matrix3.row(0); + let row_1 = mat.matrix3.row(1); + let row_2 = mat.matrix3.row(2); + let translation = mat.translation; + [ + row_0.x, + row_0.y, + row_0.z, + translation.x, + row_1.x, + row_1.y, + row_1.z, + translation.y, + row_2.x, + row_2.y, + row_2.z, + translation.z, + ] + } + + #[inline] + fn rows_to_affine(rows: &[f32; 12]) -> Affine3A { + Affine3A::from_cols_array(&[ + rows[0], rows[3], rows[6], rows[9], rows[1], rows[4], rows[7], rows[10], rows[2], + rows[5], rows[8], rows[11], + ]) + } + + pub fn transform_as_affine(&self) -> Affine3A { + Self::rows_to_affine(&self.transform) + } + pub fn set_transform(&mut self, transform: &Affine3A) { + self.transform = Self::affine_to_rows(transform); + } + + pub fn custom_index(&self) -> u32 { + self.custom_index_and_mask & Self::LOW_24_MASK + } + + pub fn mask(&self) -> u8 { + (self.custom_index_and_mask >> 24) as u8 + } + + pub fn shader_binding_table_record_offset(&self) -> u32 { + self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK + } + + pub fn flags(&self) -> u8 { + (self.shader_binding_table_record_offset_and_flags >> 24) as u8 + } + + pub fn set_custom_index(&mut self, custom_index: u32) { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + self.custom_index_and_mask = + (custom_index & Self::LOW_24_MASK) | (self.custom_index_and_mask & !Self::LOW_24_MASK) + } + + pub fn set_mask(&mut self, mask: u8) { + self.custom_index_and_mask = + (self.custom_index_and_mask & Self::LOW_24_MASK) | (u32::from(mask) << 24) + } + + pub fn set_shader_binding_table_record_offset( + &mut self, + shader_binding_table_record_offset: u32, + ) { + debug_assert!(shader_binding_table_record_offset <= Self::MAX_U24, "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24); + self.shader_binding_table_record_offset_and_flags = (shader_binding_table_record_offset + & Self::LOW_24_MASK) + | (self.shader_binding_table_record_offset_and_flags & !Self::LOW_24_MASK) + } + + pub fn set_flags(&mut self, flags: u8) { + self.shader_binding_table_record_offset_and_flags = + (self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK) + | (u32::from(flags) << 24) + } + + pub fn new( + transform: &Affine3A, + custom_index: u32, + mask: u8, + shader_binding_table_record_offset: u32, + flags: u8, + acceleration_structure_reference: u64, + ) -> Self { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + debug_assert!( + shader_binding_table_record_offset <= Self::MAX_U24, + "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24 + ); + AccelerationStructureInstance { + transform: Self::affine_to_rows(transform), + custom_index_and_mask: (custom_index & Self::MAX_U24) | (u32::from(mask) << 24), + shader_binding_table_record_offset_and_flags: (shader_binding_table_record_offset + & Self::MAX_U24) + | (u32::from(flags) << 24), + acceleration_structure_reference, + } + } +} + +struct ExecutionContext<A: hal::Api> { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec<A::TextureView>, + used_cmd_bufs: Vec<A::CommandBuffer>, + frames_recorded: usize, +} + +impl<A: hal::Api> ExecutionContext<A> { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example<A: hal::Api> { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + + contexts: Vec<ExecutionContext<A>>, + context_index: usize, + extent: [u32; 2], + start: Instant, + pipeline: A::ComputePipeline, + bind_group: A::BindGroup, + bgl: A::BindGroupLayout, + shader_module: A::ShaderModule, + texture_view: A::TextureView, + uniform_buffer: A::Buffer, + pipeline_layout: A::PipelineLayout, + vertices_buffer: A::Buffer, + indices_buffer: A::Buffer, + texture: A::Texture, + instances: [AccelerationStructureInstance; 3], + instances_buffer: A::Buffer, + blas: A::AccelerationStructure, + tlas: A::AccelerationStructure, + scratch_buffer: A::Buffer, + time: f32, +} + +impl<A: hal::Api> Example<A> { + fn init(window: &winit::window::Window) -> Result<Self, Box<dyn std::error::Error>> { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: wgt::InstanceFlags::default(), + dx12_shader_compiler: wgt::Dx12Compiler::Dxc { + dxil_path: None, + dxc_path: None, + }, + gles_minor_version: wgt::Gles3MinorVersion::default(), + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let surface = { + let raw_window_handle = window.window_handle()?.as_raw(); + let raw_display_handle = window.display_handle()?.as_raw(); + + unsafe { + instance + .create_surface(raw_display_handle, raw_window_handle) + .unwrap() + } + }; + + let (adapter, features) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + panic!("No adapters found"); + } + let exposed = adapters.swap_remove(0); + dbg!(exposed.features); + (exposed.adapter, exposed.features) + }; + let surface_caps = unsafe { adapter.surface_capabilities(&surface) } + .expect("Surface doesn't support presentation"); + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, queue } = + unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; + + let window_size: (u32, u32) = window.inner_size().into(); + dbg!(&surface_caps.formats); + let surface_format = if surface_caps + .formats + .contains(&wgt::TextureFormat::Rgba8Snorm) + { + wgt::TextureFormat::Rgba8Unorm + } else { + *surface_caps.formats.first().unwrap() + }; + let surface_config = hal::SurfaceConfiguration { + maximum_frame_latency: DESIRED_MAX_LATENCY + .max(*surface_caps.maximum_frame_latency.start()) + .min(*surface_caps.maximum_frame_latency.end()), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + format: surface_format, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + view_formats: vec![surface_format], + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + #[allow(dead_code)] + struct Uniforms { + view_inverse: glam::Mat4, + proj_inverse: glam::Mat4, + } + + let bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::<Uniforms>() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::WriteOnly, + format: wgt::TextureFormat::Rgba8Unorm, + view_dimension: wgt::TextureViewDimension::D2, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::AccelerationStructure, + count: None, + }, + ], + }; + + let bgl = unsafe { device.create_bind_group_layout(&bgl_desc).unwrap() }; + + let naga_shader = { + let shader_file = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("ray-traced-triangle") + .join("shader.wgsl"); + let source = std::fs::read_to_string(shader_file).unwrap(); + let module = naga::front::wgsl::Frontend::new().parse(&source).unwrap(); + let info = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::RAY_QUERY, + ) + .validate(&module) + .unwrap(); + hal::NagaShader { + module: Cow::Owned(module), + info, + debug_source: None, + } + }; + let shader_desc = hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }; + let shader_module = unsafe { + device + .create_shader_module(&shader_desc, hal::ShaderInput::Naga(naga_shader)) + .unwrap() + }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline = unsafe { + device.create_compute_pipeline(&hal::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: &pipeline_layout, + stage: hal::ProgrammableStage { + module: &shader_module, + entry_point: "main", + }, + }) + } + .unwrap(); + + let vertices: [f32; 9] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 0.0]; + + let vertices_size_in_bytes = vertices.len() * 4; + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size_in_bytes = indices.len() * 4; + + let vertices_buffer = unsafe { + let vertices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("vertices buffer"), + size: vertices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&vertices_buffer, 0..vertices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + vertices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + vertices_size_in_bytes, + ); + device.unmap_buffer(&vertices_buffer).unwrap(); + assert!(mapping.is_coherent); + + vertices_buffer + }; + + let indices_buffer = unsafe { + let indices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("indices buffer"), + size: indices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&indices_buffer, 0..indices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size_in_bytes, + ); + device.unmap_buffer(&indices_buffer).unwrap(); + assert!(mapping.is_coherent); + + indices_buffer + }; + + let blas_triangles = vec![hal::AccelerationStructureTriangles { + vertex_buffer: Some(&vertices_buffer), + first_vertex: 0, + vertex_format: wgt::VertexFormat::Float32x3, + vertex_count: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureTriangleIndices { + buffer: Some(&indices_buffer), + format: wgt::IndexFormat::Uint32, + offset: 0, + count: indices.len() as u32, + }), + transform: None, + flags: hal::AccelerationStructureGeometryFlags::OPAQUE, + }]; + let blas_entries = hal::AccelerationStructureEntries::Triangles(blas_triangles); + + let mut tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: None, + count: 3, + offset: 0, + }); + + let blas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &blas_entries, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + }, + ) + }; + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + let tlas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &tlas_entries, + flags: tlas_flags, + }, + ) + }; + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("tlas"), + size: tlas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + + let uniforms = { + let view = Mat4::look_at_rh(Vec3::new(0.0, 0.0, 2.5), Vec3::ZERO, Vec3::Y); + let proj = Mat4::perspective_rh(59.0_f32.to_radians(), 1.0, 0.001, 1000.0); + + Uniforms { + view_inverse: view.inverse(), + proj_inverse: proj.inverse(), + } + }; + + let uniforms_size = std::mem::size_of::<Uniforms>(); + + let uniform_buffer = unsafe { + let uniform_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("uniform buffer"), + size: uniforms_size as u64, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&uniform_buffer, 0..uniforms_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + &uniforms as *const Uniforms as *const u8, + mapping.ptr.as_ptr(), + uniforms_size, + ); + device.unmap_buffer(&uniform_buffer).unwrap(); + assert!(mapping.is_coherent); + uniform_buffer + }; + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8Unorm, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![wgt::TextureFormat::Rgba8Unorm], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let bind_group = { + let buffer_binding = hal::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::STORAGE_READ_WRITE, + }; + let group_desc = hal::BindGroupDescriptor { + label: Some("bind group"), + layout: &bgl, + buffers: &[buffer_binding], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[&tlas], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&group_desc).unwrap() } + }; + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: blas_sizes + .build_scratch_size + .max(tlas_sizes.build_scratch_size), + usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + + let instances = [ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + ]; + + let instances_buffer_size = + instances.len() * std::mem::size_of::<AccelerationStructureInstance>(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + + instances_buffer + }; + + if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { + i.buffer = Some(&instances_buffer); + assert!( + instances.len() <= i.count as usize, + "Tlas allocation to small" + ); + } + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::empty() + ..hal::AccelerationStructureUses::BUILD_OUTPUT, + }); + + cmd_encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + entries: &blas_entries, + source_acceleration_structure: None, + scratch_buffer_offset: 0, + }], + ); + + let scratch_buffer_barrier = hal::BufferBarrier { + buffer: &scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(scratch_buffer_barrier)); + + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::BUILD_INPUT, + }); + + cmd_encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + entries: &tlas_entries, + source_acceleration_structure: None, + scratch_buffer_offset: 0, + }], + ); + + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::SHADER_INPUT, + }); + + let texture_barrier = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::STORAGE_READ_WRITE, + }; + + cmd_encoder.transition_textures(iter::once(texture_barrier)); + } + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Self { + instance, + adapter, + surface, + surface_format: surface_config.format, + device, + queue, + pipeline, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + pipeline_layout, + bind_group, + texture, + instances, + instances_buffer, + blas, + tlas, + scratch_buffer, + time: 0.0, + indices_buffer, + vertices_buffer, + uniform_buffer, + texture_view, + bgl, + shader_module, + }) + } + + fn update(&mut self, _event: winit::event::WindowEvent) {} + + fn render(&mut self) { + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + + let instances_buffer_size = + self.instances.len() * std::mem::size_of::<AccelerationStructureInstance>(); + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + self.time += 1.0 / 60.0; + + self.instances[0].set_transform(&Affine3A::from_rotation_y(self.time)); + + unsafe { + let mapping = self + .device + .map_buffer(&self.instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + self.instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + self.device.unmap_buffer(&self.instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + + let instances = hal::AccelerationStructureInstances { + buffer: Some(&self.instances_buffer), + count: self.instances.len() as u32, + offset: 0, + }; + + ctx.encoder + .place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::SHADER_INPUT + ..hal::AccelerationStructureUses::BUILD_INPUT, + }); + + ctx.encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Update, + flags: tlas_flags, + destination_acceleration_structure: &self.tlas, + scratch_buffer: &self.scratch_buffer, + entries: &hal::AccelerationStructureEntries::Instances(instances), + source_acceleration_structure: Some(&self.tlas), + scratch_buffer_offset: 0, + }], + ); + + ctx.encoder + .place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::SHADER_INPUT, + }); + + let scratch_buffer_barrier = hal::BufferBarrier { + buffer: &self.scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + ctx.encoder + .transition_buffers(iter::once(scratch_buffer_barrier)); + + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COPY_DST, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + unsafe { + ctx.encoder.begin_compute_pass(&hal::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + ctx.encoder.set_compute_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.bind_group, &[]); + ctx.encoder.dispatch([512 / 8, 512 / 8, 1]); + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::PRESENT, + }; + let target_barrier2 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::STORAGE_READ_WRITE..hal::TextureUses::COPY_SRC, + }; + let target_barrier3 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_SRC..hal::TextureUses::STORAGE_READ_WRITE, + }; + unsafe { + ctx.encoder.end_compute_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier2)); + ctx.encoder.copy_texture_to_texture( + &self.texture, + hal::TextureUses::COPY_SRC, + surface_tex.borrow(), + std::iter::once(hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + dst_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 512, + height: 512, + depth: 1, + }, + }), + ); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + ctx.encoder.transition_textures(iter::once(target_barrier3)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); + self.queue.present(&self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.bind_group); + self.device.destroy_buffer(self.scratch_buffer); + self.device.destroy_buffer(self.instances_buffer); + self.device.destroy_buffer(self.indices_buffer); + self.device.destroy_buffer(self.vertices_buffer); + self.device.destroy_buffer(self.uniform_buffer); + self.device.destroy_acceleration_structure(self.tlas); + self.device.destroy_acceleration_structure(self.blas); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_compute_pipeline(self.pipeline); + self.device.destroy_pipeline_layout(self.pipeline_layout); + self.device.destroy_bind_group_layout(self.bgl); + self.device.destroy_shader_module(self.shader_module); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } +} + +cfg_if::cfg_if! { + // Apple + Metal + if #[cfg(all(any(target_os = "macos", target_os = "ios"), feature = "metal"))] { + type Api = hal::api::Metal; + } + // Wasm + Vulkan + else if #[cfg(all(not(target_arch = "wasm32"), feature = "vulkan"))] { + type Api = hal::api::Vulkan; + } + // Windows + DX12 + else if #[cfg(all(windows, feature = "dx12"))] { + type Api = hal::api::Dx12; + } + // Anything + GLES + else if #[cfg(feature = "gles")] { + type Api = hal::api::Gles; + } + // Fallback + else { + type Api = hal::api::Empty; + } +} + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new().unwrap(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-ray-traced-triangle") + .with_inner_size(winit::dpi::PhysicalSize { + width: 512, + height: 512, + }) + .with_resizable(false) + .with_enabled_buttons(WindowButtons::CLOSE) + .build(&event_loop) + .unwrap(); + + let example_result = Example::<Api>::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + event_loop + .run(move |event, target| { + let _ = &window; // force ownership by the closure + target.set_control_flow(winit::event_loop::ControlFlow::Poll); + match event { + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::CloseRequested => { + target.exit(); + } + winit::event::WindowEvent::KeyboardInput { event, .. } + if event.physical_key + == winit::keyboard::PhysicalKey::Code( + winit::keyboard::KeyCode::Escape, + ) => + { + target.exit(); + } + winit::event::WindowEvent::RedrawRequested => { + let ex = example.as_mut().unwrap(); + ex.render(); + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::LoopExiting => { + example.take().unwrap().exit(); + } + winit::event::Event::AboutToWait => { + window.request_redraw(); + } + _ => {} + } + }) + .unwrap(); +} diff --git a/third_party/rust/wgpu-hal/examples/ray-traced-triangle/shader.wgsl b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/shader.wgsl new file mode 100644 index 0000000000..8d9e475e3e --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/ray-traced-triangle/shader.wgsl @@ -0,0 +1,37 @@ +struct Uniforms { + view_inv: mat4x4<f32>, + proj_inv: mat4x4<f32>, +}; +@group(0) @binding(0) +var<uniform> uniforms: Uniforms; + +@group(0) @binding(1) +var output: texture_storage_2d<rgba8unorm, write>; + +@group(0) @binding(2) +var acc_struct: acceleration_structure; + +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let target_size = textureDimensions(output); + + let pixel_center = vec2<f32>(global_id.xy) + vec2<f32>(0.5); + let in_uv = pixel_center / vec2<f32>(target_size.xy); + let d = in_uv * 2.0 - 1.0; + + let origin = (uniforms.view_inv * vec4<f32>(0.0, 0.0, 0.0, 1.0)).xyz; + let temp = uniforms.proj_inv * vec4<f32>(d.x, d.y, 1.0, 1.0); + let direction = (uniforms.view_inv * vec4<f32>(normalize(temp.xyz), 0.0)).xyz; + + var rq: ray_query; + rayQueryInitialize(&rq, acc_struct, RayDesc(0u, 0xFFu, 0.1, 200.0, origin, direction)); + rayQueryProceed(&rq); + + var color = vec4<f32>(0.0, 0.0, 0.0, 1.0); + let intersection = rayQueryGetCommittedIntersection(&rq); + if intersection.kind != RAY_QUERY_INTERSECTION_NONE { + color = vec4<f32>(intersection.barycentrics, 1.0 - intersection.barycentrics.x - intersection.barycentrics.y, 1.0); + } + + textureStore(output, global_id.xy, color); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs new file mode 100644 index 0000000000..6af4b77bb3 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs @@ -0,0 +1,270 @@ +use std::{ffi::OsString, os::windows::ffi::OsStringExt}; +use winapi::shared::dxgiformat; + +// Helper to convert DXGI adapter name to a normal string +pub fn map_adapter_name(name: [u16; 128]) -> String { + let len = name.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&name[..len]); + name.to_string_lossy().into_owned() +} + +pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgiformat::DXGI_FORMAT> { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + Some(match format { + Tf::R8Unorm => DXGI_FORMAT_R8_UNORM, + Tf::R8Snorm => DXGI_FORMAT_R8_SNORM, + Tf::R8Uint => DXGI_FORMAT_R8_UINT, + Tf::R8Sint => DXGI_FORMAT_R8_SINT, + Tf::R16Uint => DXGI_FORMAT_R16_UINT, + Tf::R16Sint => DXGI_FORMAT_R16_SINT, + Tf::R16Unorm => DXGI_FORMAT_R16_UNORM, + Tf::R16Snorm => DXGI_FORMAT_R16_SNORM, + Tf::R16Float => DXGI_FORMAT_R16_FLOAT, + Tf::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Tf::Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Tf::Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Tf::Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Tf::Rg16Unorm => DXGI_FORMAT_R16G16_UNORM, + Tf::Rg16Snorm => DXGI_FORMAT_R16G16_SNORM, + Tf::R32Uint => DXGI_FORMAT_R32_UINT, + Tf::R32Sint => DXGI_FORMAT_R32_SINT, + Tf::R32Float => DXGI_FORMAT_R32_FLOAT, + Tf::Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Tf::Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Tf::Rg16Float => DXGI_FORMAT_R16G16_FLOAT, + Tf::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Tf::Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Tf::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Tf::Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Tf::Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + Tf::Rgb10a2Uint => DXGI_FORMAT_R10G10B10A2_UINT, + Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, + Tf::Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Tf::Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Tf::Rgba16Unorm => DXGI_FORMAT_R16G16B16A16_UNORM, + Tf::Rgba16Snorm => DXGI_FORMAT_R16G16B16A16_SNORM, + Tf::Rgba16Float => DXGI_FORMAT_R16G16B16A16_FLOAT, + Tf::Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Tf::Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Tf::Rgba32Float => DXGI_FORMAT_R32G32B32A32_FLOAT, + Tf::Stencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth16Unorm => DXGI_FORMAT_D16_UNORM, + Tf::Depth24Plus => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth24PlusStencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth32Float => DXGI_FORMAT_D32_FLOAT, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + Tf::NV12 => DXGI_FORMAT_NV12, + Tf::Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Tf::Bc2RgbaUnorm => DXGI_FORMAT_BC2_UNORM, + Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Tf::Bc3RgbaUnorm => DXGI_FORMAT_BC3_UNORM, + Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Tf::Bc4RUnorm => DXGI_FORMAT_BC4_UNORM, + Tf::Bc4RSnorm => DXGI_FORMAT_BC4_SNORM, + Tf::Bc5RgUnorm => DXGI_FORMAT_BC5_UNORM, + Tf::Bc5RgSnorm => DXGI_FORMAT_BC5_SNORM, + Tf::Bc6hRgbUfloat => DXGI_FORMAT_BC6H_UF16, + Tf::Bc6hRgbFloat => DXGI_FORMAT_BC6H_SF16, + Tf::Bc7RgbaUnorm => DXGI_FORMAT_BC7_UNORM, + Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_UNORM_SRGB, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm + | Tf::Astc { + block: _, + channel: _, + } => return None, + }) +} + +pub fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match map_texture_format_failable(format) { + Some(f) => f, + None => unreachable!(), + } +} + +// Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. +pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + _ => map_texture_format(format), + } +} + +// SRV and UAV can't use the depth or typeless formats +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#view-creation +pub fn map_texture_format_for_srv_uav( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + (wgt::TextureFormat::Depth32Float, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT + } + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS + } + ( + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS, + + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::STENCIL) => { + dxgiformat::DXGI_FORMAT_X32_TYPELESS_G8X24_UINT + } + ( + wgt::TextureFormat::Stencil8 | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_X24_TYPELESS_G8_UINT, + + (_, crate::FormatAspects::DEPTH) + | (_, crate::FormatAspects::STENCIL) + | (_, crate::FormatAspects::DEPTH_STENCIL) => return None, + + _ => map_texture_format(format), + }) +} + +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#planar-layout-for-staging-from-buffer +pub fn map_texture_format_for_copy( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + ( + wgt::TextureFormat::Depth32Float | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R32_FLOAT, + + ( + wgt::TextureFormat::Stencil8 + | wgt::TextureFormat::Depth24PlusStencil8 + | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_R8_UINT, + + (format, crate::FormatAspects::COLOR) => map_texture_format(format), + + _ => return None, + }) +} + +pub fn map_texture_format_for_resource( + format: wgt::TextureFormat, + usage: crate::TextureUses, + has_view_formats: bool, + casting_fully_typed_format_supported: bool, +) -> dxgiformat::DXGI_FORMAT { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + if casting_fully_typed_format_supported { + map_texture_format(format) + + // We might view this resource as srgb or non-srgb + } else if has_view_formats { + match format { + Tf::Rgba8Unorm | Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_TYPELESS, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_TYPELESS, + Tf::Bc1RgbaUnorm | Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_TYPELESS, + Tf::Bc2RgbaUnorm | Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_TYPELESS, + Tf::Bc3RgbaUnorm | Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_TYPELESS, + Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_TYPELESS, + format => map_texture_format(format), + } + + // We might view this resource as SRV/UAV but also as DSV + } else if format.is_depth_stencil_format() + && usage.intersects( + crate::TextureUses::RESOURCE + | crate::TextureUses::STORAGE_READ + | crate::TextureUses::STORAGE_READ_WRITE, + ) + { + match format { + Tf::Depth16Unorm => DXGI_FORMAT_R16_TYPELESS, + Tf::Depth32Float => DXGI_FORMAT_R32_TYPELESS, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_R32G8X24_TYPELESS, + Tf::Stencil8 | Tf::Depth24Plus | Tf::Depth24PlusStencil8 => DXGI_FORMAT_R24G8_TYPELESS, + _ => unreachable!(), + } + } else { + map_texture_format(format) + } +} + +pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, + wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT, + } +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_acomposite_alpha_mode(_mode: wgt::CompositeAlphaMode) -> d3d12::AlphaMode { + d3d12::AlphaMode::Ignore +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs new file mode 100644 index 0000000000..70db8b2d0d --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs @@ -0,0 +1,105 @@ +use std::{borrow::Cow, slice}; + +use parking_lot::{lock_api::RawMutex, Mutex}; +use winapi::{ + um::{errhandlingapi, winnt}, + vc::excpt, +}; + +// This is a mutex as opposed to an atomic as we need to completely +// lock everyone out until we have registered or unregistered the +// exception handler, otherwise really nasty races could happen. +// +// By routing all the registration through these functions we can guarantee +// there is either 1 or 0 exception handlers registered, not multiple. +static EXCEPTION_HANDLER_COUNT: Mutex<usize> = Mutex::const_new(parking_lot::RawMutex::INIT, 0); + +pub fn register_exception_handler() { + let mut count_guard = EXCEPTION_HANDLER_COUNT.lock(); + if *count_guard == 0 { + unsafe { + errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)) + }; + } + *count_guard += 1; +} + +pub fn unregister_exception_handler() { + let mut count_guard = EXCEPTION_HANDLER_COUNT.lock(); + if *count_guard == 1 { + unsafe { + errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _) + }; + } + *count_guard -= 1; +} + +const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[ + ("CORRUPTION", log::Level::Error), + ("ERROR", log::Level::Error), + ("WARNING", log::Level::Warn), + ("INFO", log::Level::Info), + ("MESSAGE", log::Level::Debug), +]; + +unsafe extern "system" fn output_debug_string_handler( + exception_info: *mut winnt::EXCEPTION_POINTERS, +) -> i32 { + // See https://stackoverflow.com/a/41480827 + let record = unsafe { &*(*exception_info).ExceptionRecord }; + if record.NumberParameters != 2 { + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + let message = match record.ExceptionCode { + winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u8, + record.ExceptionInformation[0], + ) + }), + winnt::DBG_PRINTEXCEPTION_WIDE_C => Cow::Owned(String::from_utf16_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u16, + record.ExceptionInformation[0], + ) + })), + _ => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let message = match message.strip_prefix("D3D12 ") { + Some(msg) => msg + .trim_end_matches("\n\0") + .trim_end_matches("[ STATE_CREATION WARNING #0: UNKNOWN]"), + None => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let (message, level) = match MESSAGE_PREFIXES + .iter() + .find(|&&(prefix, _)| message.starts_with(prefix)) + { + Some(&(prefix, level)) => (&message[prefix.len() + 2..], level), + None => (message, log::Level::Debug), + }; + + if level == log::Level::Warn && message.contains("#82") { + // This is are useless spammy warnings (#820, #821): + // "The application did not pass any clear value to resource creation" + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + + if level == log::Level::Warn && message.contains("DRAW_EMPTY_SCISSOR_RECTANGLE") { + // This is normal, WebGPU allows passing empty scissor rectangles. + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "{}", message); + }); + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.add(message.to_string()); + } + + excpt::EXCEPTION_CONTINUE_EXECUTION +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs new file mode 100644 index 0000000000..38fdd17c89 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs @@ -0,0 +1,261 @@ +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, winerror}, + Interface, +}; + +use super::result::HResult as _; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum DxgiFactoryType { + Factory2, + Factory4, + Factory6, +} + +fn should_keep_adapter(adapter: &dxgi::IDXGIAdapter1) -> bool { + let mut desc = unsafe { std::mem::zeroed() }; + unsafe { adapter.GetDesc1(&mut desc) }; + + // The Intel Haswell family of iGPUs had support for the D3D12 API but it was later + // removed due to a security vulnerability. + // + // We are explicitly filtering out all the devices in the family because we are now + // getting reports of device loss at a later time than at device creation time (`D3D12CreateDevice`). + // + // See https://www.intel.com/content/www/us/en/support/articles/000057520/graphics.html + // This list of device IDs is from https://dgpu-docs.intel.com/devices/hardware-table.html + let haswell_device_ids = [ + 0x0422, 0x0426, 0x042A, 0x042B, 0x042E, 0x0C22, 0x0C26, 0x0C2A, 0x0C2B, 0x0C2E, 0x0A22, + 0x0A2A, 0x0A2B, 0x0D2A, 0x0D2B, 0x0D2E, 0x0A26, 0x0A2E, 0x0D22, 0x0D26, 0x0412, 0x0416, + 0x0D12, 0x041A, 0x041B, 0x0C12, 0x0C16, 0x0C1A, 0x0C1B, 0x0C1E, 0x0A12, 0x0A1A, 0x0A1B, + 0x0D16, 0x0D1A, 0x0D1B, 0x0D1E, 0x041E, 0x0A16, 0x0A1E, 0x0402, 0x0406, 0x040A, 0x040B, + 0x040E, 0x0C02, 0x0C06, 0x0C0A, 0x0C0B, 0x0C0E, 0x0A02, 0x0A06, 0x0A0A, 0x0A0B, 0x0A0E, + 0x0D02, 0x0D06, 0x0D0A, 0x0D0B, 0x0D0E, + ]; + if desc.VendorId == 0x8086 && haswell_device_ids.contains(&desc.DeviceId) { + return false; + } + + // If run completely headless, windows will show two different WARP adapters, one + // which is lying about being an integrated card. This is so that programs + // that ignore software adapters will actually run on headless/gpu-less machines. + // + // We don't want that and discorage that kind of filtering anyway, so we skip the integrated WARP. + if desc.VendorId == 5140 && (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) == 0 { + let adapter_name = super::conv::map_adapter_name(desc.Description); + if adapter_name.contains("Microsoft Basic Render Driver") { + return false; + } + } + + true +} + +pub fn enumerate_adapters(factory: d3d12::DxgiFactory) -> Vec<d3d12::DxgiAdapter> { + let mut adapters = Vec::with_capacity(8); + + for cur_index in 0.. { + if let Some(factory6) = factory.as_factory6() { + profiling::scope!("IDXGIFactory6::EnumAdapterByGpuPreference"); + // We're already at dxgi1.6, we can grab IDXGIAdapter4 directly + let mut adapter4 = d3d12::ComPtr::<dxgi1_6::IDXGIAdapter4>::null(); + let hr = unsafe { + factory6.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_6::IDXGIAdapter4::uuidof(), + adapter4.mut_void(), + ) + }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + if !should_keep_adapter(&adapter4) { + continue; + } + + adapters.push(d3d12::DxgiAdapter::Adapter4(adapter4)); + continue; + } + + profiling::scope!("IDXGIFactory1::EnumAdapters1"); + let mut adapter1 = d3d12::ComPtr::<dxgi::IDXGIAdapter1>::null(); + let hr = unsafe { factory.EnumAdapters1(cur_index, adapter1.mut_self()) }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + if !should_keep_adapter(&adapter1) { + continue; + } + + // Do the most aggressive casts first, skipping Adapter4 as we definitely don't have dxgi1_6. + + // Adapter1 -> Adapter3 + unsafe { + match adapter1.cast::<dxgi1_4::IDXGIAdapter3>().into_result() { + Ok(adapter3) => { + adapters.push(d3d12::DxgiAdapter::Adapter3(adapter3)); + continue; + } + Err(err) => { + log::warn!("Failed casting Adapter1 to Adapter3: {}", err); + } + } + } + + // Adapter1 -> Adapter2 + unsafe { + match adapter1.cast::<dxgi1_2::IDXGIAdapter2>().into_result() { + Ok(adapter2) => { + adapters.push(d3d12::DxgiAdapter::Adapter2(adapter2)); + continue; + } + Err(err) => { + log::warn!("Failed casting Adapter1 to Adapter2: {}", err); + } + } + } + + adapters.push(d3d12::DxgiAdapter::Adapter1(adapter1)); + } + + adapters +} + +/// Tries to create a IDXGIFactory6, then a IDXGIFactory4, then a IDXGIFactory2, then a IDXGIFactory1, +/// returning the one that succeeds, or if the required_factory_type fails to be +/// created. +pub fn create_factory( + required_factory_type: DxgiFactoryType, + instance_flags: wgt::InstanceFlags, +) -> Result<(d3d12::DxgiLib, d3d12::DxgiFactory), crate::InstanceError> { + let lib_dxgi = d3d12::DxgiLib::new().map_err(|e| { + crate::InstanceError::with_source(String::from("failed to load dxgi.dll"), e) + })?; + + let mut factory_flags = d3d12::FactoryCreationFlags::empty(); + + if instance_flags.contains(wgt::InstanceFlags::VALIDATION) { + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.into_result() { + Ok(_debug_controller) => { + factory_flags |= d3d12::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + + // Intercept `OutputDebugString` calls + super::exception::register_exception_handler(); + } + + // Try to create IDXGIFactory4 + let factory4 = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.into_result() { + Ok(factory) => Some(factory), + // We hard error here as we _should have_ been able to make a factory4 but couldn't. + Err(err) => { + // err is a Cow<str>, not an Error implementor + return Err(crate::InstanceError::new(format!( + "failed to create IDXGIFactory4: {err:?}" + ))); + } + }, + // If we require factory4, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory4 => { + return Err(crate::InstanceError::with_source( + String::from("IDXGIFactory1 creation function not found"), + err, + )); + } + // If we don't print it to warn as all win7 will hit this case. + Err(err) => { + log::warn!("IDXGIFactory1 creation function not found: {err:?}"); + None + } + }; + + if let Some(factory4) = factory4 { + // Try to cast the IDXGIFactory4 into IDXGIFactory6 + let factory6 = unsafe { factory4.cast::<dxgi1_6::IDXGIFactory6>().into_result() }; + match factory6 { + Ok(factory6) => { + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory6(factory6))); + } + // If we require factory6, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory6 => { + // err is a Cow<str>, not an Error implementor + return Err(crate::InstanceError::new(format!( + "failed to cast IDXGIFactory4 to IDXGIFactory6: {err:?}" + ))); + } + // If we don't print it to warn. + Err(err) => { + log::warn!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err); + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory4(factory4))); + } + } + } + + // Try to create IDXGIFactory1 + let factory1 = match lib_dxgi.create_factory1() { + Ok(pair) => match pair.into_result() { + Ok(factory) => factory, + Err(err) => { + // err is a Cow<str>, not an Error implementor + return Err(crate::InstanceError::new(format!( + "failed to create IDXGIFactory1: {err:?}" + ))); + } + }, + // We always require at least factory1, so hard error + Err(err) => { + return Err(crate::InstanceError::with_source( + String::from("IDXGIFactory1 creation function not found"), + err, + )); + } + }; + + // Try to cast the IDXGIFactory1 into IDXGIFactory2 + let factory2 = unsafe { factory1.cast::<dxgi1_2::IDXGIFactory2>().into_result() }; + match factory2 { + Ok(factory2) => { + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory2(factory2))); + } + // If we require factory2, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory2 => { + // err is a Cow<str>, not an Error implementor + return Err(crate::InstanceError::new(format!( + "failed to cast IDXGIFactory1 to IDXGIFactory2: {err:?}" + ))); + } + // If we don't print it to warn. + Err(err) => { + log::warn!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err); + } + } + + // We tried to create 4 and 2, but only succeeded with 1. + Ok((lib_dxgi, d3d12::DxgiFactory::Factory1(factory1))) +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs new file mode 100644 index 0000000000..559969633c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs @@ -0,0 +1,5 @@ +pub mod conv; +pub mod exception; +pub mod factory; +pub mod result; +pub mod time; diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs new file mode 100644 index 0000000000..db013d2dec --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs @@ -0,0 +1,42 @@ +use std::borrow::Cow; + +use winapi::shared::winerror; + +pub(crate) trait HResult<O> { + fn into_result(self) -> Result<O, Cow<'static, str>>; + fn into_device_result(self, description: &str) -> Result<O, crate::DeviceError>; +} +impl HResult<()> for i32 { + fn into_result(self) -> Result<(), Cow<'static, str>> { + if self >= 0 { + return Ok(()); + } + let description = match self { + winerror::E_UNEXPECTED => "unexpected", + winerror::E_NOTIMPL => "not implemented", + winerror::E_OUTOFMEMORY => "out of memory", + winerror::E_INVALIDARG => "invalid argument", + _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))), + }; + Err(Cow::Borrowed(description)) + } + fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.into_result().map_err(|err| { + log::error!("{} failed: {}", description, err); + if self == winerror::E_OUTOFMEMORY { + crate::DeviceError::OutOfMemory + } else { + crate::DeviceError::Lost + } + }) + } +} + +impl<T> HResult<T> for (T, i32) { + fn into_result(self) -> Result<T, Cow<'static, str>> { + self.1.into_result().map(|()| self.0) + } + fn into_device_result(self, description: &str) -> Result<T, crate::DeviceError> { + self.1.into_device_result(description).map(|()| self.0) + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs new file mode 100644 index 0000000000..fd99c097d7 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs @@ -0,0 +1,94 @@ +#![allow(dead_code)] // IPresentationManager is unused currently + +use std::mem; + +use winapi::um::{ + profileapi::{QueryPerformanceCounter, QueryPerformanceFrequency}, + winnt::LARGE_INTEGER, +}; + +pub enum PresentationTimer { + /// DXGI uses QueryPerformanceCounter + Dxgi { + /// How many ticks of QPC per second + frequency: u64, + }, + /// IPresentationManager uses QueryInterruptTimePrecise + #[allow(non_snake_case)] + IPresentationManager { + fnQueryInterruptTimePrecise: unsafe extern "system" fn(*mut winapi::ctypes::c_ulonglong), + }, +} + +impl std::fmt::Debug for PresentationTimer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + Self::Dxgi { frequency } => f + .debug_struct("DXGI") + .field("frequency", &frequency) + .finish(), + Self::IPresentationManager { + fnQueryInterruptTimePrecise, + } => f + .debug_struct("IPresentationManager") + .field( + "QueryInterruptTimePrecise", + &(fnQueryInterruptTimePrecise as usize), + ) + .finish(), + } + } +} + +impl PresentationTimer { + /// Create a presentation timer using QueryPerformanceFrequency (what DXGI uses for presentation times) + pub fn new_dxgi() -> Self { + let mut frequency: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceFrequency(&mut frequency) }; + assert_ne!(success, 0); + + Self::Dxgi { + frequency: unsafe { *frequency.QuadPart() } as u64, + } + } + + /// Create a presentation timer using QueryInterruptTimePrecise (what IPresentationManager uses for presentation times) + /// + /// Panics if QueryInterruptTimePrecise isn't found (below Win10) + pub fn new_ipresentation_manager() -> Self { + // We need to load this explicitly, as QueryInterruptTimePrecise is only available on Windows 10+ + // + // Docs say it's in kernel32.dll, but it's actually in kernelbase.dll. + let kernelbase = + libloading::os::windows::Library::open_already_loaded("kernelbase.dll").unwrap(); + // No concerns about lifetimes here as kernelbase is always there. + let ptr = unsafe { kernelbase.get(b"QueryInterruptTimePrecise").unwrap() }; + Self::IPresentationManager { + fnQueryInterruptTimePrecise: *ptr, + } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + // Always do u128 math _after_ hitting the timing function. + match *self { + PresentationTimer::Dxgi { frequency } => { + let mut counter: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceCounter(&mut counter) }; + assert_ne!(success, 0); + + // counter * (1_000_000_000 / freq) but re-ordered to make more precise + (unsafe { *counter.QuadPart() } as u128 * 1_000_000_000) / frequency as u128 + } + PresentationTimer::IPresentationManager { + fnQueryInterruptTimePrecise, + } => { + let mut counter = 0; + unsafe { fnQueryInterruptTimePrecise(&mut counter) }; + + // QueryInterruptTimePrecise uses units of 100ns for its tick. + counter as u128 * 100 + } + } + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/mod.rs b/third_party/rust/wgpu-hal/src/auxil/mod.rs new file mode 100644 index 0000000000..f70a8bbe03 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/mod.rs @@ -0,0 +1,138 @@ +#[cfg(dx12)] +pub(super) mod dxgi; + +#[cfg(all(native, feature = "renderdoc"))] +pub(super) mod renderdoc; + +pub mod db { + pub mod amd { + pub const VENDOR: u32 = 0x1002; + } + pub mod apple { + pub const VENDOR: u32 = 0x106B; + } + pub mod arm { + pub const VENDOR: u32 = 0x13B5; + } + pub mod broadcom { + pub const VENDOR: u32 = 0x14E4; + } + pub mod imgtec { + pub const VENDOR: u32 = 0x1010; + } + pub mod intel { + pub const VENDOR: u32 = 0x8086; + pub const DEVICE_KABY_LAKE_MASK: u32 = 0x5900; + pub const DEVICE_SKY_LAKE_MASK: u32 = 0x1900; + } + pub mod mesa { + // Mesa does not actually have a PCI vendor id. + // + // To match Vulkan, we use the VkVendorId for Mesa in the gles backend so that lavapipe (Vulkan) and + // llvmpipe (OpenGL) have the same vendor id. + pub const VENDOR: u32 = 0x10005; + } + pub mod nvidia { + pub const VENDOR: u32 = 0x10DE; + } + pub mod qualcomm { + pub const VENDOR: u32 = 0x5143; + } +} + +/// Maximum binding size for the shaders that only support `i32` indexing. +/// Interestingly, the index itself can't reach that high, because the minimum +/// element size is 4 bytes, but the compiler toolchain still computes the +/// offset at some intermediate point, internally, as i32. +pub const MAX_I32_BINDING_SIZE: u32 = 1 << 31; + +pub fn map_naga_stage(stage: naga::ShaderStage) -> wgt::ShaderStages { + match stage { + naga::ShaderStage::Vertex => wgt::ShaderStages::VERTEX, + naga::ShaderStage::Fragment => wgt::ShaderStages::FRAGMENT, + naga::ShaderStage::Compute => wgt::ShaderStages::COMPUTE, + } +} + +impl crate::CopyExtent { + pub fn map_extent_to_copy_size(extent: &wgt::Extent3d, dim: wgt::TextureDimension) -> Self { + Self { + width: extent.width, + height: extent.height, + depth: match dim { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => 1, + wgt::TextureDimension::D3 => extent.depth_or_array_layers, + }, + } + } + + pub fn min(&self, other: &Self) -> Self { + Self { + width: self.width.min(other.width), + height: self.height.min(other.height), + depth: self.depth.min(other.depth), + } + } + + // Get the copy size at a specific mipmap level. This doesn't make most sense, + // since the copy extents are provided *for* a mipmap level to start with. + // But backends use `CopyExtent` more sparingly, and this piece is shared. + pub fn at_mip_level(&self, level: u32) -> Self { + Self { + width: (self.width >> level).max(1), + height: (self.height >> level).max(1), + depth: (self.depth >> level).max(1), + } + } +} + +impl crate::TextureCopyBase { + pub fn max_copy_size(&self, full_size: &crate::CopyExtent) -> crate::CopyExtent { + let mip = full_size.at_mip_level(self.mip_level); + crate::CopyExtent { + width: mip.width - self.origin.x, + height: mip.height - self.origin.y, + depth: mip.depth - self.origin.z, + } + } +} + +impl crate::BufferTextureCopy { + pub fn clamp_size_to_virtual(&mut self, full_size: &crate::CopyExtent) { + let max_size = self.texture_base.max_copy_size(full_size); + self.size = self.size.min(&max_size); + } +} + +impl crate::TextureCopy { + pub fn clamp_size_to_virtual( + &mut self, + full_src_size: &crate::CopyExtent, + full_dst_size: &crate::CopyExtent, + ) { + let max_src_size = self.src_base.max_copy_size(full_src_size); + let max_dst_size = self.dst_base.max_copy_size(full_dst_size); + self.size = self.size.min(&max_src_size).min(&max_dst_size); + } +} + +/// Construct a `CStr` from a byte slice, up to the first zero byte. +/// +/// Return a `CStr` extending from the start of `bytes` up to and +/// including the first zero byte. If there is no zero byte in +/// `bytes`, return `None`. +/// +/// This can be removed when `CStr::from_bytes_until_nul` is stabilized. +/// ([#95027](https://github.com/rust-lang/rust/issues/95027)) +#[allow(dead_code)] +pub(crate) fn cstr_from_bytes_until_nul(bytes: &[std::os::raw::c_char]) -> Option<&std::ffi::CStr> { + if bytes.contains(&0) { + // Safety for `CStr::from_ptr`: + // - We've ensured that the slice does contain a null terminator. + // - The range is valid to read, because the slice covers it. + // - The memory won't be changed, because the slice borrows it. + unsafe { Some(std::ffi::CStr::from_ptr(bytes.as_ptr())) } + } else { + None + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs new file mode 100644 index 0000000000..15b2c1039a --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs @@ -0,0 +1,138 @@ +//! RenderDoc integration - <https://renderdoc.org/> + +use std::{ffi, os, ptr}; + +/// The dynamically loaded RenderDoc API function table +#[repr(C)] +#[derive(Debug)] +pub struct RenderDocApi { + api: renderdoc_sys::RENDERDOC_API_1_4_1, + lib: libloading::Library, +} + +unsafe impl Send for RenderDocApi {} +unsafe impl Sync for RenderDocApi {} + +/// RenderDoc API type +#[derive(Debug)] +pub enum RenderDoc { + /// RenderDoc functionality is available + Available { + /// RenderDoc API with function pointers + api: RenderDocApi, + }, + /// RenderDoc functionality is _not_ available + NotAvailable { + /// A description why renderdoc functionality is not available + reason: String, + }, +} + +// TODO: replace with libloading API once supported +#[cfg(unix)] +const RTLD_NOLOAD: i32 = 0x4; + +impl RenderDoc { + pub unsafe fn new() -> Self { + type GetApiFn = unsafe extern "C" fn(version: u32, out: *mut *mut ffi::c_void) -> i32; + + #[cfg(windows)] + let renderdoc_filename = "renderdoc.dll"; + #[cfg(all(unix, not(target_os = "android")))] + let renderdoc_filename = "librenderdoc.so"; + #[cfg(target_os = "android")] + let renderdoc_filename = "libVkLayer_GLES_RenderDoc.so"; + + #[cfg(unix)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = unsafe { + libloading::os::unix::Library::open( + Some(renderdoc_filename), + libloading::os::unix::RTLD_NOW | RTLD_NOLOAD, + ) + } + .map(|lib| lib.into()); + + #[cfg(windows)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = + libloading::os::windows::Library::open_already_loaded(renderdoc_filename) + .map(|lib| lib.into()); + + let renderdoc_lib = match renderdoc_result { + Ok(lib) => lib, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to load renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + + let get_api: libloading::Symbol<GetApiFn> = + match unsafe { renderdoc_lib.get(b"RENDERDOC_GetAPI\0") } { + Ok(api) => api, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to get RENDERDOC_GetAPI from renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + let mut obj = ptr::null_mut(); + match unsafe { get_api(10401, &mut obj) } { + 1 => RenderDoc::Available { + api: RenderDocApi { + api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) }, + lib: renderdoc_lib, + }, + }, + return_value => RenderDoc::NotAvailable { + reason: format!( + "Unable to get API from renderdoc library '{renderdoc_filename}': {return_value}" + ), + }, + } + } +} + +impl Default for RenderDoc { + fn default() -> Self { + if !cfg!(debug_assertions) { + return RenderDoc::NotAvailable { + reason: "RenderDoc support is only enabled with 'debug_assertions'".into(), + }; + } + unsafe { Self::new() } + } +} +/// A implementation specific handle +pub type Handle = *mut os::raw::c_void; + +impl RenderDoc { + /// Start a RenderDoc frame capture + pub unsafe fn start_frame_capture(&self, device_handle: Handle, window_handle: Handle) -> bool { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.StartFrameCapture.unwrap()(device_handle, window_handle) }; + true + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not start RenderDoc frame capture: {}", reason); + false + } + } + } + + /// End a RenderDoc frame capture + pub unsafe fn end_frame_capture(&self, device_handle: Handle, window_handle: Handle) { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.EndFrameCapture.unwrap()(device_handle, window_handle) }; + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not end RenderDoc frame capture: {}", reason) + } + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..f6027014d2 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,643 @@ +use crate::{ + auxil::{self, dxgi::result::HResult as _}, + dx12::{shader_compilation, SurfaceTarget}, +}; +use parking_lot::Mutex; +use std::{mem, ptr, sync::Arc, thread}; +use winapi::{ + shared::{ + dxgi, dxgi1_2, dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, minwindef::DWORD, windef, winerror, + }, + um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser}, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + // Debug tracking alive objects + if !thread::panicking() + && self + .private_caps + .instance_flags + .contains(wgt::InstanceFlags::VALIDATION) + { + unsafe { + self.report_live_objects(); + } + } + } +} + +impl super::Adapter { + pub unsafe fn report_live_objects(&self) { + if let Ok(debug_device) = unsafe { + self.raw + .cast::<d3d12sdklayers::ID3D12DebugDevice>() + .into_result() + } { + unsafe { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ) + }; + } + } + + pub fn raw_adapter(&self) -> &d3d12::DxgiAdapter { + &self.raw + } + + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: d3d12::DxgiAdapter, + library: &Arc<d3d12::D3D12Lib>, + instance_flags: wgt::InstanceFlags, + dxc_container: Option<Arc<shader_compilation::DxcContainer>>, + ) -> Option<crate::ExposedAdapter<super::Api>> { + // Create the device so that we can get the capabilities. + let device = { + profiling::scope!("ID3D12Device::create_device"); + match library.create_device(&adapter, d3d12::FeatureLevel::L11_0) { + Ok(pair) => match pair.into_result() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + } + }; + + profiling::scope!("feature queries"); + + // Detect the highest supported feature level. + let d3d_feature_level = [ + d3d12::FeatureLevel::L12_1, + d3d12::FeatureLevel::L12_0, + d3d12::FeatureLevel::L11_1, + d3d12::FeatureLevel::L11_0, + ]; + let mut device_levels: d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS = + unsafe { mem::zeroed() }; + device_levels.NumFeatureLevels = d3d_feature_level.len() as u32; + device_levels.pFeatureLevelsRequested = d3d_feature_level.as_ptr().cast(); + unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FEATURE_LEVELS, + &mut device_levels as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FEATURE_LEVELS>() as _, + ) + }; + // This cast should never fail because we only requested feature levels that are already in the enum. + let max_feature_level = + d3d12::FeatureLevel::try_from(device_levels.MaxSupportedFeatureLevel) + .expect("Unexpected feature level"); + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.unwrap_adapter2().GetDesc2(&mut desc); + } + + let device_name = auxil::dxgi::conv::map_adapter_name(desc.Description); + + let mut features_architecture: d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _, + ) + }); + + let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = + d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, + }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_SHADER_MODEL, + &mut shader_model_support as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId, + device: desc.DeviceId, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::Cpu + } else if features_architecture.UMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + driver: String::new(), + driver_info: String::new(), + }; + + let mut options: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let casting_fully_typed_format_supported = { + let mut features3: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 21, // D3D12_FEATURE_D3D12_OPTIONS3 + &mut features3 as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _, + ) + }; + hr == 0 && features3.CastingFullyTypedFormatSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + instance_flags, + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12_ty::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + heap_create_not_zeroed: false, //TODO: winapi support for Options7 + casting_fully_typed_format_supported, + // See https://github.com/gfx-rs/wgpu/issues/3552 + suballocation_supported: !info.name.contains("Iris(R) Xe"), + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, uav_count) = match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => { + let uav_count = match max_feature_level { + d3d12::FeatureLevel::L11_0 => 8, + _ => 64, + }; + + ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + uav_count, + ) + } + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::DEPTH32FLOAT_STENCIL8 + | wgt::Features::INDIRECT_FIRST_INSTANCE + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES + | wgt::Features::TEXTURE_COMPRESSION_BC + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::SHADER_PRIMITIVE_INDEX + | wgt::Features::RG11B10UFLOAT_RENDERABLE + | wgt::Features::DUAL_SOURCE_BLENDING + | wgt::Features::TEXTURE_FORMAT_NV12; + + //TODO: in order to expose this, we need to run a compute shader + // that extract the necessary statistics out of the D3D12 result. + // Alternatively, we could allocate a buffer for the query set, + // write the results there, and issue a bunch of copy commands. + //| wgt::Features::PIPELINE_STATISTICS_QUERY + + if max_feature_level as u32 >= d3d12::FeatureLevel::L11_1 as u32 { + features |= wgt::Features::VERTEX_WRITABLE_STORAGE; + } + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + features.set( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + ); + + let bgra8unorm_storage_supported = { + let mut bgra8unorm_info: d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT = + unsafe { mem::zeroed() }; + bgra8unorm_info.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + &mut bgra8unorm_info as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }; + hr == 0 + && (bgra8unorm_info.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE != 0) + }; + features.set( + wgt::Features::BGRA8UNORM_STORAGE, + bgra8unorm_storage_supported, + ); + + // float32-filterable should always be available on d3d12 + features.set(wgt::Features::FLOAT32_FILTERABLE, true); + + // TODO: Determine if IPresentationManager is supported + let presentation_timer = auxil::dxgi::time::PresentationTimer::new_dxgi(); + + let base = wgt::Limits::default(); + + let mut downlevel = wgt::DownlevelCapabilities::default(); + // https://github.com/gfx-rs/wgpu/issues/2471 + downlevel.flags -= + wgt::DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW; + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + presentation_timer, + workarounds, + dxc_container, + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12_ty::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12_ty::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12_ty::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12_ty::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12_ty::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 128, + _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 16, + _ => d3d12_ty::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + // of the two, so we divide it by 4 to account for the worst case scenario + // (2 shader stages, with both using 16 storage textures and 16 storage buffers) + max_storage_buffers_per_shader_stage: uav_count / 4, + max_storage_textures_per_shader_stage: uav_count / 4, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: + d3d12_ty::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, + max_storage_buffer_binding_size: crate::auxil::MAX_I32_BINDING_SIZE, + max_vertex_buffers: d3d12_ty::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + // The push constants are part of the root signature which + // has a limit of 64 DWORDS (256 bytes), but other resources + // also share the root signature: + // + // - push constants consume a `DWORD` for each `4 bytes` of data + // - If a bind group has buffers it will consume a `DWORD` + // for the descriptor table + // - If a bind group has samplers it will consume a `DWORD` + // for the descriptor table + // - Each dynamic buffer will consume `2 DWORDs` for the + // root descriptor + // - The special constants buffer count as constants + // + // Since we can't know beforehand all root signatures that + // will be created, the max size to be used for push + // constants needs to be set to a reasonable number instead. + // + // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs + max_push_constant_size: 128, + min_uniform_buffer_offset_alignment: + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, + min_storage_buffer_offset_alignment: 4, + max_inter_stage_shader_components: base.max_inter_stage_shader_components, + max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO? + max_compute_invocations_per_workgroup: + d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP, + max_compute_workgroup_size_x: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_X, + max_compute_workgroup_size_y: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Y, + max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z, + max_compute_workgroups_per_dimension: + d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + // Dx12 does not expose a maximum buffer size in the API. + // This limit is chosen to avoid potential issues with drivers should they internally + // store buffer sizes using 32 bit ints (a situation we have already encountered with vulkan). + max_buffer_size: i32::MAX as u64, + max_non_sampler_bindings: 1_000_000, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + }, + downlevel, + }, + }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + _features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = { + profiling::scope!("ID3D12Device::CreateCommandQueue"); + self.device + .create_command_queue( + d3d12::CmdListType::Direct, + d3d12::Priority::Normal, + d3d12::CommandQueueFlags::empty(), + 0, + ) + .into_device_result("Queue creation")? + }; + + let device = super::Device::new( + self.device.clone(), + queue.clone(), + limits, + self.private_caps, + &self.library, + self.dxc_container.clone(), + )?; + Ok(crate::OpenDevice { + device, + queue: super::Queue { + raw: queue, + temp_lists: Mutex::new(Vec::new()), + }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let raw_format = match auxil::dxgi::conv::map_texture_format_failable(format) { + Some(f) => f, + None => return Tfc::empty(), + }; + let srv_uav_format = if format.is_combined_depth_stencil_format() { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + // use the depth aspect here as opposed to stencil since it has more capabilities + crate::FormatAspects::DEPTH, + ) + } else { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + crate::FormatAspects::from(format), + ) + } + .unwrap(); + + let mut data = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: unsafe { mem::zeroed() }, + Support2: unsafe { mem::zeroed() }, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }); + + // Because we use a different format for SRV and UAV views of depth textures, we need to check + // the features that use SRV/UAVs using the no-depth format. + let mut data_srv_uav = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: srv_uav_format, + Support1: d3d12_ty::D3D12_FORMAT_SUPPORT1_NONE, + Support2: d3d12_ty::D3D12_FORMAT_SUPPORT2_NONE, + }; + if raw_format != srv_uav_format { + // Only-recheck if we're using a different format + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + ptr::addr_of_mut!(data_srv_uav).cast(), + DWORD::try_from(mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>()) + .unwrap(), + ) + }); + } else { + // Same format, just copy over. + data_srv_uav = data; + } + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let is_texture = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + != 0; + // SRVs use srv_uav_format + caps.set( + Tfc::SAMPLED, + is_texture && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0, + ); + caps.set( + Tfc::SAMPLED_LINEAR, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + // UAVs use srv_uav_format + caps.set( + Tfc::STORAGE, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW + != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data_srv_uav.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + // We load via UAV/SRV so use srv_uav_format + let no_msaa_load = caps.contains(Tfc::SAMPLED) + && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD == 0; + + let no_msaa_target = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET + | d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) + != 0 + && data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET == 0; + + caps.set( + Tfc::MULTISAMPLE_RESOLVE, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE != 0, + ); + + let mut ms_levels = d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS { + Format: raw_format, + SampleCount: 0, + Flags: d3d12_ty::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, + NumQualityLevels: 0, + }; + + let mut set_sample_count = |sc: u32, tfc: Tfc| { + ms_levels.SampleCount = sc; + + if unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + <*mut _>::cast(&mut ms_levels), + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _, + ) + } == winerror::S_OK + && ms_levels.NumQualityLevels != 0 + { + caps.set(tfc, !no_msaa_load && !no_msaa_target); + } + }; + + set_sample_count(2, Tfc::MULTISAMPLE_X2); + set_sample_count(4, Tfc::MULTISAMPLE_X4); + set_sample_count(8, Tfc::MULTISAMPLE_X8); + set_sample_count(16, Tfc::MULTISAMPLE_X16); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = { + match surface.target { + SurfaceTarget::WndHandle(wnd_handle) => { + let mut rect: windef::RECT = unsafe { mem::zeroed() }; + if unsafe { winuser::GetClientRect(wnd_handle, &mut rect) } != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + } + SurfaceTarget::Visual(_) + | SurfaceTarget::SurfaceHandle(_) + | SurfaceTarget::SwapChainPanel(_) => None, + } + }; + + let mut present_modes = vec![wgt::PresentMode::Mailbox, wgt::PresentMode::Fifo]; + if surface.supports_allow_tearing { + present_modes.push(wgt::PresentMode::Immediate); + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + maximum_frame_latency: 1..=16, + current_extent, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp(self.presentation_timer.get_timestamp_ns()) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/command.rs b/third_party/rust/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..f527898d90 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,1220 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::conv; +use std::{mem, ops::Range, ptr}; +use winapi::um::d3d12 as d3d12_ty; + +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12_ty::D3D12_BOX { + d3d12_ty::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + +impl crate::BufferTextureCopy { + fn to_subresource_footprint( + &self, + format: wgt::TextureFormat, + ) -> d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + let (block_width, _) = format.block_dimensions(); + d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: self.buffer_layout.offset, + Footprint: d3d12_ty::D3D12_SUBRESOURCE_FOOTPRINT { + Format: auxil::dxgi::conv::map_texture_format_for_copy( + format, + self.texture_base.aspect, + ) + .unwrap(), + Width: self.size.width, + Height: self.size.height, + Depth: self.size.depth, + RowPitch: { + let actual = self.buffer_layout.bytes_per_row.unwrap_or_else(|| { + // this may happen for single-line updates + let block_size = format + .block_copy_size(Some(self.texture_base.aspect.map())) + .unwrap(); + (self.size.width / block_width) * block_size + }); + wgt::math::align_to(actual, d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }, + }, + } + } +} + +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { + let list = self.list.as_ref().unwrap(); + self.pass.kind = kind; + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) }; + self.pass.has_label = true; + } + self.pass.dirty_root_elements = 0; + self.pass.dirty_vertex_buffers = 0; + list.set_descriptor_heaps(&[ + self.shared.heap_views.raw.clone(), + self.shared.heap_samplers.raw.clone(), + ]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.as_ref().unwrap(); + list.set_descriptor_heaps(&[]); + if self.pass.has_label { + unsafe { list.EndEvent() }; + } + self.pass.clear(); + } + + unsafe fn prepare_draw(&mut self, first_vertex: i32, first_instance: u32) { + while self.pass.dirty_vertex_buffers != 0 { + let list = self.list.as_ref().unwrap(); + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + unsafe { + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + first_vertex: other_vertex, + first_instance: other_instance, + other: _, + } => first_vertex != other_vertex || first_instance != other_instance, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + first_vertex, + first_instance, + other: 0, + }; + } + } + self.update_root_elements(); + } + + fn prepare_dispatch(&mut self, count: [u32; 3]) { + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + first_vertex, + first_instance, + other, + } => [first_vertex as u32, first_instance, other] != count, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + first_vertex: count[0] as i32, + first_instance: count[1], + other: count[2], + }; + } + } + self.update_root_elements(); + } + + //Note: we have to call this lazily before draw calls. Otherwise, D3D complains + // about the root parameters being incompatible with root signature. + fn update_root_elements(&mut self) { + use super::{BufferViewKind as Bvk, PassKind as Pk}; + + while self.pass.dirty_root_elements != 0 { + let list = self.list.as_ref().unwrap(); + let index = self.pass.dirty_root_elements.trailing_zeros(); + self.pass.dirty_root_elements ^= 1 << index; + + match self.pass.root_elements[index as usize] { + super::RootElement::Empty => log::error!("Root index {} is not bound", index), + super::RootElement::Constant => { + let info = self.pass.layout.root_constant_info.as_ref().unwrap(); + + for offset in info.range.clone() { + let val = self.pass.constant_data[offset as usize]; + match self.pass.kind { + Pk::Render => list.set_graphics_root_constant(index, val, offset), + Pk::Compute => list.set_compute_root_constant(index, val, offset), + Pk::Transfer => (), + } + } + } + super::RootElement::SpecialConstantBuffer { + first_vertex, + first_instance, + other, + } => match self.pass.kind { + Pk::Render => { + list.set_graphics_root_constant(index, first_vertex as u32, 0); + list.set_graphics_root_constant(index, first_instance, 1); + } + Pk::Compute => { + list.set_compute_root_constant(index, first_vertex as u32, 0); + list.set_compute_root_constant(index, first_instance, 1); + list.set_compute_root_constant(index, other, 2); + } + Pk::Transfer => (), + }, + super::RootElement::Table(descriptor) => match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(index, descriptor), + Pk::Transfer => (), + }, + super::RootElement::DynamicOffsetBuffer { kind, address } => { + match (self.pass.kind, kind) { + (Pk::Render, Bvk::Constant) => { + list.set_graphics_root_constant_buffer_view(index, address) + } + (Pk::Compute, Bvk::Constant) => { + list.set_compute_root_constant_buffer_view(index, address) + } + (Pk::Render, Bvk::ShaderResource) => { + list.set_graphics_root_shader_resource_view(index, address) + } + (Pk::Compute, Bvk::ShaderResource) => { + list.set_compute_root_shader_resource_view(index, address) + } + (Pk::Render, Bvk::UnorderedAccess) => { + list.set_graphics_root_unordered_access_view(index, address) + } + (Pk::Compute, Bvk::UnorderedAccess) => { + list.set_compute_root_unordered_access_view(index, address) + } + (Pk::Transfer, _) => (), + } + } + } + } + } + + fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) { + log::trace!("Reset signature {:?}", layout.signature); + if let Some(root_index) = layout.special_constants_root_index { + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + first_vertex: 0, + first_instance: 0, + other: 0, + }; + } + self.pass.layout = layout.clone(); + self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; + } + + fn write_pass_end_timestamp_if_requested(&mut self) { + if let Some((query_set_raw, index)) = self.end_of_pass_timer_query.take() { + use crate::CommandEncoder as _; + unsafe { + self.write_timestamp( + &crate::dx12::QuerySet { + raw: query_set_raw, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + }, + index, + ); + } + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = loop { + if let Some(list) = self.free_lists.pop() { + let reset_result = list + .reset(&self.allocator, d3d12::PipelineState::null()) + .into_result(); + if reset_result.is_ok() { + break Some(list); + } + } else { + break None; + } + }; + + let list = if let Some(list) = list { + list + } else { + self.device + .create_graphics_command_list( + d3d12::CmdListType::Direct, + &self.allocator, + d3d12::PipelineState::null(), + 0, + ) + .into_device_result("Create command list")? + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + unsafe { list.SetName(cwstr.as_ptr()) }; + } + + self.list = Some(list); + self.temp.clear(); + self.pass.clear(); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + if list.close().into_result().is_ok() { + self.free_lists.push(list); + } + } + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.list.take().unwrap(); + raw.close() + .into_device_result("GraphicsCommandList::close")?; + Ok(super::CommandBuffer { raw }) + } + unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + self.free_lists.push(cmd_buf.raw); + } + self.allocator.reset(); + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!( + "List {:p} buffer transitions", + self.list.as_ref().unwrap().as_ptr() + ); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}", + barrier.buffer.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end + ); + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .as_ref() + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!( + "List {:p} texture transitions", + self.list.as_ref().unwrap().as_ptr() + ); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}, range {:?}", + barrier.texture.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end, + barrier.range + ); + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + + let tex_mip_level_count = barrier.texture.mip_level_count; + let tex_array_layer_count = barrier.texture.array_layer_count(); + + if barrier.range.is_full_resource( + barrier.texture.format, + tex_mip_level_count, + tex_array_layer_count, + ) { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Selected texture aspect is relevant if the texture format has both depth _and_ stencil aspects. + let planes = if barrier.texture.format.is_combined_depth_stencil_format() { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..2, + wgt::TextureAspect::DepthOnly => 0..1, + wgt::TextureAspect::StencilOnly => 1..2, + _ => unreachable!(), + } + } else if let Some(planes) = barrier.texture.format.planes() { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..planes, + wgt::TextureAspect::Plane0 => 0..1, + wgt::TextureAspect::Plane1 => 1..2, + wgt::TextureAspect::Plane2 => 2..3, + _ => unreachable!(), + } + } else { + match barrier.texture.format { + wgt::TextureFormat::Stencil8 => 1..2, + wgt::TextureFormat::Depth24Plus => 0..2, // TODO: investigate why tests fail if we set this to 0..1 + _ => 0..1, + } + }; + + for mip_level in barrier.range.mip_range(tex_mip_level_count) { + for array_layer in barrier.range.layer_range(tex_array_layer_count) { + for plane in planes.clone() { + unsafe { + raw.u.Transition_mut().Subresource = barrier + .texture + .calc_subresource(mip_level, array_layer, plane); + }; + self.temp.barriers.push(raw); + } + } + } + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .as_ref() + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let list = self.list.as_ref().unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + unsafe { + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.shared.zero_buffer.as_mut_ptr(), + 0, + size, + ) + }; + offset += size; + } + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let list = self.list.as_ref().unwrap(); + for r in regions { + unsafe { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ) + }; + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let list = self.list.as_ref().unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + + for r in regions { + let src_box = make_box(&r.src_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base) + }; + + unsafe { + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.as_ref().unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + unsafe { + *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = + dst.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.as_ref().unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&r.texture_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = + src.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format) + }; + unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box) }; + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .as_ref() + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .as_ref() + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list.as_ref().unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + // nothing to do here + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + unsafe { + self.list.as_ref().unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; + + // Start timestamp if any (before all other commands but after debug marker) + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw.clone(), index)); + } + + let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + *rtv = cat.target.view.handle_rtv.unwrap().raw; + } else { + *rtv = self.null_rtv_handle.raw; + } + } + + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + let list = self.list.as_ref().unwrap(); + unsafe { + list.OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ) + }; + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base.clone(), + dst: target.view.target_base.clone(), + format: target.view.raw_format, + }); + } + } + } + + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = d3d12::ClearFlags::empty(); + let aspects = ds.target.view.aspects; + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::DEPTH) + { + flags |= d3d12::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::STENCIL) + { + flags |= d3d12::ClearFlags::STENCIL; + } + + if !ds_view.is_null() && !flags.is_empty() { + list.clear_depth_stencil_view( + unsafe { *ds_view }, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } + + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: 0.0, + TopLeftY: 0.0, + Width: desc.extent.width as f32, + Height: desc.extent.height as f32, + MinDepth: 0.0, + MaxDepth: 1.0, + }; + let raw_rect = d3d12_ty::D3D12_RECT { + left: 0, + top: 0, + right: desc.extent.width as i32, + bottom: desc.extent.height as i32, + }; + unsafe { list.RSSetViewports(1, &raw_vp) }; + unsafe { list.RSSetScissorRects(1, &raw_rect) }; + } + + unsafe fn end_render_pass(&mut self) { + if !self.pass.resolves.is_empty() { + let list = self.list.as_ref().unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + } + }; + self.temp.barriers.push(barrier); + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_DEST, + } + }; + self.temp.barriers.push(barrier); + } + + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + + for resolve in self.pass.resolves.iter() { + profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource"); + unsafe { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ) + }; + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = unsafe { barrier.u.Transition_mut() }; + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + } + + self.write_pass_end_timestamp_if_requested(); + + unsafe { self.end_pass() }; + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + log::trace!("Set group[{}]", index); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index as usize; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + log::trace!("\tBind element[{}] = view", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_views.unwrap().gpu); + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + log::trace!("\tBind element[{}] = sampler", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_samplers.unwrap().gpu); + root_index += 1; + } + + // Bind root descriptors + for ((&kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + log::trace!("\tBind element[{}] = dynamic", root_index); + self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { + kind, + address: gpu_base + offset as d3d12::GpuAddress, + }; + root_index += 1; + } + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= (1 << root_index) - (1 << info.base_root_index); + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + let offset_words = offset_bytes as usize / 4; + + let info = layout.shared.root_constant_info.as_ref().unwrap(); + + self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; + + self.pass.constant_data[offset_words..(offset_words + data.len())].copy_from_slice(data); + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << info.root_index; + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { + self.list + .as_ref() + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + unsafe { + self.list + .as_ref() + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn end_debug_marker(&mut self) { + unsafe { self.list.as_ref().unwrap().EndEvent() } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.as_ref().unwrap().clone(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_graphics_root_signature(&pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(&pipeline.raw); + unsafe { list.IASetPrimitiveTopology(pipeline.topology) }; + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.list.as_ref().unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + auxil::dxgi::conv::map_index_format(format), + ); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + unsafe { self.list.as_ref().unwrap().RSSetViewports(1, &raw_vp) }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let raw_rect = d3d12_ty::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + unsafe { self.list.as_ref().unwrap().RSSetScissorRects(1, &raw_rect) }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.as_ref().unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.as_ref().unwrap().set_blend_factor(*color); + } + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(first_vertex as i32, first_instance) }; + self.list.as_ref().unwrap().draw( + vertex_count, + instance_count, + first_vertex, + first_instance, + ); + } + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(base_vertex, first_instance) }; + self.list.as_ref().unwrap().draw_indexed( + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + ); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.as_ref().unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.as_ref().unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.as_ref().unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.as_ref().unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + + // compute + + unsafe fn begin_compute_pass<'a>( + &mut self, + desc: &crate::ComputePassDescriptor<'a, super::Api>, + ) { + unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; + + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw.clone(), index)); + } + } + unsafe fn end_compute_pass(&mut self) { + self.write_pass_end_timestamp_if_requested(); + unsafe { self.end_pass() }; + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.as_ref().unwrap().clone(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_compute_root_signature(&pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(&pipeline.raw); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.prepare_dispatch(count); + self.list.as_ref().unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.prepare_dispatch([0; 3]); + //TODO: update special constants indirectly + unsafe { + self.list.as_ref().unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>, + { + // Implement using `BuildRaytracingAccelerationStructure`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure + todo!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..2b6c1d959e --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,355 @@ +use std::iter; +use winapi::{ + shared::minwindef::BOOL, + um::{d3d12 as d3d12_ty, d3dcommon}, +}; + +pub fn map_buffer_usage_to_resource_flags( + usage: crate::BufferUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + flags +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12_ty::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.contains(crate::TextureUses::RESOURCE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12_ty::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12_ty::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12_ty::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12_ty::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12_ty::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12_ty::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12_ty::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12_ty::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12_ty::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12_ty::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option<wgt::SamplerBorderColor>) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | Some(Sbc::Zero) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} + +pub fn map_visibility(visibility: wgt::ShaderStages) -> d3d12::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => d3d12::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => d3d12::ShaderVisibility::PS, + _ => d3d12::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => d3d12::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } => d3d12::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV, + Bt::AccelerationStructure => todo!(), + } +} + +pub fn map_label(name: &str) -> Vec<u16> { + name.encode_utf16().chain(iter::once(0)).collect() +} + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } else if usage.intersects(Bu::STORAGE_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::RESOURCE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID, + wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), + } +} + +/// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending +/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc). +/// Therefore this function takes an additional `is_alpha` argument +/// which if set will return an equivalent `_ALPHA` factor. +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12_ty::D3D12_BLEND_ZERO, + Bf::One => d3d12_ty::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12_ty::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12_ty::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12_ty::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12_ty::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT, + Bf::Src1 if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + Bf::Src1 => d3d12_ty::D3D12_BLEND_SRC1_COLOR, + Bf::OneMinusSrc1 if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + Bf::OneMinusSrc1 => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, + Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12_ty::D3D12_BLEND_OP, + d3d12_ty::D3D12_BLEND, + d3d12_ty::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12_ty::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12_ty::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12_ty::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12_ty::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12_ty::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[Option<wgt::ColorTargetState>], +) -> [d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] { + let dummy_target = d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12_ty::D3D12_BLEND_ZERO, + DestBlend: d3d12_ty::D3D12_BLEND_ZERO, + BlendOp: d3d12_ty::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12_ty::D3D12_BLEND_OP_ADD, + LogicOp: d3d12_ty::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + if let Some(ct) = ct.as_ref() { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12_ty::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12_ty::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12_ty::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12_ty::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12_ty::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12_ty::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12_ty::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12_ty::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12_ty::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: BOOL::from(ds.is_depth_enabled()), + DepthWriteMask: if ds.depth_write_enabled { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: BOOL::from(ds.stencil.is_enabled()), + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/descriptor.rs b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..6f7afe8071 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,312 @@ +use super::null_comptr_check; +use crate::auxil::dxgi::result::HResult as _; +use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: d3d12::CpuDescriptor, + pub gpu: d3d12::GpuDescriptor, + /// How large the block allocated to this handle is. + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } +} + +type DescriptorIndex = u64; + +pub(super) struct GeneralHeap { + pub raw: d3d12::DescriptorHeap, + ty: d3d12::DescriptorHeapType, + handle_size: u64, + total_handles: u64, + start: DualHandle, + ranges: Mutex<RangeAllocator<DescriptorIndex>>, +} + +impl GeneralHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total_handles: u64, + ) -> Result<Self, crate::DeviceError> { + let raw = { + profiling::scope!("ID3D12Device::CreateDescriptorHeap"); + device + .create_descriptor_heap( + total_handles as u32, + ty, + d3d12::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")? + }; + + null_comptr_check(&raw)?; + + Ok(Self { + raw: raw.clone(), + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + count: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + count, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> d3d12::GpuDescriptor { + d3d12::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + _raw: d3d12::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: d3d12::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new( + device: &d3d12::Device, + ty: d3d12::DescriptorHeapType, + ) -> Result<Self, crate::DeviceError> { + let heap = device + .create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + d3d12::DescriptorHeapFlags::empty(), + 0, + ) + .into_device_result("Descriptor heap creation")?; + + null_comptr_check(&heap)?; + + Ok(Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + _raw: heap, + }) + } + + fn alloc_handle(&mut self) -> Result<d3d12::CpuDescriptor, crate::DeviceError> { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + if slot >= HEAP_SIZE_FIXED { + log::error!("Failed to allocate a handle form a fixed size heap"); + return Err(crate::DeviceError::OutOfMemory); + } + // Set the slot as occupied. + self.availability ^= 1 << slot; + + Ok(d3d12::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + }) + } + + fn free_handle(&mut self, handle: d3d12::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: d3d12::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + heaps: Vec<FixedSizeHeap>, + available_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + available_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Result<Handle, crate::DeviceError> { + let heap_index = self + .available_heap_indices + .iter() + .next() + .unwrap_or(self.heaps.len()); + + // Allocate a new heap + if heap_index == self.heaps.len() { + self.heaps.push(FixedSizeHeap::new(&self.device, self.ty)?); + self.available_heap_indices.insert(heap_index); + } + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle()?, + heap_index, + }; + if heap.is_full() { + self.available_heap_indices.remove(heap_index); + } + + Ok(handle) + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.available_heap_indices.insert(handle.heap_index); + } +} + +pub(super) struct CpuHeapInner { + pub _raw: d3d12::DescriptorHeap, + pub stage: Vec<d3d12::CpuDescriptor>, +} + +pub(super) struct CpuHeap { + pub inner: Mutex<CpuHeapInner>, + start: d3d12::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total: u32, + ) -> Result<Self, crate::DeviceError> { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + null_comptr_check(&raw)?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + _raw: raw.clone(), + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: d3d12::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result<DualHandle, crate::DeviceError> { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + unsafe { + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ) + }; + Ok(dst.at(index, count as u64)) +} diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..2507c125f8 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,1694 @@ +use crate::{ + auxil::{self, dxgi::result::HResult as _}, + dx12::shader_compilation, + DeviceError, +}; +use d3d12::ComPtr; + +use super::{conv, descriptor, null_comptr_check, view}; +use parking_lot::Mutex; +use std::{ + ffi, mem, + num::NonZeroU32, + ptr, + sync::Arc, + time::{Duration, Instant}, +}; +use winapi::{ + shared::{dxgiformat, dxgitype, minwindef::BOOL, winerror}, + um::{d3d12 as d3d12_ty, synchapi, winbase}, + Interface, +}; + +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; + +impl super::Device { + pub(super) fn new( + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + limits: &wgt::Limits, + private_caps: super::PrivateCapabilities, + library: &Arc<d3d12::D3D12Lib>, + dxc_container: Option<Arc<shader_compilation::DxcContainer>>, + ) -> Result<Self, DeviceError> { + let mem_allocator = if private_caps.suballocation_supported { + super::suballocation::create_allocator_wrapper(&raw)? + } else { + None + }; + + let mut idle_fence = d3d12::Fence::null(); + let hr = unsafe { + profiling::scope!("ID3D12Device::CreateFence"); + raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.into_device_result("Idle fence creation")?; + + null_comptr_check(&idle_fence)?; + + let mut zero_buffer = d3d12::Resource::null(); + unsafe { + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12_ty::D3D12_RESOURCE_FLAG_NONE, + }; + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + profiling::scope!("Zero Buffer Allocation"); + raw.CreateCommittedResource( + &heap_properties, + d3d12_ty::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ) + .into_device_result("Zero buffer creation")?; + + null_comptr_check(&zero_buffer)?; + + // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. + }; + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = limits.max_non_sampler_bindings as u64; + let capacity_samplers = 2_048; + + let shared = super::DeviceShared { + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw()], + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw_indexed()], + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::dispatch()], + mem::size_of::<wgt::DispatchIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + heap_views: descriptor::GeneralHeap::new( + raw.clone(), + d3d12::DescriptorHeapType::CbvSrvUav, + capacity_views, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw.clone(), + d3d12::DescriptorHeapType::Sampler, + capacity_samplers, + )?, + }; + + let mut rtv_pool = descriptor::CpuPool::new(raw.clone(), d3d12::DescriptorHeapType::Rtv); + let null_rtv_handle = rtv_pool.alloc_handle()?; + // A null pResource is used to initialize a null descriptor, + // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded) + raw.create_render_target_view( + ComPtr::null(), + &d3d12::RenderTargetViewDesc::texture_2d( + winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + 0, + 0, + ), + null_rtv_handle.raw, + ); + + Ok(super::Device { + raw: raw.clone(), + present_queue, + idler: super::Idler { + fence: idle_fence, + event: d3d12::Event::create(false, false), + }, + private_caps, + shared: Arc::new(shared), + rtv_pool: Mutex::new(rtv_pool), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw.clone(), + d3d12::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw.clone(), + d3d12::DescriptorHeapType::CbvSrvUav, + )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Sampler, + )), + library: Arc::clone(library), + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + null_rtv_handle, + mem_allocator, + dxc_container, + }) + } + + // Blocks until the dedicated present queue is finished with all of its work. + // + // Once this method completes, the surface is able to be resized or deleted. + pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), DeviceError> { + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(DeviceError::Lost); + } + + let value = cur_value + 1; + log::debug!("Waiting for idle with value {}", value); + self.present_queue.signal(&self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + unsafe { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE) }; + Ok(()) + } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result<super::CompiledShader, crate::PipelineError> { + use naga::back::hlsl; + + let stage_bit = crate::auxil::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let reflection_info = { + profiling::scope!("naga::back::hlsl::write"); + writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? + }; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + let raw_ep = reflection_info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; + + let source_name = stage + .module + .raw_name + .as_ref() + .and_then(|cstr| cstr.to_str().ok()) + .unwrap_or_default(); + + // Compile with DXC if available, otherwise fall back to FXC + let (result, log_level) = if let Some(ref dxc_container) = self.dxc_container { + super::shader_compilation::compile_dxc( + self, + &source, + source_name, + raw_ep, + stage_bit, + full_stage, + dxc_container, + ) + } else { + super::shader_compilation::compile_fxc( + self, + &source, + source_name, + &ffi::CString::new(raw_ep.as_str()).unwrap(), + stage_bit, + full_stage, + ) + }; + + log::log!( + log_level, + "Naga generated shader for {:?} at {:?}:\n{}", + raw_ep, + naga_stage, + source + ); + result + } + + pub fn raw_device(&self) -> &d3d12::Device { + &self.raw + } + + pub fn raw_queue(&self) -> &d3d12::CommandQueue { + &self.present_queue + } + + pub unsafe fn texture_from_raw( + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + ) -> super::Texture { + super::Texture { + resource, + format, + dimension, + size, + mip_level_count, + sample_count, + allocation: None, + } + } + + pub unsafe fn buffer_from_raw( + resource: d3d12::Resource, + size: wgt::BufferAddress, + ) -> super::Buffer { + super::Buffer { + resource, + size, + allocation: None, + } + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(mut self, _queue: super::Queue) { + self.rtv_pool.lock().free_handle(self.null_rtv_handle); + self.mem_allocator = None; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, DeviceError> { + let mut resource = d3d12::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = + super::suballocation::create_buffer_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Buffer creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Buffer { + resource, + size, + allocation, + }) + } + + unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) { + // Only happens when it's using the windows_rs feature and there's an allocation + if let Some(alloc) = buffer.allocation.take() { + super::suballocation::free_buffer_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, DeviceError> { + let mut ptr = ptr::null_mut(); + // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware? + let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) }; + hr.into_device_result("Map buffer")?; + + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::<u8>() }) + .unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), DeviceError> { + unsafe { (*buffer.resource).Unmap(0, ptr::null()) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, DeviceError> { + use super::suballocation::create_texture_resource; + + let mut resource = d3d12::Resource::null(); + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + Format: auxil::dxgi::conv::map_texture_format_for_resource( + desc.format, + desc.usage, + !desc.view_formats.is_empty(), + self.private_caps.casting_fully_typed_format_supported, + ), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = create_texture_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Texture creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Texture { + resource, + format: desc.format, + dimension: desc.dimension, + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + allocation, + }) + } + + unsafe fn destroy_texture(&self, mut texture: super::Texture) { + if let Some(alloc) = texture.allocation.take() { + super::suballocation::free_texture_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, DeviceError> { + let view_desc = desc.to_internal(texture); + + Ok(super::TextureView { + raw_format: view_desc.rtv_dsv_format, + aspects: view_desc.aspects, + target_base: ( + texture.resource.clone(), + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), + handle_srv: if desc.usage.intersects(crate::TextureUses::RESOURCE) { + match unsafe { view_desc.to_srv() } { + Some(raw_desc) => { + let handle = self.srv_uav_pool.lock().alloc_handle()?; + unsafe { + self.raw.CreateShaderResourceView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } + None => None, + } + } else { + None + }, + handle_uav: if desc.usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + match unsafe { view_desc.to_uav() } { + Some(raw_desc) => { + let handle = self.srv_uav_pool.lock().alloc_handle()?; + unsafe { + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ); + } + Some(handle) + } + None => None, + } + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + let raw_desc = unsafe { view_desc.to_rtv() }; + let handle = self.rtv_pool.lock().alloc_handle()?; + unsafe { + self.raw.CreateRenderTargetView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + let raw_desc = unsafe { view_desc.to_dsv(true) }; + let handle = self.dsv_pool.lock().alloc_handle()?; + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + let raw_desc = unsafe { view_desc.to_dsv(false) }; + let handle = self.dsv_pool.lock().alloc_handle()?; + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } + } + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, DeviceError> { + let handle = self.sampler_pool.lock().alloc_handle()?; + + let reduction = match desc.compare { + Some(_) => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let mut filter = conv::map_filter_mode(desc.min_filter) << d3d12_ty::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12_ty::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12_ty::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_SHIFT; + + if desc.anisotropy_clamp != 1 { + filter |= d3d12_ty::D3D12_FILTER_ANISOTROPIC; + }; + + let border_color = conv::map_border_color(desc.border_color); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp as u32, + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + border_color, + desc.lod_clamp.clone(), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, DeviceError> { + let allocator = self + .raw + .create_command_allocator(d3d12::CmdListType::Direct) + .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { allocator.SetName(cwstr.as_ptr()) }; + } + + Ok(super::CommandEncoder { + allocator, + device: self.raw.clone(), + shared: Arc::clone(&self.shared), + null_rtv_handle: self.null_rtv_handle, + list: None, + free_lists: Vec::new(), + pass: super::PassState::new(), + temp: super::Temp::default(), + end_of_pass_timer_query: None, + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + } + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, DeviceError> { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + let count = entry.count.map_or(1, NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += count, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += count + } + wgt::BindingType::Sampler { .. } => num_samplers += count, + wgt::BindingType::AccelerationStructure => todo!(), + } + } + + let num_views = num_buffer_views + num_texture_views; + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw.clone(), + d3d12::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw.clone(), + d3d12::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, DeviceError> { + use naga::back::hlsl; + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Buffers with dynamic offsets are implemented as root descriptors. + // This is easier than trying to patch up the offset on the shader side. + // + // Root signature layout: + // Root Constants: Parameter=0, Space=0 + // ... + // (bind group [0]) - Space=0 + // View descriptor table, if any + // Sampler descriptor table, if any + // Root descriptors (for dynamic offset buffers) + // (bind group [1]) - Space=0 + // ... + // (bind group [2]) - Space=0 + // Special constant buffer: Space=0 + + //TODO: put lower bind group indices further down the root signature. See: + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + // Currently impossible because wgpu-core only re-binds the descriptor sets based + // on Vulkan-like layout compatibility rules. + + fn native_binding(bt: &hlsl::BindTarget) -> d3d12::Binding { + d3d12::Binding { + space: bt.space as u32, + register: bt.register, + } + } + + log::debug!( + "Creating Root Signature '{}'", + desc.label.unwrap_or_default() + ); + + let mut binding_map = hlsl::BindingMap::default(); + let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + ); + let mut parameters = Vec::new(); + let mut push_constants_target = None; + let mut root_constant_info = None; + + let mut pc_start = u32::MAX; + let mut pc_end = u32::MIN; + + for pc in desc.push_constant_ranges.iter() { + pc_start = pc_start.min(pc.range.start); + pc_end = pc_end.max(pc.range.end); + } + + if pc_start != u32::MAX && pc_end != u32::MIN { + let parameter_index = parameters.len(); + let size = (pc_end - pc_start) / 4; + log::debug!( + "\tParam[{}] = push constant (count = {})", + parameter_index, + size, + ); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, + native_binding(&bind_cbv), + size, + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + root_constant_info = Some(super::RootConstantInfo { + root_index: parameter_index as u32, + range: (pc_start / 4)..(pc_end / 4), + }); + push_constants_target = Some(binding); + + bind_cbv.space += 1; + } + + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut bind_group_infos = + arrayvec::ArrayVec::<super::BindGroupInfo, { crate::MAX_BIND_GROUPS }>::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => continue, + ref other => conv::map_binding_type(other), + }; + let bt = match range_ty { + d3d12::DescriptorRangeType::CBV => &mut bind_cbv, + d3d12::DescriptorRangeType::SRV => &mut bind_srv, + d3d12::DescriptorRangeType::UAV => &mut bind_uav, + d3d12::DescriptorRangeType::Sampler => continue, + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(bt), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = views (vis = {:?}, count = {})", + parameters.len(), + visibility_view_static, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SRV_CBV_UAV; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + _ => continue, + }; + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bind_sampler.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(&bind_sampler), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = samplers (vis = {:?}, count = {})", + parameters.len(), + visibility_sampler, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SAMPLERS; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + + let (kind, parameter_ty, bt) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_CBV, + &mut bind_cbv, + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_SRV, + &mut bind_srv, + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_UAV, + &mut bind_uav, + ), + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + info.dynamic_buffers.push(kind); + + log::debug!( + "\tParam[{}] = dynamic {:?} (vis = {:?})", + parameters.len(), + buffer_ty, + dynamic_buffers_visibility, + ); + parameters.push(d3d12::RootParameter::descriptor( + parameter_ty, + dynamic_buffers_visibility, + native_binding(bt), + )); + + bt.register += entry.count.map_or(1, NonZeroU32::get); + } + + bind_group_infos.push(info); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + + let (special_constants_root_index, special_constants_binding) = if desc.flags.intersects( + crate::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE + | crate::PipelineLayoutFlags::NUM_WORK_GROUPS, + ) { + let parameter_index = parameters.len(); + log::debug!("\tParam[{}] = special", parameter_index); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, // really needed for VS and CS only + native_binding(&bind_cbv), + 3, // 0 = first_vertex, 1 = first_instance, 2 = other + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + (Some(parameter_index as u32), Some(binding)) + } else { + (None, None) + }; + + log::trace!("{:#?}", parameters); + log::trace!("Bindings {:#?}", binding_map); + + let (blob, error) = self + .library + .serialize_root_signature( + d3d12::RootSignatureVersion::V1_0, + ¶meters, + &[], + d3d12::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + DeviceError::Lost + })? + .into_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + unsafe { error.as_c_str() }.to_str().unwrap() + ); + return Err(DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .into_device_result("Root signature creation")?; + + log::debug!("\traw = {:?}", raw); + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::PipelineLayout { + shared: super::PipelineLayoutShared { + signature: raw, + total_root_elements: parameters.len() as super::RootIndex, + special_constants_root_index, + root_constant_info, + }, + bind_group_infos, + naga_options: hlsl::Options { + shader_model: match self.dxc_container { + // DXC + Some(_) => hlsl::ShaderModel::V6_0, + // FXC doesn't support SM 6.0 + None => hlsl::ShaderModel::V5_1, + }, + binding_map, + fake_missing_bindings: false, + special_constants_binding, + push_constants_target, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, DeviceError> { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + dynamic_buffers.push(data.resolve_address()); + } + } + wgt::BindingType::Buffer { ty, .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12_ty::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + unsafe { self.raw.CreateConstantBufferView(&raw_desc, handle) }; + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: + view::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12_ty::D3D12_SRV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_SRV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12_ty::D3D12_BUFFER_SRV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ) + }; + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12_ty::D3D12_UAV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_UAV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12_ty::D3D12_BUFFER_UAV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ) + }; + } + } + inner.stage.push(handle); + } + } + wgt::BindingType::Texture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.samplers[start..end] { + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + wgt::BindingType::AccelerationStructure => todo!(), + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw.clone(), + &inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw.clone(), + &inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + + Ok(super::BindGroup { + handle_views, + handle_samplers, + dynamic_buffers, + }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + self.shared.heap_samplers.free_slice(dual); + } + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let raw_name = desc.label.and_then(|label| ffi::CString::new(label).ok()); + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga, raw_name }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + } + None => None, + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::VertexStepMode::Vertex => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::VertexStepMode::Instance => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC { + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, + SemanticIndex: attribute.shader_location, + Format: auxil::dxgi::conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + if let Some(ct) = ct.as_ref() { + *rtv_format = auxil::dxgi::conv::map_texture_format(ct.format); + } + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(); + + let raw_rasterizer = d3d12_ty::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12_ty::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12_ty::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12_ty::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: BOOL::from(!desc.primitive.unclipped_depth), + MultisampleEnable: BOOL::from(desc.multisample.count > 1), + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12_ty::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.shared.signature.as_mut_ptr(), + VS: *blob_vs.create_native_shader(), + PS: match blob_fs { + Some(ref shader) => *shader.create_native_shader(), + None => *d3d12::Shader::null(), + }, + GS: *d3d12::Shader::null(), + DS: *d3d12::Shader::null(), + HS: *d3d12::Shader::null(), + StreamOutput: d3d12_ty::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12_ty::D3D12_BLEND_DESC { + AlphaToCoverageEnable: BOOL::from(desc.multisample.alpha_to_coverage_enabled), + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => unsafe { mem::zeroed() }, + }, + InputLayout: d3d12_ty::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + } + Some(wgt::IndexFormat::Uint32) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + auxil::dxgi::conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12_ty::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12_ty::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = d3d12::PipelineState::null(); + let hr = { + profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); + unsafe { + self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12_ty::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ) + } + }; + + unsafe { blob_vs.destroy() }; + if let Some(blob_fs) = blob_fs { + unsafe { blob_fs.destroy() }; + }; + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + null_comptr_check(&raw)?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::RenderPipeline { + raw, + layout: desc.layout.shared.clone(), + topology, + vertex_strides, + }) + } + unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {} + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = { + profiling::scope!("ID3D12Device::CreateComputePipelineState"); + self.raw.create_compute_pipeline_state( + &desc.layout.shared.signature, + blob_cs.create_native_shader(), + 0, + d3d12::CachedPSO::null(), + d3d12::PipelineStateFlags::empty(), + ) + }; + + unsafe { blob_cs.destroy() }; + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + null_comptr_check(&raw)?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::ComputePipeline { + raw, + layout: desc.layout.shared.clone(), + }) + } + unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, DeviceError> { + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + d3d12::QueryHeapType::Occlusion, + d3d12_ty::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + d3d12::QueryHeapType::PipelineStatistics, + d3d12_ty::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), + wgt::QueryType::Timestamp => ( + d3d12::QueryHeapType::Timestamp, + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + ), + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .into_device_result("Query heap creation")?; + + null_comptr_check(&raw)?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::QuerySet { raw, raw_ty }) + } + unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> Result<super::Fence, DeviceError> { + let mut raw = d3d12::Fence::null(); + let hr = unsafe { + self.raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_SHARED, + &d3d12_ty::ID3D12Fence::uuidof(), + raw.mut_void(), + ) + }; + hr.into_device_result("Fence creation")?; + null_comptr_check(&raw)?; + + Ok(super::Fence { raw }) + } + unsafe fn destroy_fence(&self, _fence: super::Fence) {} + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, DeviceError> { + Ok(unsafe { fence.raw.GetCompletedValue() }) + } + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, DeviceError> { + let timeout_duration = Duration::from_millis(timeout_ms as u64); + + // We first check if the fence has already reached the value we're waiting for. + let mut fence_value = unsafe { fence.raw.GetCompletedValue() }; + if fence_value >= value { + return Ok(true); + } + + fence + .raw + .set_event_on_completion(self.idler.event, value) + .into_device_result("Set event")?; + + let start_time = Instant::now(); + + // We need to loop to get correct behavior when timeouts are involved. + // + // wait(0): + // - We set the event from the fence value 0. + // - WaitForSingleObject times out, we return false. + // + // wait(1): + // - We set the event from the fence value 1. + // - WaitForSingleObject returns. However we do not know if the fence value is 0 or 1, + // just that _something_ triggered the event. We check the fence value, and if it is + // 1, we return true. Otherwise, we loop and wait again. + loop { + let elapsed = start_time.elapsed(); + + // We need to explicitly use checked_sub. Overflow with duration panics, and if the + // timing works out just right, we can get a negative remaining wait duration. + // + // This happens when a previous iteration WaitForSingleObject succeeded with a previous fence value, + // right before the timeout would have been hit. + let remaining_wait_duration = match timeout_duration.checked_sub(elapsed) { + Some(remaining) => remaining, + None => { + log::trace!("Timeout elapsed inbetween waits!"); + break Ok(false); + } + }; + + log::trace!( + "Waiting for fence value {} for {:?}", + value, + remaining_wait_duration + ); + + match unsafe { + synchapi::WaitForSingleObject( + self.idler.event.0, + remaining_wait_duration.as_millis().try_into().unwrap(), + ) + } { + winbase::WAIT_OBJECT_0 => {} + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => { + log::error!("Wait failed!"); + break Err(DeviceError::Lost); + } + winerror::WAIT_TIMEOUT => { + log::trace!("Wait timed out!"); + break Ok(false); + } + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + break Err(DeviceError::Lost); + } + }; + + fence_value = unsafe { fence.raw.GetCompletedValue() }; + log::trace!("Wait complete! Fence actual value: {}", fence_value); + + if fence_value >= value { + break Ok(true); + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + unsafe { + self.render_doc + .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + unsafe { + self.render_doc + .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } + + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo + todo!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + // Implement using `GetGPUVirtualAddress`: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12resource-getgpuvirtualaddress + todo!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<super::AccelerationStructure, DeviceError> { + // Create a D3D12 resource as per-usual. + todo!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + // Destroy a D3D12 resource as per-usual. + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/instance.rs b/third_party/rust/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..020809328e --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,158 @@ +use parking_lot::RwLock; +use winapi::shared::{dxgi1_5, minwindef}; + +use super::SurfaceTarget; +use crate::auxil::{self, dxgi::result::HResult as _}; +use std::{mem, sync::Arc}; + +impl Drop for super::Instance { + fn drop(&mut self) { + if self.flags.contains(wgt::InstanceFlags::VALIDATION) { + crate::auxil::dxgi::exception::unregister_exception_handler(); + } + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init DX12 Backend"); + let lib_main = d3d12::D3D12Lib::new().map_err(|e| { + crate::InstanceError::with_source(String::from("failed to load d3d12.dll"), e) + })?; + + if desc + .flags + .intersects(wgt::InstanceFlags::VALIDATION | wgt::InstanceFlags::GPU_BASED_VALIDATION) + { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + if desc.flags.intersects(wgt::InstanceFlags::VALIDATION) { + debug_controller.enable_layer(); + } + if desc + .flags + .intersects(wgt::InstanceFlags::GPU_BASED_VALIDATION) + { + #[allow(clippy::collapsible_if)] + if !debug_controller.enable_gpu_based_validation() { + log::warn!("Failed to enable GPU-based validation"); + } + } + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + } + + // Create DXGIFactory4 + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory4, + desc.flags, + )?; + + // Create IDXGIFactoryMedia + let factory_media = match lib_dxgi.create_factory_media() { + Ok(pair) => match pair.into_result() { + Ok(factory_media) => Some(factory_media), + Err(err) => { + log::error!("Failed to create IDXGIFactoryMedia: {}", err); + None + } + }, + Err(err) => { + log::warn!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + let mut supports_allow_tearing = false; + #[allow(trivial_casts)] + if let Some(factory5) = factory.as_factory5() { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = unsafe { + factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::<minwindef::BOOL>() as _, + ) + }; + + match hr.into_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => supports_allow_tearing = true, + } + } + + // Initialize DXC shader compiler + let dxc_container = match desc.dx12_shader_compiler.clone() { + wgt::Dx12Compiler::Dxc { + dxil_path, + dxc_path, + } => { + let container = super::shader_compilation::get_dxc_container(dxc_path, dxil_path) + .map_err(|e| { + crate::InstanceError::with_source(String::from("Failed to load DXC"), e) + })?; + + container.map(Arc::new) + } + wgt::Dx12Compiler::Fxc => None, + }; + + match dxc_container { + Some(_) => log::debug!("Using DXC for shader compilation"), + None => log::debug!("Using FXC for shader compilation"), + } + + Ok(Self { + // The call to create_factory will only succeed if we get a factory4, so this is safe. + factory, + factory_media, + library: Arc::new(lib_main), + _lib_dxgi: lib_dxgi, + supports_allow_tearing, + flags: desc.flags, + dxc_container, + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + match window_handle { + raw_window_handle::RawWindowHandle::Win32(handle) => Ok(super::Surface { + factory: self.factory.clone(), + factory_media: self.factory_media.clone(), + target: SurfaceTarget::WndHandle(handle.hwnd.get() as *mut _), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: RwLock::new(None), + }), + _ => Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a Win32 handle" + ))), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory.clone()); + + adapters + .into_iter() + .filter_map(|raw| { + super::Adapter::expose(raw, &self.library, self.flags, self.dxc_container.clone()) + }) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..053b880689 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,956 @@ +/*! +# DirectX12 API internals. + +Generally the mapping is straightforward. + +## Resource transitions + +D3D12 API matches WebGPU internal states very well. The only +caveat here is issuing a special UAV barrier whenever both source +and destination states match, and they are for storage sync. + +## Memory + +For now, all resources are created with "committed" memory. + +## Resource binding + +See ['Device::create_pipeline_layout`] documentation for the structure +of the root signature corresponding to WebGPU pipeline layout. + +Binding groups is mostly straightforward, with one big caveat: +all bindings have to be reset whenever the pipeline layout changes. +This is the rule of D3D12, and we can do nothing to help it. + +We detect this change at both [`crate::CommandEncoder::set_bind_group`] +and [`crate::CommandEncoder::set_render_pipeline`] with +[`crate::CommandEncoder::set_compute_pipeline`]. + +For this reason, in order avoid repeating the binding code, +we are binding everything in `CommandEncoder::update_root_elements`. +When the pipeline layout is changed, we reset all bindings. +Otherwise, we pass a range corresponding only to the current bind group. + +!*/ + +mod adapter; +mod command; +mod conv; +mod descriptor; +mod device; +mod instance; +mod shader_compilation; +mod suballocation; +mod types; +mod view; + +use crate::auxil::{self, dxgi::result::HResult as _}; + +use arrayvec::ArrayVec; +use parking_lot::{Mutex, RwLock}; +use std::{ffi, fmt, mem, num::NonZeroU32, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_4, dxgitype, windef, winerror}, + um::{d3d12 as d3d12_ty, dcomp, synchapi, winbase, winnt}, + Interface as _, +}; + +#[derive(Clone, Debug)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; +} + +// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. +const MAX_ROOT_ELEMENTS: usize = 64; +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + +pub struct Instance { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + library: Arc<d3d12::D3D12Lib>, + supports_allow_tearing: bool, + _lib_dxgi: d3d12::DxgiLib, + flags: wgt::InstanceFlags, + dxc_container: Option<Arc<shader_compilation::DxcContainer>>, +} + +impl Instance { + pub unsafe fn create_surface_from_visual( + &self, + visual: *mut dcomp::IDCompositionVisual, + ) -> Surface { + Surface { + factory: self.factory.clone(), + factory_media: self.factory_media.clone(), + target: SurfaceTarget::Visual(unsafe { d3d12::ComPtr::from_raw(visual) }), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: RwLock::new(None), + } + } + + pub unsafe fn create_surface_from_surface_handle( + &self, + surface_handle: winnt::HANDLE, + ) -> Surface { + Surface { + factory: self.factory.clone(), + factory_media: self.factory_media.clone(), + target: SurfaceTarget::SurfaceHandle(surface_handle), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: RwLock::new(None), + } + } + + pub unsafe fn create_surface_from_swap_chain_panel( + &self, + swap_chain_panel: *mut types::ISwapChainPanelNative, + ) -> Surface { + Surface { + factory: self.factory.clone(), + factory_media: self.factory_media.clone(), + target: SurfaceTarget::SwapChainPanel(unsafe { + d3d12::ComPtr::from_raw(swap_chain_panel) + }), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: RwLock::new(None), + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +struct SwapChain { + raw: d3d12::ComPtr<dxgi1_4::IDXGISwapChain3>, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec<d3d12::Resource>, + waitable: winnt::HANDLE, + acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, +} + +enum SurfaceTarget { + WndHandle(windef::HWND), + Visual(d3d12::ComPtr<dcomp::IDCompositionVisual>), + SurfaceHandle(winnt::HANDLE), + SwapChainPanel(d3d12::ComPtr<types::ISwapChainPanelNative>), +} + +pub struct Surface { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + target: SurfaceTarget, + supports_allow_tearing: bool, + swap_chain: RwLock<Option<SwapChain>>, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { + #[allow(unused)] + cache_coherent: bool, + }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + instance_flags: wgt::InstanceFlags, + #[allow(unused)] + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, + #[allow(unused)] // TODO: Exists until windows-rs is standard, then it can probably be removed? + heap_create_not_zeroed: bool, + casting_fully_typed_format_supported: bool, + suballocation_supported: bool, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: d3d12::DxgiAdapter, + device: d3d12::Device, + library: Arc<d3d12::D3D12Lib>, + private_caps: PrivateCapabilities, + presentation_timer: auxil::dxgi::time::PresentationTimer, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] + workarounds: Workarounds, + dxc_container: Option<Arc<shader_compilation::DxcContainer>>, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +/// Helper structure for waiting for GPU. +struct Idler { + fence: d3d12::Fence, + event: d3d12::Event, +} + +struct CommandSignatures { + draw: d3d12::CommandSignature, + draw_indexed: d3d12::CommandSignature, + dispatch: d3d12::CommandSignature, +} + +struct DeviceShared { + zero_buffer: d3d12::Resource, + cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, +} + +pub struct Device { + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, + shared: Arc<DeviceShared>, + // CPU only pools + rtv_pool: Mutex<descriptor::CpuPool>, + dsv_pool: Mutex<descriptor::CpuPool>, + srv_uav_pool: Mutex<descriptor::CpuPool>, + sampler_pool: Mutex<descriptor::CpuPool>, + // library + library: Arc<d3d12::D3D12Lib>, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, + null_rtv_handle: descriptor::Handle, + mem_allocator: Option<Mutex<suballocation::GpuAllocatorWrapper>>, + dxc_container: Option<Arc<shader_compilation::DxcContainer>>, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: d3d12::CommandQueue, + temp_lists: Mutex<Vec<d3d12::CommandList>>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +#[derive(Default)] +struct Temp { + marker: Vec<u16>, + barriers: Vec<d3d12_ty::D3D12_RESOURCE_BARRIER>, +} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.barriers.clear(); + } +} + +struct PassResolve { + src: (d3d12::Resource, u32), + dst: (d3d12::Resource, u32), + format: d3d12::Format, +} + +#[derive(Clone, Copy)] +enum RootElement { + Empty, + Constant, + SpecialConstantBuffer { + first_vertex: i32, + first_instance: u32, + other: u32, + }, + /// Descriptor table. + Table(d3d12::GpuDescriptor), + /// Descriptor for a buffer that has dynamic offset. + DynamicOffsetBuffer { + kind: BufferViewKind, + address: d3d12::GpuAddress, + }, +} + +#[derive(Clone, Copy)] +enum PassKind { + Render, + Compute, + Transfer, +} + +struct PassState { + has_label: bool, + resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>, + layout: PipelineLayoutShared, + root_elements: [RootElement; MAX_ROOT_ELEMENTS], + constant_data: [u32; MAX_ROOT_ELEMENTS], + dirty_root_elements: u64, + vertex_buffers: [d3d12_ty::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, +} + +#[test] +fn test_dirty_mask() { + assert_eq!(MAX_ROOT_ELEMENTS, std::mem::size_of::<u64>() * 8); +} + +impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + layout: PipelineLayoutShared { + signature: d3d12::RootSignature::null(), + total_root_elements: 0, + special_constants_root_index: None, + root_constant_info: None, + }, + root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], + constant_data: [0; MAX_ROOT_ELEMENTS], + dirty_root_elements: 0, + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + + fn clear(&mut self) { + // careful about heap allocations! + *self = Self::new(); + } +} + +pub struct CommandEncoder { + allocator: d3d12::CommandAllocator, + device: d3d12::Device, + shared: Arc<DeviceShared>, + null_rtv_handle: descriptor::Handle, + list: Option<d3d12::GraphicsCommandList>, + free_lists: Vec<d3d12::GraphicsCommandList>, + pass: PassState, + temp: Temp, + + /// If set, the end of the next render/compute pass will write a timestamp at + /// the given pool & location. + end_of_pass_timer_query: Option<(d3d12::QueryHeap, u32)>, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("allocator", &self.allocator) + .field("device", &self.allocator) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: d3d12::GraphicsCommandList, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer { + resource: d3d12::Resource, + size: wgt::BufferAddress, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + +#[derive(Debug)] +pub struct Texture { + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } + + /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count + } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + let plane = match base.aspect { + crate::FormatAspects::COLOR | crate::FormatAspects::DEPTH => 0, + crate::FormatAspects::STENCIL => 1, + _ => unreachable!(), + }; + self.calc_subresource(base.mip_level, base.array_layer, plane) + } +} + +#[derive(Debug)] +pub struct TextureView { + raw_format: d3d12::Format, + aspects: crate::FormatAspects, + /// only used by resolve + target_base: (d3d12::Resource, u32), + handle_srv: Option<descriptor::Handle>, + handle_uav: Option<descriptor::Handle>, + handle_rtv: Option<descriptor::Handle>, + handle_dsv_ro: Option<descriptor::Handle>, + handle_dsv_rw: Option<descriptor::Handle>, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +#[derive(Debug)] +pub struct Sampler { + handle: descriptor::Handle, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet { + raw: d3d12::QueryHeap, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: d3d12::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + pub fn raw_fence(&self) -> &d3d12::Fence { + &self.raw + } +} + +#[derive(Debug)] +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec<wgt::BindGroupLayoutEntry>, + cpu_heap_views: Option<descriptor::CpuHeap>, + cpu_heap_samplers: Option<descriptor::CpuHeap>, + copy_counts: Vec<u32>, // all 1's +} + +#[derive(Debug, Clone, Copy)] +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +#[derive(Debug)] +pub struct BindGroup { + handle_views: Option<descriptor::DualHandle>, + handle_samplers: Option<descriptor::DualHandle>, + dynamic_buffers: Vec<d3d12::GpuAddress>, +} + +bitflags::bitflags! { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct TableTypes: u8 { + const SRV_CBV_UAV = 1 << 0; + const SAMPLERS = 1 << 1; + } +} + +// Element (also known as parameter) index into the root signature. +type RootIndex = u32; + +#[derive(Debug)] +struct BindGroupInfo { + base_root_index: RootIndex, + tables: TableTypes, + dynamic_buffers: Vec<BufferViewKind>, +} + +#[derive(Debug, Clone)] +struct RootConstantInfo { + root_index: RootIndex, + range: std::ops::Range<u32>, +} + +#[derive(Debug, Clone)] +struct PipelineLayoutShared { + signature: d3d12::RootSignature, + total_root_elements: RootIndex, + special_constants_root_index: Option<RootIndex>, + root_constant_info: Option<RootConstantInfo>, +} + +unsafe impl Send for PipelineLayoutShared {} +unsafe impl Sync for PipelineLayoutShared {} + +#[derive(Debug)] +pub struct PipelineLayout { + shared: PipelineLayoutShared, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + bind_group_infos: ArrayVec<BindGroupInfo, { crate::MAX_BIND_GROUPS }>, + naga_options: naga::back::hlsl::Options, +} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + raw_name: Option<ffi::CString>, +} + +pub(super) enum CompiledShader { + #[allow(unused)] + Dxc(Vec<u8>), + Fxc(d3d12::Blob), +} + +impl CompiledShader { + fn create_native_shader(&self) -> d3d12::Shader { + match *self { + CompiledShader::Dxc(ref shader) => d3d12::Shader::from_raw(shader), + CompiledShader::Fxc(ref shader) => d3d12::Shader::from_blob(shader), + } + } + + unsafe fn destroy(self) {} +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, + topology: d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub struct AccelerationStructure {} + +impl SwapChain { + unsafe fn release_resources(self) -> d3d12::ComPtr<dxgi1_4::IDXGISwapChain3> { + self.raw + } + + unsafe fn wait( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<bool, crate::SurfaceError> { + let timeout_ms = match timeout { + Some(duration) => duration.as_millis() as u32, + None => winbase::INFINITE, + }; + match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + // We always set ALLOW_TEARING on the swapchain no matter + // what kind of swapchain we want because ResizeBuffers + // cannot change the swapchain's ALLOW_TEARING flag. + // + // This does not change the behavior of the swapchain, just + // allow present calls to use tearing. + if self.supports_allow_tearing { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + // While `configure`s contract ensures that no work on the GPU's main queues + // are in flight, we still need to wait for the present queue to be idle. + unsafe { device.wait_for_present_queue_idle() }?; + + let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + + // The range for `SetMaximumFrameLatency` is 1-16 so the maximum latency requested should be 15 because we add 1. + // https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + debug_assert!(config.maximum_frame_latency <= 15); + + // Nvidia recommends to use 1-2 more buffers than the maximum latency + // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ + // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1. + let swap_chain_buffer = (config.maximum_frame_latency + 1).min(16); + + let swap_chain = match self.swap_chain.write().take() { + //Note: this path doesn't properly re-initialize all of the things + Some(sc) => { + let raw = unsafe { sc.release_resources() }; + let result = unsafe { + raw.ResizeBuffers( + swap_chain_buffer, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ) + }; + if let Err(err) = result.into_result() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let desc = d3d12::SwapchainDesc { + alpha_mode: auxil::dxgi::conv::map_acomposite_alpha_mode( + config.composite_alpha_mode, + ), + width: config.extent.width, + height: config.extent.height, + format: non_srgb_format, + stereo: false, + sample: d3d12::SampleDesc { + count: 1, + quality: 0, + }, + buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + buffer_count: swap_chain_buffer, + scaling: d3d12::Scaling::Stretch, + swap_effect: d3d12::SwapEffect::FlipDiscard, + flags, + }; + let swap_chain1 = match self.target { + SurfaceTarget::Visual(_) | SurfaceTarget::SwapChainPanel(_) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForComposition"); + self.factory + .unwrap_factory2() + .create_swapchain_for_composition( + device.present_queue.as_mut_ptr() as *mut _, + &desc, + ) + .into_result() + } + SurfaceTarget::SurfaceHandle(handle) => { + profiling::scope!( + "IDXGIFactoryMedia::CreateSwapChainForCompositionSurfaceHandle" + ); + self.factory_media + .clone() + .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))? + .create_swapchain_for_composition_surface_handle( + device.present_queue.as_mut_ptr() as *mut _, + handle, + &desc, + ) + .into_result() + } + SurfaceTarget::WndHandle(hwnd) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd"); + self.factory + .as_factory2() + .unwrap() + .create_swapchain_for_hwnd( + device.present_queue.as_mut_ptr() as *mut _, + hwnd, + &desc, + ) + .into_result() + } + }; + + let swap_chain1 = match swap_chain1 { + Ok(s) => s, + Err(err) => { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + }; + + match &self.target { + &SurfaceTarget::WndHandle(_) | &SurfaceTarget::SurfaceHandle(_) => {} + &SurfaceTarget::Visual(ref visual) => { + if let Err(err) = + unsafe { visual.SetContent(swap_chain1.as_unknown()) }.into_result() + { + log::error!("Unable to SetContent: {}", err); + return Err(crate::SurfaceError::Other( + "IDCompositionVisual::SetContent", + )); + } + } + &SurfaceTarget::SwapChainPanel(ref swap_chain_panel) => { + if let Err(err) = + unsafe { swap_chain_panel.SetSwapChain(swap_chain1.as_ptr()) } + .into_result() + { + log::error!("Unable to SetSwapChain: {}", err); + return Err(crate::SurfaceError::Other( + "ISwapChainPanelNative::SetSwapChain", + )); + } + } + } + + match unsafe { swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>() }.into_result() { + Ok(swap_chain3) => swap_chain3, + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + match self.target { + SurfaceTarget::WndHandle(wnd_handle) => { + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + unsafe { + self.factory.MakeWindowAssociation( + wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ) + }; + } + SurfaceTarget::Visual(_) + | SurfaceTarget::SurfaceHandle(_) + | SurfaceTarget::SwapChainPanel(_) => {} + } + + unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) }; + let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; + + let mut resources = Vec::with_capacity(swap_chain_buffer as usize); + for i in 0..swap_chain_buffer { + let mut resource = d3d12::Resource::null(); + unsafe { + swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void()) + }; + resources.push(resource); + } + + let mut swapchain = self.swap_chain.write(); + *swapchain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + present_mode: config.present_mode, + format: config.format, + size: config.extent, + }); + + Ok(()) + } + + unsafe fn unconfigure(&self, device: &Device) { + if let Some(sc) = self.swap_chain.write().take() { + unsafe { + // While `unconfigure`s contract ensures that no work on the GPU's main queues + // are in flight, we still need to wait for the present queue to be idle. + + // The major failure mode of this function is device loss, + // which if we have lost the device, we should just continue + // cleaning up, without error. + let _ = device.wait_for_present_queue_idle(); + + let _raw = sc.release_resources(); + } + } + } + + unsafe fn acquire_texture( + &self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + let mut swapchain = self.swap_chain.write(); + let sc = swapchain.as_mut().unwrap(); + + unsafe { sc.wait(timeout) }?; + + let base_index = unsafe { sc.raw.GetCurrentBackBufferIndex() } as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index].clone(), + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + allocation: None, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&self, _texture: Texture) { + let mut swapchain = self.swap_chain.write(); + let sc = swapchain.as_mut().unwrap(); + sc.acquired_count -= 1; + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &self, + command_buffers: &[&CommandBuffer], + _surface_textures: &[&Texture], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let mut temp_lists = self.temp_lists.lock(); + temp_lists.clear(); + for cmd_buf in command_buffers { + temp_lists.push(cmd_buf.raw.as_list()); + } + + { + profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists"); + self.raw.execute_command_lists(&temp_lists); + } + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(&fence.raw, value) + .into_device_result("Signal fence")?; + } + + // Note the lack of synchronization here between the main Direct queue + // and the dedicated presentation queue. This is automatically handled + // by the D3D runtime by detecting uses of resources derived from the + // swapchain. This automatic detection is why you cannot use a swapchain + // as an UAV in D3D12. + + Ok(()) + } + unsafe fn present( + &self, + surface: &Surface, + _texture: Texture, + ) -> Result<(), crate::SurfaceError> { + let mut swapchain = surface.swap_chain.write(); + let sc = swapchain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + // We only allow immediate if ALLOW_TEARING is valid. + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Mailbox => (0, 0), + wgt::PresentMode::Fifo => (1, 0), + m => unreachable!("Cannot make surface with present mode {m:?}"), + }; + + profiling::scope!("IDXGISwapchain3::Present"); + unsafe { sc.raw.Present(interval, flags) }; + + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + let mut frequency = 0u64; + unsafe { self.raw.GetTimestampFrequency(&mut frequency) }; + (1_000_000_000.0 / frequency as f64) as f32 + } +} + +/// A shorthand for producing a `ResourceCreationFailed` error if a ComPtr is null. +#[inline] +pub fn null_comptr_check<T: winapi::Interface>( + ptr: &d3d12::ComPtr<T>, +) -> Result<(), crate::DeviceError> { + if d3d12::ComPtr::is_null(ptr) { + return Err(crate::DeviceError::ResourceCreationFailed); + } + + Ok(()) +} diff --git a/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs new file mode 100644 index 0000000000..df040dba15 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs @@ -0,0 +1,294 @@ +use std::ptr; + +pub(super) use dxc::{compile_dxc, get_dxc_container, DxcContainer}; +use winapi::um::d3dcompiler; + +use crate::auxil::dxgi::result::HResult; + +// This exists so that users who don't want to use dxc can disable the dxc_shader_compiler feature +// and not have to compile hassle_rs. +// Currently this will use Dxc if it is chosen as the dx12 compiler at `Instance` creation time, and will +// fallback to FXC if the Dxc libraries (dxil.dll and dxcompiler.dll) are not found, or if Fxc is chosen at' +// `Instance` creation time. + +pub(super) fn compile_fxc( + device: &super::Device, + source: &String, + source_name: &str, + raw_ep: &std::ffi::CString, + stage_bit: wgt::ShaderStages, + full_stage: String, +) -> ( + Result<super::CompiledShader, crate::PipelineError>, + log::Level, +) { + profiling::scope!("compile_fxc"); + let mut shader_data = d3d12::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if device + .private_caps + .instance_flags + .contains(wgt::InstanceFlags::DEBUG) + { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG | d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION; + } + let mut error = d3d12::Blob::null(); + let hr = unsafe { + profiling::scope!("d3dcompiler::D3DCompile"); + d3dcompiler::D3DCompile( + source.as_ptr().cast(), + source.len(), + source_name.as_ptr().cast(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr().cast(), + compile_flags, + 0, + shader_data.mut_void().cast(), + error.mut_void().cast(), + ) + }; + + match hr.into_result() { + Ok(()) => ( + Ok(super::CompiledShader::Fxc(shader_data)), + log::Level::Info, + ), + Err(e) => { + let mut full_msg = format!("FXC D3DCompile error ({e})"); + if !error.is_null() { + use std::fmt::Write as _; + let message = unsafe { + std::slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ) + }; + let _ = write!(full_msg, ": {}", String::from_utf8_lossy(message)); + } + ( + Err(crate::PipelineError::Linkage(stage_bit, full_msg)), + log::Level::Warn, + ) + } + } +} + +// The Dxc implementation is behind a feature flag so that users who don't want to use dxc can disable the feature. +#[cfg(feature = "dxc_shader_compiler")] +mod dxc { + use std::path::PathBuf; + + // Destructor order should be fine since _dxil and _dxc don't rely on each other. + pub(crate) struct DxcContainer { + compiler: hassle_rs::DxcCompiler, + library: hassle_rs::DxcLibrary, + validator: hassle_rs::DxcValidator, + // Has to be held onto for the lifetime of the device otherwise shaders will fail to compile. + _dxc: hassle_rs::Dxc, + // Also Has to be held onto for the lifetime of the device otherwise shaders will fail to validate. + _dxil: hassle_rs::Dxil, + } + + pub(crate) fn get_dxc_container( + dxc_path: Option<PathBuf>, + dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Make sure that dxil.dll exists. + let dxil = match hassle_rs::Dxil::new(dxil_path) { + Ok(dxil) => dxil, + Err(e) => { + log::warn!("Failed to load dxil.dll. Defaulting to FXC instead: {}", e); + return Ok(None); + } + }; + + // Needed for explicit validation. + let validator = dxil.create_validator()?; + + let dxc = match hassle_rs::Dxc::new(dxc_path) { + Ok(dxc) => dxc, + Err(e) => { + log::warn!( + "Failed to load dxcompiler.dll. Defaulting to FXC instead: {}", + e + ); + return Ok(None); + } + }; + let compiler = dxc.create_compiler()?; + let library = dxc.create_library()?; + + Ok(Some(DxcContainer { + _dxc: dxc, + compiler, + library, + _dxil: dxil, + validator, + })) + } + + pub(crate) fn compile_dxc( + device: &crate::dx12::Device, + source: &str, + source_name: &str, + raw_ep: &str, + stage_bit: wgt::ShaderStages, + full_stage: String, + dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + profiling::scope!("compile_dxc"); + let mut compile_flags = arrayvec::ArrayVec::<&str, 6>::new_const(); + compile_flags.push("-Ges"); // d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS + compile_flags.push("-Vd"); // Disable implicit validation to work around bugs when dxil.dll isn't in the local directory. + compile_flags.push("-HV"); // Use HLSL 2018, Naga doesn't supported 2021 yet. + compile_flags.push("2018"); + + if device + .private_caps + .instance_flags + .contains(wgt::InstanceFlags::DEBUG) + { + compile_flags.push("-Zi"); // d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION + compile_flags.push("-Od"); // d3dcompiler::D3DCOMPILE_DEBUG + } + + let blob = match dxc_container + .library + .create_blob_with_encoding_from_str(source) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("DXC blob error: {e}"))) + { + Ok(blob) => blob, + Err(e) => return (Err(e), log::Level::Error), + }; + + let compiled = dxc_container.compiler.compile( + &blob, + source_name, + raw_ep, + &full_stage, + &compile_flags, + None, + &[], + ); + + let (result, log_level) = match compiled { + Ok(dxc_result) => match dxc_result.get_result() { + Ok(dxc_blob) => { + // Validate the shader. + match dxc_container.validator.validate(dxc_blob) { + Ok(validated_blob) => ( + Ok(crate::dx12::CompiledShader::Dxc(validated_blob.to_vec())), + log::Level::Info, + ), + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC validation error: {:?}\n{:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default(), + e.1 + ), + )), + log::Level::Error, + ), + } + } + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!("DXC compile error: {e}"), + )), + log::Level::Error, + ), + }, + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC compile error: {:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default() + ), + )), + log::Level::Error, + ), + }; + + (result, log_level) + } + + impl From<hassle_rs::HassleError> for crate::DeviceError { + fn from(value: hassle_rs::HassleError) -> Self { + match value { + hassle_rs::HassleError::Win32Error(e) => { + // TODO: This returns an HRESULT, should we try and use the associated Windows error message? + log::error!("Win32 error: {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LoadLibraryError { filename, inner } => { + log::error!("Failed to load dxc library {filename:?}. Inner error: {inner:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LibLoadingError(e) => { + log::error!("Failed to load dxc library. {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::WindowsOnly(e) => { + log::error!("Signing with dxil.dll is only supported on Windows. {e:?}"); + crate::DeviceError::Lost + } + // `ValidationError` and `CompileError` should never happen in a context involving `DeviceError` + hassle_rs::HassleError::ValidationError(_e) => unimplemented!(), + hassle_rs::HassleError::CompileError(_e) => unimplemented!(), + } + } + } + + fn get_error_string_from_dxc_result( + library: &hassle_rs::DxcLibrary, + error: &hassle_rs::DxcOperationResult, + ) -> Result<String, hassle_rs::HassleError> { + error + .get_error_buffer() + .and_then(|error| library.get_blob_as_string(&hassle_rs::DxcBlob::from(error))) + } +} + +// These are stubs for when the `dxc_shader_compiler` feature is disabled. +#[cfg(not(feature = "dxc_shader_compiler"))] +mod dxc { + use std::path::PathBuf; + + pub(crate) struct DxcContainer {} + + pub(crate) fn get_dxc_container( + _dxc_path: Option<PathBuf>, + _dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Falls back to Fxc and logs an error. + log::error!("DXC shader compiler was requested on Instance creation, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + Ok(None) + } + + // It shouldn't be possible that this gets called with the `dxc_shader_compiler` feature disabled. + pub(crate) fn compile_dxc( + _device: &crate::dx12::Device, + _source: &str, + _source_name: &str, + _raw_ep: &str, + _stage_bit: wgt::ShaderStages, + _full_stage: String, + _dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + unimplemented!("Something went really wrong, please report this. Attempted to compile shader with DXC, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/suballocation.rs b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs new file mode 100644 index 0000000000..47a398be53 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs @@ -0,0 +1,363 @@ +pub(crate) use allocation::{ + create_allocator_wrapper, create_buffer_resource, create_texture_resource, + free_buffer_allocation, free_texture_allocation, AllocationWrapper, GpuAllocatorWrapper, +}; + +#[cfg(not(feature = "windows_rs"))] +use committed as allocation; +#[cfg(feature = "windows_rs")] +use placed as allocation; + +// This exists to work around https://github.com/gfx-rs/wgpu/issues/3207 +// Currently this will work the older, slower way if the windows_rs feature is disabled, +// and will use the fast path of suballocating buffers and textures using gpu_allocator if +// the windows_rs feature is enabled. + +// This is the fast path using gpu_allocator to suballocate buffers and textures. +#[cfg(feature = "windows_rs")] +mod placed { + use crate::dx12::null_comptr_check; + use d3d12::ComPtr; + use parking_lot::Mutex; + use std::ptr; + use wgt::assertions::StrictAssertUnwrapExt; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + use gpu_allocator::{ + d3d12::{AllocationCreateDesc, ToWinapi, ToWindows}, + MemoryLocation, + }; + + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper { + pub(crate) allocator: gpu_allocator::d3d12::Allocator, + } + + #[derive(Debug)] + pub(crate) struct AllocationWrapper { + pub(crate) allocation: gpu_allocator::d3d12::Allocation, + } + + pub(crate) fn create_allocator_wrapper( + raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + let device = raw.as_ptr(); + + match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc { + device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(device.as_windows().clone()), + debug_settings: Default::default(), + allocation_sizes: gpu_allocator::AllocationSizes::default(), + }) { + Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))), + Err(e) => { + log::error!("Failed to create d3d12 allocator, error: {}", e); + Err(e)? + } + } + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut ComPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + // It's a workaround for Intel Xe drivers. + if !device.private_caps.suballocation_supported { + return super::committed::create_buffer_resource(device, desc, raw_desc, resource) + .map(|(hr, _)| (hr, None)); + } + + let location = match (is_cpu_read, is_cpu_write) { + (true, true) => MemoryLocation::CpuToGpu, + (true, false) => MemoryLocation::GpuToCpu, + (false, true) => MemoryLocation::CpuToGpu, + (false, false) => MemoryLocation::GpuOnly, + }; + + let name = desc.label.unwrap_or("Unlabeled buffer"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + + // let mut allocator = unsafe { device.mem_allocator.as_ref().unwrap_unchecked().lock() }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + null_comptr_check(resource)?; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut ComPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + // It's a workaround for Intel Xe drivers. + if !device.private_caps.suballocation_supported { + return super::committed::create_texture_resource(device, desc, raw_desc, resource) + .map(|(hr, _)| (hr, None)); + } + + let location = MemoryLocation::GpuOnly; + + let name = desc.label.unwrap_or("Unlabeled texture"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + null_comptr_check(resource)?; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn free_buffer_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 buffer, {e}"), + }; + } + + pub(crate) fn free_texture_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 texture, {e}"), + }; + } + + impl From<gpu_allocator::AllocationError> for crate::DeviceError { + fn from(result: gpu_allocator::AllocationError) -> Self { + match result { + gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, + gpu_allocator::AllocationError::FailedToMap(e) => { + log::error!("DX12 gpu-allocator: Failed to map: {}", e); + Self::Lost + } + gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { + log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { + log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { + log::error!( + "DX12 gpu-allocator: Invalid Allocator Creation Description: {}", + e + ); + Self::Lost + } + gpu_allocator::AllocationError::Internal(e) => { + log::error!("DX12 gpu-allocator: Internal Error: {}", e); + Self::Lost + } + gpu_allocator::AllocationError::BarrierLayoutNeedsDevice10 => todo!(), + } + } + } +} + +// This is the older, slower path where it doesn't suballocate buffers. +// Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207 +mod committed { + use crate::dx12::null_comptr_check; + use d3d12::ComPtr; + use parking_lot::Mutex; + use std::ptr; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_heap_flags + const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: d3d12_ty::D3D12_HEAP_FLAGS = 0x1000; + + // Allocator isn't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper {} + + // Allocations aren't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct AllocationWrapper {} + + #[allow(unused)] + pub(crate) fn create_allocator_wrapper( + _raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + Ok(None) + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut ComPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_read { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12_ty::D3D12_MEMORY_POOL_L1 + } + _ => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + null_comptr_check(resource)?; + + Ok((hr, None)) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + _desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut ComPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + crate::dx12::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + null_comptr_check(resource)?; + + Ok((hr, None)) + } + + #[allow(unused)] + pub(crate) fn free_buffer_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } + + #[allow(unused)] + pub(crate) fn free_texture_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs new file mode 100644 index 0000000000..b4ad38324a --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -0,0 +1,43 @@ +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +// use here so that the recursive RIDL macro can find the crate +use winapi::um::unknwnbase::{IUnknown, IUnknownVtbl}; +use winapi::RIDL; + +RIDL! {#[uuid(0x63aad0b8, 0x7c24, 0x40ff, 0x85, 0xa8, 0x64, 0x0d, 0x94, 0x4c, 0xc3, 0x25)] +interface ISwapChainPanelNative(ISwapChainPanelNativeVtbl): IUnknown(IUnknownVtbl) { + fn SetSwapChain(swapChain: *const winapi::shared::dxgi1_2::IDXGISwapChain1,) -> winapi::um::winnt::HRESULT, +}} + +winapi::ENUM! { + enum D3D12_VIEW_INSTANCING_TIER { + D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED = 0, + D3D12_VIEW_INSTANCING_TIER_1 = 1, + D3D12_VIEW_INSTANCING_TIER_2 = 2, + D3D12_VIEW_INSTANCING_TIER_3 = 3, + } +} + +winapi::ENUM! { + enum D3D12_COMMAND_LIST_SUPPORT_FLAGS { + D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_ENCODE, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 { + CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL, + CastingFullyTypedFormatSupported: winapi::shared::minwindef::BOOL, + WriteBufferImmediateSupportFlags: D3D12_COMMAND_LIST_SUPPORT_FLAGS, + ViewInstancingTier: D3D12_VIEW_INSTANCING_TIER, + BarycentricsSupported: winapi::shared::minwindef::BOOL, + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/view.rs b/third_party/rust/wgpu-hal/src/dx12/view.rs new file mode 100644 index 0000000000..ae8e5814a8 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/view.rs @@ -0,0 +1,389 @@ +use crate::auxil; +use std::mem; +use winapi::um::d3d12 as d3d12_ty; + +pub(crate) const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; + +pub(super) struct ViewDescriptor { + dimension: wgt::TextureViewDimension, + pub aspects: crate::FormatAspects, + pub rtv_dsv_format: d3d12::Format, + srv_uav_format: Option<d3d12::Format>, + multisampled: bool, + array_layer_base: u32, + array_layer_count: u32, + mip_level_base: u32, + mip_level_count: u32, +} + +impl crate::TextureViewDescriptor<'_> { + pub(super) fn to_internal(&self, texture: &super::Texture) -> ViewDescriptor { + let aspects = crate::FormatAspects::new(texture.format, self.range.aspect); + + ViewDescriptor { + dimension: self.dimension, + aspects, + rtv_dsv_format: auxil::dxgi::conv::map_texture_format(self.format), + srv_uav_format: auxil::dxgi::conv::map_texture_format_for_srv_uav(self.format, aspects), + multisampled: texture.sample_count > 1, + mip_level_base: self.range.base_mip_level, + mip_level_count: self.range.mip_level_count.unwrap_or(!0), + array_layer_base: self.range.base_array_layer, + array_layer_count: self.range.array_layer_count.unwrap_or(!0), + } + } +} + +fn aspects_to_plane(aspects: crate::FormatAspects) -> u32 { + match aspects { + crate::FormatAspects::STENCIL => 1, + crate::FormatAspects::PLANE_1 => 1, + crate::FormatAspects::PLANE_2 => 2, + _ => 0, + } +} + +impl ViewDescriptor { + pub(crate) unsafe fn to_srv(&self) -> Option<d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + PlaneSlice: aspects_to_plane(self.aspects), + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: aspects_to_plane(self.aspects), + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBE; + unsafe { + *desc.u.TextureCube_mut() = d3d12_ty::D3D12_TEXCUBE_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + unsafe { + *desc.u.TextureCubeArray_mut() = d3d12_ty::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + First2DArrayFace: self.array_layer_base, + NumCubes: if self.array_layer_count == !0 { + !0 + } else { + self.array_layer_count / 6 + }, + ResourceMinLODClamp: 0.0, + } + } + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_uav(&self) -> Option<d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_UAV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_UAV { + MipSlice: self.mip_level_base, + PlaneSlice: aspects_to_plane(self.aspects), + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: aspects_to_plane(self.aspects), + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_UAV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_rtv(&self) -> d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_RTV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_RTV { + MipSlice: self.mip_level_base, + PlaneSlice: aspects_to_plane(self.aspects), + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: aspects_to_plane(self.aspects), + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_RTV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + desc + } + + pub(crate) unsafe fn to_dsv(&self, read_only: bool) -> d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + Flags: { + let mut flags = d3d12_ty::D3D12_DSV_FLAG_NONE; + if read_only { + if self.aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if self.aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + } + flags + }, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_DSV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_DSV { + MipSlice: self.mip_level_base, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + desc + } +} diff --git a/third_party/rust/wgpu-hal/src/empty.rs b/third_party/rust/wgpu-hal/src/empty.rs new file mode 100644 index 0000000000..d58e779b96 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/empty.rs @@ -0,0 +1,450 @@ +#![allow(unused_variables)] + +use std::ops::Range; + +#[derive(Clone, Debug)] +pub struct Api; +pub struct Context; +#[derive(Debug)] +pub struct Encoder; +#[derive(Debug)] +pub struct Resource; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +impl crate::Api for Api { + type Instance = Context; + type Surface = Context; + type Adapter = Context; + type Device = Context; + + type Queue = Context; + type CommandEncoder = Encoder; + type CommandBuffer = Resource; + + type Buffer = Resource; + type Texture = Resource; + type SurfaceTexture = Resource; + type TextureView = Resource; + type Sampler = Resource; + type QuerySet = Resource; + type Fence = Resource; + type AccelerationStructure = Resource; + + type BindGroupLayout = Resource; + type BindGroup = Resource; + type PipelineLayout = Resource; + type ShaderModule = Resource; + type RenderPipeline = Resource; + type ComputePipeline = Resource; +} + +impl crate::Instance<Api> for Context { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + Ok(Context) + } + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + _window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Context, crate::InstanceError> { + Ok(Context) + } + unsafe fn destroy_surface(&self, surface: Context) {} + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + Vec::new() + } +} + +impl crate::Surface<Api> for Context { + unsafe fn configure( + &self, + device: &Context, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn unconfigure(&self, device: &Context) {} + + unsafe fn acquire_texture( + &self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + Ok(None) + } + unsafe fn discard_texture(&self, texture: Resource) {} +} + +impl crate::Adapter<Api> for Context { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> DeviceResult<crate::OpenDevice<Api>> { + Err(crate::DeviceError::Lost) + } + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + crate::TextureFormatCapabilities::empty() + } + + unsafe fn surface_capabilities(&self, surface: &Context) -> Option<crate::SurfaceCapabilities> { + None + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl crate::Queue<Api> for Context { + unsafe fn submit( + &self, + command_buffers: &[&Resource], + surface_textures: &[&Resource], + signal_fence: Option<(&mut Resource, crate::FenceValue)>, + ) -> DeviceResult<()> { + Ok(()) + } + unsafe fn present( + &self, + surface: &Context, + texture: Resource, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +impl crate::Device<Api> for Context { + unsafe fn exit(self, queue: Context) {} + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_buffer(&self, buffer: Resource) {} + unsafe fn map_buffer( + &self, + buffer: &Resource, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + Err(crate::DeviceError::Lost) + } + unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + + unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( + &self, + texture: &Resource, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_sampler(&self, sampler: Resource) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<Api>, + ) -> DeviceResult<Encoder> { + Ok(Encoder) + } + unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<Resource, crate::ShaderError> { + Ok(Resource) + } + unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_query_set(&self, set: Resource) {} + unsafe fn create_fence(&self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_fence(&self, fence: Resource) {} + unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult<crate::FenceValue> { + Ok(0) + } + unsafe fn wait( + &self, + fence: &Resource, + value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Api>, + ) -> crate::AccelerationStructureBuildSizes { + Default::default() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &Resource, + ) -> wgt::BufferAddress { + Default::default() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {} +} + +impl crate::CommandEncoder<Api> for Encoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> DeviceResult<()> { + Ok(()) + } + unsafe fn discard_encoding(&mut self) {} + unsafe fn end_encoding(&mut self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn reset_all<I>(&mut self, command_buffers: I) {} + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange) {} + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + #[cfg(webgl)] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &Resource, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} + unsafe fn end_query(&mut self, set: &Resource, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn reset_queries(&mut self, set: &Resource, range: Range<u32>) {} + unsafe fn copy_query_results( + &mut self, + set: &Resource, + range: Range<u32>, + buffer: &Resource, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {} + unsafe fn end_render_pass(&mut self) {} + + unsafe fn set_bind_group( + &mut self, + layout: &Resource, + index: u32, + group: &Resource, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + } + unsafe fn set_push_constants( + &mut self, + layout: &Resource, + stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) {} + unsafe fn begin_debug_marker(&mut self, group_label: &str) {} + unsafe fn end_debug_marker(&mut self) {} + + unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, Api>, + format: wgt::IndexFormat, + ) { + } + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) { + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {} + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {} + unsafe fn set_stencil_reference(&mut self, value: u32) {} + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {} + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<Api>) {} + unsafe fn end_compute_pass(&mut self) {} + + unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn dispatch(&mut self, count: [u32; 3]) {} + unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + descriptors: T, + ) where + Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, Api>>, + { + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs new file mode 100644 index 0000000000..afa4023797 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs @@ -0,0 +1,1218 @@ +use glow::HasContext; +use parking_lot::Mutex; +use std::sync::{atomic::AtomicU8, Arc}; +use wgt::AstcChannel; + +use crate::auxil::db; + +// https://webgl2fundamentals.org/webgl/lessons/webgl-data-textures.html + +const GL_UNMASKED_VENDOR_WEBGL: u32 = 0x9245; +const GL_UNMASKED_RENDERER_WEBGL: u32 = 0x9246; + +impl super::Adapter { + /// Note that this function is intentionally lenient in regards to parsing, + /// and will try to recover at least the first two version numbers without + /// resulting in an `Err`. + /// # Notes + /// `WebGL 2` version returned as `OpenGL ES 3.0` + fn parse_version(mut src: &str) -> Result<(u8, u8), crate::InstanceError> { + let webgl_sig = "WebGL "; + // According to the WebGL specification + // VERSION WebGL<space>1.0<space><vendor-specific information> + // SHADING_LANGUAGE_VERSION WebGL<space>GLSL<space>ES<space>1.0<space><vendor-specific information> + let is_webgl = src.starts_with(webgl_sig); + if is_webgl { + let pos = src.rfind(webgl_sig).unwrap_or(0); + src = &src[pos + webgl_sig.len()..]; + } else { + let es_sig = " ES "; + match src.rfind(es_sig) { + Some(pos) => { + src = &src[pos + es_sig.len()..]; + } + None => { + return Err(crate::InstanceError::new(format!( + "OpenGL version {src:?} does not contain 'ES'" + ))); + } + } + }; + + let glsl_es_sig = "GLSL ES "; + let is_glsl = match src.find(glsl_es_sig) { + Some(pos) => { + src = &src[pos + glsl_es_sig.len()..]; + true + } + None => false, + }; + + Self::parse_full_version(src).map(|(major, minor)| { + ( + // Return WebGL 2.0 version as OpenGL ES 3.0 + if is_webgl && !is_glsl { + major + 1 + } else { + major + }, + minor, + ) + }) + } + + /// According to the OpenGL specification, the version information is + /// expected to follow the following syntax: + /// + /// ~~~bnf + /// <major> ::= <number> + /// <minor> ::= <number> + /// <revision> ::= <number> + /// <vendor-info> ::= <string> + /// <release> ::= <major> "." <minor> ["." <release>] + /// <version> ::= <release> [" " <vendor-info>] + /// ~~~ + /// + /// Note that this function is intentionally lenient in regards to parsing, + /// and will try to recover at least the first two version numbers without + /// resulting in an `Err`. + pub(super) fn parse_full_version(src: &str) -> Result<(u8, u8), crate::InstanceError> { + let (version, _vendor_info) = match src.find(' ') { + Some(i) => (&src[..i], src[i + 1..].to_string()), + None => (src, String::new()), + }; + + // TODO: make this even more lenient so that we can also accept + // `<major> "." <minor> [<???>]` + let mut it = version.split('.'); + let major = it.next().and_then(|s| s.parse().ok()); + let minor = it.next().and_then(|s| { + let trimmed = if s.starts_with('0') { + "0" + } else { + s.trim_end_matches('0') + }; + trimmed.parse().ok() + }); + + match (major, minor) { + (Some(major), Some(minor)) => Ok((major, minor)), + _ => Err(crate::InstanceError::new(format!( + "unable to extract OpenGL version from {version:?}" + ))), + } + } + + fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + let vendor = vendor_orig.to_lowercase(); + let renderer = renderer_orig.to_lowercase(); + + // opengl has no way to discern device_type, so we can try to infer it from the renderer string + let strings_that_imply_integrated = [ + " xpress", // space here is on purpose so we don't match express + "amd renoir", + "radeon hd 4200", + "radeon hd 4250", + "radeon hd 4290", + "radeon hd 4270", + "radeon hd 4225", + "radeon hd 3100", + "radeon hd 3200", + "radeon hd 3000", + "radeon hd 3300", + "radeon(tm) r4 graphics", + "radeon(tm) r5 graphics", + "radeon(tm) r6 graphics", + "radeon(tm) r7 graphics", + "radeon r7 graphics", + "nforce", // all nvidia nforce are integrated + "tegra", // all nvidia tegra are integrated + "shield", // all nvidia shield are integrated + "igp", + "mali", + "intel", + "v3d", + "apple m", // all apple m are integrated + ]; + let strings_that_imply_cpu = ["mesa offscreen", "swiftshader", "llvmpipe"]; + + //TODO: handle Intel Iris XE as discreet + let inferred_device_type = if vendor.contains("qualcomm") + || vendor.contains("intel") + || strings_that_imply_integrated + .iter() + .any(|&s| renderer.contains(s)) + { + wgt::DeviceType::IntegratedGpu + } else if strings_that_imply_cpu.iter().any(|&s| renderer.contains(s)) { + wgt::DeviceType::Cpu + } else { + // At this point the Device type is Unknown. + // It's most likely DiscreteGpu, but we do not know for sure. + // Use "Other" to avoid possibly making incorrect assumptions. + // Note that if this same device is available under some other API (ex: Vulkan), + // It will mostly likely get a different device type (probably DiscreteGpu). + wgt::DeviceType::Other + }; + + // source: Sascha Willems at Vulkan + let vendor_id = if vendor.contains("amd") { + db::amd::VENDOR + } else if vendor.contains("imgtec") { + db::imgtec::VENDOR + } else if vendor.contains("nvidia") { + db::nvidia::VENDOR + } else if vendor.contains("arm") { + db::arm::VENDOR + } else if vendor.contains("qualcomm") { + db::qualcomm::VENDOR + } else if vendor.contains("intel") { + db::intel::VENDOR + } else if vendor.contains("broadcom") { + db::broadcom::VENDOR + } else if vendor.contains("mesa") { + db::mesa::VENDOR + } else if vendor.contains("apple") { + db::apple::VENDOR + } else { + 0 + }; + + wgt::AdapterInfo { + name: renderer_orig, + vendor: vendor_id, + device: 0, + device_type: inferred_device_type, + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Gl, + } + } + + pub(super) unsafe fn expose( + context: super::AdapterContext, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let gl = context.lock(); + let extensions = gl.supported_extensions(); + + let (vendor_const, renderer_const) = if extensions.contains("WEBGL_debug_renderer_info") { + // emscripten doesn't enable "WEBGL_debug_renderer_info" extension by default. so, we do it manually. + // See https://github.com/gfx-rs/wgpu/issues/3245 for context + #[cfg(Emscripten)] + if unsafe { super::emscripten::enable_extension("WEBGL_debug_renderer_info\0") } { + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + } + // glow already enables WEBGL_debug_renderer_info on wasm32-unknown-unknown target by default. + #[cfg(not(Emscripten))] + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + }; + + let vendor = unsafe { gl.get_parameter_string(vendor_const) }; + let renderer = unsafe { gl.get_parameter_string(renderer_const) }; + let version = unsafe { gl.get_parameter_string(glow::VERSION) }; + log::debug!("Vendor: {}", vendor); + log::debug!("Renderer: {}", renderer); + log::debug!("Version: {}", version); + + let full_ver = Self::parse_full_version(&version).ok(); + let es_ver = full_ver.map_or_else(|| Self::parse_version(&version).ok(), |_| None); + + if let Some(full_ver) = full_ver { + let core_profile = (full_ver >= (3, 2)).then(|| unsafe { + gl.get_parameter_i32(glow::CONTEXT_PROFILE_MASK) + & glow::CONTEXT_CORE_PROFILE_BIT as i32 + != 0 + }); + log::trace!( + "Profile: {}", + core_profile + .map(|core_profile| if core_profile { + "Core" + } else { + "Compatibility" + }) + .unwrap_or("Legacy") + ); + } + + if es_ver.is_none() && full_ver.is_none() { + log::warn!("Unable to parse OpenGL version"); + return None; + } + + if let Some(es_ver) = es_ver { + if es_ver < (3, 0) { + log::warn!( + "Returned GLES context is {}.{}, when 3.0+ was requested", + es_ver.0, + es_ver.1 + ); + return None; + } + } + + if let Some(full_ver) = full_ver { + if full_ver < (3, 3) { + log::warn!( + "Returned GL context is {}.{}, when 3.3+ is needed", + full_ver.0, + full_ver.1 + ); + return None; + } + } + + let shading_language_version = { + let sl_version = unsafe { gl.get_parameter_string(glow::SHADING_LANGUAGE_VERSION) }; + log::debug!("SL version: {}", &sl_version); + if full_ver.is_some() { + let (sl_major, sl_minor) = Self::parse_full_version(&sl_version).ok()?; + let mut value = sl_major as u16 * 100 + sl_minor as u16 * 10; + // Naga doesn't think it supports GL 460+, so we cap it at 450 + if value > 450 { + value = 450; + } + naga::back::glsl::Version::Desktop(value) + } else { + let (sl_major, sl_minor) = Self::parse_version(&sl_version).ok()?; + let value = sl_major as u16 * 100 + sl_minor as u16 * 10; + naga::back::glsl::Version::Embedded { + version: value, + is_webgl: cfg!(any(webgl, Emscripten)), + } + } + }; + + log::debug!("Supported GL Extensions: {:#?}", extensions); + + let supported = |(req_es_major, req_es_minor), (req_full_major, req_full_minor)| { + let es_supported = es_ver + .map(|es_ver| es_ver >= (req_es_major, req_es_minor)) + .unwrap_or_default(); + + let full_supported = full_ver + .map(|full_ver| full_ver >= (req_full_major, req_full_minor)) + .unwrap_or_default(); + + es_supported || full_supported + }; + + let supports_storage = + supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_shader_storage_buffer_object"); + let supports_compute = + supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_compute_shader"); + let supports_work_group_params = supports_compute; + + // ANGLE provides renderer strings like: "ANGLE (Apple, Apple M1 Pro, OpenGL 4.1)" + let is_angle = renderer.contains("ANGLE"); + + let vertex_shader_storage_blocks = if supports_storage { + let value = + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS) } as u32); + + if value == 0 && extensions.contains("GL_ARB_shader_storage_buffer_object") { + // The driver for AMD Radeon HD 5870 returns zero here, so assume the value matches the compute shader storage block count. + // Windows doesn't recognize `GL_MAX_VERTEX_ATTRIB_STRIDE`. + let new = (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHADER_STORAGE_BLOCKS) } + as u32); + log::warn!("Max vertex shader storage blocks is zero, but GL_ARB_shader_storage_buffer_object is specified. Assuming the compute value {new}"); + new + } else { + value + } + } else { + 0 + }; + let fragment_shader_storage_blocks = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS) } as u32) + } else { + 0 + }; + let vertex_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let fragment_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let max_storage_block_size = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } as u32) + } else { + 0 + }; + let max_element_index = unsafe { gl.get_parameter_i32(glow::MAX_ELEMENT_INDEX) } as u32; + + // WORKAROUND: In order to work around an issue with GL on RPI4 and similar, we ignore a + // zero vertex ssbo count if there are vertex sstos. (more info: + // https://github.com/gfx-rs/wgpu/pull/1607#issuecomment-874938961) The hardware does not + // want us to write to these SSBOs, but GLES cannot express that. We detect this case and + // disable writing to SSBOs. + let vertex_ssbo_false_zero = + vertex_shader_storage_blocks == 0 && vertex_shader_storage_textures != 0; + if vertex_ssbo_false_zero { + // We only care about fragment here as the 0 is a lie. + log::warn!("Max vertex shader SSBO == 0 and SSTO != 0. Interpreting as false zero."); + } + + let max_storage_buffers_per_shader_stage = if vertex_shader_storage_blocks == 0 { + fragment_shader_storage_blocks + } else { + vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) + }; + let max_storage_textures_per_shader_stage = if vertex_shader_storage_textures == 0 { + fragment_shader_storage_textures + } else { + vertex_shader_storage_textures.min(fragment_shader_storage_textures) + }; + + let mut downlevel_flags = wgt::DownlevelFlags::empty() + | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES + | wgt::DownlevelFlags::COMPARISON_SAMPLERS + | wgt::DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW; + downlevel_flags.set(wgt::DownlevelFlags::COMPUTE_SHADERS, supports_compute); + downlevel_flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + max_storage_block_size != 0, + ); + downlevel_flags.set( + wgt::DownlevelFlags::INDIRECT_EXECUTION, + supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_multi_draw_indirect"), + ); + downlevel_flags.set(wgt::DownlevelFlags::BASE_VERTEX, supported((3, 2), (3, 2))); + downlevel_flags.set( + wgt::DownlevelFlags::INDEPENDENT_BLEND, + supported((3, 2), (4, 0)) || extensions.contains("GL_EXT_draw_buffers_indexed"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::VERTEX_STORAGE, + max_storage_block_size != 0 + && max_storage_buffers_per_shader_stage != 0 + && (vertex_shader_storage_blocks != 0 || vertex_ssbo_false_zero), + ); + downlevel_flags.set(wgt::DownlevelFlags::FRAGMENT_STORAGE, supports_storage); + if extensions.contains("EXT_texture_filter_anisotropic") + || extensions.contains("GL_EXT_texture_filter_anisotropic") + { + let max_aniso = + unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_MAX_ANISOTROPY_EXT) } as u32; + downlevel_flags.set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, max_aniso >= 16); + } + downlevel_flags.set( + wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED, + !(cfg!(any(webgl, Emscripten)) || is_angle), + ); + // see https://registry.khronos.org/webgl/specs/latest/2.0/#BUFFER_OBJECT_BINDING + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER, + !cfg!(any(webgl, Emscripten)), + ); + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES, + !cfg!(any(webgl, Emscripten)), + ); + downlevel_flags.set( + wgt::DownlevelFlags::FULL_DRAW_INDEX_UINT32, + max_element_index == u32::MAX, + ); + downlevel_flags.set( + wgt::DownlevelFlags::MULTISAMPLED_SHADING, + supported((3, 2), (4, 0)) || extensions.contains("OES_sample_variables"), + ); + let query_buffers = extensions.contains("GL_ARB_query_buffer_object") + || extensions.contains("GL_AMD_query_buffer_object"); + if query_buffers { + downlevel_flags.set(wgt::DownlevelFlags::NONBLOCKING_QUERY_RESOLVE, true); + } + + let mut features = wgt::Features::empty() + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::PUSH_CONSTANTS; + features.set( + wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO, + extensions.contains("GL_EXT_texture_border_clamp") + || extensions.contains("GL_ARB_texture_border_clamp"), + ); + features.set( + wgt::Features::DEPTH_CLIP_CONTROL, + extensions.contains("GL_EXT_depth_clamp") || extensions.contains("GL_ARB_depth_clamp"), + ); + features.set( + wgt::Features::VERTEX_WRITABLE_STORAGE, + downlevel_flags.contains(wgt::DownlevelFlags::VERTEX_STORAGE) + && vertex_shader_storage_textures != 0, + ); + features.set( + wgt::Features::MULTIVIEW, + extensions.contains("OVR_multiview2") || extensions.contains("GL_OVR_multiview2"), + ); + features.set( + wgt::Features::DUAL_SOURCE_BLENDING, + extensions.contains("GL_EXT_blend_func_extended") + || extensions.contains("GL_ARB_blend_func_extended"), + ); + features.set( + wgt::Features::SHADER_PRIMITIVE_INDEX, + supported((3, 2), (3, 2)) + || extensions.contains("OES_geometry_shader") + || extensions.contains("GL_ARB_geometry_shader4"), + ); + features.set( + wgt::Features::SHADER_EARLY_DEPTH_TEST, + supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), + ); + features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); + if extensions.contains("GL_ARB_timer_query") { + features.set(wgt::Features::TIMESTAMP_QUERY, true); + features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES, true); + } + let gl_bcn_exts = [ + "GL_EXT_texture_compression_s3tc", + "GL_EXT_texture_compression_rgtc", + "GL_ARB_texture_compression_bptc", + ]; + let gles_bcn_exts = [ + "GL_EXT_texture_compression_s3tc_srgb", + "GL_EXT_texture_compression_rgtc", + "GL_EXT_texture_compression_bptc", + ]; + let webgl_bcn_exts = [ + "WEBGL_compressed_texture_s3tc", + "WEBGL_compressed_texture_s3tc_srgb", + "EXT_texture_compression_rgtc", + "EXT_texture_compression_bptc", + ]; + let bcn_exts = if cfg!(any(webgl, Emscripten)) { + &webgl_bcn_exts[..] + } else if es_ver.is_some() { + &gles_bcn_exts[..] + } else { + &gl_bcn_exts[..] + }; + features.set( + wgt::Features::TEXTURE_COMPRESSION_BC, + bcn_exts.iter().all(|&ext| extensions.contains(ext)), + ); + let has_etc = if cfg!(any(webgl, Emscripten)) { + extensions.contains("WEBGL_compressed_texture_etc") + } else { + // This is a required part of GLES3, but not part of Desktop GL at all. + es_ver.is_some() + }; + features.set(wgt::Features::TEXTURE_COMPRESSION_ETC2, has_etc); + + // `OES_texture_compression_astc` provides 2D + 3D, LDR + HDR support + if extensions.contains("WEBGL_compressed_texture_astc") + || extensions.contains("GL_OES_texture_compression_astc") + { + #[cfg(webgl)] + { + if context + .glow_context + .compressed_texture_astc_supports_ldr_profile() + { + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC); + } + if context + .glow_context + .compressed_texture_astc_supports_hdr_profile() + { + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR); + } + } + + #[cfg(any(native, Emscripten))] + { + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC); + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR); + } + } else { + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC, + extensions.contains("GL_KHR_texture_compression_astc_ldr"), + ); + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, + extensions.contains("GL_KHR_texture_compression_astc_hdr"), + ); + } + + features.set( + wgt::Features::FLOAT32_FILTERABLE, + extensions.contains("GL_ARB_color_buffer_float") + || extensions.contains("GL_EXT_color_buffer_float") + || extensions.contains("OES_texture_float_linear"), + ); + + if es_ver.is_none() { + features |= wgt::Features::POLYGON_MODE_LINE | wgt::Features::POLYGON_MODE_POINT; + } + + // We *might* be able to emulate bgra8unorm-storage but currently don't attempt to. + + let mut private_caps = super::PrivateCapabilities::empty(); + private_caps.set( + super::PrivateCapabilities::BUFFER_ALLOCATION, + extensions.contains("GL_EXT_buffer_storage") + || extensions.contains("GL_ARB_buffer_storage"), + ); + private_caps.set( + super::PrivateCapabilities::SHADER_BINDING_LAYOUT, + supports_compute, + ); + private_caps.set( + super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD, + extensions.contains("GL_EXT_texture_shadow_lod"), + ); + private_caps.set( + super::PrivateCapabilities::MEMORY_BARRIERS, + supported((3, 1), (4, 2)), + ); + private_caps.set( + super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT, + supported((3, 1), (4, 3)) || extensions.contains("GL_ARB_vertex_attrib_binding"), + ); + private_caps.set( + super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE, + !cfg!(any(webgl, Emscripten)), + ); + private_caps.set( + super::PrivateCapabilities::GET_BUFFER_SUB_DATA, + cfg!(any(webgl, Emscripten)) || full_ver.is_some(), + ); + let color_buffer_float = extensions.contains("GL_EXT_color_buffer_float") + || extensions.contains("GL_ARB_color_buffer_float") + || extensions.contains("EXT_color_buffer_float"); + let color_buffer_half_float = extensions.contains("GL_EXT_color_buffer_half_float") + || extensions.contains("GL_ARB_half_float_pixel"); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + color_buffer_half_float || color_buffer_float, + ); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + color_buffer_float, + ); + private_caps.set(super::PrivateCapabilities::QUERY_BUFFERS, query_buffers); + private_caps.set(super::PrivateCapabilities::QUERY_64BIT, full_ver.is_some()); + private_caps.set( + super::PrivateCapabilities::TEXTURE_STORAGE, + supported((3, 0), (4, 2)), + ); + private_caps.set(super::PrivateCapabilities::DEBUG_FNS, gl.supports_debug()); + private_caps.set( + super::PrivateCapabilities::INVALIDATE_FRAMEBUFFER, + supported((3, 0), (4, 3)), + ); + if let Some(full_ver) = full_ver { + let supported = + full_ver >= (4, 2) && extensions.contains("GL_ARB_shader_draw_parameters"); + private_caps.set( + super::PrivateCapabilities::FULLY_FEATURED_INSTANCING, + supported, + ); + // Desktop 4.2 and greater specify the first instance parameter. + // + // For all other versions, the behavior is undefined. + // + // We only support indirect first instance when we also have ARB_shader_draw_parameters as + // that's the only way to get gl_InstanceID to work correctly. + features.set(wgt::Features::INDIRECT_FIRST_INSTANCE, supported); + } + + let max_texture_size = unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) } as u32; + let max_texture_3d_size = unsafe { gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) } as u32; + + let min_uniform_buffer_offset_alignment = + (unsafe { gl.get_parameter_i32(glow::UNIFORM_BUFFER_OFFSET_ALIGNMENT) } as u32); + let min_storage_buffer_offset_alignment = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT) } as u32) + } else { + 256 + }; + let max_uniform_buffers_per_shader_stage = + unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS) } + .min(unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS) }) + as u32; + + let max_compute_workgroups_per_dimension = if supports_work_group_params { + unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 0) } + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 1) }) + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 2) }) + as u32 + } else { + 0 + }; + + let limits = wgt::Limits { + max_texture_dimension_1d: max_texture_size, + max_texture_dimension_2d: max_texture_size, + max_texture_dimension_3d: max_texture_3d_size, + max_texture_array_layers: unsafe { + gl.get_parameter_i32(glow::MAX_ARRAY_TEXTURE_LAYERS) + } as u32, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: max_uniform_buffers_per_shader_stage, + max_dynamic_storage_buffers_per_pipeline_layout: max_storage_buffers_per_shader_stage, + max_sampled_textures_per_shader_stage: super::MAX_TEXTURE_SLOTS as u32, + max_samplers_per_shader_stage: super::MAX_SAMPLERS as u32, + max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage, + max_uniform_buffer_binding_size: unsafe { + gl.get_parameter_i32(glow::MAX_UNIFORM_BLOCK_SIZE) + } as u32, + max_storage_buffer_binding_size: if supports_storage { + unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } + } else { + 0 + } as u32, + max_vertex_buffers: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_BINDINGS) } as u32) + } else { + 16 // should this be different? + } + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIBS) } + as u32) + .min(super::MAX_VERTEX_ATTRIBUTES as u32), + max_vertex_buffer_array_stride: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + if let Some(full_ver) = full_ver { + if full_ver >= (4, 4) { + // We can query `GL_MAX_VERTEX_ATTRIB_STRIDE` in OpenGL 4.4+ + let value = + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_STRIDE) }) + as u32; + + if value == 0 { + // This should be at least 2048, but the driver for AMD Radeon HD 5870 on + // Windows doesn't recognize `GL_MAX_VERTEX_ATTRIB_STRIDE`. + + log::warn!("Max vertex attribute stride is 0. Assuming it is 2048"); + 2048 + } else { + value + } + } else { + log::warn!("Max vertex attribute stride unknown. Assuming it is 2048"); + 2048 + } + } else { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_STRIDE) }) as u32 + } + } else { + !0 + }, + max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, + min_uniform_buffer_offset_alignment, + min_storage_buffer_offset_alignment, + max_inter_stage_shader_components: unsafe { + gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS) + } as u32, + max_compute_workgroup_storage_size: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) } as u32) + } else { + 0 + }, + max_compute_invocations_per_workgroup: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_WORK_GROUP_INVOCATIONS) } as u32) + } else { + 0 + }, + max_compute_workgroup_size_x: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_y: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 1) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_z: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 2) } + as u32) + } else { + 0 + }, + max_compute_workgroups_per_dimension, + max_buffer_size: i32::MAX as u64, + max_non_sampler_bindings: std::u32::MAX, + }; + + let mut workarounds = super::Workarounds::empty(); + + workarounds.set( + super::Workarounds::EMULATE_BUFFER_MAP, + cfg!(any(webgl, Emscripten)), + ); + + let r = renderer.to_lowercase(); + // Check for Mesa sRGB clear bug. See + // [`super::PrivateCapabilities::MESA_I915_SRGB_SHADER_CLEAR`]. + if context.is_owned() + && r.contains("mesa") + && r.contains("intel") + && r.split(&[' ', '(', ')'][..]) + .any(|substr| substr.len() == 3 && substr.chars().nth(2) == Some('l')) + { + log::warn!( + "Detected skylake derivative running on mesa i915. Clears to srgb textures will \ + use manual shader clears." + ); + workarounds.set(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR, true); + } + + let downlevel_defaults = wgt::DownlevelLimits {}; + + // Drop the GL guard so we can move the context into AdapterShared + // ( on Wasm the gl handle is just a ref so we tell clippy to allow + // dropping the ref ) + #[cfg_attr(target_arch = "wasm32", allow(dropping_references))] + drop(gl); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + shared: Arc::new(super::AdapterShared { + context, + private_caps, + workarounds, + features, + shading_language_version, + next_shader_id: Default::default(), + program_cache: Default::default(), + es: es_ver.is_some(), + }), + }, + info: Self::make_info(vendor, renderer), + features, + capabilities: crate::Capabilities { + limits, + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: downlevel_defaults, + shader_model: wgt::ShaderModel::Sm5, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(4).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + }, + }) + } + + unsafe fn compile_shader( + source: &str, + gl: &glow::Context, + shader_type: u32, + es: bool, + ) -> Option<glow::Shader> { + let source = if es { + format!("#version 300 es\nprecision lowp float;\n{source}") + } else { + format!("#version 130\n{source}") + }; + let shader = unsafe { gl.create_shader(shader_type) }.expect("Could not create shader"); + unsafe { gl.shader_source(shader, &source) }; + unsafe { gl.compile_shader(shader) }; + + if !unsafe { gl.get_shader_compile_status(shader) } { + let msg = unsafe { gl.get_shader_info_log(shader) }; + if !msg.is_empty() { + log::error!("\tShader compile error: {}", msg); + } + unsafe { gl.delete_shader(shader) }; + None + } else { + Some(shader) + } + } + + unsafe fn create_shader_clear_program( + gl: &glow::Context, + es: bool, + ) -> Option<(glow::Program, glow::UniformLocation)> { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = unsafe { + Self::compile_shader( + include_str!("./shaders/clear.vert"), + gl, + glow::VERTEX_SHADER, + es, + )? + }; + let fragment = unsafe { + Self::compile_shader( + include_str!("./shaders/clear.frag"), + gl, + glow::FRAGMENT_SHADER, + es, + )? + }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + + let linked_ok = unsafe { gl.get_program_link_status(program) }; + let msg = unsafe { gl.get_program_info_log(program) }; + if !msg.is_empty() { + log::warn!("Shader link error: {}", msg); + } + if !linked_ok { + return None; + } + + let color_uniform_location = unsafe { gl.get_uniform_location(program, "color") } + .expect("Could not find color uniform in shader clear shader"); + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + + Some((program, color_uniform_location)) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let gl = &self.shared.context.lock(); + unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) }; + unsafe { gl.pixel_store_i32(glow::PACK_ALIGNMENT, 1) }; + let main_vao = + unsafe { gl.create_vertex_array() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_vertex_array(Some(main_vao)) }; + + let zero_buffer = + unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(zero_buffer)) }; + let zeroes = vec![0u8; super::ZERO_BUFFER_SIZE]; + unsafe { gl.buffer_data_u8_slice(glow::COPY_READ_BUFFER, &zeroes, glow::STATIC_DRAW) }; + + // Compile the shader program we use for doing manual clears to work around Mesa fastclear + // bug. + + let (shader_clear_program, shader_clear_program_color_uniform_location) = unsafe { + Self::create_shader_clear_program(gl, self.shared.es) + .ok_or(crate::DeviceError::ResourceCreationFailed)? + }; + + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + main_vao, + #[cfg(all(native, feature = "renderdoc"))] + render_doc: Default::default(), + }, + queue: super::Queue { + shared: Arc::clone(&self.shared), + features, + draw_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + copy_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + shader_clear_program, + shader_clear_program_color_uniform_location, + zero_buffer, + temp_query_results: Mutex::new(Vec::new()), + draw_buffer_count: AtomicU8::new(1), + current_index_buffer: Mutex::new(None), + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let sample_count = { + let max_samples = unsafe { + self.shared + .context + .lock() + .get_parameter_i32(glow::MAX_SAMPLES) + }; + if max_samples >= 16 { + Tfc::MULTISAMPLE_X2 + | Tfc::MULTISAMPLE_X4 + | Tfc::MULTISAMPLE_X8 + | Tfc::MULTISAMPLE_X16 + } else if max_samples >= 8 { + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 | Tfc::MULTISAMPLE_X8 + } else { + // The lowest supported level in GLE3.0/WebGL2 is 4X + // (see GL_MAX_SAMPLES in https://registry.khronos.org/OpenGL-Refpages/es3.0/html/glGet.xhtml). + // On some platforms, like iOS Safari, `get_parameter_i32(MAX_SAMPLES)` returns 0, + // so we always fall back to supporting 4x here. + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 + } + }; + + // Base types are pulled from the table in the OpenGLES 3.0 spec in section 3.8. + // + // The storage types are based on table 8.26, in section + // "TEXTURE IMAGE LOADS AND STORES" of OpenGLES-3.2 spec. + let empty = Tfc::empty(); + let base = Tfc::COPY_SRC | Tfc::COPY_DST; + let unfilterable = base | Tfc::SAMPLED; + let depth = base | Tfc::SAMPLED | sample_count | Tfc::DEPTH_STENCIL_ATTACHMENT; + let filterable = unfilterable | Tfc::SAMPLED_LINEAR; + let renderable = + unfilterable | Tfc::COLOR_ATTACHMENT | sample_count | Tfc::MULTISAMPLE_RESOLVE; + let filterable_renderable = filterable | renderable | Tfc::COLOR_ATTACHMENT_BLEND; + let storage = base | Tfc::STORAGE | Tfc::STORAGE_READ_WRITE; + + let feature_fn = |f, caps| { + if self.shared.features.contains(f) { + caps + } else { + empty + } + }; + + let bcn_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_BC, filterable); + let etc2_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ETC2, filterable); + let astc_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC, filterable); + let astc_hdr_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, filterable); + + let private_caps_fn = |f, caps| { + if self.shared.private_caps.contains(f) { + caps + } else { + empty + } + }; + + let half_float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let texture_float_linear = feature_fn(wgt::Features::FLOAT32_FILTERABLE, filterable); + + match format { + Tf::R8Unorm => filterable_renderable, + Tf::R8Snorm => filterable, + Tf::R8Uint => renderable, + Tf::R8Sint => renderable, + Tf::R16Uint => renderable, + Tf::R16Sint => renderable, + Tf::R16Unorm => empty, + Tf::R16Snorm => empty, + Tf::R16Float => filterable | half_float_renderable, + Tf::Rg8Unorm => filterable_renderable, + Tf::Rg8Snorm => filterable, + Tf::Rg8Uint => renderable, + Tf::Rg8Sint => renderable, + Tf::R32Uint => renderable | storage, + Tf::R32Sint => renderable | storage, + Tf::R32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Rg16Uint => renderable, + Tf::Rg16Sint => renderable, + Tf::Rg16Unorm => empty, + Tf::Rg16Snorm => empty, + Tf::Rg16Float => filterable | half_float_renderable, + Tf::Rgba8Unorm => filterable_renderable | storage, + Tf::Rgba8UnormSrgb => filterable_renderable, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => filterable_renderable, + Tf::Rgba8Snorm => filterable | storage, + Tf::Rgba8Uint => renderable | storage, + Tf::Rgba8Sint => renderable | storage, + Tf::Rgb10a2Uint => renderable, + Tf::Rgb10a2Unorm => filterable_renderable, + Tf::Rg11b10Float => filterable | float_renderable, + Tf::Rg32Uint => renderable, + Tf::Rg32Sint => renderable, + Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear, + Tf::Rgba16Uint => renderable | storage, + Tf::Rgba16Sint => renderable | storage, + Tf::Rgba16Unorm => empty, + Tf::Rgba16Snorm => empty, + Tf::Rgba16Float => filterable | storage | half_float_renderable, + Tf::Rgba32Uint => renderable | storage, + Tf::Rgba32Sint => renderable | storage, + Tf::Rgba32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Stencil8 + | Tf::Depth16Unorm + | Tf::Depth32Float + | Tf::Depth32FloatStencil8 + | Tf::Depth24Plus + | Tf::Depth24PlusStencil8 => depth, + Tf::NV12 => empty, + Tf::Rgb9e5Ufloat => filterable, + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbFloat + | Tf::Bc6hRgbUfloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => bcn_features, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => etc2_features, + Tf::Astc { + block: _, + channel: AstcChannel::Unorm | AstcChannel::UnormSrgb, + } => astc_features, + Tf::Astc { + block: _, + channel: AstcChannel::Hdr, + } => astc_hdr_features, + } + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if surface.presentable { + let mut formats = vec![ + wgt::TextureFormat::Rgba8Unorm, + #[cfg(native)] + wgt::TextureFormat::Bgra8Unorm, + ]; + if surface.supports_srgb() { + formats.extend([ + wgt::TextureFormat::Rgba8UnormSrgb, + #[cfg(native)] + wgt::TextureFormat::Bgra8UnormSrgb, + ]) + } + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT) + { + formats.push(wgt::TextureFormat::Rgba16Float) + } + + Some(crate::SurfaceCapabilities { + formats, + present_modes: if cfg!(windows) { + vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] + } else { + vec![wgt::PresentMode::Fifo] //TODO + }, + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO + maximum_frame_latency: 2..=2, //TODO, unused currently + current_extent: None, + usage: crate::TextureUses::COLOR_TARGET, + }) + } else { + None + } + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl super::AdapterShared { + pub(super) unsafe fn get_buffer_sub_data( + &self, + gl: &glow::Context, + target: u32, + offset: i32, + dst_data: &mut [u8], + ) { + if self + .private_caps + .contains(super::PrivateCapabilities::GET_BUFFER_SUB_DATA) + { + unsafe { gl.get_buffer_sub_data(target, offset, dst_data) }; + } else { + log::error!("Fake map"); + let length = dst_data.len(); + let buffer_mapping = + unsafe { gl.map_buffer_range(target, offset, length as _, glow::MAP_READ_BIT) }; + + unsafe { std::ptr::copy_nonoverlapping(buffer_mapping, dst_data.as_mut_ptr(), length) }; + + unsafe { gl.unmap_buffer(target) }; + } + } +} + +#[cfg(send_sync)] +unsafe impl Sync for super::Adapter {} +#[cfg(send_sync)] +unsafe impl Send for super::Adapter {} + +#[cfg(test)] +mod tests { + use super::super::Adapter; + + #[test] + fn test_version_parse() { + Adapter::parse_version("1").unwrap_err(); + Adapter::parse_version("1.").unwrap_err(); + Adapter::parse_version("1 h3l1o. W0rld").unwrap_err(); + Adapter::parse_version("1. h3l1o. W0rld").unwrap_err(); + Adapter::parse_version("1.2.3").unwrap_err(); + + assert_eq!(Adapter::parse_version("OpenGL ES 3.1").unwrap(), (3, 1)); + assert_eq!( + Adapter::parse_version("OpenGL ES 2.0 Google Nexus").unwrap(), + (2, 0) + ); + assert_eq!(Adapter::parse_version("GLSL ES 1.1").unwrap(), (1, 1)); + assert_eq!( + Adapter::parse_version("OpenGL ES GLSL ES 3.20").unwrap(), + (3, 2) + ); + assert_eq!( + // WebGL 2.0 should parse as OpenGL ES 3.0 + Adapter::parse_version("WebGL 2.0 (OpenGL ES 3.0 Chromium)").unwrap(), + (3, 0) + ); + assert_eq!( + Adapter::parse_version("WebGL GLSL ES 3.00 (OpenGL ES GLSL ES 3.0 Chromium)").unwrap(), + (3, 0) + ); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/command.rs b/third_party/rust/wgpu-hal/src/gles/command.rs new file mode 100644 index 0000000000..926122e4ad --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/command.rs @@ -0,0 +1,1187 @@ +use super::{conv, Command as C}; +use arrayvec::ArrayVec; +use std::{mem, ops::Range}; + +#[derive(Clone, Copy, Debug, Default)] +struct TextureSlotDesc { + tex_target: super::BindTarget, + sampler_index: Option<u8>, +} + +pub(super) struct State { + topology: u32, + primitive: super::PrimitiveState, + index_format: wgt::IndexFormat, + index_offset: wgt::BufferAddress, + vertex_buffers: + [(super::VertexBufferDesc, Option<super::BufferBinding>); crate::MAX_VERTEX_BUFFERS], + vertex_attributes: ArrayVec<super::AttributeDesc, { super::MAX_VERTEX_ATTRIBUTES }>, + color_targets: ArrayVec<super::ColorTargetDesc, { crate::MAX_COLOR_ATTACHMENTS }>, + stencil: super::StencilState, + depth_bias: wgt::DepthBiasState, + alpha_to_coverage_enabled: bool, + samplers: [Option<glow::Sampler>; super::MAX_SAMPLERS], + texture_slots: [TextureSlotDesc; super::MAX_TEXTURE_SLOTS], + render_size: wgt::Extent3d, + resolve_attachments: ArrayVec<(u32, super::TextureView), { crate::MAX_COLOR_ATTACHMENTS }>, + invalidate_attachments: ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>, + has_pass_label: bool, + instance_vbuf_mask: usize, + dirty_vbuf_mask: usize, + active_first_instance: u32, + first_instance_location: Option<glow::UniformLocation>, + push_constant_descs: ArrayVec<super::PushConstantDesc, { super::MAX_PUSH_CONSTANT_COMMANDS }>, + // The current state of the push constant data block. + current_push_constant_data: [u32; super::MAX_PUSH_CONSTANTS], + end_of_pass_timestamp: Option<glow::Query>, +} + +impl Default for State { + fn default() -> Self { + Self { + topology: Default::default(), + primitive: Default::default(), + index_format: Default::default(), + index_offset: Default::default(), + vertex_buffers: Default::default(), + vertex_attributes: Default::default(), + color_targets: Default::default(), + stencil: Default::default(), + depth_bias: Default::default(), + alpha_to_coverage_enabled: Default::default(), + samplers: Default::default(), + texture_slots: Default::default(), + render_size: Default::default(), + resolve_attachments: Default::default(), + invalidate_attachments: Default::default(), + has_pass_label: Default::default(), + instance_vbuf_mask: Default::default(), + dirty_vbuf_mask: Default::default(), + active_first_instance: Default::default(), + first_instance_location: Default::default(), + push_constant_descs: Default::default(), + current_push_constant_data: [0; super::MAX_PUSH_CONSTANTS], + end_of_pass_timestamp: Default::default(), + } + } +} + +impl super::CommandBuffer { + fn clear(&mut self) { + self.label = None; + self.commands.clear(); + self.data_bytes.clear(); + self.queries.clear(); + } + + fn add_marker(&mut self, marker: &str) -> Range<u32> { + let start = self.data_bytes.len() as u32; + self.data_bytes.extend(marker.as_bytes()); + start..self.data_bytes.len() as u32 + } + + fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> { + let data_raw = unsafe { + std::slice::from_raw_parts(data.as_ptr() as *const _, mem::size_of_val(data)) + }; + let start = self.data_bytes.len(); + assert!(start < u32::MAX as usize); + self.data_bytes.extend_from_slice(data_raw); + let end = self.data_bytes.len(); + assert!(end < u32::MAX as usize); + (start as u32)..(end as u32) + } +} + +impl super::CommandEncoder { + fn rebind_stencil_func(&mut self) { + fn make(s: &super::StencilSide, face: u32) -> C { + C::SetStencilFunc { + face, + function: s.function, + reference: s.reference, + read_mask: s.mask_read, + } + } + + let s = &self.state.stencil; + if s.front.function == s.back.function + && s.front.mask_read == s.back.mask_read + && s.front.reference == s.back.reference + { + self.cmd_buffer + .commands + .push(make(&s.front, glow::FRONT_AND_BACK)); + } else { + self.cmd_buffer.commands.push(make(&s.front, glow::FRONT)); + self.cmd_buffer.commands.push(make(&s.back, glow::BACK)); + } + } + + fn rebind_vertex_data(&mut self, first_instance: u32) { + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for (index, pair) in self.state.vertex_buffers.iter().enumerate() { + if self.state.dirty_vbuf_mask & (1 << index) == 0 { + continue; + } + let (buffer_desc, vb) = match *pair { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + let instance_offset = match buffer_desc.step { + wgt::VertexStepMode::Vertex => 0, + wgt::VertexStepMode::Instance => first_instance * buffer_desc.stride, + }; + + self.cmd_buffer.commands.push(C::SetVertexBuffer { + index: index as u32, + buffer: super::BufferBinding { + raw: vb.raw, + offset: vb.offset + instance_offset as wgt::BufferAddress, + }, + buffer_desc, + }); + self.state.dirty_vbuf_mask ^= 1 << index; + } + } else { + let mut vbuf_mask = 0; + for attribute in self.state.vertex_attributes.iter() { + if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 { + continue; + } + let (buffer_desc, vb) = + match self.state.vertex_buffers[attribute.buffer_index as usize] { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + + let mut attribute_desc = attribute.clone(); + attribute_desc.offset += vb.offset as u32; + if buffer_desc.step == wgt::VertexStepMode::Instance { + attribute_desc.offset += buffer_desc.stride * first_instance; + } + + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: Some(vb.raw), + buffer_desc, + attribute_desc, + }); + vbuf_mask |= 1 << attribute.buffer_index; + } + self.state.dirty_vbuf_mask ^= vbuf_mask; + } + } + + fn rebind_sampler_states(&mut self, dirty_textures: u32, dirty_samplers: u32) { + for (texture_index, slot) in self.state.texture_slots.iter().enumerate() { + if dirty_textures & (1 << texture_index) != 0 + || slot + .sampler_index + .map_or(false, |si| dirty_samplers & (1 << si) != 0) + { + let sampler = slot + .sampler_index + .and_then(|si| self.state.samplers[si as usize]); + self.cmd_buffer + .commands + .push(C::BindSampler(texture_index as u32, sampler)); + } + } + } + + fn prepare_draw(&mut self, first_instance: u32) { + // If we support fully featured instancing, we want to bind everything as normal + // and let the draw call sort it out. + let emulated_first_instance_value = if self + .private_caps + .contains(super::PrivateCapabilities::FULLY_FEATURED_INSTANCING) + { + 0 + } else { + first_instance + }; + + if emulated_first_instance_value != self.state.active_first_instance { + // rebind all per-instance buffers on first-instance change + self.state.dirty_vbuf_mask |= self.state.instance_vbuf_mask; + self.state.active_first_instance = emulated_first_instance_value; + } + if self.state.dirty_vbuf_mask != 0 { + self.rebind_vertex_data(emulated_first_instance_value); + } + } + + #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation + fn set_pipeline_inner(&mut self, inner: &super::PipelineInner) { + self.cmd_buffer.commands.push(C::SetProgram(inner.program)); + + self.state.first_instance_location = inner.first_instance_location.clone(); + self.state.push_constant_descs = inner.push_constant_descs.clone(); + + // rebind textures, if needed + let mut dirty_textures = 0u32; + for (texture_index, (slot, &sampler_index)) in self + .state + .texture_slots + .iter_mut() + .zip(inner.sampler_map.iter()) + .enumerate() + { + if slot.sampler_index != sampler_index { + slot.sampler_index = sampler_index; + dirty_textures |= 1 << texture_index; + } + } + if dirty_textures != 0 { + self.rebind_sampler_states(dirty_textures, 0); + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + self.state = State::default(); + self.cmd_buffer.label = label.map(str::to_string); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + self.cmd_buffer.clear(); + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + Ok(mem::take(&mut self.cmd_buffer)) + } + unsafe fn reset_all<I>(&mut self, _command_buffers: I) { + //TODO: could re-use the allocations in all these command buffers + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::BufferUses::STORAGE_READ_WRITE) + { + continue; + } + self.cmd_buffer + .commands + .push(C::BufferBarrier(bar.buffer.raw.unwrap(), bar.usage.end)); + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + + let mut combined_usage = crate::TextureUses::empty(); + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::TextureUses::STORAGE_READ_WRITE) + { + continue; + } + // unlike buffers, there is no need for a concrete texture + // object to be bound anywhere for a barrier + combined_usage |= bar.usage.end; + } + + if !combined_usage.is_empty() { + self.cmd_buffer + .commands + .push(C::TextureBarrier(combined_usage)); + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + self.cmd_buffer.commands.push(C::ClearBuffer { + dst: buffer.clone(), + dst_target: buffer.target, + range, + }); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let (src_target, dst_target) = if src.target == dst.target { + (glow::COPY_READ_BUFFER, glow::COPY_WRITE_BUFFER) + } else { + (src.target, dst.target) + }; + for copy in regions { + self.cmd_buffer.commands.push(C::CopyBufferToBuffer { + src: src.clone(), + src_target, + dst: dst.clone(), + dst_target, + copy, + }) + } + } + + #[cfg(webgl)] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &super::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + for copy in regions { + self.cmd_buffer + .commands + .push(C::CopyExternalImageToTexture { + src: src.clone(), + dst: dst_raw, + dst_target, + dst_format: dst.format, + dst_premultiplication, + copy, + }) + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + let (dst_raw, dst_target) = dst.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size, &dst.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToTexture { + src: src_raw, + src_target, + dst: dst_raw, + dst_target, + copy, + }) + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + + for mut copy in regions { + copy.clamp_size_to_virtual(&dst.copy_size); + self.cmd_buffer.commands.push(C::CopyBufferToTexture { + src: src.clone(), + src_target: src.target, + dst: dst_raw, + dst_target, + dst_format: dst.format, + copy, + }) + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToBuffer { + src: src_raw, + src_target, + src_format: src.format, + dst: dst.clone(), + dst_target: dst.target, + copy, + }) + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + let query = set.queries[index as usize]; + self.cmd_buffer + .commands + .push(C::BeginQuery(query, set.target)); + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + self.cmd_buffer.commands.push(C::EndQuery(set.target)); + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + let query = set.queries[index as usize]; + self.cmd_buffer.commands.push(C::TimestampQuery(query)); + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + //TODO: what do we do here? + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + let start = self.cmd_buffer.queries.len(); + self.cmd_buffer + .queries + .extend_from_slice(&set.queries[range.start as usize..range.end as usize]); + let query_range = start as u32..self.cmd_buffer.queries.len() as u32; + self.cmd_buffer.commands.push(C::CopyQueryResults { + query_range, + dst: buffer.clone(), + dst_target: buffer.target, + dst_offset: offset, + }); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + debug_assert!(self.state.end_of_pass_timestamp.is_none()); + if let Some(ref t) = desc.timestamp_writes { + if let Some(index) = t.beginning_of_pass_write_index { + unsafe { self.write_timestamp(t.query_set, index) } + } + self.state.end_of_pass_timestamp = t + .end_of_pass_write_index + .map(|index| t.query_set.queries[index as usize]); + } + + self.state.render_size = desc.extent; + self.state.resolve_attachments.clear(); + self.state.invalidate_attachments.clear(); + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + + let rendering_to_external_framebuffer = desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .any(|at| match at.target.view.inner { + #[cfg(webgl)] + super::TextureInner::ExternalFramebuffer { .. } => true, + _ => false, + }); + + if rendering_to_external_framebuffer && desc.color_attachments.len() != 1 { + panic!("Multiple render attachments with external framebuffers are not supported."); + } + + // `COLOR_ATTACHMENT0` to `COLOR_ATTACHMENT31` gives 32 possible color attachments. + assert!(desc.color_attachments.len() <= 32); + + match desc + .color_attachments + .first() + .filter(|at| at.is_some()) + .and_then(|at| at.as_ref().map(|at| &at.target.view.inner)) + { + // default framebuffer (provided externally) + Some(&super::TextureInner::DefaultRenderbuffer) => { + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: true }); + } + _ => { + // set the framebuffer + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: false }); + + for (i, cat) in desc.color_attachments.iter().enumerate() { + if let Some(cat) = cat.as_ref() { + let attachment = glow::COLOR_ATTACHMENT0 + i as u32; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: cat.target.view.clone(), + }); + if let Some(ref rat) = cat.resolve_target { + self.state + .resolve_attachments + .push((attachment, rat.view.clone())); + } + if !cat.ops.contains(crate::AttachmentOps::STORE) { + self.state.invalidate_attachments.push(attachment); + } + } + } + if let Some(ref dsat) = desc.depth_stencil_attachment { + let aspects = dsat.target.view.aspects; + let attachment = match aspects { + crate::FormatAspects::DEPTH => glow::DEPTH_ATTACHMENT, + crate::FormatAspects::STENCIL => glow::STENCIL_ATTACHMENT, + _ => glow::DEPTH_STENCIL_ATTACHMENT, + }; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: dsat.target.view.clone(), + }); + if aspects.contains(crate::FormatAspects::DEPTH) + && !dsat.depth_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::DEPTH_ATTACHMENT); + } + if aspects.contains(crate::FormatAspects::STENCIL) + && !dsat.stencil_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::STENCIL_ATTACHMENT); + } + } + } + } + + let rect = crate::Rect { + x: 0, + y: 0, + w: desc.extent.width as i32, + h: desc.extent.height as i32, + }; + self.cmd_buffer.commands.push(C::SetScissor(rect.clone())); + self.cmd_buffer.commands.push(C::SetViewport { + rect, + depth: 0.0..1.0, + }); + + // issue the clears + for (i, cat) in desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .enumerate() + { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let c = &cat.clear_value; + self.cmd_buffer.commands.push( + match cat.target.view.format.sample_type(None, None).unwrap() { + wgt::TextureSampleType::Float { .. } => C::ClearColorF { + draw_buffer: i as u32, + color: [c.r as f32, c.g as f32, c.b as f32, c.a as f32], + is_srgb: cat.target.view.format.is_srgb(), + }, + wgt::TextureSampleType::Uint => C::ClearColorU( + i as u32, + [c.r as u32, c.g as u32, c.b as u32, c.a as u32], + ), + wgt::TextureSampleType::Sint => C::ClearColorI( + i as u32, + [c.r as i32, c.g as i32, c.b as i32, c.a as i32], + ), + wgt::TextureSampleType::Depth => unreachable!(), + }, + ); + } + } + + if !rendering_to_external_framebuffer { + // set the draw buffers and states + self.cmd_buffer + .commands + .push(C::SetDrawColorBuffers(desc.color_attachments.len() as u8)); + } + + if let Some(ref dsat) = desc.depth_stencil_attachment { + let clear_depth = !dsat.depth_ops.contains(crate::AttachmentOps::LOAD); + let clear_stencil = !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD); + + if clear_depth && clear_stencil { + self.cmd_buffer.commands.push(C::ClearDepthAndStencil( + dsat.clear_value.0, + dsat.clear_value.1, + )); + } else if clear_depth { + self.cmd_buffer + .commands + .push(C::ClearDepth(dsat.clear_value.0)); + } else if clear_stencil { + self.cmd_buffer + .commands + .push(C::ClearStencil(dsat.clear_value.1)); + } + } + } + unsafe fn end_render_pass(&mut self) { + for (attachment, dst) in self.state.resolve_attachments.drain(..) { + self.cmd_buffer.commands.push(C::ResolveAttachment { + attachment, + dst, + size: self.state.render_size, + }); + } + if !self.state.invalidate_attachments.is_empty() { + self.cmd_buffer.commands.push(C::InvalidateAttachments( + self.state.invalidate_attachments.clone(), + )); + self.state.invalidate_attachments.clear(); + } + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + self.state.instance_vbuf_mask = 0; + self.state.dirty_vbuf_mask = 0; + self.state.active_first_instance = 0; + self.state.color_targets.clear(); + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + self.state.primitive = super::PrimitiveState::default(); + + if let Some(query) = self.state.end_of_pass_timestamp.take() { + self.cmd_buffer.commands.push(C::TimestampQuery(query)); + } + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let mut do_index = 0; + let mut dirty_textures = 0u32; + let mut dirty_samplers = 0u32; + let group_info = &layout.group_infos[index as usize]; + + for (binding_layout, raw_binding) in group_info.entries.iter().zip(group.contents.iter()) { + let slot = group_info.binding_to_slot[binding_layout.binding as usize] as u32; + match *raw_binding { + super::RawBinding::Buffer { + raw, + offset: base_offset, + size, + } => { + let mut offset = base_offset; + let target = match binding_layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + min_binding_size: _, + } => { + if has_dynamic_offset { + offset += dynamic_offsets[do_index] as i32; + do_index += 1; + } + match ty { + wgt::BufferBindingType::Uniform => glow::UNIFORM_BUFFER, + wgt::BufferBindingType::Storage { .. } => { + glow::SHADER_STORAGE_BUFFER + } + } + } + _ => unreachable!(), + }; + self.cmd_buffer.commands.push(C::BindBuffer { + target, + slot, + buffer: raw, + offset, + size, + }); + } + super::RawBinding::Sampler(sampler) => { + dirty_samplers |= 1 << slot; + self.state.samplers[slot as usize] = Some(sampler); + } + super::RawBinding::Texture { + raw, + target, + aspects, + ref mip_levels, + } => { + dirty_textures |= 1 << slot; + self.state.texture_slots[slot as usize].tex_target = target; + self.cmd_buffer.commands.push(C::BindTexture { + slot, + texture: raw, + target, + aspects, + mip_levels: mip_levels.clone(), + }); + } + super::RawBinding::Image(ref binding) => { + self.cmd_buffer.commands.push(C::BindImage { + slot, + binding: binding.clone(), + }); + } + } + } + + self.rebind_sampler_states(dirty_textures, dirty_samplers); + } + + unsafe fn set_push_constants( + &mut self, + _layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + // There is nothing preventing the user from trying to update a single value within + // a vector or matrix in the set_push_constant call, as to the user, all of this is + // just memory. However OpenGL does not allow partial uniform updates. + // + // As such, we locally keep a copy of the current state of the push constant memory + // block. If the user tries to update a single value, we have the data to update the entirety + // of the uniform. + let start_words = offset_bytes / 4; + let end_words = start_words + data.len() as u32; + self.state.current_push_constant_data[start_words as usize..end_words as usize] + .copy_from_slice(data); + + // We iterate over the uniform list as there may be multiple uniforms that need + // updating from the same push constant memory (one for each shader stage). + // + // Additionally, any statically unused uniform descs will have been removed from this list + // by OpenGL, so the uniform list is not contiguous. + for uniform in self.state.push_constant_descs.iter().cloned() { + let uniform_size_words = uniform.size_bytes / 4; + let uniform_start_words = uniform.offset / 4; + let uniform_end_words = uniform_start_words + uniform_size_words; + + // Is true if any word within the uniform binding was updated + let needs_updating = + start_words < uniform_end_words || uniform_start_words <= end_words; + + if needs_updating { + let uniform_data = &self.state.current_push_constant_data + [uniform_start_words as usize..uniform_end_words as usize]; + + let range = self.cmd_buffer.add_push_constant_data(uniform_data); + + self.cmd_buffer.commands.push(C::SetPushConstants { + uniform, + offset: range.start, + }); + } + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::InsertDebugMarker(range)); + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let range = self.cmd_buffer.add_marker(group_label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + } + unsafe fn end_debug_marker(&mut self) { + self.cmd_buffer.commands.push(C::PopDebugGroup); + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology); + + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for vat in pipeline.vertex_attributes.iter() { + let vb = &pipeline.vertex_buffers[vat.buffer_index as usize]; + // set the layout + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: None, + buffer_desc: vb.clone(), + attribute_desc: vat.clone(), + }); + } + } else { + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + + self.state.dirty_vbuf_mask = 0; + // copy vertex attributes + for vat in pipeline.vertex_attributes.iter() { + //Note: we can invalidate more carefully here. + self.state.dirty_vbuf_mask |= 1 << vat.buffer_index; + self.state.vertex_attributes.push(vat.clone()); + } + } + + self.state.instance_vbuf_mask = 0; + // copy vertex state + for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self + .state + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_buffers.iter()) + .enumerate() + { + if pipe_desc.step == wgt::VertexStepMode::Instance { + self.state.instance_vbuf_mask |= 1 << index; + } + if state_desc != pipe_desc { + self.state.dirty_vbuf_mask |= 1 << index; + *state_desc = pipe_desc.clone(); + } + } + + self.set_pipeline_inner(&pipeline.inner); + + // set primitive state + let prim_state = conv::map_primitive_state(&pipeline.primitive); + if prim_state != self.state.primitive { + self.cmd_buffer + .commands + .push(C::SetPrimitive(prim_state.clone())); + self.state.primitive = prim_state; + } + + // set depth/stencil states + let mut aspects = crate::FormatAspects::empty(); + if pipeline.depth_bias != self.state.depth_bias { + self.state.depth_bias = pipeline.depth_bias; + self.cmd_buffer + .commands + .push(C::SetDepthBias(pipeline.depth_bias)); + } + if let Some(ref depth) = pipeline.depth { + aspects |= crate::FormatAspects::DEPTH; + self.cmd_buffer.commands.push(C::SetDepth(depth.clone())); + } + if let Some(ref stencil) = pipeline.stencil { + aspects |= crate::FormatAspects::STENCIL; + self.state.stencil = stencil.clone(); + self.rebind_stencil_func(); + if stencil.front.ops == stencil.back.ops + && stencil.front.mask_write == stencil.back.mask_write + { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT_AND_BACK, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + } else { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::BACK, + write_mask: stencil.back.mask_write, + ops: stencil.back.ops.clone(), + }); + } + } + self.cmd_buffer + .commands + .push(C::ConfigureDepthStencil(aspects)); + + // set multisampling state + if pipeline.alpha_to_coverage_enabled != self.state.alpha_to_coverage_enabled { + self.state.alpha_to_coverage_enabled = pipeline.alpha_to_coverage_enabled; + self.cmd_buffer + .commands + .push(C::SetAlphaToCoverage(pipeline.alpha_to_coverage_enabled)); + } + + // set blend states + if self.state.color_targets[..] != pipeline.color_targets[..] { + if pipeline + .color_targets + .iter() + .skip(1) + .any(|ct| *ct != pipeline.color_targets[0]) + { + for (index, ct) in pipeline.color_targets.iter().enumerate() { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: Some(index as u32), + desc: ct.clone(), + }); + } + } else { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: None, + desc: pipeline.color_targets.first().cloned().unwrap_or_default(), + }); + } + } + self.state.color_targets.clear(); + for ct in pipeline.color_targets.iter() { + self.state.color_targets.push(ct.clone()); + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.state.index_offset = binding.offset; + self.state.index_format = format; + self.cmd_buffer + .commands + .push(C::SetIndexBuffer(binding.buffer.raw.unwrap())); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + self.state.dirty_vbuf_mask |= 1 << index; + let (_, ref mut vb) = self.state.vertex_buffers[index as usize]; + *vb = Some(super::BufferBinding { + raw: binding.buffer.raw.unwrap(), + offset: binding.offset, + }); + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth: Range<f32>) { + self.cmd_buffer.commands.push(C::SetViewport { + rect: crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + }, + depth, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + self.cmd_buffer.commands.push(C::SetScissor(crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + })); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.state.stencil.front.reference = value; + self.state.stencil.back.reference = value; + self.rebind_stencil_func(); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.cmd_buffer.commands.push(C::SetBlendConstant(*color)); + } + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(first_instance); + #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation + self.cmd_buffer.commands.push(C::Draw { + topology: self.state.topology, + first_vertex, + vertex_count, + first_instance, + instance_count, + first_instance_location: self.state.first_instance_location.clone(), + }); + } + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(first_instance); + let (index_size, index_type) = match self.state.index_format { + wgt::IndexFormat::Uint16 => (2, glow::UNSIGNED_SHORT), + wgt::IndexFormat::Uint32 => (4, glow::UNSIGNED_INT), + }; + let index_offset = self.state.index_offset + index_size * first_index as wgt::BufferAddress; + #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation + self.cmd_buffer.commands.push(C::DrawIndexed { + topology: self.state.topology, + index_type, + index_offset, + index_count, + base_vertex, + first_instance, + instance_count, + first_instance_location: self.state.first_instance_location.clone(), + }); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = + offset + draw * mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation + self.cmd_buffer.commands.push(C::DrawIndirect { + topology: self.state.topology, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + first_instance_location: self.state.first_instance_location.clone(), + }); + } + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + let index_type = match self.state.index_format { + wgt::IndexFormat::Uint16 => glow::UNSIGNED_SHORT, + wgt::IndexFormat::Uint32 => glow::UNSIGNED_INT, + }; + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = offset + + draw * mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + #[allow(clippy::clone_on_copy)] // False positive when cloning glow::UniformLocation + self.cmd_buffer.commands.push(C::DrawIndexedIndirect { + topology: self.state.topology, + index_type, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + first_instance_location: self.state.first_instance_location.clone(), + }); + } + } + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) { + debug_assert!(self.state.end_of_pass_timestamp.is_none()); + if let Some(ref t) = desc.timestamp_writes { + if let Some(index) = t.beginning_of_pass_write_index { + unsafe { self.write_timestamp(t.query_set, index) } + } + self.state.end_of_pass_timestamp = t + .end_of_pass_write_index + .map(|index| t.query_set.queries[index as usize]); + } + + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + + if let Some(query) = self.state.end_of_pass_timestamp.take() { + self.cmd_buffer.commands.push(C::TimestampQuery(query)); + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.set_pipeline_inner(&pipeline.inner); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.cmd_buffer.commands.push(C::Dispatch(count)); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.cmd_buffer.commands.push(C::DispatchIndirect { + indirect_buf: buffer.raw.unwrap(), + indirect_offset: offset, + }); + } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>, + { + unimplemented!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + unimplemented!() + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs new file mode 100644 index 0000000000..bde69b8629 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/conv.rs @@ -0,0 +1,415 @@ +impl super::AdapterShared { + pub(super) fn describe_texture_format( + &self, + texture_format: wgt::TextureFormat, + ) -> super::TextureFormatDesc { + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + + let (internal, external, data_type) = match texture_format { + Tf::R8Unorm => (glow::R8, glow::RED, glow::UNSIGNED_BYTE), + Tf::R8Snorm => (glow::R8_SNORM, glow::RED, glow::BYTE), + Tf::R8Uint => (glow::R8UI, glow::RED_INTEGER, glow::UNSIGNED_BYTE), + Tf::R8Sint => (glow::R8I, glow::RED_INTEGER, glow::BYTE), + Tf::R16Uint => (glow::R16UI, glow::RED_INTEGER, glow::UNSIGNED_SHORT), + Tf::R16Sint => (glow::R16I, glow::RED_INTEGER, glow::SHORT), + Tf::R16Unorm => (glow::R16, glow::RED, glow::UNSIGNED_SHORT), + Tf::R16Snorm => (glow::R16_SNORM, glow::RED, glow::SHORT), + Tf::R16Float => (glow::R16F, glow::RED, glow::HALF_FLOAT), + Tf::Rg8Unorm => (glow::RG8, glow::RG, glow::UNSIGNED_BYTE), + Tf::Rg8Snorm => (glow::RG8_SNORM, glow::RG, glow::BYTE), + Tf::Rg8Uint => (glow::RG8UI, glow::RG_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rg8Sint => (glow::RG8I, glow::RG_INTEGER, glow::BYTE), + Tf::R32Uint => (glow::R32UI, glow::RED_INTEGER, glow::UNSIGNED_INT), + Tf::R32Sint => (glow::R32I, glow::RED_INTEGER, glow::INT), + Tf::R32Float => (glow::R32F, glow::RED, glow::FLOAT), + Tf::Rg16Uint => (glow::RG16UI, glow::RG_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rg16Sint => (glow::RG16I, glow::RG_INTEGER, glow::SHORT), + Tf::Rg16Unorm => (glow::RG16, glow::RG, glow::UNSIGNED_SHORT), + Tf::Rg16Snorm => (glow::RG16_SNORM, glow::RG, glow::SHORT), + Tf::Rg16Float => (glow::RG16F, glow::RG, glow::HALF_FLOAT), + Tf::Rgba8Unorm => (glow::RGBA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Rgba8UnormSrgb => (glow::SRGB8_ALPHA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Bgra8UnormSrgb => (glow::SRGB8_ALPHA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Snorm => (glow::RGBA8_SNORM, glow::RGBA, glow::BYTE), + Tf::Bgra8Unorm => (glow::RGBA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Uint => (glow::RGBA8UI, glow::RGBA_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rgba8Sint => (glow::RGBA8I, glow::RGBA_INTEGER, glow::BYTE), + Tf::Rgb10a2Uint => ( + glow::RGB10_A2UI, + glow::RGBA_INTEGER, + glow::UNSIGNED_INT_2_10_10_10_REV, + ), + Tf::Rgb10a2Unorm => ( + glow::RGB10_A2, + glow::RGBA, + glow::UNSIGNED_INT_2_10_10_10_REV, + ), + Tf::Rg11b10Float => ( + glow::R11F_G11F_B10F, + glow::RGB, + glow::UNSIGNED_INT_10F_11F_11F_REV, + ), + Tf::Rg32Uint => (glow::RG32UI, glow::RG_INTEGER, glow::UNSIGNED_INT), + Tf::Rg32Sint => (glow::RG32I, glow::RG_INTEGER, glow::INT), + Tf::Rg32Float => (glow::RG32F, glow::RG, glow::FLOAT), + Tf::Rgba16Uint => (glow::RGBA16UI, glow::RGBA_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rgba16Sint => (glow::RGBA16I, glow::RGBA_INTEGER, glow::SHORT), + Tf::Rgba16Unorm => (glow::RGBA16, glow::RGBA, glow::UNSIGNED_SHORT), + Tf::Rgba16Snorm => (glow::RGBA16_SNORM, glow::RGBA, glow::SHORT), + Tf::Rgba16Float => (glow::RGBA16F, glow::RGBA, glow::HALF_FLOAT), + Tf::Rgba32Uint => (glow::RGBA32UI, glow::RGBA_INTEGER, glow::UNSIGNED_INT), + Tf::Rgba32Sint => (glow::RGBA32I, glow::RGBA_INTEGER, glow::INT), + Tf::Rgba32Float => (glow::RGBA32F, glow::RGBA, glow::FLOAT), + Tf::Stencil8 => ( + glow::STENCIL_INDEX8, + glow::STENCIL_INDEX, + glow::UNSIGNED_BYTE, + ), + Tf::Depth16Unorm => ( + glow::DEPTH_COMPONENT16, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_SHORT, + ), + Tf::Depth32Float => (glow::DEPTH_COMPONENT32F, glow::DEPTH_COMPONENT, glow::FLOAT), + Tf::Depth32FloatStencil8 => ( + glow::DEPTH32F_STENCIL8, + glow::DEPTH_STENCIL, + glow::FLOAT_32_UNSIGNED_INT_24_8_REV, + ), + Tf::Depth24Plus => ( + glow::DEPTH_COMPONENT24, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_INT, + ), + Tf::Depth24PlusStencil8 => ( + glow::DEPTH24_STENCIL8, + glow::DEPTH_STENCIL, + glow::UNSIGNED_INT_24_8, + ), + Tf::NV12 => unreachable!(), + Tf::Rgb9e5Ufloat => (glow::RGB9_E5, glow::RGB, glow::UNSIGNED_INT_5_9_9_9_REV), + Tf::Bc1RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc1RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc4RUnorm => (glow::COMPRESSED_RED_RGTC1, glow::RED, 0), + Tf::Bc4RSnorm => (glow::COMPRESSED_SIGNED_RED_RGTC1, glow::RED, 0), + Tf::Bc5RgUnorm => (glow::COMPRESSED_RG_RGTC2, glow::RG, 0), + Tf::Bc5RgSnorm => (glow::COMPRESSED_SIGNED_RG_RGTC2, glow::RG, 0), + Tf::Bc6hRgbUfloat => (glow::COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, glow::RGB, 0), + Tf::Bc6hRgbFloat => (glow::COMPRESSED_RGB_BPTC_SIGNED_FLOAT, glow::RGB, 0), + Tf::Bc7RgbaUnorm => (glow::COMPRESSED_RGBA_BPTC_UNORM, glow::RGBA, 0), + Tf::Bc7RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_BPTC_UNORM, glow::RGBA, 0), + Tf::Etc2Rgb8Unorm => (glow::COMPRESSED_RGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8UnormSrgb => (glow::COMPRESSED_SRGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8A1Unorm => ( + glow::COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgb8A1UnormSrgb => ( + glow::COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgba8Unorm => (glow::COMPRESSED_RGBA8_ETC2_EAC, glow::RGBA, 0), + Tf::Etc2Rgba8UnormSrgb => (glow::COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, glow::RGBA, 0), + Tf::EacR11Unorm => (glow::COMPRESSED_R11_EAC, glow::RED, 0), + Tf::EacR11Snorm => (glow::COMPRESSED_SIGNED_R11_EAC, glow::RED, 0), + Tf::EacRg11Unorm => (glow::COMPRESSED_RG11_EAC, glow::RG, 0), + Tf::EacRg11Snorm => (glow::COMPRESSED_SIGNED_RG11_EAC, glow::RG, 0), + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm | AstcChannel::Hdr => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_RGBA_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_RGBA_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_RGBA_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_RGBA_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_RGBA_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_RGBA_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_RGBA_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_RGBA_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => (glow::COMPRESSED_RGBA_ASTC_10x5_KHR, glow::RGBA, 0), + AstcBlock::B10x6 => (glow::COMPRESSED_RGBA_ASTC_10x6_KHR, glow::RGBA, 0), + AstcBlock::B10x8 => (glow::COMPRESSED_RGBA_ASTC_10x8_KHR, glow::RGBA, 0), + AstcBlock::B10x10 => (glow::COMPRESSED_RGBA_ASTC_10x10_KHR, glow::RGBA, 0), + AstcBlock::B12x10 => (glow::COMPRESSED_RGBA_ASTC_12x10_KHR, glow::RGBA, 0), + AstcBlock::B12x12 => (glow::COMPRESSED_RGBA_ASTC_12x12_KHR, glow::RGBA, 0), + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, glow::RGBA, 0) + } + AstcBlock::B10x6 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, glow::RGBA, 0) + } + AstcBlock::B10x8 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, glow::RGBA, 0) + } + AstcBlock::B10x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x12 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, glow::RGBA, 0) + } + }, + }, + }; + + super::TextureFormatDesc { + internal, + external, + data_type, + } + } +} + +pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super::VertexFormatDesc { + use super::VertexAttribKind as Vak; + use wgt::VertexFormat as Vf; + + let (element_count, element_format, attrib_kind) = match vertex_format { + Vf::Unorm8x2 => (2, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x2 => (2, glow::BYTE, Vak::Float), + Vf::Uint8x2 => (2, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x2 => (2, glow::BYTE, Vak::Integer), + Vf::Unorm8x4 => (4, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x4 => (4, glow::BYTE, Vak::Float), + Vf::Uint8x4 => (4, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x4 => (4, glow::BYTE, Vak::Integer), + Vf::Unorm16x2 => (2, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x2 => (2, glow::SHORT, Vak::Float), + Vf::Uint16x2 => (2, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x2 => (2, glow::SHORT, Vak::Integer), + Vf::Float16x2 => (2, glow::HALF_FLOAT, Vak::Float), + Vf::Unorm16x4 => (4, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x4 => (4, glow::SHORT, Vak::Float), + Vf::Uint16x4 => (4, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x4 => (4, glow::SHORT, Vak::Integer), + Vf::Float16x4 => (4, glow::HALF_FLOAT, Vak::Float), + Vf::Uint32 => (1, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32 => (1, glow::INT, Vak::Integer), + Vf::Float32 => (1, glow::FLOAT, Vak::Float), + Vf::Uint32x2 => (2, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x2 => (2, glow::INT, Vak::Integer), + Vf::Float32x2 => (2, glow::FLOAT, Vak::Float), + Vf::Uint32x3 => (3, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x3 => (3, glow::INT, Vak::Integer), + Vf::Float32x3 => (3, glow::FLOAT, Vak::Float), + Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x4 => (4, glow::INT, Vak::Integer), + Vf::Float32x4 => (4, glow::FLOAT, Vak::Float), + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + }; + + super::VertexFormatDesc { + element_count, + element_format, + attrib_kind, + } +} + +pub fn map_filter_modes( + min: wgt::FilterMode, + mag: wgt::FilterMode, + mip: wgt::FilterMode, +) -> (u32, u32) { + use wgt::FilterMode as Fm; + + let mag_filter = match mag { + Fm::Nearest => glow::NEAREST, + Fm::Linear => glow::LINEAR, + }; + + let min_filter = match (min, mip) { + (Fm::Nearest, Fm::Nearest) => glow::NEAREST_MIPMAP_NEAREST, + (Fm::Nearest, Fm::Linear) => glow::NEAREST_MIPMAP_LINEAR, + (Fm::Linear, Fm::Nearest) => glow::LINEAR_MIPMAP_NEAREST, + (Fm::Linear, Fm::Linear) => glow::LINEAR_MIPMAP_LINEAR, + }; + + (min_filter, mag_filter) +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> u32 { + match mode { + wgt::AddressMode::Repeat => glow::REPEAT, + wgt::AddressMode::MirrorRepeat => glow::MIRRORED_REPEAT, + wgt::AddressMode::ClampToEdge => glow::CLAMP_TO_EDGE, + wgt::AddressMode::ClampToBorder => glow::CLAMP_TO_BORDER, + //wgt::AddressMode::MirrorClamp => glow::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_compare_func(fun: wgt::CompareFunction) -> u32 { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => glow::NEVER, + Cf::Less => glow::LESS, + Cf::LessEqual => glow::LEQUAL, + Cf::Equal => glow::EQUAL, + Cf::GreaterEqual => glow::GEQUAL, + Cf::Greater => glow::GREATER, + Cf::NotEqual => glow::NOTEQUAL, + Cf::Always => glow::ALWAYS, + } +} + +pub fn map_primitive_topology(topology: wgt::PrimitiveTopology) -> u32 { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => glow::POINTS, + Pt::LineList => glow::LINES, + Pt::LineStrip => glow::LINE_STRIP, + Pt::TriangleList => glow::TRIANGLES, + Pt::TriangleStrip => glow::TRIANGLE_STRIP, + } +} + +pub(super) fn map_primitive_state(state: &wgt::PrimitiveState) -> super::PrimitiveState { + super::PrimitiveState { + //Note: we are flipping the front face, so that + // the Y-flip in the generated GLSL keeps the same visibility. + // See `naga::back::glsl::WriterFlags::ADJUST_COORDINATE_SPACE`. + front_face: match state.front_face { + wgt::FrontFace::Cw => glow::CCW, + wgt::FrontFace::Ccw => glow::CW, + }, + cull_face: match state.cull_mode { + Some(wgt::Face::Front) => glow::FRONT, + Some(wgt::Face::Back) => glow::BACK, + None => 0, + }, + unclipped_depth: state.unclipped_depth, + polygon_mode: match state.polygon_mode { + wgt::PolygonMode::Fill => glow::FILL, + wgt::PolygonMode::Line => glow::LINE, + wgt::PolygonMode::Point => glow::POINT, + }, + } +} + +pub fn _map_view_dimension(dim: wgt::TextureViewDimension) -> u32 { + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 | Tvd::D2 => glow::TEXTURE_2D, + Tvd::D2Array => glow::TEXTURE_2D_ARRAY, + Tvd::Cube => glow::TEXTURE_CUBE_MAP, + Tvd::CubeArray => glow::TEXTURE_CUBE_MAP_ARRAY, + Tvd::D3 => glow::TEXTURE_3D, + } +} + +fn map_stencil_op(operation: wgt::StencilOperation) -> u32 { + use wgt::StencilOperation as So; + match operation { + So::Keep => glow::KEEP, + So::Zero => glow::ZERO, + So::Replace => glow::REPLACE, + So::Invert => glow::INVERT, + So::IncrementClamp => glow::INCR, + So::DecrementClamp => glow::DECR, + So::IncrementWrap => glow::INCR_WRAP, + So::DecrementWrap => glow::DECR_WRAP, + } +} + +fn map_stencil_ops(face: &wgt::StencilFaceState) -> super::StencilOps { + super::StencilOps { + pass: map_stencil_op(face.pass_op), + fail: map_stencil_op(face.fail_op), + depth_fail: map_stencil_op(face.depth_fail_op), + } +} + +pub(super) fn map_stencil(state: &wgt::StencilState) -> super::StencilState { + super::StencilState { + front: super::StencilSide { + function: map_compare_func(state.front.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.front), + }, + back: super::StencilSide { + function: map_compare_func(state.back.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.back), + }, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> u32 { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => glow::ZERO, + Bf::One => glow::ONE, + Bf::Src => glow::SRC_COLOR, + Bf::OneMinusSrc => glow::ONE_MINUS_SRC_COLOR, + Bf::Dst => glow::DST_COLOR, + Bf::OneMinusDst => glow::ONE_MINUS_DST_COLOR, + Bf::SrcAlpha => glow::SRC_ALPHA, + Bf::OneMinusSrcAlpha => glow::ONE_MINUS_SRC_ALPHA, + Bf::DstAlpha => glow::DST_ALPHA, + Bf::OneMinusDstAlpha => glow::ONE_MINUS_DST_ALPHA, + Bf::Constant => glow::CONSTANT_COLOR, + Bf::OneMinusConstant => glow::ONE_MINUS_CONSTANT_COLOR, + Bf::SrcAlphaSaturated => glow::SRC_ALPHA_SATURATE, + Bf::Src1 => glow::SRC1_COLOR, + Bf::OneMinusSrc1 => glow::ONE_MINUS_SRC1_COLOR, + Bf::Src1Alpha => glow::SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => glow::ONE_MINUS_SRC1_ALPHA, + } +} + +fn map_blend_component(component: &wgt::BlendComponent) -> super::BlendComponent { + super::BlendComponent { + src: map_blend_factor(component.src_factor), + dst: map_blend_factor(component.dst_factor), + equation: match component.operation { + wgt::BlendOperation::Add => glow::FUNC_ADD, + wgt::BlendOperation::Subtract => glow::FUNC_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => glow::FUNC_REVERSE_SUBTRACT, + wgt::BlendOperation::Min => glow::MIN, + wgt::BlendOperation::Max => glow::MAX, + }, + } +} + +pub(super) fn map_blend(blend: &wgt::BlendState) -> super::BlendDesc { + super::BlendDesc { + color: map_blend_component(&blend.color), + alpha: map_blend_component(&blend.alpha), + } +} + +pub(super) fn map_storage_access(access: wgt::StorageTextureAccess) -> u32 { + match access { + wgt::StorageTextureAccess::ReadOnly => glow::READ_ONLY, + wgt::StorageTextureAccess::WriteOnly => glow::WRITE_ONLY, + wgt::StorageTextureAccess::ReadWrite => glow::READ_WRITE, + } +} + +pub(super) fn is_layered_target(target: u32) -> bool { + match target { + glow::TEXTURE_2D | glow::TEXTURE_CUBE_MAP => false, + glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY | glow::TEXTURE_3D => true, + _ => unreachable!(), + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs new file mode 100644 index 0000000000..d0abe2c169 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/device.rs @@ -0,0 +1,1503 @@ +use super::{conv, PrivateCapabilities}; +use crate::auxil::map_naga_stage; +use glow::HasContext; +use std::{ + cmp::max, + convert::TryInto, + ptr, + sync::{Arc, Mutex}, +}; + +use arrayvec::ArrayVec; +#[cfg(native)] +use std::mem; +use std::sync::atomic::Ordering; + +type ShaderStage<'a> = ( + naga::ShaderStage, + &'a crate::ProgrammableStage<'a, super::Api>, +); +type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>; + +struct CompilationContext<'a> { + layout: &'a super::PipelineLayout, + sampler_map: &'a mut super::SamplerBindMap, + name_binding_map: &'a mut NameBindingMap, + push_constant_items: &'a mut Vec<naga::back::glsl::PushConstantItem>, + multiview: Option<std::num::NonZeroU32>, +} + +impl CompilationContext<'_> { + fn consume_reflection( + self, + gl: &glow::Context, + module: &naga::Module, + ep_info: &naga::valid::FunctionInfo, + reflection_info: naga::back::glsl::ReflectionInfo, + naga_stage: naga::ShaderStage, + program: glow::Program, + ) { + for (handle, var) in module.global_variables.iter() { + if ep_info[handle].is_empty() { + continue; + } + let register = match var.space { + naga::AddressSpace::Uniform => super::BindingRegister::UniformBuffers, + naga::AddressSpace::Storage { .. } => super::BindingRegister::StorageBuffers, + _ => continue, + }; + + let br = var.binding.as_ref().unwrap(); + let slot = self.layout.get_slot(br); + + let name = match reflection_info.uniforms.get(&handle) { + Some(name) => name.clone(), + None => continue, + }; + log::trace!( + "Rebind buffer: {:?} -> {}, register={:?}, slot={}", + var.name.as_ref(), + &name, + register, + slot + ); + self.name_binding_map.insert(name, (register, slot)); + } + + for (name, mapping) in reflection_info.texture_mapping { + let var = &module.global_variables[mapping.texture]; + let register = match module.types[var.ty].inner { + naga::TypeInner::Image { + class: naga::ImageClass::Storage { .. }, + .. + } => super::BindingRegister::Images, + _ => super::BindingRegister::Textures, + }; + + let tex_br = var.binding.as_ref().unwrap(); + let texture_linear_index = self.layout.get_slot(tex_br); + + self.name_binding_map + .insert(name, (register, texture_linear_index)); + if let Some(sampler_handle) = mapping.sampler { + let sam_br = module.global_variables[sampler_handle] + .binding + .as_ref() + .unwrap(); + let sampler_linear_index = self.layout.get_slot(sam_br); + self.sampler_map[texture_linear_index as usize] = Some(sampler_linear_index); + } + } + + for (name, location) in reflection_info.varying { + match naga_stage { + naga::ShaderStage::Vertex => { + assert_eq!(location.index, 0); + unsafe { gl.bind_attrib_location(program, location.location, &name) } + } + naga::ShaderStage::Fragment => { + assert_eq!(location.index, 0); + unsafe { gl.bind_frag_data_location(program, location.location, &name) } + } + naga::ShaderStage::Compute => {} + } + } + + *self.push_constant_items = reflection_info.push_constant_items; + } +} + +impl super::Device { + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a texture + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the texture. If `drop_guard` is + /// [`Some`], the texture must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(native, Emscripten))] + pub unsafe fn texture_from_raw( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + super::Texture { + inner: super::TextureInner::Texture { + raw: glow::NativeTexture(name), + target: super::Texture::get_info_from_desc(desc), + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + } + } + + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a renderbuffer + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the renderbuffer. If `drop_guard` is + /// [`Some`], the renderbuffer must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(native, Emscripten))] + pub unsafe fn texture_from_raw_renderbuffer( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: glow::NativeRenderbuffer(name), + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + } + } + + unsafe fn compile_shader( + gl: &glow::Context, + shader: &str, + naga_stage: naga::ShaderStage, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + ) -> Result<glow::Shader, crate::PipelineError> { + let target = match naga_stage { + naga::ShaderStage::Vertex => glow::VERTEX_SHADER, + naga::ShaderStage::Fragment => glow::FRAGMENT_SHADER, + naga::ShaderStage::Compute => glow::COMPUTE_SHADER, + }; + + let raw = unsafe { gl.create_shader(target) }.unwrap(); + #[cfg(native)] + if gl.supports_debug() { + //TODO: remove all transmutes from `object_label` + // https://github.com/grovesNL/glow/issues/186 + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SHADER, name, label) }; + } + + unsafe { gl.shader_source(raw, shader) }; + unsafe { gl.compile_shader(raw) }; + + log::debug!("\tCompiled shader {:?}", raw); + + let compiled_ok = unsafe { gl.get_shader_compile_status(raw) }; + let msg = unsafe { gl.get_shader_info_log(raw) }; + if compiled_ok { + if !msg.is_empty() { + log::warn!("\tCompile: {}", msg); + } + Ok(raw) + } else { + log::error!("\tShader compilation failed: {}", msg); + unsafe { gl.delete_shader(raw) }; + Err(crate::PipelineError::Linkage( + map_naga_stage(naga_stage), + msg, + )) + } + } + + fn create_shader( + gl: &glow::Context, + naga_stage: naga::ShaderStage, + stage: &crate::ProgrammableStage<super::Api>, + context: CompilationContext, + program: glow::Program, + ) -> Result<glow::Shader, crate::PipelineError> { + use naga::back::glsl; + let pipeline_options = glsl::PipelineOptions { + shader_stage: naga_stage, + entry_point: stage.entry_point.to_string(), + multiview: context.multiview, + }; + + let shader = &stage.module.naga; + let entry_point_index = shader + .module + .entry_points + .iter() + .position(|ep| ep.name.as_str() == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + use naga::proc::BoundsCheckPolicy; + // The image bounds checks require the TEXTURE_LEVELS feature available in GL core 4.3+. + let version = gl.version(); + let image_check = if !version.is_embedded && (version.major, version.minor) >= (4, 3) { + BoundsCheckPolicy::ReadZeroSkipWrite + } else { + BoundsCheckPolicy::Unchecked + }; + + // Other bounds check are either provided by glsl or not implemented yet. + let policies = naga::proc::BoundsCheckPolicies { + index: BoundsCheckPolicy::Unchecked, + buffer: BoundsCheckPolicy::Unchecked, + image_load: image_check, + image_store: BoundsCheckPolicy::Unchecked, + binding_array: BoundsCheckPolicy::Unchecked, + }; + + let mut output = String::new(); + let mut writer = glsl::Writer::new( + &mut output, + &shader.module, + &shader.info, + &context.layout.naga_options, + &pipeline_options, + policies, + ) + .map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + let reflection_info = writer.write().map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + log::debug!("Naga generated shader:\n{}", output); + + context.consume_reflection( + gl, + &shader.module, + shader.info.get_entry_point(entry_point_index), + reflection_info, + naga_stage, + program, + ); + + unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) } + } + + unsafe fn create_pipeline<'a>( + &self, + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, { crate::MAX_CONCURRENT_SHADER_STAGES }>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let mut program_stages = ArrayVec::new(); + let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len()); + for group in &*layout.group_infos { + group_to_binding_to_slot.push(group.binding_to_slot.clone()); + } + for &(naga_stage, stage) in &shaders { + program_stages.push(super::ProgramStage { + naga_stage: naga_stage.to_owned(), + shader_id: stage.module.id, + entry_point: stage.entry_point.to_owned(), + }); + } + let mut guard = self + .shared + .program_cache + .try_lock() + .expect("Couldn't acquire program_cache lock"); + // This guard ensures that we can't accidentally destroy a program whilst we're about to reuse it + // The only place that destroys a pipeline is also locking on `program_cache` + let program = guard + .entry(super::ProgramCacheKey { + stages: program_stages, + group_to_binding_to_slot: group_to_binding_to_slot.into_boxed_slice(), + }) + .or_insert_with(|| unsafe { + Self::create_program( + gl, + shaders, + layout, + label, + multiview, + self.shared.shading_language_version, + self.shared.private_caps, + ) + }) + .to_owned()?; + drop(guard); + + Ok(program) + } + + unsafe fn create_program<'a>( + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, { crate::MAX_CONCURRENT_SHADER_STAGES }>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + glsl_version: naga::back::glsl::Version, + private_caps: PrivateCapabilities, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let glsl_version = match glsl_version { + naga::back::glsl::Version::Embedded { version, .. } => format!("{version} es"), + naga::back::glsl::Version::Desktop(version) => format!("{version}"), + }; + let program = unsafe { gl.create_program() }.unwrap(); + #[cfg(native)] + if let Some(label) = label { + if private_caps.contains(PrivateCapabilities::DEBUG_FNS) { + let name = unsafe { mem::transmute(program) }; + unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) }; + } + } + + let mut name_binding_map = NameBindingMap::default(); + let mut push_constant_items = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); + let mut sampler_map = [None; super::MAX_TEXTURE_SLOTS]; + let mut has_stages = wgt::ShaderStages::empty(); + let mut shaders_to_delete = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); + + for &(naga_stage, stage) in &shaders { + has_stages |= map_naga_stage(naga_stage); + let pc_item = { + push_constant_items.push(Vec::new()); + push_constant_items.last_mut().unwrap() + }; + let context = CompilationContext { + layout, + sampler_map: &mut sampler_map, + name_binding_map: &mut name_binding_map, + push_constant_items: pc_item, + multiview, + }; + + let shader = Self::create_shader(gl, naga_stage, stage, context, program)?; + shaders_to_delete.push(shader); + } + + // Create empty fragment shader if only vertex shader is present + if has_stages == wgt::ShaderStages::VERTEX { + let shader_src = format!("#version {glsl_version}\n void main(void) {{}}",); + log::info!("Only vertex shader is present. Creating an empty fragment shader",); + let shader = unsafe { + Self::compile_shader( + gl, + &shader_src, + naga::ShaderStage::Fragment, + Some("(wgpu internal) dummy fragment shader"), + ) + }?; + shaders_to_delete.push(shader); + } + + for &shader in shaders_to_delete.iter() { + unsafe { gl.attach_shader(program, shader) }; + } + unsafe { gl.link_program(program) }; + + for shader in shaders_to_delete { + unsafe { gl.delete_shader(shader) }; + } + + log::debug!("\tLinked program {:?}", program); + + let linked_ok = unsafe { gl.get_program_link_status(program) }; + let msg = unsafe { gl.get_program_info_log(program) }; + if !linked_ok { + return Err(crate::PipelineError::Linkage(has_stages, msg)); + } + if !msg.is_empty() { + log::warn!("\tLink: {}", msg); + } + + if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) { + // This remapping is only needed if we aren't able to put the binding layout + // in the shader. We can't remap storage buffers this way. + unsafe { gl.use_program(Some(program)) }; + for (ref name, (register, slot)) in name_binding_map { + log::trace!("Get binding {:?} from program {:?}", name, program); + match register { + super::BindingRegister::UniformBuffers => { + let index = unsafe { gl.get_uniform_block_index(program, name) }.unwrap(); + log::trace!("\tBinding slot {slot} to block index {index}"); + unsafe { gl.uniform_block_binding(program, index, slot as _) }; + } + super::BindingRegister::StorageBuffers => { + let index = + unsafe { gl.get_shader_storage_block_index(program, name) }.unwrap(); + log::error!( + "Unable to re-map shader storage block {} to {}", + name, + index + ); + return Err(crate::DeviceError::Lost.into()); + } + super::BindingRegister::Textures | super::BindingRegister::Images => { + let location = unsafe { gl.get_uniform_location(program, name) }; + unsafe { gl.uniform_1_i32(location.as_ref(), slot as _) }; + } + } + } + } + + let mut uniforms = ArrayVec::new(); + + for (stage_idx, stage_items) in push_constant_items.into_iter().enumerate() { + for item in stage_items { + let naga_module = &shaders[stage_idx].1.module.naga.module; + let type_inner = &naga_module.types[item.ty].inner; + + let location = unsafe { gl.get_uniform_location(program, &item.access_path) }; + + log::trace!( + "push constant item: name={}, ty={:?}, offset={}, location={:?}", + item.access_path, + type_inner, + item.offset, + location, + ); + + if let Some(location) = location { + uniforms.push(super::PushConstantDesc { + location, + offset: item.offset, + size_bytes: type_inner.size(naga_module.to_ctx()), + ty: type_inner.clone(), + }); + } + } + } + + let first_instance_location = if has_stages.contains(wgt::ShaderStages::VERTEX) { + // If this returns none (the uniform isn't active), that's fine, we just won't set it. + unsafe { gl.get_uniform_location(program, naga::back::glsl::FIRST_INSTANCE_BINDING) } + } else { + None + }; + + Ok(Arc::new(super::PipelineInner { + program, + sampler_map, + first_instance_location, + push_constant_descs: uniforms, + })) + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_vertex_array(self.main_vao) }; + unsafe { gl.delete_framebuffer(queue.draw_fbo) }; + unsafe { gl.delete_framebuffer(queue.copy_fbo) }; + unsafe { gl.delete_buffer(queue.zero_buffer) }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let emulate_map = self + .shared + .workarounds + .contains(super::Workarounds::EMULATE_BUFFER_MAP) + || !self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION); + + if emulate_map && desc.usage.intersects(crate::BufferUses::MAP_WRITE) { + return Ok(super::Buffer { + raw: None, + target, + size: desc.size, + map_flags: 0, + data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))), + }); + } + + let gl = &self.shared.context.lock(); + + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let is_host_visible = desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE); + let is_coherent = desc + .memory_flags + .contains(crate::MemoryFlags::PREFER_COHERENT); + + let mut map_flags = 0; + if desc.usage.contains(crate::BufferUses::MAP_READ) { + map_flags |= glow::MAP_READ_BIT; + } + if desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_WRITE_BIT; + } + + let raw = Some(unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?); + unsafe { gl.bind_buffer(target, raw) }; + let raw_size = desc + .size + .try_into() + .map_err(|_| crate::DeviceError::OutOfMemory)?; + + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION) + { + if is_host_visible { + map_flags |= glow::MAP_PERSISTENT_BIT; + if is_coherent { + map_flags |= glow::MAP_COHERENT_BIT; + } + } + // TODO: may also be required for other calls involving `buffer_sub_data_u8_slice` (e.g. copy buffer to buffer and clear buffer) + if desc.usage.intersects(crate::BufferUses::QUERY_RESOLVE) { + map_flags |= glow::DYNAMIC_STORAGE_BIT; + } + unsafe { gl.buffer_storage(target, raw_size, None, map_flags) }; + } else { + assert!(!is_coherent); + let usage = if is_host_visible { + if desc.usage.contains(crate::BufferUses::MAP_READ) { + glow::STREAM_READ + } else { + glow::DYNAMIC_DRAW + } + } else { + // Even if the usage doesn't contain SRC_READ, we update it internally at least once + // Some vendors take usage very literally and STATIC_DRAW will freeze us with an empty buffer + // https://github.com/gfx-rs/wgpu/issues/3371 + glow::DYNAMIC_DRAW + }; + unsafe { gl.buffer_data_size(target, raw_size, usage) }; + } + + unsafe { gl.bind_buffer(target, None) }; + + if !is_coherent && desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_FLUSH_EXPLICIT_BIT; + } + //TODO: do we need `glow::MAP_UNSYNCHRONIZED_BIT`? + + #[cfg(native)] + if let Some(label) = desc.label { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::BUFFER, name, Some(label)) }; + } + } + + let data = if emulate_map && desc.usage.contains(crate::BufferUses::MAP_READ) { + Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))) + } else { + None + }; + + Ok(super::Buffer { + raw, + target, + size: desc.size, + map_flags, + data, + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_buffer(raw) }; + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let is_coherent = buffer.map_flags & glow::MAP_COHERENT_BIT != 0; + let ptr = match buffer.raw { + None => { + let mut vec = buffer.data.as_ref().unwrap().lock().unwrap(); + let slice = &mut vec.as_mut_slice()[range.start as usize..range.end as usize]; + slice.as_mut_ptr() + } + Some(raw) => { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + let ptr = if let Some(ref map_read_allocation) = buffer.data { + let mut guard = map_read_allocation.lock().unwrap(); + let slice = guard.as_mut_slice(); + unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) }; + slice.as_mut_ptr() + } else { + unsafe { + gl.map_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + buffer.map_flags, + ) + } + }; + unsafe { gl.bind_buffer(buffer.target, None) }; + ptr + } + }; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr).ok_or(crate::DeviceError::Lost)?, + is_coherent, + }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + if let Some(raw) = buffer.raw { + if buffer.data.is_none() { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + unsafe { gl.unmap_buffer(buffer.target) }; + unsafe { gl.bind_buffer(buffer.target, None) }; + } + } + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + for range in ranges { + unsafe { + gl.flush_mapped_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + ) + }; + } + } + } + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) { + //TODO: do we need to do anything? + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let render_usage = crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_WRITE + | crate::TextureUses::DEPTH_STENCIL_READ; + let format_desc = self.shared.describe_texture_format(desc.format); + + let inner = if render_usage.contains(desc.usage) + && desc.dimension == wgt::TextureDimension::D2 + && desc.size.depth_or_array_layers == 1 + { + let raw = unsafe { gl.create_renderbuffer().unwrap() }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(raw)) }; + if desc.sample_count > 1 { + unsafe { + gl.renderbuffer_storage_multisample( + glow::RENDERBUFFER, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } else { + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } + + #[cfg(native)] + if let Some(label) = desc.label { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) }; + } + } + + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + super::TextureInner::Renderbuffer { raw } + } else { + let raw = unsafe { gl.create_texture().unwrap() }; + let target = super::Texture::get_info_from_desc(desc); + + unsafe { gl.bind_texture(target, Some(raw)) }; + //Note: this has to be done before defining the storage! + match desc.format.sample_type(None, Some(self.shared.features)) { + Some( + wgt::TextureSampleType::Float { filterable: false } + | wgt::TextureSampleType::Uint + | wgt::TextureSampleType::Sint, + ) => { + // reset default filtering mode + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MIN_FILTER, glow::NEAREST as i32) + }; + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MAG_FILTER, glow::NEAREST as i32) + }; + } + _ => {} + } + + if conv::is_layered_target(target) { + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::TEXTURE_STORAGE) + { + gl.tex_storage_3d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + desc.size.depth_or_array_layers as i32, + ) + } else if target == glow::TEXTURE_3D { + let mut width = desc.size.width; + let mut height = desc.size.width; + let mut depth = desc.size.depth_or_array_layers; + for i in 0..desc.mip_level_count { + gl.tex_image_3d( + target, + i as i32, + format_desc.internal as i32, + width as i32, + height as i32, + depth as i32, + 0, + format_desc.external, + format_desc.data_type, + None, + ); + width = max(1, width / 2); + height = max(1, height / 2); + depth = max(1, depth / 2); + } + } else { + let mut width = desc.size.width; + let mut height = desc.size.width; + for i in 0..desc.mip_level_count { + gl.tex_image_3d( + target, + i as i32, + format_desc.internal as i32, + width as i32, + height as i32, + desc.size.depth_or_array_layers as i32, + 0, + format_desc.external, + format_desc.data_type, + None, + ); + width = max(1, width / 2); + height = max(1, height / 2); + } + } + }; + } else if desc.sample_count > 1 { + unsafe { + gl.tex_storage_2d_multisample( + target, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + true, + ) + }; + } else { + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::TEXTURE_STORAGE) + { + gl.tex_storage_2d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + } else if target == glow::TEXTURE_CUBE_MAP { + let mut width = desc.size.width; + let mut height = desc.size.width; + for i in 0..desc.mip_level_count { + for face in [ + glow::TEXTURE_CUBE_MAP_POSITIVE_X, + glow::TEXTURE_CUBE_MAP_NEGATIVE_X, + glow::TEXTURE_CUBE_MAP_POSITIVE_Y, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Y, + glow::TEXTURE_CUBE_MAP_POSITIVE_Z, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Z, + ] { + gl.tex_image_2d( + face, + i as i32, + format_desc.internal as i32, + width as i32, + height as i32, + 0, + format_desc.external, + format_desc.data_type, + None, + ); + } + width = max(1, width / 2); + height = max(1, height / 2); + } + } else { + let mut width = desc.size.width; + let mut height = desc.size.width; + for i in 0..desc.mip_level_count { + gl.tex_image_2d( + target, + i as i32, + format_desc.internal as i32, + width as i32, + height as i32, + 0, + format_desc.external, + format_desc.data_type, + None, + ); + width = max(1, width / 2); + height = max(1, height / 2); + } + } + }; + } + + #[cfg(native)] + if let Some(label) = desc.label { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) }; + } + } + + unsafe { gl.bind_texture(target, None) }; + super::TextureInner::Texture { raw, target } + }; + + Ok(super::Texture { + inner, + drop_guard: None, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc, + copy_size: desc.copy_extent(), + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + let gl = &self.shared.context.lock(); + match texture.inner { + super::TextureInner::Renderbuffer { raw, .. } => { + unsafe { gl.delete_renderbuffer(raw) }; + } + super::TextureInner::DefaultRenderbuffer => {} + super::TextureInner::Texture { raw, .. } => { + unsafe { gl.delete_texture(raw) }; + } + #[cfg(webgl)] + super::TextureInner::ExternalFramebuffer { .. } => {} + } + } + + // For clarity, we explicitly drop the drop guard. Although this has no real semantic effect as the + // end of the scope will drop the drop guard since this function takes ownership of the texture. + drop(texture.drop_guard); + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + Ok(super::TextureView { + //TODO: use `conv::map_view_dimension(desc.dimension)`? + inner: texture.inner.clone(), + aspects: crate::FormatAspects::new(texture.format, desc.range.aspect), + mip_levels: desc.range.mip_range(texture.mip_level_count), + array_layers: desc.range.layer_range(texture.array_layer_count), + format: texture.format, + }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let raw = unsafe { gl.create_sampler().unwrap() }; + + let (min, mag) = + conv::map_filter_modes(desc.min_filter, desc.mag_filter, desc.mipmap_filter); + + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MIN_FILTER, min as i32) }; + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MAG_FILTER, mag as i32) }; + + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_S, + conv::map_address_mode(desc.address_modes[0]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_T, + conv::map_address_mode(desc.address_modes[1]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_R, + conv::map_address_mode(desc.address_modes[2]) as i32, + ) + }; + + if let Some(border_color) = desc.border_color { + let border = match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + [0.0; 4] + } + wgt::SamplerBorderColor::OpaqueBlack => [0.0, 0.0, 0.0, 1.0], + wgt::SamplerBorderColor::OpaqueWhite => [1.0; 4], + }; + unsafe { gl.sampler_parameter_f32_slice(raw, glow::TEXTURE_BORDER_COLOR, &border) }; + } + + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MIN_LOD, desc.lod_clamp.start) }; + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MAX_LOD, desc.lod_clamp.end) }; + + // If clamp is not 1, we know anisotropy is supported up to 16x + if desc.anisotropy_clamp != 1 { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_MAX_ANISOTROPY, + desc.anisotropy_clamp as i32, + ) + }; + } + + //set_param_float(glow::TEXTURE_LOD_BIAS, info.lod_bias.0); + + if let Some(compare) = desc.compare { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_MODE, + glow::COMPARE_REF_TO_TEXTURE as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_FUNC, + conv::map_compare_func(compare) as i32, + ) + }; + } + + #[cfg(native)] + if let Some(label) = desc.label { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) }; + } + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_sampler(sampler.raw) }; + } + + unsafe fn create_command_encoder( + &self, + _desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + cmd_buffer: super::CommandBuffer::default(), + state: Default::default(), + private_caps: self.shared.private_caps, + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::glsl; + + let mut group_infos = Vec::with_capacity(desc.bind_group_layouts.len()); + let mut num_samplers = 0u8; + let mut num_textures = 0u8; + let mut num_images = 0u8; + let mut num_uniform_buffers = 0u8; + let mut num_storage_buffers = 0u8; + + let mut writer_flags = glsl::WriterFlags::ADJUST_COORDINATE_SPACE; + writer_flags.set( + glsl::WriterFlags::TEXTURE_SHADOW_LOD, + self.shared + .private_caps + .contains(super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD), + ); + writer_flags.set( + glsl::WriterFlags::DRAW_PARAMETERS, + self.shared + .private_caps + .contains(super::PrivateCapabilities::FULLY_FEATURED_INSTANCING), + ); + // We always force point size to be written and it will be ignored by the driver if it's not a point list primitive. + // https://github.com/gfx-rs/wgpu/pull/3440/files#r1095726950 + writer_flags.set(glsl::WriterFlags::FORCE_POINT_SIZE, true); + let mut binding_map = glsl::BindingMap::default(); + + for (group_index, bg_layout) in desc.bind_group_layouts.iter().enumerate() { + // create a vector with the size enough to hold all the bindings, filled with `!0` + let mut binding_to_slot = vec![ + !0; + bg_layout + .entries + .last() + .map_or(0, |b| b.binding as usize + 1) + ] + .into_boxed_slice(); + + for entry in bg_layout.entries.iter() { + let counter = match entry.ty { + wgt::BindingType::Sampler { .. } => &mut num_samplers, + wgt::BindingType::Texture { .. } => &mut num_textures, + wgt::BindingType::StorageTexture { .. } => &mut num_images, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => &mut num_uniform_buffers, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } => &mut num_storage_buffers, + wgt::BindingType::AccelerationStructure => unimplemented!(), + }; + + binding_to_slot[entry.binding as usize] = *counter; + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + binding_map.insert(br, *counter); + *counter += entry.count.map_or(1, |c| c.get() as u8); + } + + group_infos.push(super::BindGroupLayoutInfo { + entries: Arc::clone(&bg_layout.entries), + binding_to_slot, + }); + } + + Ok(super::PipelineLayout { + group_infos: group_infos.into_boxed_slice(), + naga_options: glsl::Options { + version: self.shared.shading_language_version, + writer_flags, + binding_map, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut contents = Vec::new(); + + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let binding = match layout.ty { + wgt::BindingType::Buffer { .. } => { + let bb = &desc.buffers[entry.resource_index as usize]; + super::RawBinding::Buffer { + raw: bb.buffer.raw.unwrap(), + offset: bb.offset as i32, + size: match bb.size { + Some(s) => s.get() as i32, + None => (bb.buffer.size - bb.offset) as i32, + }, + } + } + wgt::BindingType::Sampler { .. } => { + let sampler = desc.samplers[entry.resource_index as usize]; + super::RawBinding::Sampler(sampler.raw) + } + wgt::BindingType::Texture { .. } => { + let view = desc.textures[entry.resource_index as usize].view; + if view.array_layers.start != 0 { + log::error!("Unable to create a sampled texture binding for non-zero array layer.\n{}", + "This is an implementation problem of wgpu-hal/gles backend.") + } + let (raw, target) = view.inner.as_native(); + super::RawBinding::Texture { + raw, + target, + aspects: view.aspects, + mip_levels: view.mip_levels.clone(), + } + } + wgt::BindingType::StorageTexture { + access, + format, + view_dimension, + } => { + let view = desc.textures[entry.resource_index as usize].view; + let format_desc = self.shared.describe_texture_format(format); + let (raw, _target) = view.inner.as_native(); + super::RawBinding::Image(super::ImageBinding { + raw, + mip_level: view.mip_levels.start, + array_layer: match view_dimension { + wgt::TextureViewDimension::D2Array + | wgt::TextureViewDimension::CubeArray => None, + _ => Some(view.array_layers.start), + }, + access: conv::map_storage_access(access), + format: format_desc.internal, + }) + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + }; + contents.push(binding); + } + + Ok(super::BindGroup { + contents: contents.into_boxed_slice(), + }) + } + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + Ok(super::ShaderModule { + naga: match shader { + crate::ShaderInput::SpirV(_) => { + panic!("`Features::SPIRV_SHADER_PASSTHROUGH` is not enabled") + } + crate::ShaderInput::Naga(naga) => naga, + }, + label: desc.label.map(|str| str.to_string()), + id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed), + }) + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage)); + if let Some(ref fs) = desc.fragment_stage { + shaders.push((naga::ShaderStage::Fragment, fs)); + } + let inner = + unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?; + + let (vertex_buffers, vertex_attributes) = { + let mut buffers = Vec::new(); + let mut attributes = Vec::new(); + for (index, vb_layout) in desc.vertex_buffers.iter().enumerate() { + buffers.push(super::VertexBufferDesc { + step: vb_layout.step_mode, + stride: vb_layout.array_stride as u32, + }); + for vat in vb_layout.attributes.iter() { + let format_desc = conv::describe_vertex_format(vat.format); + attributes.push(super::AttributeDesc { + location: vat.shader_location, + offset: vat.offset as u32, + buffer_index: index as u32, + format_desc, + }); + } + } + (buffers.into_boxed_slice(), attributes.into_boxed_slice()) + }; + + let color_targets = { + let mut targets = Vec::new(); + for ct in desc.color_targets.iter().filter_map(|at| at.as_ref()) { + targets.push(super::ColorTargetDesc { + mask: ct.write_mask, + blend: ct.blend.as_ref().map(conv::map_blend), + }); + } + //Note: if any of the states are different, and `INDEPENDENT_BLEND` flag + // is not exposed, then this pipeline will not bind correctly. + targets.into_boxed_slice() + }; + + Ok(super::RenderPipeline { + inner, + primitive: desc.primitive, + vertex_buffers, + vertex_attributes, + color_targets, + depth: desc.depth_stencil.as_ref().map(|ds| super::DepthState { + function: conv::map_compare_func(ds.depth_compare), + mask: ds.depth_write_enabled, + }), + depth_bias: desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(), + stencil: desc + .depth_stencil + .as_ref() + .map(|ds| conv::map_stencil(&ds.stencil)), + alpha_to_coverage_enabled: desc.multisample.alpha_to_coverage_enabled, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache` + // This is safe to assume as long as: + // - `RenderPipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Compute, &desc.stage)); + let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?; + + Ok(super::ComputePipeline { inner }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`` + // This is safe to assume as long as: + // - `ComputePipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + #[cfg_attr(target_arch = "wasm32", allow(unused))] + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let mut queries = Vec::with_capacity(desc.count as usize); + for _ in 0..desc.count { + let query = + unsafe { gl.create_query() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + + // We aren't really able to, in general, label queries. + // + // We could take a timestamp here to "initialize" the query, + // but that's a bit of a hack, and we don't want to insert + // random timestamps into the command stream of we don't have to. + + queries.push(query); + } + + Ok(super::QuerySet { + queries: queries.into_boxed_slice(), + target: match desc.ty { + wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED_CONSERVATIVE, + wgt::QueryType::Timestamp => glow::TIMESTAMP, + _ => unimplemented!(), + }, + }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + let gl = &self.shared.context.lock(); + for &query in set.queries.iter() { + unsafe { gl.delete_query(query) }; + } + } + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(super::Fence { + last_completed: 0, + pending: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + let gl = &self.shared.context.lock(); + for (_, sync) in fence.pending { + unsafe { gl.delete_sync(sync) }; + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + #[cfg_attr(target_arch = "wasm32", allow(clippy::needless_borrow))] + Ok(fence.get_latest(&self.shared.context.lock())) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if fence.last_completed < wait_value { + let gl = &self.shared.context.lock(); + let timeout_ns = if cfg!(any(webgl, Emscripten)) { + 0 + } else { + (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) + }; + if let Some(&(_, sync)) = fence + .pending + .iter() + .find(|&&(value, _)| value >= wait_value) + { + return match unsafe { + gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32) + } { + // for some reason firefox returns WAIT_FAILED, to investigate + #[cfg(any(webgl, Emscripten))] + glow::WAIT_FAILED => { + log::warn!("wait failed!"); + Ok(false) + } + glow::TIMEOUT_EXPIRED => Ok(false), + glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => Ok(true), + _ => Err(crate::DeviceError::Lost), + }; + } + } + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(all(native, feature = "renderdoc"))] + return unsafe { + self.render_doc + .start_frame_capture(self.shared.context.raw_context(), ptr::null_mut()) + }; + #[allow(unreachable_code)] + false + } + unsafe fn stop_capture(&self) { + #[cfg(all(native, feature = "renderdoc"))] + unsafe { + self.render_doc + .end_frame_capture(ptr::null_mut(), ptr::null_mut()) + } + } + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<(), crate::DeviceError> { + unimplemented!() + } + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &(), + ) -> wgt::BufferAddress { + unimplemented!() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {} +} + +#[cfg(send_sync)] +unsafe impl Sync for super::Device {} +#[cfg(send_sync)] +unsafe impl Send for super::Device {} diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs new file mode 100644 index 0000000000..aa985d8121 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/egl.rs @@ -0,0 +1,1380 @@ +use glow::HasContext; +use parking_lot::{Mutex, MutexGuard, RwLock}; + +use std::{ffi, os::raw, ptr, rc::Rc, sync::Arc, time::Duration}; + +/// The amount of time to wait while trying to obtain a lock to the adapter context +const CONTEXT_LOCK_TIMEOUT_SECS: u64 = 1; + +const EGL_CONTEXT_FLAGS_KHR: i32 = 0x30FC; +const EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR: i32 = 0x0001; +const EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT: i32 = 0x30BF; +const EGL_PLATFORM_WAYLAND_KHR: u32 = 0x31D8; +const EGL_PLATFORM_X11_KHR: u32 = 0x31D5; +const EGL_PLATFORM_ANGLE_ANGLE: u32 = 0x3202; +const EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE: u32 = 0x348F; +const EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED: u32 = 0x3451; +const EGL_PLATFORM_SURFACELESS_MESA: u32 = 0x31DD; +const EGL_GL_COLORSPACE_KHR: u32 = 0x309D; +const EGL_GL_COLORSPACE_SRGB_KHR: u32 = 0x3089; + +type XOpenDisplayFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type XCloseDisplayFun = unsafe extern "system" fn(display: *mut raw::c_void) -> raw::c_int; + +type WlDisplayConnectFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type WlDisplayDisconnectFun = unsafe extern "system" fn(display: *const raw::c_void); + +#[cfg(not(Emscripten))] +type EglInstance = khronos_egl::DynamicInstance<khronos_egl::EGL1_4>; + +#[cfg(Emscripten)] +type EglInstance = khronos_egl::Instance<khronos_egl::Static>; + +type WlEglWindowCreateFun = unsafe extern "system" fn( + surface: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, +) -> *mut raw::c_void; + +type WlEglWindowResizeFun = unsafe extern "system" fn( + window: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, + dx: raw::c_int, + dy: raw::c_int, +); + +type WlEglWindowDestroyFun = unsafe extern "system" fn(window: *const raw::c_void); + +#[cfg(target_os = "android")] +extern "C" { + pub fn ANativeWindow_setBuffersGeometry( + window: *mut raw::c_void, + width: i32, + height: i32, + format: i32, + ) -> i32; +} + +type EglLabel = *const raw::c_void; + +#[allow(clippy::upper_case_acronyms)] +type EGLDEBUGPROCKHR = Option< + unsafe extern "system" fn( + error: khronos_egl::Enum, + command: *const raw::c_char, + message_type: u32, + thread_label: EglLabel, + object_label: EglLabel, + message: *const raw::c_char, + ), +>; + +const EGL_DEBUG_MSG_CRITICAL_KHR: u32 = 0x33B9; +const EGL_DEBUG_MSG_ERROR_KHR: u32 = 0x33BA; +const EGL_DEBUG_MSG_WARN_KHR: u32 = 0x33BB; +const EGL_DEBUG_MSG_INFO_KHR: u32 = 0x33BC; + +type EglDebugMessageControlFun = unsafe extern "system" fn( + proc: EGLDEBUGPROCKHR, + attrib_list: *const khronos_egl::Attrib, +) -> raw::c_int; + +unsafe extern "system" fn egl_debug_proc( + error: khronos_egl::Enum, + command_raw: *const raw::c_char, + message_type: u32, + _thread_label: EglLabel, + _object_label: EglLabel, + message_raw: *const raw::c_char, +) { + let log_severity = match message_type { + EGL_DEBUG_MSG_CRITICAL_KHR | EGL_DEBUG_MSG_ERROR_KHR => log::Level::Error, + EGL_DEBUG_MSG_WARN_KHR => log::Level::Warn, + EGL_DEBUG_MSG_INFO_KHR => log::Level::Info, + _ => log::Level::Debug, + }; + let command = unsafe { ffi::CStr::from_ptr(command_raw) }.to_string_lossy(); + let message = if message_raw.is_null() { + "".into() + } else { + unsafe { ffi::CStr::from_ptr(message_raw) }.to_string_lossy() + }; + + log::log!( + log_severity, + "EGL '{}' code 0x{:x}: {}", + command, + error, + message, + ); +} + +/// A simple wrapper around an X11 or Wayland display handle. +/// Since the logic in this file doesn't actually need to directly +/// persist a wayland connection handle, the only load-bearing +/// enum variant is the X11 variant +#[derive(Debug)] +enum DisplayRef { + X11(ptr::NonNull<raw::c_void>), + Wayland, +} + +impl DisplayRef { + /// Convenience for getting the underlying pointer + fn as_ptr(&self) -> *mut raw::c_void { + match *self { + Self::X11(ptr) => ptr.as_ptr(), + Self::Wayland => unreachable!(), + } + } +} + +/// DisplayOwner ties the lifetime of the system display handle +/// to that of the loaded library. +/// It implements Drop to ensure that the display handle is closed +/// prior to unloading the library so that we don't leak the +/// associated file descriptors +#[derive(Debug)] +struct DisplayOwner { + library: libloading::Library, + display: DisplayRef, +} + +impl Drop for DisplayOwner { + fn drop(&mut self) { + match self.display { + DisplayRef::X11(ptr) => unsafe { + let func: libloading::Symbol<XCloseDisplayFun> = + self.library.get(b"XCloseDisplay").unwrap(); + func(ptr.as_ptr()); + }, + DisplayRef::Wayland => {} + } + } +} + +fn open_x_display() -> Option<DisplayOwner> { + log::debug!("Loading X11 library to get the current display"); + unsafe { + let library = libloading::Library::new("libX11.so").ok()?; + let func: libloading::Symbol<XOpenDisplayFun> = library.get(b"XOpenDisplay").unwrap(); + let result = func(ptr::null()); + ptr::NonNull::new(result).map(|ptr| DisplayOwner { + display: DisplayRef::X11(ptr), + library, + }) + } +} + +unsafe fn find_library(paths: &[&str]) -> Option<libloading::Library> { + for path in paths { + match unsafe { libloading::Library::new(path) } { + Ok(lib) => return Some(lib), + _ => continue, + }; + } + None +} + +fn test_wayland_display() -> Option<DisplayOwner> { + /* We try to connect and disconnect here to simply ensure there + * is an active wayland display available. + */ + log::debug!("Loading Wayland library to get the current display"); + let library = unsafe { + let client_library = find_library(&["libwayland-client.so.0", "libwayland-client.so"])?; + let wl_display_connect: libloading::Symbol<WlDisplayConnectFun> = + client_library.get(b"wl_display_connect").unwrap(); + let wl_display_disconnect: libloading::Symbol<WlDisplayDisconnectFun> = + client_library.get(b"wl_display_disconnect").unwrap(); + let display = ptr::NonNull::new(wl_display_connect(ptr::null()))?; + wl_display_disconnect(display.as_ptr()); + find_library(&["libwayland-egl.so.1", "libwayland-egl.so"])? + }; + Some(DisplayOwner { + library, + display: DisplayRef::Wayland, + }) +} + +#[derive(Clone, Copy, Debug)] +enum SrgbFrameBufferKind { + /// No support for SRGB surface + None, + /// Using EGL 1.5's support for colorspaces + Core, + /// Using EGL_KHR_gl_colorspace + Khr, +} + +/// Choose GLES framebuffer configuration. +fn choose_config( + egl: &EglInstance, + display: khronos_egl::Display, + srgb_kind: SrgbFrameBufferKind, +) -> Result<(khronos_egl::Config, bool), crate::InstanceError> { + //TODO: EGL_SLOW_CONFIG + let tiers = [ + ( + "off-screen", + &[ + khronos_egl::SURFACE_TYPE, + khronos_egl::PBUFFER_BIT, + khronos_egl::RENDERABLE_TYPE, + khronos_egl::OPENGL_ES2_BIT, + ][..], + ), + ( + "presentation", + &[khronos_egl::SURFACE_TYPE, khronos_egl::WINDOW_BIT][..], + ), + #[cfg(not(target_os = "android"))] + ( + "native-render", + &[khronos_egl::NATIVE_RENDERABLE, khronos_egl::TRUE as _][..], + ), + ]; + + let mut attributes = Vec::with_capacity(9); + for tier_max in (0..tiers.len()).rev() { + let name = tiers[tier_max].0; + log::debug!("\tTrying {}", name); + + attributes.clear(); + for &(_, tier_attr) in tiers[..=tier_max].iter() { + attributes.extend_from_slice(tier_attr); + } + // make sure the Alpha is enough to support sRGB + match srgb_kind { + SrgbFrameBufferKind::None => {} + _ => { + attributes.push(khronos_egl::ALPHA_SIZE); + attributes.push(8); + } + } + attributes.push(khronos_egl::NONE); + + match egl.choose_first_config(display, &attributes) { + Ok(Some(config)) => { + if tier_max == 1 { + //Note: this has been confirmed to malfunction on Intel+NV laptops, + // but also on Angle. + log::warn!("EGL says it can present to the window but not natively",); + } + // Android emulator can't natively present either. + let tier_threshold = if cfg!(target_os = "android") || cfg!(windows) { + 1 + } else { + 2 + }; + return Ok((config, tier_max >= tier_threshold)); + } + Ok(None) => { + log::warn!("No config found!"); + } + Err(e) => { + log::error!("error in choose_first_config: {:?}", e); + } + } + } + + // TODO: include diagnostic details that are currently logged + Err(crate::InstanceError::new(String::from( + "unable to find an acceptable EGL framebuffer configuration", + ))) +} + +#[derive(Clone, Debug)] +struct EglContext { + instance: Arc<EglInstance>, + version: (i32, i32), + display: khronos_egl::Display, + raw: khronos_egl::Context, + pbuffer: Option<khronos_egl::Surface>, +} + +impl EglContext { + fn make_current(&self) { + self.instance + .make_current(self.display, self.pbuffer, self.pbuffer, Some(self.raw)) + .unwrap(); + } + fn unmake_current(&self) { + self.instance + .make_current(self.display, None, None, None) + .unwrap(); + } +} + +/// A wrapper around a [`glow::Context`] and the required EGL context that uses locking to guarantee +/// exclusive access when shared with multiple threads. +pub struct AdapterContext { + glow: Mutex<glow::Context>, + egl: Option<EglContext>, +} + +unsafe impl Sync for AdapterContext {} +unsafe impl Send for AdapterContext {} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + self.egl.is_some() + } + + /// Returns the EGL instance. + /// + /// This provides access to EGL functions and the ability to load GL and EGL extension functions. + pub fn egl_instance(&self) -> Option<&EglInstance> { + self.egl.as_ref().map(|egl| &*egl.instance) + } + + /// Returns the EGLDisplay corresponding to the adapter context. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn raw_display(&self) -> Option<&khronos_egl::Display> { + self.egl.as_ref().map(|egl| &egl.display) + } + + /// Returns the EGL version the adapter context was created with. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn egl_version(&self) -> Option<(i32, i32)> { + self.egl.as_ref().map(|egl| egl.version) + } + + pub fn raw_context(&self) -> *mut raw::c_void { + match self.egl { + Some(ref egl) => egl.raw.as_ptr(), + None => ptr::null_mut(), + } + } +} + +struct EglContextLock<'a> { + instance: &'a Arc<EglInstance>, + display: khronos_egl::Display, +} + +/// A guard containing a lock to an [`AdapterContext`] +pub struct AdapterContextLock<'a> { + glow: MutexGuard<'a, glow::Context>, + egl: Option<EglContextLock<'a>>, +} + +impl<'a> std::ops::Deref for AdapterContextLock<'a> { + type Target = glow::Context; + + fn deref(&self) -> &Self::Target { + &self.glow + } +} + +impl<'a> Drop for AdapterContextLock<'a> { + fn drop(&mut self) { + if let Some(egl) = self.egl.take() { + egl.instance + .make_current(egl.display, None, None, None) + .unwrap(); + } + } +} + +impl AdapterContext { + /// Get's the [`glow::Context`] without waiting for a lock + /// + /// # Safety + /// + /// This should only be called when you have manually made sure that the current thread has made + /// the EGL context current and that no other thread also has the EGL context current. + /// Additionally, you must manually make the EGL context **not** current after you are done with + /// it, so that future calls to `lock()` will not fail. + /// + /// > **Note:** Calling this function **will** still lock the [`glow::Context`] which adds an + /// > extra safe-guard against accidental concurrent access to the context. + pub unsafe fn get_without_egl_lock(&self) -> MutexGuard<glow::Context> { + self.glow + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock.") + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock<'a>(&'a self) -> AdapterContextLock<'a> { + let glow = self + .glow + // Don't lock forever. If it takes longer than 1 second to get the lock we've got a + // deadlock and should panic to show where we got stuck + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + let egl = self.egl.as_ref().map(|egl| { + egl.make_current(); + EglContextLock { + instance: &egl.instance, + display: egl.display, + } + }); + + AdapterContextLock { glow, egl } + } +} + +#[derive(Debug)] +struct Inner { + /// Note: the context contains a dummy pbuffer (1x1). + /// Required for `eglMakeCurrent` on platforms that doesn't supports `EGL_KHR_surfaceless_context`. + egl: EglContext, + #[allow(unused)] + version: (i32, i32), + supports_native_window: bool, + config: khronos_egl::Config, + #[cfg_attr(Emscripten, allow(dead_code))] + wl_display: Option<*mut raw::c_void>, + #[cfg_attr(Emscripten, allow(dead_code))] + force_gles_minor_version: wgt::Gles3MinorVersion, + /// Method by which the framebuffer should support srgb + srgb_kind: SrgbFrameBufferKind, +} + +impl Inner { + fn create( + flags: wgt::InstanceFlags, + egl: Arc<EglInstance>, + display: khronos_egl::Display, + force_gles_minor_version: wgt::Gles3MinorVersion, + ) -> Result<Self, crate::InstanceError> { + let version = egl.initialize(display).map_err(|e| { + crate::InstanceError::with_source( + String::from("failed to initialize EGL display connection"), + e, + ) + })?; + let vendor = egl + .query_string(Some(display), khronos_egl::VENDOR) + .unwrap(); + let display_extensions = egl + .query_string(Some(display), khronos_egl::EXTENSIONS) + .unwrap() + .to_string_lossy(); + log::debug!("Display vendor {:?}, version {:?}", vendor, version,); + log::debug!( + "Display extensions: {:#?}", + display_extensions.split_whitespace().collect::<Vec<_>>() + ); + + let srgb_kind = if version >= (1, 5) { + log::debug!("\tEGL surface: +srgb"); + SrgbFrameBufferKind::Core + } else if display_extensions.contains("EGL_KHR_gl_colorspace") { + log::debug!("\tEGL surface: +srgb khr"); + SrgbFrameBufferKind::Khr + } else { + log::warn!("\tEGL surface: -srgb"); + SrgbFrameBufferKind::None + }; + + if log::max_level() >= log::LevelFilter::Trace { + log::trace!("Configurations:"); + let config_count = egl.get_config_count(display).unwrap(); + let mut configurations = Vec::with_capacity(config_count); + egl.get_configs(display, &mut configurations).unwrap(); + for &config in configurations.iter() { + log::trace!("\tCONFORMANT=0x{:X}, RENDERABLE=0x{:X}, NATIVE_RENDERABLE=0x{:X}, SURFACE_TYPE=0x{:X}, ALPHA_SIZE={}", + egl.get_config_attrib(display, config, khronos_egl::CONFORMANT).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::RENDERABLE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::NATIVE_RENDERABLE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::SURFACE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::ALPHA_SIZE).unwrap(), + ); + } + } + + let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; + egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let needs_robustness = true; + let mut khr_context_flags = 0; + let supports_khr_context = display_extensions.contains("EGL_KHR_create_context"); + + //TODO: make it so `Device` == EGL Context + let mut context_attributes = vec![ + khronos_egl::CONTEXT_MAJOR_VERSION, + 3, // Request GLES 3.0 or higher + ]; + + if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic { + context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION); + context_attributes.push(match force_gles_minor_version { + wgt::Gles3MinorVersion::Version0 => 0, + wgt::Gles3MinorVersion::Version1 => 1, + wgt::Gles3MinorVersion::Version2 => 2, + _ => unreachable!(), + }); + } + + if flags.contains(wgt::InstanceFlags::DEBUG) { + if version >= (1, 5) { + log::debug!("\tEGL context: +debug"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_DEBUG); + context_attributes.push(khronos_egl::TRUE as _); + } else if supports_khr_context { + log::debug!("\tEGL context: +debug KHR"); + khr_context_flags |= EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR; + } else { + log::debug!("\tEGL context: -debug"); + } + } + if needs_robustness { + //Note: the core version can fail if robustness is not supported + // (regardless of whether the extension is supported!). + // In fact, Angle does precisely that awful behavior, so we don't try it there. + if version >= (1, 5) && !display_extensions.contains("EGL_ANGLE_") { + log::debug!("\tEGL context: +robust access"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_ROBUST_ACCESS); + context_attributes.push(khronos_egl::TRUE as _); + } else if display_extensions.contains("EGL_EXT_create_context_robustness") { + log::debug!("\tEGL context: +robust access EXT"); + context_attributes.push(EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT); + context_attributes.push(khronos_egl::TRUE as _); + } else { + //Note: we aren't trying `EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR` + // because it's for desktop GL only, not GLES. + log::warn!("\tEGL context: -robust access"); + } + + //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`? + } + if khr_context_flags != 0 { + context_attributes.push(EGL_CONTEXT_FLAGS_KHR); + context_attributes.push(khr_context_flags); + } + context_attributes.push(khronos_egl::NONE); + let context = match egl.create_context(display, config, None, &context_attributes) { + Ok(context) => context, + Err(e) => { + return Err(crate::InstanceError::with_source( + String::from("unable to create GLES 3.x context"), + e, + )); + } + }; + + // Testing if context can be binded without surface + // and creating dummy pbuffer surface if not. + let pbuffer = if version >= (1, 5) + || display_extensions.contains("EGL_KHR_surfaceless_context") + || cfg!(Emscripten) + { + log::debug!("\tEGL context: +surfaceless"); + None + } else { + let attributes = [ + khronos_egl::WIDTH, + 1, + khronos_egl::HEIGHT, + 1, + khronos_egl::NONE, + ]; + egl.create_pbuffer_surface(display, config, &attributes) + .map(Some) + .map_err(|e| { + crate::InstanceError::with_source( + String::from("error in create_pbuffer_surface"), + e, + ) + })? + }; + + Ok(Self { + egl: EglContext { + instance: egl, + display, + raw: context, + pbuffer, + version, + }, + version, + supports_native_window, + config, + wl_display: None, + srgb_kind, + force_gles_minor_version, + }) + } +} + +impl Drop for Inner { + fn drop(&mut self) { + if let Err(e) = self + .egl + .instance + .destroy_context(self.egl.display, self.egl.raw) + { + log::warn!("Error in destroy_context: {:?}", e); + } + if let Err(e) = self.egl.instance.terminate(self.egl.display) { + log::warn!("Error in terminate: {:?}", e); + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum WindowKind { + Wayland, + X11, + AngleX11, + Unknown, +} + +#[derive(Clone, Debug)] +struct WindowSystemInterface { + display_owner: Option<Rc<DisplayOwner>>, + kind: WindowKind, +} + +pub struct Instance { + wsi: WindowSystemInterface, + flags: wgt::InstanceFlags, + inner: Mutex<Inner>, +} + +impl Instance { + pub fn raw_display(&self) -> khronos_egl::Display { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .egl + .display + } + + /// Returns the version of the EGL display. + pub fn egl_version(&self) -> (i32, i32) { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .version + } + + pub fn egl_config(&self) -> khronos_egl::Config { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .config + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init OpenGL (EGL) Backend"); + #[cfg(Emscripten)] + let egl_result: Result<EglInstance, khronos_egl::Error> = + Ok(khronos_egl::Instance::new(khronos_egl::Static)); + + #[cfg(not(Emscripten))] + let egl_result = if cfg!(windows) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dll", + ) + } + } else if cfg!(any(target_os = "macos", target_os = "ios")) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dylib", + ) + } + } else { + unsafe { khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required() } + }; + let egl = match egl_result { + Ok(egl) => Arc::new(egl), + Err(e) => { + return Err(crate::InstanceError::with_source( + String::from("unable to open libEGL"), + e, + )); + } + }; + + let client_extensions = egl.query_string(None, khronos_egl::EXTENSIONS); + + let client_ext_str = match client_extensions { + Ok(ext) => ext.to_string_lossy().into_owned(), + Err(_) => String::new(), + }; + log::debug!( + "Client extensions: {:#?}", + client_ext_str.split_whitespace().collect::<Vec<_>>() + ); + + let wayland_library = if client_ext_str.contains("EGL_EXT_platform_wayland") { + test_wayland_display() + } else { + None + }; + let x11_display_library = if client_ext_str.contains("EGL_EXT_platform_x11") { + open_x_display() + } else { + None + }; + let angle_x11_display_library = if client_ext_str.contains("EGL_ANGLE_platform_angle") { + open_x_display() + } else { + None + }; + + #[cfg(not(Emscripten))] + let egl1_5 = egl.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(Emscripten)] + let egl1_5: Option<&Arc<EglInstance>> = Some(&egl); + + let (display, display_owner, wsi_kind) = + if let (Some(library), Some(egl)) = (wayland_library, egl1_5) { + log::info!("Using Wayland platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = unsafe { + egl.get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + khronos_egl::DEFAULT_DISPLAY, + &display_attributes, + ) + } + .unwrap(); + (display, Some(Rc::new(library)), WindowKind::Wayland) + } else if let (Some(display_owner), Some(egl)) = (x11_display_library, egl1_5) { + log::info!("Using X11 platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = unsafe { + egl.get_platform_display( + EGL_PLATFORM_X11_KHR, + display_owner.display.as_ptr(), + &display_attributes, + ) + } + .unwrap(); + (display, Some(Rc::new(display_owner)), WindowKind::X11) + } else if let (Some(display_owner), Some(egl)) = (angle_x11_display_library, egl1_5) { + log::info!("Using Angle platform with X11"); + let display_attributes = [ + EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE as khronos_egl::Attrib, + EGL_PLATFORM_X11_KHR as khronos_egl::Attrib, + EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED as khronos_egl::Attrib, + usize::from(desc.flags.contains(wgt::InstanceFlags::VALIDATION)), + khronos_egl::ATTRIB_NONE, + ]; + let display = unsafe { + egl.get_platform_display( + EGL_PLATFORM_ANGLE_ANGLE, + display_owner.display.as_ptr(), + &display_attributes, + ) + } + .unwrap(); + (display, Some(Rc::new(display_owner)), WindowKind::AngleX11) + } else if client_ext_str.contains("EGL_MESA_platform_surfaceless") { + log::warn!("No windowing system present. Using surfaceless platform"); + let egl = egl1_5.expect("Failed to get EGL 1.5 for surfaceless"); + let display = unsafe { + egl.get_platform_display( + EGL_PLATFORM_SURFACELESS_MESA, + std::ptr::null_mut(), + &[khronos_egl::ATTRIB_NONE], + ) + } + .unwrap(); + + (display, None, WindowKind::Unknown) + } else { + log::warn!("EGL_MESA_platform_surfaceless not available. Using default platform"); + let display = unsafe { egl.get_display(khronos_egl::DEFAULT_DISPLAY) }.unwrap(); + (display, None, WindowKind::Unknown) + }; + + if desc.flags.contains(wgt::InstanceFlags::VALIDATION) + && client_ext_str.contains("EGL_KHR_debug") + { + log::debug!("Enabling EGL debug output"); + let function: EglDebugMessageControlFun = { + let addr = egl.get_proc_address("eglDebugMessageControlKHR").unwrap(); + unsafe { std::mem::transmute(addr) } + }; + let attributes = [ + EGL_DEBUG_MSG_CRITICAL_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_ERROR_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_WARN_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_INFO_KHR as khronos_egl::Attrib, + 1, + khronos_egl::ATTRIB_NONE, + ]; + unsafe { (function)(Some(egl_debug_proc), attributes.as_ptr()) }; + } + + let inner = Inner::create(desc.flags, egl, display, desc.gles_minor_version)?; + + Ok(Instance { + wsi: WindowSystemInterface { + display_owner, + kind: wsi_kind, + }, + flags: desc.flags, + inner: Mutex::new(inner), + }) + } + + #[cfg_attr(target_os = "macos", allow(unused, unused_mut, unreachable_code))] + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + #[cfg_attr(any(target_os = "android", Emscripten), allow(unused_mut))] + let mut inner = self.inner.lock(); + + match (window_handle, display_handle) { + (Rwh::Xlib(_), _) => {} + (Rwh::Xcb(_), _) => {} + (Rwh::Win32(_), _) => {} + (Rwh::AppKit(_), _) => {} + #[cfg(target_os = "android")] + (Rwh::AndroidNdk(handle), _) => { + let format = inner + .egl + .instance + .get_config_attrib( + inner.egl.display, + inner.config, + khronos_egl::NATIVE_VISUAL_ID, + ) + .unwrap(); + + let ret = unsafe { + ANativeWindow_setBuffersGeometry(handle.a_native_window.as_ptr(), 0, 0, format) + }; + + if ret != 0 { + return Err(crate::InstanceError::new(format!( + "error {ret} returned from ANativeWindow_setBuffersGeometry", + ))); + } + } + #[cfg(not(Emscripten))] + (Rwh::Wayland(_), raw_window_handle::RawDisplayHandle::Wayland(display_handle)) => { + if inner + .wl_display + .map(|ptr| ptr != display_handle.display.as_ptr()) + .unwrap_or(true) + { + /* Wayland displays are not sharable between surfaces so if the + * surface we receive from this handle is from a different + * display, we must re-initialize the context. + * + * See gfx-rs/gfx#3545 + */ + log::warn!("Re-initializing Gles context due to Wayland window"); + + use std::ops::DerefMut; + let display_attributes = [khronos_egl::ATTRIB_NONE]; + + let display = unsafe { + inner + .egl + .instance + .upcast::<khronos_egl::EGL1_5>() + .unwrap() + .get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + display_handle.display.as_ptr(), + &display_attributes, + ) + } + .unwrap(); + + let new_inner = Inner::create( + self.flags, + Arc::clone(&inner.egl.instance), + display, + inner.force_gles_minor_version, + )?; + + let old_inner = std::mem::replace(inner.deref_mut(), new_inner); + inner.wl_display = Some(display_handle.display.as_ptr()); + + drop(old_inner); + } + } + #[cfg(Emscripten)] + (Rwh::Web(_), _) => {} + other => { + return Err(crate::InstanceError::new(format!( + "unsupported window: {other:?}" + ))); + } + }; + + inner.egl.unmake_current(); + + Ok(Surface { + egl: inner.egl.clone(), + wsi: self.wsi.clone(), + config: inner.config, + presentable: inner.supports_native_window, + raw_window_handle: window_handle, + swapchain: RwLock::new(None), + srgb_kind: inner.srgb_kind, + }) + } + unsafe fn destroy_surface(&self, _surface: Surface) {} + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let inner = self.inner.lock(); + inner.egl.make_current(); + + let mut gl = unsafe { + glow::Context::from_loader_function(|name| { + inner + .egl + .instance + .get_proc_address(name) + .map_or(ptr::null(), |p| p as *const _) + }) + }; + + if self.flags.contains(wgt::InstanceFlags::DEBUG) && gl.supports_debug() { + log::debug!("Max label length: {}", unsafe { + gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) + }); + } + + if self.flags.contains(wgt::InstanceFlags::VALIDATION) && gl.supports_debug() { + log::debug!("Enabling GLES debug output"); + unsafe { gl.enable(glow::DEBUG_OUTPUT) }; + unsafe { gl.debug_message_callback(super::gl_debug_message_callback) }; + } + + inner.egl.unmake_current(); + + unsafe { + super::Adapter::expose(AdapterContext { + glow: Mutex::new(gl), + egl: Some(inner.egl.clone()), + }) + } + .into_iter() + .collect() + } +} + +impl super::Adapter { + /// Creates a new external adapter using the specified loader function. + /// + /// # Safety + /// + /// - The underlying OpenGL ES context must be current. + /// - The underlying OpenGL ES context must be current when interfacing with any objects returned by + /// wgpu-hal from this adapter. + pub unsafe fn new_external( + fun: impl FnMut(&str) -> *const ffi::c_void, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let context = unsafe { glow::Context::from_loader_function(fun) }; + unsafe { + Self::expose(AdapterContext { + glow: Mutex::new(context), + egl: None, + }) + } + } + + pub fn adapter_context(&self) -> &AdapterContext { + &self.shared.context + } +} + +impl super::Device { + /// Returns the underlying EGL context. + pub fn context(&self) -> &AdapterContext { + &self.shared.context + } +} + +#[derive(Debug)] +pub struct Swapchain { + surface: khronos_egl::Surface, + wl_window: Option<*mut raw::c_void>, + framebuffer: glow::Framebuffer, + renderbuffer: glow::Renderbuffer, + /// Extent because the window lies + extent: wgt::Extent3d, + format: wgt::TextureFormat, + format_desc: super::TextureFormatDesc, + #[allow(unused)] + sample_type: wgt::TextureSampleType, +} + +#[derive(Debug)] +pub struct Surface { + egl: EglContext, + wsi: WindowSystemInterface, + config: khronos_egl::Config, + pub(super) presentable: bool, + raw_window_handle: raw_window_handle::RawWindowHandle, + swapchain: RwLock<Option<Swapchain>>, + srgb_kind: SrgbFrameBufferKind, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub(super) unsafe fn present( + &self, + _suf_texture: super::Texture, + context: &AdapterContext, + ) -> Result<(), crate::SurfaceError> { + let gl = unsafe { context.get_without_egl_lock() }; + let swapchain = self.swapchain.read(); + let sc = swapchain.as_ref().unwrap(); + + self.egl + .instance + .make_current( + self.egl.display, + Some(sc.surface), + Some(sc.surface), + Some(self.egl.raw), + ) + .map_err(|e| { + log::error!("make_current(surface) failed: {}", e); + crate::SurfaceError::Lost + })?; + + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.color_mask(true, true, true, true) }; + + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + sc.extent.height as i32, + sc.extent.width as i32, + 0, + 0, + 0, + sc.extent.width as i32, + sc.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + self.egl + .instance + .swap_buffers(self.egl.display, sc.surface) + .map_err(|e| { + log::error!("swap_buffers failed: {}", e); + crate::SurfaceError::Lost + // TODO: should we unset the current context here? + })?; + self.egl + .instance + .make_current(self.egl.display, None, None, None) + .map_err(|e| { + log::error!("make_current(null) failed: {}", e); + crate::SurfaceError::Lost + })?; + + Ok(()) + } + + unsafe fn unconfigure_impl( + &self, + device: &super::Device, + ) -> Option<(khronos_egl::Surface, Option<*mut raw::c_void>)> { + let gl = &device.shared.context.lock(); + match self.swapchain.write().take() { + Some(sc) => { + unsafe { gl.delete_renderbuffer(sc.renderbuffer) }; + unsafe { gl.delete_framebuffer(sc.framebuffer) }; + Some((sc.surface, sc.wl_window)) + } + None => None, + } + } + + pub fn supports_srgb(&self) -> bool { + match self.srgb_kind { + SrgbFrameBufferKind::None => false, + _ => true, + } + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + let (surface, wl_window) = match unsafe { self.unconfigure_impl(device) } { + Some(pair) => pair, + None => { + let mut wl_window = None; + let (mut temp_xlib_handle, mut temp_xcb_handle); + #[allow(trivial_casts)] + let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) { + (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => { + temp_xlib_handle = handle.window; + &mut temp_xlib_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xlib(handle)) => { + handle.window as *mut std::ffi::c_void + } + (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => { + temp_xcb_handle = handle.window; + &mut temp_xcb_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xcb(handle)) => { + handle.window.get() as *mut std::ffi::c_void + } + (WindowKind::Unknown, Rwh::AndroidNdk(handle)) => { + handle.a_native_window.as_ptr() + } + (WindowKind::Wayland, Rwh::Wayland(handle)) => { + let library = &self.wsi.display_owner.as_ref().unwrap().library; + let wl_egl_window_create: libloading::Symbol<WlEglWindowCreateFun> = + unsafe { library.get(b"wl_egl_window_create") }.unwrap(); + let window = + unsafe { wl_egl_window_create(handle.surface.as_ptr(), 640, 480) } + as *mut _; + wl_window = Some(window); + window + } + #[cfg(Emscripten)] + (WindowKind::Unknown, Rwh::Web(handle)) => handle.id as *mut std::ffi::c_void, + (WindowKind::Unknown, Rwh::Win32(handle)) => { + handle.hwnd.get() as *mut std::ffi::c_void + } + (WindowKind::Unknown, Rwh::AppKit(handle)) => { + #[cfg(not(target_os = "macos"))] + let window_ptr = handle.ns_view.as_ptr(); + #[cfg(target_os = "macos")] + let window_ptr = { + use objc::{msg_send, runtime::Object, sel, sel_impl}; + // ns_view always have a layer and don't need to verify that it exists. + let layer: *mut Object = + msg_send![handle.ns_view.as_ptr() as *mut Object, layer]; + layer as *mut ffi::c_void + }; + window_ptr + } + _ => { + log::warn!( + "Initialized platform {:?} doesn't work with window {:?}", + self.wsi.kind, + self.raw_window_handle + ); + return Err(crate::SurfaceError::Other("incompatible window kind")); + } + }; + + let mut attributes = vec![ + khronos_egl::RENDER_BUFFER, + // We don't want any of the buffering done by the driver, because we + // manage a swapchain on our side. + // Some drivers just fail on surface creation seeing `EGL_SINGLE_BUFFER`. + if cfg!(any(target_os = "android", target_os = "macos")) + || cfg!(windows) + || self.wsi.kind == WindowKind::AngleX11 + { + khronos_egl::BACK_BUFFER + } else { + khronos_egl::SINGLE_BUFFER + }, + ]; + if config.format.is_srgb() { + match self.srgb_kind { + SrgbFrameBufferKind::None => {} + SrgbFrameBufferKind::Core => { + attributes.push(khronos_egl::GL_COLORSPACE); + attributes.push(khronos_egl::GL_COLORSPACE_SRGB); + } + SrgbFrameBufferKind::Khr => { + attributes.push(EGL_GL_COLORSPACE_KHR as i32); + attributes.push(EGL_GL_COLORSPACE_SRGB_KHR as i32); + } + } + } + attributes.push(khronos_egl::ATTRIB_NONE as i32); + + #[cfg(not(Emscripten))] + let egl1_5 = self.egl.instance.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(Emscripten)] + let egl1_5: Option<&Arc<EglInstance>> = Some(&self.egl.instance); + + // Careful, we can still be in 1.4 version even if `upcast` succeeds + let raw_result = match egl1_5 { + Some(egl) if self.wsi.kind != WindowKind::Unknown => { + let attributes_usize = attributes + .into_iter() + .map(|v| v as usize) + .collect::<Vec<_>>(); + unsafe { + egl.create_platform_window_surface( + self.egl.display, + self.config, + native_window_ptr, + &attributes_usize, + ) + } + } + _ => unsafe { + self.egl.instance.create_window_surface( + self.egl.display, + self.config, + native_window_ptr, + Some(&attributes), + ) + }, + }; + + match raw_result { + Ok(raw) => (raw, wl_window), + Err(e) => { + log::warn!("Error in create_window_surface: {:?}", e); + return Err(crate::SurfaceError::Lost); + } + } + } + }; + + if let Some(window) = wl_window { + let library = &self.wsi.display_owner.as_ref().unwrap().library; + let wl_egl_window_resize: libloading::Symbol<WlEglWindowResizeFun> = + unsafe { library.get(b"wl_egl_window_resize") }.unwrap(); + unsafe { + wl_egl_window_resize( + window, + config.extent.width as i32, + config.extent.height as i32, + 0, + 0, + ) + }; + } + + let format_desc = device.shared.describe_texture_format(config.format); + let gl = &device.shared.context.lock(); + let renderbuffer = unsafe { gl.create_renderbuffer() }.map_err(|error| { + log::error!("Internal swapchain renderbuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(renderbuffer)) }; + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + config.extent.width as _, + config.extent.height as _, + ) + }; + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_renderbuffer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::RENDERBUFFER, + Some(renderbuffer), + ) + }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + let mut swapchain = self.swapchain.write(); + *swapchain = Some(Swapchain { + surface, + wl_window, + renderbuffer, + framebuffer, + extent: config.extent, + format: config.format, + format_desc, + sample_type: wgt::TextureSampleType::Float { filterable: false }, + }); + + Ok(()) + } + + unsafe fn unconfigure(&self, device: &super::Device) { + if let Some((surface, wl_window)) = unsafe { self.unconfigure_impl(device) } { + self.egl + .instance + .destroy_surface(self.egl.display, surface) + .unwrap(); + if let Some(window) = wl_window { + let library = &self + .wsi + .display_owner + .as_ref() + .expect("unsupported window") + .library; + let wl_egl_window_destroy: libloading::Symbol<WlEglWindowDestroyFun> = + unsafe { library.get(b"wl_egl_window_destroy") }.unwrap(); + unsafe { wl_egl_window_destroy(window) }; + } + } + } + + unsafe fn acquire_texture( + &self, + _timeout_ms: Option<Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let swapchain = self.swapchain.read(); + let sc = swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: sc.renderbuffer, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/emscripten.rs b/third_party/rust/wgpu-hal/src/gles/emscripten.rs new file mode 100644 index 0000000000..7372dbd369 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/emscripten.rs @@ -0,0 +1,26 @@ +extern "C" { + /// returns 1 if success. 0 if failure. extension name must be null terminated + fn emscripten_webgl_enable_extension( + context: std::ffi::c_int, + extension: *const std::ffi::c_char, + ) -> std::ffi::c_int; + fn emscripten_webgl_get_current_context() -> std::ffi::c_int; +} +/// Webgl requires extensions to be enabled before using them. +/// This function can be used to enable webgl extension on emscripten target. +/// +/// returns true on success +/// +/// # Safety: +/// +/// - opengl context MUST BE current +/// - extension_name_null_terminated argument must be a valid string with null terminator. +/// - extension must be present. check `glow_context.supported_extensions()` +pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool { + unsafe { + emscripten_webgl_enable_extension( + emscripten_webgl_get_current_context(), + extension_name_null_terminated.as_ptr() as _, + ) == 1 + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs new file mode 100644 index 0000000000..646419c7fe --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/mod.rs @@ -0,0 +1,990 @@ +/*! +# OpenGL ES3 API (aka GLES3). + +Designed to work on Linux and Android, with context provided by EGL. + +## Texture views + +GLES3 doesn't really have separate texture view objects. We have to remember the +original texture and the sub-range into it. Problem is, however, that there is +no way to expose a subset of array layers or mip levels of a sampled texture. + +## Binding model + +Binding model is very different from WebGPU, especially with regards to samplers. +GLES3 has sampler objects, but they aren't separately bindable to the shaders. +Each sampled texture is exposed to the shader as a combined texture-sampler binding. + +When building the pipeline layout, we linearize binding entries based on the groups +(uniform/storage buffers, uniform/storage textures), and record the mapping into +`BindGroupLayoutInfo`. +When a pipeline gets created, and we track all the texture-sampler associations +from the static use in the shader. +We only support at most one sampler used with each texture so far. The linear index +of this sampler is stored per texture slot in `SamplerBindMap` array. + +The texture-sampler pairs get potentially invalidated in 2 places: + - when a new pipeline is set, we update the linear indices of associated samplers + - when a new bind group is set, we update both the textures and the samplers + +We expect that the changes to sampler states between any 2 pipelines of the same layout +will be minimal, if any. + +## Vertex data + +Generally, vertex buffers are marked as dirty and lazily bound on draw. + +GLES3 doesn't support `first_instance` semantics. However, it's easy to support, +since we are forced to do late binding anyway. We just adjust the offsets +into the vertex data. + +### Old path + +In GLES-3.0 and WebGL2, vertex buffer layout is provided +together with the actual buffer binding. +We invalidate the attributes on the vertex buffer change, and re-bind them. + +### New path + +In GLES-3.1 and higher, the vertex buffer layout can be declared separately +from the vertex data itself. This mostly matches WebGPU, however there is a catch: +`stride` needs to be specified with the data, not as a part of the layout. + +To address this, we invalidate the vertex buffers based on: + - whether or not `first_instance` is used + - stride has changed + +## Handling of `base_vertex`, `first_instance`, and `first_vertex` + +Between indirect, the lack of `first_instance` semantics, and the availability of `gl_BaseInstance` +in shaders, getting buffers and builtins to work correctly is a bit tricky. + +We never emulate `base_vertex` and gl_VertexID behaves as `@builtin(vertex_index)` does, so we +never need to do anything about that. + +We always advertise support for `VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW`. + +### GL 4.2+ with ARB_shader_draw_parameters + +- `@builtin(instance_index)` translates to `gl_InstanceID + gl_BaseInstance` +- We bind instance buffers without any offset emulation. +- We advertise support for the `INDIRECT_FIRST_INSTANCE` feature. + +While we can theoretically have a card with 4.2+ support but without ARB_shader_draw_parameters, +we don't bother with that combination. + +### GLES & GL 4.1 + +- `@builtin(instance_index)` translates to `gl_InstanceID + naga_vs_first_instance` +- We bind instance buffers with offset emulation. +- We _do not_ advertise support for `INDIRECT_FIRST_INSTANCE` and cpu-side pretend the `first_instance` is 0 on indirect calls. + +*/ + +///cbindgen:ignore +#[cfg(not(any(windows, webgl)))] +mod egl; +#[cfg(Emscripten)] +mod emscripten; +#[cfg(webgl)] +mod web; +#[cfg(windows)] +mod wgl; + +mod adapter; +mod command; +mod conv; +mod device; +mod queue; + +use crate::{CopyExtent, TextureDescriptor}; + +#[cfg(not(any(windows, webgl)))] +pub use self::egl::{AdapterContext, AdapterContextLock}; +#[cfg(not(any(windows, webgl)))] +use self::egl::{Instance, Surface}; + +#[cfg(webgl)] +pub use self::web::AdapterContext; +#[cfg(webgl)] +use self::web::{Instance, Surface}; + +#[cfg(windows)] +use self::wgl::AdapterContext; +#[cfg(windows)] +use self::wgl::{Instance, Surface}; + +use arrayvec::ArrayVec; + +use glow::HasContext; + +use naga::FastHashMap; +use parking_lot::Mutex; +use std::sync::atomic::{AtomicU32, AtomicU8}; +use std::{fmt, ops::Range, sync::Arc}; + +#[derive(Clone, Debug)] +pub struct Api; + +//Note: we can support more samplers if not every one of them is used at a time, +// but it probably doesn't worth it. +const MAX_TEXTURE_SLOTS: usize = 16; +const MAX_SAMPLERS: usize = 16; +const MAX_VERTEX_ATTRIBUTES: usize = 16; +const ZERO_BUFFER_SIZE: usize = 256 << 10; +const MAX_PUSH_CONSTANTS: usize = 64; +// We have to account for each push constant may need to be set for every shader. +const MAX_PUSH_CONSTANT_COMMANDS: usize = MAX_PUSH_CONSTANTS * crate::MAX_CONCURRENT_SHADER_STAGES; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + type AccelerationStructure = (); + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +bitflags::bitflags! { + /// Flags that affect internal code paths but do not + /// change the exposed feature set. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct PrivateCapabilities: u32 { + /// Indicates support for `glBufferStorage` allocation. + const BUFFER_ALLOCATION = 1 << 0; + /// Support explicit layouts in shader. + const SHADER_BINDING_LAYOUT = 1 << 1; + /// Support extended shadow sampling instructions. + const SHADER_TEXTURE_SHADOW_LOD = 1 << 2; + /// Support memory barriers. + const MEMORY_BARRIERS = 1 << 3; + /// Vertex buffer layouts separate from the data. + const VERTEX_BUFFER_LAYOUT = 1 << 4; + /// Indicates that buffers used as `GL_ELEMENT_ARRAY_BUFFER` may be created / initialized / used + /// as other targets, if not present they must not be mixed with other targets. + const INDEX_BUFFER_ROLE_CHANGE = 1 << 5; + /// Supports `glGetBufferSubData` + const GET_BUFFER_SUB_DATA = 1 << 7; + /// Supports `f16` color buffers + const COLOR_BUFFER_HALF_FLOAT = 1 << 8; + /// Supports `f11/f10` and `f32` color buffers + const COLOR_BUFFER_FLOAT = 1 << 9; + /// Supports query buffer objects. + const QUERY_BUFFERS = 1 << 11; + /// Supports 64 bit queries via `glGetQueryObjectui64v` + const QUERY_64BIT = 1 << 12; + /// Supports `glTexStorage2D`, etc. + const TEXTURE_STORAGE = 1 << 13; + /// Supports `push_debug_group`, `pop_debug_group` and `debug_message_insert`. + const DEBUG_FNS = 1 << 14; + /// Supports framebuffer invalidation. + const INVALIDATE_FRAMEBUFFER = 1 << 15; + /// Indicates support for `glDrawElementsInstancedBaseVertexBaseInstance` and `ARB_shader_draw_parameters` + /// + /// When this is true, instance offset emulation via vertex buffer rebinding and a shader uniform will be disabled. + const FULLY_FEATURED_INSTANCING = 1 << 16; + } +} + +bitflags::bitflags! { + /// Flags that indicate necessary workarounds for specific devices or driver bugs + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct Workarounds: u32 { + // Needs workaround for Intel Mesa bug: + // https://gitlab.freedesktop.org/mesa/mesa/-/issues/2565. + // + // This comment + // (https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4972/diffs?diff_id=75888#22f5d1004713c9bbf857988c7efb81631ab88f99_323_327) + // seems to indicate all skylake models are effected. + const MESA_I915_SRGB_SHADER_CLEAR = 1 << 0; + /// Buffer map must emulated because it is not supported natively + const EMULATE_BUFFER_MAP = 1 << 1; + } +} + +type BindTarget = u32; + +#[derive(Debug, Clone, Copy)] +enum VertexAttribKind { + Float, // glVertexAttribPointer + Integer, // glVertexAttribIPointer + //Double, // glVertexAttribLPointer +} + +impl Default for VertexAttribKind { + fn default() -> Self { + Self::Float + } +} + +#[derive(Clone, Debug)] +pub struct TextureFormatDesc { + pub internal: u32, + pub external: u32, + pub data_type: u32, +} + +struct AdapterShared { + context: AdapterContext, + private_caps: PrivateCapabilities, + features: wgt::Features, + workarounds: Workarounds, + shading_language_version: naga::back::glsl::Version, + next_shader_id: AtomicU32, + program_cache: Mutex<ProgramCache>, + es: bool, +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Device { + shared: Arc<AdapterShared>, + main_vao: glow::VertexArray, + #[cfg(all(native, feature = "renderdoc"))] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + shared: Arc<AdapterShared>, + features: wgt::Features, + draw_fbo: glow::Framebuffer, + copy_fbo: glow::Framebuffer, + /// Shader program used to clear the screen for [`Workarounds::MESA_I915_SRGB_SHADER_CLEAR`] + /// devices. + shader_clear_program: glow::Program, + /// The uniform location of the color uniform in the shader clear program + shader_clear_program_color_uniform_location: glow::UniformLocation, + /// Keep a reasonably large buffer filled with zeroes, so that we can implement `ClearBuffer` of + /// zeroes by copying from it. + zero_buffer: glow::Buffer, + temp_query_results: Mutex<Vec<u64>>, + draw_buffer_count: AtomicU8, + current_index_buffer: Mutex<Option<glow::Buffer>>, +} + +#[derive(Clone, Debug)] +pub struct Buffer { + raw: Option<glow::Buffer>, + target: BindTarget, + size: wgt::BufferAddress, + map_flags: u32, + data: Option<Arc<std::sync::Mutex<Vec<u8>>>>, +} + +#[cfg(send_sync)] +unsafe impl Sync for Buffer {} +#[cfg(send_sync)] +unsafe impl Send for Buffer {} + +#[derive(Clone, Debug)] +pub enum TextureInner { + Renderbuffer { + raw: glow::Renderbuffer, + }, + DefaultRenderbuffer, + Texture { + raw: glow::Texture, + target: BindTarget, + }, + #[cfg(webgl)] + ExternalFramebuffer { + inner: web_sys::WebGlFramebuffer, + }, +} + +#[cfg(send_sync)] +unsafe impl Sync for TextureInner {} +#[cfg(send_sync)] +unsafe impl Send for TextureInner {} + +impl TextureInner { + fn as_native(&self) -> (glow::Texture, BindTarget) { + match *self { + Self::Renderbuffer { .. } | Self::DefaultRenderbuffer => { + panic!("Unexpected renderbuffer"); + } + Self::Texture { raw, target } => (raw, target), + #[cfg(webgl)] + Self::ExternalFramebuffer { .. } => panic!("Unexpected external framebuffer"), + } + } +} + +#[derive(Debug)] +pub struct Texture { + pub inner: TextureInner, + pub drop_guard: Option<crate::DropGuard>, + pub mip_level_count: u32, + pub array_layer_count: u32, + pub format: wgt::TextureFormat, + #[allow(unused)] + pub format_desc: TextureFormatDesc, + pub copy_size: CopyExtent, +} + +impl Texture { + pub fn default_framebuffer(format: wgt::TextureFormat) -> Self { + Self { + inner: TextureInner::DefaultRenderbuffer, + drop_guard: None, + mip_level_count: 1, + array_layer_count: 1, + format, + format_desc: TextureFormatDesc { + internal: 0, + external: 0, + data_type: 0, + }, + copy_size: CopyExtent { + width: 0, + height: 0, + depth: 0, + }, + } + } + + /// Returns the `target`, whether the image is 3d and whether the image is a cubemap. + fn get_info_from_desc(desc: &TextureDescriptor) -> u32 { + match desc.dimension { + wgt::TextureDimension::D1 => glow::TEXTURE_2D, + wgt::TextureDimension::D2 => { + // HACK: detect a cube map; forces cube compatible textures to be cube textures + match (desc.is_cube_compatible(), desc.size.depth_or_array_layers) { + (false, 1) => glow::TEXTURE_2D, + (false, _) => glow::TEXTURE_2D_ARRAY, + (true, 6) => glow::TEXTURE_CUBE_MAP, + (true, _) => glow::TEXTURE_CUBE_MAP_ARRAY, + } + } + wgt::TextureDimension::D3 => glow::TEXTURE_3D, + } + } +} + +#[derive(Clone, Debug)] +pub struct TextureView { + inner: TextureInner, + aspects: crate::FormatAspects, + mip_levels: Range<u32>, + array_layers: Range<u32>, + format: wgt::TextureFormat, +} + +#[derive(Debug)] +pub struct Sampler { + raw: glow::Sampler, +} + +#[derive(Debug)] +pub struct BindGroupLayout { + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +#[derive(Debug)] +struct BindGroupLayoutInfo { + entries: Arc<[wgt::BindGroupLayoutEntry]>, + /// Mapping of resources, indexed by `binding`, into the whole layout space. + /// For texture resources, the value is the texture slot index. + /// For sampler resources, the value is the index of the sampler in the whole layout. + /// For buffers, the value is the uniform or storage slot index. + /// For unused bindings, the value is `!0` + binding_to_slot: Box<[u8]>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + group_infos: Box<[BindGroupLayoutInfo]>, + naga_options: naga::back::glsl::Options, +} + +impl PipelineLayout { + fn get_slot(&self, br: &naga::ResourceBinding) -> u8 { + let group_info = &self.group_infos[br.group as usize]; + group_info.binding_to_slot[br.binding as usize] + } +} + +#[derive(Debug)] +enum BindingRegister { + UniformBuffers, + StorageBuffers, + Textures, + Images, +} + +#[derive(Debug)] +enum RawBinding { + Buffer { + raw: glow::Buffer, + offset: i32, + size: i32, + }, + Texture { + raw: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + mip_levels: Range<u32>, + //TODO: array layers + }, + Image(ImageBinding), + Sampler(glow::Sampler), +} + +#[derive(Debug)] +pub struct BindGroup { + contents: Box<[RawBinding]>, +} + +type ShaderId = u32; + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + label: Option<String>, + id: ShaderId, +} + +#[derive(Clone, Debug, Default)] +struct VertexFormatDesc { + element_count: i32, + element_format: u32, + attrib_kind: VertexAttribKind, +} + +#[derive(Clone, Debug, Default)] +struct AttributeDesc { + location: u32, + offset: u32, + buffer_index: u32, + format_desc: VertexFormatDesc, +} + +#[derive(Clone, Debug)] +struct BufferBinding { + raw: glow::Buffer, + offset: wgt::BufferAddress, +} + +#[derive(Clone, Debug)] +struct ImageBinding { + raw: glow::Texture, + mip_level: u32, + array_layer: Option<u32>, + access: u32, + format: u32, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct VertexBufferDesc { + step: wgt::VertexStepMode, + stride: u32, +} + +#[derive(Clone, Debug)] +struct PushConstantDesc { + location: glow::UniformLocation, + ty: naga::TypeInner, + offset: u32, + size_bytes: u32, +} + +#[cfg(send_sync)] +unsafe impl Sync for PushConstantDesc {} +#[cfg(send_sync)] +unsafe impl Send for PushConstantDesc {} + +/// For each texture in the pipeline layout, store the index of the only +/// sampler (in this layout) that the texture is used with. +type SamplerBindMap = [Option<u8>; MAX_TEXTURE_SLOTS]; + +#[derive(Debug)] +struct PipelineInner { + program: glow::Program, + sampler_map: SamplerBindMap, + first_instance_location: Option<glow::UniformLocation>, + push_constant_descs: ArrayVec<PushConstantDesc, MAX_PUSH_CONSTANT_COMMANDS>, +} + +#[derive(Clone, Debug)] +struct DepthState { + function: u32, + mask: bool, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendComponent { + src: u32, + dst: u32, + equation: u32, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendDesc { + alpha: BlendComponent, + color: BlendComponent, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct ColorTargetDesc { + mask: wgt::ColorWrites, + blend: Option<BlendDesc>, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramStage { + naga_stage: naga::ShaderStage, + shader_id: ShaderId, + entry_point: String, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramCacheKey { + stages: ArrayVec<ProgramStage, 3>, + group_to_binding_to_slot: Box<[Box<[u8]>]>, +} + +type ProgramCache = FastHashMap<ProgramCacheKey, Result<Arc<PipelineInner>, crate::PipelineError>>; + +#[derive(Debug)] +pub struct RenderPipeline { + inner: Arc<PipelineInner>, + primitive: wgt::PrimitiveState, + vertex_buffers: Box<[VertexBufferDesc]>, + vertex_attributes: Box<[AttributeDesc]>, + color_targets: Box<[ColorTargetDesc]>, + depth: Option<DepthState>, + depth_bias: wgt::DepthBiasState, + stencil: Option<StencilState>, + alpha_to_coverage_enabled: bool, +} + +#[cfg(send_sync)] +unsafe impl Sync for RenderPipeline {} +#[cfg(send_sync)] +unsafe impl Send for RenderPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + inner: Arc<PipelineInner>, +} + +#[cfg(send_sync)] +unsafe impl Sync for ComputePipeline {} +#[cfg(send_sync)] +unsafe impl Send for ComputePipeline {} + +#[derive(Debug)] +pub struct QuerySet { + queries: Box<[glow::Query]>, + target: BindTarget, +} + +#[derive(Debug)] +pub struct Fence { + last_completed: crate::FenceValue, + pending: Vec<(crate::FenceValue, glow::Fence)>, +} + +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] +unsafe impl Send for Fence {} +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { + let mut max_value = self.last_completed; + for &(value, sync) in self.pending.iter() { + let status = unsafe { gl.get_sync_status(sync) }; + if status == glow::SIGNALED { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self, gl: &glow::Context) { + let latest = self.get_latest(gl); + for &(value, sync) in self.pending.iter() { + if value <= latest { + unsafe { + gl.delete_sync(sync); + } + } + } + self.pending.retain(|&(value, _)| value > latest); + self.last_completed = latest; + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilOps { + pass: u32, + fail: u32, + depth_fail: u32, +} + +impl Default for StencilOps { + fn default() -> Self { + Self { + pass: glow::KEEP, + fail: glow::KEEP, + depth_fail: glow::KEEP, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilSide { + function: u32, + mask_read: u32, + mask_write: u32, + reference: u32, + ops: StencilOps, +} + +impl Default for StencilSide { + fn default() -> Self { + Self { + function: glow::ALWAYS, + mask_read: 0xFF, + mask_write: 0xFF, + reference: 0, + ops: StencilOps::default(), + } + } +} + +#[derive(Debug, Clone, Default)] +struct StencilState { + front: StencilSide, + back: StencilSide, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct PrimitiveState { + front_face: u32, + cull_face: u32, + unclipped_depth: bool, + polygon_mode: u32, +} + +type InvalidatedAttachments = ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>; + +#[derive(Debug)] +enum Command { + Draw { + topology: u32, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + first_instance_location: Option<glow::UniformLocation>, + }, + DrawIndexed { + topology: u32, + index_type: u32, + index_count: u32, + index_offset: wgt::BufferAddress, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + first_instance_location: Option<glow::UniformLocation>, + }, + DrawIndirect { + topology: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + first_instance_location: Option<glow::UniformLocation>, + }, + DrawIndexedIndirect { + topology: u32, + index_type: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + first_instance_location: Option<glow::UniformLocation>, + }, + Dispatch([u32; 3]), + DispatchIndirect { + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + ClearBuffer { + dst: Buffer, + dst_target: BindTarget, + range: crate::MemoryRange, + }, + CopyBufferToBuffer { + src: Buffer, + src_target: BindTarget, + dst: Buffer, + dst_target: BindTarget, + copy: crate::BufferCopy, + }, + #[cfg(webgl)] + CopyExternalImageToTexture { + src: wgt::ImageCopyExternalImage, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + dst_premultiplication: bool, + copy: crate::TextureCopy, + }, + CopyTextureToTexture { + src: glow::Texture, + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + copy: crate::TextureCopy, + }, + CopyBufferToTexture { + src: Buffer, + #[allow(unused)] + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + copy: crate::BufferTextureCopy, + }, + CopyTextureToBuffer { + src: glow::Texture, + src_target: BindTarget, + src_format: wgt::TextureFormat, + dst: Buffer, + #[allow(unused)] + dst_target: BindTarget, + copy: crate::BufferTextureCopy, + }, + SetIndexBuffer(glow::Buffer), + BeginQuery(glow::Query, BindTarget), + EndQuery(BindTarget), + TimestampQuery(glow::Query), + CopyQueryResults { + query_range: Range<u32>, + dst: Buffer, + dst_target: BindTarget, + dst_offset: wgt::BufferAddress, + }, + ResetFramebuffer { + is_default: bool, + }, + BindAttachment { + attachment: u32, + view: TextureView, + }, + ResolveAttachment { + attachment: u32, + dst: TextureView, + size: wgt::Extent3d, + }, + InvalidateAttachments(InvalidatedAttachments), + SetDrawColorBuffers(u8), + ClearColorF { + draw_buffer: u32, + color: [f32; 4], + is_srgb: bool, + }, + ClearColorU(u32, [u32; 4]), + ClearColorI(u32, [i32; 4]), + ClearDepth(f32), + ClearStencil(u32), + // Clearing both the depth and stencil buffer individually appears to + // result in the stencil buffer failing to clear, atleast in WebGL. + // It is also more efficient to emit a single command instead of two for + // this. + ClearDepthAndStencil(f32, u32), + BufferBarrier(glow::Buffer, crate::BufferUses), + TextureBarrier(crate::TextureUses), + SetViewport { + rect: crate::Rect<i32>, + depth: Range<f32>, + }, + SetScissor(crate::Rect<i32>), + SetStencilFunc { + face: u32, + function: u32, + reference: u32, + read_mask: u32, + }, + SetStencilOps { + face: u32, + write_mask: u32, + ops: StencilOps, + }, + SetDepth(DepthState), + SetDepthBias(wgt::DepthBiasState), + ConfigureDepthStencil(crate::FormatAspects), + SetAlphaToCoverage(bool), + SetVertexAttribute { + buffer: Option<glow::Buffer>, + buffer_desc: VertexBufferDesc, + attribute_desc: AttributeDesc, + }, + UnsetVertexAttribute(u32), + SetVertexBuffer { + index: u32, + buffer: BufferBinding, + buffer_desc: VertexBufferDesc, + }, + SetProgram(glow::Program), + SetPrimitive(PrimitiveState), + SetBlendConstant([f32; 4]), + SetColorTarget { + draw_buffer_index: Option<u32>, + desc: ColorTargetDesc, + }, + BindBuffer { + target: BindTarget, + slot: u32, + buffer: glow::Buffer, + offset: i32, + size: i32, + }, + BindSampler(u32, Option<glow::Sampler>), + BindTexture { + slot: u32, + texture: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + mip_levels: Range<u32>, + }, + BindImage { + slot: u32, + binding: ImageBinding, + }, + InsertDebugMarker(Range<u32>), + PushDebugGroup(Range<u32>), + PopDebugGroup, + SetPushConstants { + uniform: PushConstantDesc, + /// Offset from the start of the `data_bytes` + offset: u32, + }, +} + +#[derive(Default)] +pub struct CommandBuffer { + label: Option<String>, + commands: Vec<Command>, + data_bytes: Vec<u8>, + queries: Vec<glow::Query>, +} + +impl fmt::Debug for CommandBuffer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut builder = f.debug_struct("CommandBuffer"); + if let Some(ref label) = self.label { + builder.field("label", label); + } + builder.finish() + } +} + +#[cfg(send_sync)] +unsafe impl Sync for CommandBuffer {} +#[cfg(send_sync)] +unsafe impl Send for CommandBuffer {} + +//TODO: we would have something like `Arc<typed_arena::Arena>` +// here and in the command buffers. So that everything grows +// inside the encoder and stays there until `reset_all`. + +pub struct CommandEncoder { + cmd_buffer: CommandBuffer, + state: command::State, + private_caps: PrivateCapabilities, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("cmd_buffer", &self.cmd_buffer) + .finish() + } +} + +#[cfg(send_sync)] +unsafe impl Sync for CommandEncoder {} +#[cfg(send_sync)] +unsafe impl Send for CommandEncoder {} + +#[cfg(not(webgl))] +fn gl_debug_message_callback(source: u32, gltype: u32, id: u32, severity: u32, message: &str) { + let source_str = match source { + glow::DEBUG_SOURCE_API => "API", + glow::DEBUG_SOURCE_WINDOW_SYSTEM => "Window System", + glow::DEBUG_SOURCE_SHADER_COMPILER => "ShaderCompiler", + glow::DEBUG_SOURCE_THIRD_PARTY => "Third Party", + glow::DEBUG_SOURCE_APPLICATION => "Application", + glow::DEBUG_SOURCE_OTHER => "Other", + _ => unreachable!(), + }; + + let log_severity = match severity { + glow::DEBUG_SEVERITY_HIGH => log::Level::Error, + glow::DEBUG_SEVERITY_MEDIUM => log::Level::Warn, + glow::DEBUG_SEVERITY_LOW => log::Level::Info, + glow::DEBUG_SEVERITY_NOTIFICATION => log::Level::Trace, + _ => unreachable!(), + }; + + let type_str = match gltype { + glow::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "Deprecated Behavior", + glow::DEBUG_TYPE_ERROR => "Error", + glow::DEBUG_TYPE_MARKER => "Marker", + glow::DEBUG_TYPE_OTHER => "Other", + glow::DEBUG_TYPE_PERFORMANCE => "Performance", + glow::DEBUG_TYPE_POP_GROUP => "Pop Group", + glow::DEBUG_TYPE_PORTABILITY => "Portability", + glow::DEBUG_TYPE_PUSH_GROUP => "Push Group", + glow::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "Undefined Behavior", + _ => unreachable!(), + }; + + let _ = std::panic::catch_unwind(|| { + log::log!( + log_severity, + "GLES: [{}/{}] ID {} : {}", + source_str, + type_str, + id, + message + ); + }); + + if cfg!(debug_assertions) && log_severity == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.add(message.to_string()); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs new file mode 100644 index 0000000000..6ec553bd29 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/queue.rs @@ -0,0 +1,1812 @@ +use super::{conv::is_layered_target, Command as C, PrivateCapabilities}; +use arrayvec::ArrayVec; +use glow::HasContext; +use std::{ + mem, slice, + sync::{atomic::Ordering, Arc}, +}; + +const DEBUG_ID: u32 = 0; + +fn extract_marker<'a>(data: &'a [u8], range: &std::ops::Range<u32>) -> &'a str { + std::str::from_utf8(&data[range.start as usize..range.end as usize]).unwrap() +} + +fn get_2d_target(target: u32, array_layer: u32) -> u32 { + const CUBEMAP_FACES: [u32; 6] = [ + glow::TEXTURE_CUBE_MAP_POSITIVE_X, + glow::TEXTURE_CUBE_MAP_NEGATIVE_X, + glow::TEXTURE_CUBE_MAP_POSITIVE_Y, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Y, + glow::TEXTURE_CUBE_MAP_POSITIVE_Z, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Z, + ]; + + match target { + glow::TEXTURE_2D => target, + glow::TEXTURE_CUBE_MAP => CUBEMAP_FACES[array_layer as usize], + _ => unreachable!(), + } +} + +fn get_z_offset(target: u32, base: &crate::TextureCopyBase) -> u32 { + match target { + glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY => base.array_layer, + glow::TEXTURE_3D => base.origin.z, + _ => unreachable!(), + } +} + +impl super::Queue { + /// Performs a manual shader clear, used as a workaround for a clearing bug on mesa + unsafe fn perform_shader_clear(&self, gl: &glow::Context, draw_buffer: u32, color: [f32; 4]) { + unsafe { gl.use_program(Some(self.shader_clear_program)) }; + unsafe { + gl.uniform_4_f32( + Some(&self.shader_clear_program_color_uniform_location), + color[0], + color[1], + color[2], + color[3], + ) + }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::COLOR_ATTACHMENT0 + draw_buffer]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + + let draw_buffer_count = self.draw_buffer_count.load(Ordering::Relaxed); + if draw_buffer_count != 0 { + // Reset the draw buffers to what they were before the clear + let indices = (0..draw_buffer_count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + } + } + + unsafe fn reset_state(&self, gl: &glow::Context) { + unsafe { gl.use_program(None) }; + unsafe { gl.bind_framebuffer(glow::FRAMEBUFFER, None) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, None) }; + let mut current_index_buffer = self.current_index_buffer.lock(); + *current_index_buffer = None; + } + + unsafe fn set_attachment( + &self, + gl: &glow::Context, + fbo_target: u32, + attachment: u32, + view: &super::TextureView, + ) { + match view.inner { + super::TextureInner::Renderbuffer { raw } => { + unsafe { + gl.framebuffer_renderbuffer( + fbo_target, + attachment, + glow::RENDERBUFFER, + Some(raw), + ) + }; + } + super::TextureInner::DefaultRenderbuffer => panic!("Unexpected default RBO"), + super::TextureInner::Texture { raw, target } => { + let num_layers = view.array_layers.end - view.array_layers.start; + if num_layers > 1 { + #[cfg(webgl)] + unsafe { + gl.framebuffer_texture_multiview_ovr( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + num_layers as i32, + ) + }; + } else if is_layered_target(target) { + unsafe { + gl.framebuffer_texture_layer( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + ) + }; + } else { + unsafe { + assert_eq!(view.mip_levels.len(), 1); + gl.framebuffer_texture_2d( + fbo_target, + attachment, + get_2d_target(target, view.array_layers.start), + Some(raw), + view.mip_levels.start as i32, + ) + }; + } + } + #[cfg(webgl)] + super::TextureInner::ExternalFramebuffer { ref inner } => unsafe { + gl.bind_external_framebuffer(glow::FRAMEBUFFER, inner); + }, + } + } + + unsafe fn process( + &self, + gl: &glow::Context, + command: &C, + #[cfg_attr(target_arch = "wasm32", allow(unused))] data_bytes: &[u8], + queries: &[glow::Query], + ) { + match *command { + C::Draw { + topology, + first_vertex, + vertex_count, + instance_count, + first_instance, + ref first_instance_location, + } => { + let supports_full_instancing = self + .shared + .private_caps + .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); + + if supports_full_instancing { + unsafe { + gl.draw_arrays_instanced_base_instance( + topology, + first_vertex as i32, + vertex_count as i32, + instance_count as i32, + first_instance, + ) + } + } else { + unsafe { + gl.uniform_1_u32(first_instance_location.as_ref(), first_instance); + } + + // Don't use `gl.draw_arrays` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `draw_arrays`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + unsafe { + gl.draw_arrays_instanced( + topology, + first_vertex as i32, + vertex_count as i32, + instance_count as i32, + ) + } + }; + } + C::DrawIndexed { + topology, + index_type, + index_count, + index_offset, + base_vertex, + first_instance, + instance_count, + ref first_instance_location, + } => { + match base_vertex { + 0 => { + unsafe { + gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) + }; + + unsafe { + // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + gl.draw_elements_instanced( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + ) + } + } + _ => { + let supports_full_instancing = self + .shared + .private_caps + .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING); + + if supports_full_instancing { + unsafe { + gl.draw_elements_instanced_base_vertex_base_instance( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + first_instance, + ) + } + } else { + unsafe { + gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) + }; + + // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature. + unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) + } + } + } + } + } + C::DrawIndirect { + topology, + indirect_buf, + indirect_offset, + ref first_instance_location, + } => { + unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), 0) }; + + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.draw_arrays_indirect_offset(topology, indirect_offset as i32) }; + } + C::DrawIndexedIndirect { + topology, + index_type, + indirect_buf, + indirect_offset, + ref first_instance_location, + } => { + unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), 0) }; + + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { + gl.draw_elements_indirect_offset(topology, index_type, indirect_offset as i32) + }; + } + C::Dispatch(group_counts) => { + unsafe { gl.dispatch_compute(group_counts[0], group_counts[1], group_counts[2]) }; + } + C::DispatchIndirect { + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DISPATCH_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.dispatch_compute_indirect(indirect_offset as i32) }; + } + C::ClearBuffer { + ref dst, + dst_target, + ref range, + } => match dst.raw { + Some(buffer) => { + // When `INDEX_BUFFER_ROLE_CHANGE` isn't available, we can't copy into the + // index buffer from the zero buffer. This would fail in Chrome with the + // following message: + // + // > Cannot copy into an element buffer destination from a non-element buffer + // > source + // + // Instead, we'll upload zeroes into the buffer. + let can_use_zero_buffer = self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + || dst_target != glow::ELEMENT_ARRAY_BUFFER; + + if can_use_zero_buffer { + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer)) }; + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let mut dst_offset = range.start; + while dst_offset < range.end { + let size = (range.end - dst_offset).min(super::ZERO_BUFFER_SIZE as u64); + unsafe { + gl.copy_buffer_sub_data( + glow::COPY_READ_BUFFER, + dst_target, + 0, + dst_offset as i32, + size as i32, + ) + }; + dst_offset += size; + } + } else { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let zeroes = vec![0u8; (range.end - range.start) as usize]; + unsafe { + gl.buffer_sub_data_u8_slice(dst_target, range.start as i32, &zeroes) + }; + } + } + None => { + dst.data.as_ref().unwrap().lock().unwrap().as_mut_slice() + [range.start as usize..range.end as usize] + .fill(0); + } + }, + C::CopyBufferToBuffer { + ref src, + src_target, + ref dst, + dst_target, + copy, + } => { + let copy_src_target = glow::COPY_READ_BUFFER; + let is_index_buffer_only_element_dst = !self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + && dst_target == glow::ELEMENT_ARRAY_BUFFER + || src_target == glow::ELEMENT_ARRAY_BUFFER; + + // WebGL not allowed to copy data from other targets to element buffer and can't copy element data to other buffers + let copy_dst_target = if is_index_buffer_only_element_dst { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::COPY_WRITE_BUFFER + }; + let size = copy.size.get() as usize; + match (src.raw, dst.raw) { + (Some(ref src), Some(ref dst)) => { + unsafe { gl.bind_buffer(copy_src_target, Some(*src)) }; + unsafe { gl.bind_buffer(copy_dst_target, Some(*dst)) }; + unsafe { + gl.copy_buffer_sub_data( + copy_src_target, + copy_dst_target, + copy.src_offset as _, + copy.dst_offset as _, + copy.size.get() as _, + ) + }; + } + (Some(src), None) => { + let mut data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut data.as_mut_slice() + [copy.dst_offset as usize..copy.dst_offset as usize + size]; + + unsafe { gl.bind_buffer(copy_src_target, Some(src)) }; + unsafe { + self.shared.get_buffer_sub_data( + gl, + copy_src_target, + copy.src_offset as i32, + dst_data, + ) + }; + } + (None, Some(dst)) => { + let data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &data.as_slice() + [copy.src_offset as usize..copy.src_offset as usize + size]; + unsafe { gl.bind_buffer(copy_dst_target, Some(dst)) }; + unsafe { + gl.buffer_sub_data_u8_slice( + copy_dst_target, + copy.dst_offset as i32, + src_data, + ) + }; + } + (None, None) => { + todo!() + } + } + unsafe { gl.bind_buffer(copy_src_target, None) }; + if is_index_buffer_only_element_dst { + unsafe { + gl.bind_buffer( + glow::ELEMENT_ARRAY_BUFFER, + *self.current_index_buffer.lock(), + ) + }; + } else { + unsafe { gl.bind_buffer(copy_dst_target, None) }; + } + } + #[cfg(webgl)] + C::CopyExternalImageToTexture { + ref src, + dst, + dst_target, + dst_format, + dst_premultiplication, + ref copy, + } => { + const UNPACK_FLIP_Y_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_FLIP_Y_WEBGL; + const UNPACK_PREMULTIPLY_ALPHA_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_PREMULTIPLY_ALPHA_WEBGL; + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, true); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, true); + } + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + let format_desc = self.shared.describe_texture_format(dst_format); + if is_layered_target(dst_target) { + let z_offset = get_z_offset(dst_target, ©.dst_base); + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_3d_with_image_bitmap( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset as i32, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_3d_with_html_video_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset as i32, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + v, + ); + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_3d_with_html_canvas_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset as i32, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + c, + ); + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } else { + let dst_target = get_2d_target(dst_target, copy.dst_base.array_layer); + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_2d_with_image_bitmap_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_2d_with_html_video_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + v, + ) + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_2d_with_html_canvas_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + c, + ) + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, false); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, false); + } + } + } + C::CopyTextureToTexture { + src, + src_target, + dst, + dst_target, + ref copy, + } => { + //TODO: handle 3D copies + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + if is_layered_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.src_base.mip_level as i32, + copy.src_base.array_layer as i32, + ) + }; + } else { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.src_base.mip_level as i32, + ) + }; + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + if is_layered_target(dst_target) { + unsafe { + gl.copy_tex_sub_image_3d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + get_z_offset(dst_target, ©.dst_base) as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } else { + unsafe { + gl.copy_tex_sub_image_2d( + get_2d_target(dst_target, copy.dst_base.array_layer), + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } + } + C::CopyBufferToTexture { + ref src, + src_target: _, + dst, + dst_target, + dst_format, + ref copy, + } => { + let (block_width, block_height) = dst_format.block_dimensions(); + let block_size = dst_format.block_copy_size(None).unwrap(); + let format_desc = self.shared.describe_texture_format(dst_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(0, |bpr| block_width * bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .map_or(0, |rpi| block_height * rpi); + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_IMAGE_HEIGHT, column_texels as i32) }; + let mut unbind_unpack_buffer = false; + if !dst_format.is_compressed() { + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::PixelUnpackData::BufferOffset(copy.buffer_layout.offset as u32) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = + &buffer_data.as_slice()[copy.buffer_layout.offset as usize..]; + glow::PixelUnpackData::Slice(src_data) + } + }; + if is_layered_target(dst_target) { + unsafe { + gl.tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + get_z_offset(dst_target, ©.texture_base) as i32, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } else { + unsafe { + gl.tex_sub_image_2d( + get_2d_target(dst_target, copy.texture_base.array_layer), + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } + } else { + let bytes_per_row = copy + .buffer_layout + .bytes_per_row + .unwrap_or(copy.size.width * block_size); + let minimum_rows_per_image = + (copy.size.height + block_height - 1) / block_height; + let rows_per_image = copy + .buffer_layout + .rows_per_image + .unwrap_or(minimum_rows_per_image); + + let bytes_per_image = bytes_per_row * rows_per_image; + let minimum_bytes_per_image = bytes_per_row * minimum_rows_per_image; + let bytes_in_upload = + (bytes_per_image * (copy.size.depth - 1)) + minimum_bytes_per_image; + let offset = copy.buffer_layout.offset as u32; + + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::CompressedPixelUnpackData::BufferRange( + offset..offset + bytes_in_upload, + ) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &buffer_data.as_slice() + [(offset as usize)..(offset + bytes_in_upload) as usize]; + glow::CompressedPixelUnpackData::Slice(src_data) + } + }; + + if is_layered_target(dst_target) { + unsafe { + gl.compressed_tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + get_z_offset(dst_target, ©.texture_base) as i32, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.internal, + unpack_data, + ) + }; + } else { + unsafe { + gl.compressed_tex_sub_image_2d( + get_2d_target(dst_target, copy.texture_base.array_layer), + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.internal, + unpack_data, + ) + }; + } + } + if unbind_unpack_buffer { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, None) }; + } + } + C::CopyTextureToBuffer { + src, + src_target, + src_format, + ref dst, + dst_target: _, + ref copy, + } => { + let block_size = src_format.block_copy_size(None).unwrap(); + if src_format.is_compressed() { + log::error!("Not implemented yet: compressed texture copy to buffer"); + return; + } + if src_target == glow::TEXTURE_CUBE_MAP + || src_target == glow::TEXTURE_CUBE_MAP_ARRAY + { + log::error!("Not implemented yet: cubemap texture copy to buffer"); + return; + } + let format_desc = self.shared.describe_texture_format(src_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(copy.size.width, |bpr| bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .unwrap_or(copy.size.height); + + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + + let read_pixels = |offset| { + let mut buffer_data; + let unpack_data = match dst.raw { + Some(buffer) => { + unsafe { gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(buffer)) }; + glow::PixelPackData::BufferOffset(offset as u32) + } + None => { + buffer_data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut buffer_data.as_mut_slice()[offset as usize..]; + glow::PixelPackData::Slice(dst_data) + } + }; + unsafe { + gl.read_pixels( + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + }; + + match src_target { + glow::TEXTURE_2D => { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.texture_base.mip_level as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_2D_ARRAY => { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + copy.texture_base.array_layer as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_3D => { + for z in copy.texture_base.origin.z..copy.size.depth { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + z as i32, + ) + }; + let offset = copy.buffer_layout.offset + + (z * block_size * row_texels * column_texels) as u64; + read_pixels(offset); + } + } + glow::TEXTURE_CUBE_MAP | glow::TEXTURE_CUBE_MAP_ARRAY => unimplemented!(), + _ => unreachable!(), + } + } + C::SetIndexBuffer(buffer) => { + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(buffer)) }; + let mut current_index_buffer = self.current_index_buffer.lock(); + *current_index_buffer = Some(buffer); + } + C::BeginQuery(query, target) => { + unsafe { gl.begin_query(target, query) }; + } + C::EndQuery(target) => { + unsafe { gl.end_query(target) }; + } + C::TimestampQuery(query) => { + unsafe { gl.query_counter(query, glow::TIMESTAMP) }; + } + C::CopyQueryResults { + ref query_range, + ref dst, + dst_target, + dst_offset, + } => { + if self + .shared + .private_caps + .contains(PrivateCapabilities::QUERY_BUFFERS) + && dst.raw.is_some() + { + unsafe { + // We're assuming that the only relevant queries are 8 byte timestamps or + // occlusion tests. + let query_size = 8; + + let query_range_size = query_size * query_range.len(); + + let buffer = gl.create_buffer().ok(); + gl.bind_buffer(glow::QUERY_BUFFER, buffer); + gl.buffer_data_size( + glow::QUERY_BUFFER, + query_range_size as _, + glow::STREAM_COPY, + ); + + for (i, &query) in queries + [query_range.start as usize..query_range.end as usize] + .iter() + .enumerate() + { + gl.get_query_parameter_u64_with_offset( + query, + glow::QUERY_RESULT, + query_size * i, + ) + } + gl.bind_buffer(dst_target, dst.raw); + gl.copy_buffer_sub_data( + glow::QUERY_BUFFER, + dst_target, + 0, + dst_offset as _, + query_range_size as _, + ); + if let Some(buffer) = buffer { + gl.delete_buffer(buffer) + } + } + } else { + let mut temp_query_results = self.temp_query_results.lock(); + temp_query_results.clear(); + for &query in + queries[query_range.start as usize..query_range.end as usize].iter() + { + let mut result: u64 = 0; + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::QUERY_64BIT) + { + let result: *mut u64 = &mut result; + gl.get_query_parameter_u64_with_offset( + query, + glow::QUERY_RESULT, + result as usize, + ) + } else { + result = + gl.get_query_parameter_u32(query, glow::QUERY_RESULT) as u64; + } + }; + temp_query_results.push(result); + } + let query_data = unsafe { + slice::from_raw_parts( + temp_query_results.as_ptr() as *const u8, + temp_query_results.len() * mem::size_of::<u64>(), + ) + }; + match dst.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + unsafe { + gl.buffer_sub_data_u8_slice( + dst_target, + dst_offset as i32, + query_data, + ) + }; + } + None => { + let data = &mut dst.data.as_ref().unwrap().lock().unwrap(); + let len = query_data.len().min(data.len()); + data[..len].copy_from_slice(&query_data[..len]); + } + } + } + } + C::ResetFramebuffer { is_default } => { + if is_default { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + } else { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + glow::DEPTH_STENCIL_ATTACHMENT, + glow::TEXTURE_2D, + None, + 0, + ) + }; + for i in 0..crate::MAX_COLOR_ATTACHMENTS { + let target = glow::COLOR_ATTACHMENT0 + i as u32; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + target, + glow::TEXTURE_2D, + None, + 0, + ) + }; + } + } + unsafe { gl.color_mask(true, true, true, true) }; + unsafe { gl.depth_mask(true) }; + unsafe { gl.stencil_mask(!0) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + } + C::BindAttachment { + attachment, + ref view, + } => { + unsafe { self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, attachment, view) }; + } + C::ResolveAttachment { + attachment, + ref dst, + ref size, + } => { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { gl.read_buffer(attachment) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.copy_fbo)) }; + unsafe { + self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, glow::COLOR_ATTACHMENT0, dst) + }; + unsafe { + gl.blit_framebuffer( + 0, + 0, + size.width as i32, + size.height as i32, + 0, + 0, + size.width as i32, + size.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + } + C::InvalidateAttachments(ref list) => { + if self + .shared + .private_caps + .contains(PrivateCapabilities::INVALIDATE_FRAMEBUFFER) + { + unsafe { gl.invalidate_framebuffer(glow::DRAW_FRAMEBUFFER, list) }; + } + } + C::SetDrawColorBuffers(count) => { + self.draw_buffer_count.store(count, Ordering::Relaxed); + let indices = (0..count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + } + C::ClearColorF { + draw_buffer, + ref color, + is_srgb, + } => { + if self + .shared + .workarounds + .contains(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR) + && is_srgb + { + unsafe { self.perform_shader_clear(gl, draw_buffer, *color) }; + } else { + // Prefer `clear` as `clear_buffer` functions have issues on Sandy Bridge + // on Windows. + unsafe { + gl.draw_buffers(&[glow::COLOR_ATTACHMENT0 + draw_buffer]); + gl.clear_color(color[0], color[1], color[2], color[3]); + gl.clear(glow::COLOR_BUFFER_BIT); + } + } + } + C::ClearColorU(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_u32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearColorI(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_i32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearDepth(depth) => { + // Prefer `clear` as `clear_buffer` functions have issues on Sandy Bridge + // on Windows. + unsafe { + gl.clear_depth_f32(depth); + gl.clear(glow::DEPTH_BUFFER_BIT); + } + } + C::ClearStencil(value) => { + // Prefer `clear` as `clear_buffer` functions have issues on Sandy Bridge + // on Windows. + unsafe { + gl.clear_stencil(value as i32); + gl.clear(glow::STENCIL_BUFFER_BIT); + } + } + C::ClearDepthAndStencil(depth, stencil_value) => { + // Prefer `clear` as `clear_buffer` functions have issues on Sandy Bridge + // on Windows. + unsafe { + gl.clear_depth_f32(depth); + gl.clear_stencil(stencil_value as i32); + gl.clear(glow::DEPTH_BUFFER_BIT | glow::STENCIL_BUFFER_BIT); + } + } + C::BufferBarrier(raw, usage) => { + let mut flags = 0; + if usage.contains(crate::BufferUses::VERTEX) { + flags |= glow::VERTEX_ATTRIB_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, Some(raw)) }; + unsafe { gl.vertex_attrib_pointer_f32(0, 1, glow::BYTE, true, 0, 0) }; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= glow::ELEMENT_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= glow::UNIFORM_BARRIER_BIT; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= glow::COMMAND_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(raw)) }; + } + if usage.intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) { + flags |= glow::BUFFER_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_STORAGE_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::TextureBarrier(usage) => { + let mut flags = 0; + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= glow::TEXTURE_FETCH_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_IMAGE_ACCESS_BARRIER_BIT; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= glow::TEXTURE_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_READ + | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= glow::FRAMEBUFFER_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::SetViewport { + ref rect, + ref depth, + } => { + unsafe { gl.viewport(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.depth_range_f32(depth.start, depth.end) }; + } + C::SetScissor(ref rect) => { + unsafe { gl.scissor(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.enable(glow::SCISSOR_TEST) }; + } + C::SetStencilFunc { + face, + function, + reference, + read_mask, + } => { + unsafe { gl.stencil_func_separate(face, function, reference as i32, read_mask) }; + } + C::SetStencilOps { + face, + write_mask, + ref ops, + } => { + unsafe { gl.stencil_mask_separate(face, write_mask) }; + unsafe { gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass) }; + } + C::SetVertexAttribute { + buffer, + ref buffer_desc, + attribute_desc: ref vat, + } => { + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, buffer) }; + unsafe { gl.enable_vertex_attrib_array(vat.location) }; + + if buffer.is_none() { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_format_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + vat.offset, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_format_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + vat.offset, + ) + }, + } + + //Note: there is apparently a bug on AMD 3500U: + // this call is ignored if the current array is disabled. + unsafe { gl.vertex_attrib_binding(vat.location, vat.buffer_index) }; + } else { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_pointer_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_pointer_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + } + unsafe { gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32) }; + } + } + C::UnsetVertexAttribute(location) => { + unsafe { gl.disable_vertex_attrib_array(location) }; + } + C::SetVertexBuffer { + index, + ref buffer, + ref buffer_desc, + } => { + unsafe { gl.vertex_binding_divisor(index, buffer_desc.step as u32) }; + unsafe { + gl.bind_vertex_buffer( + index, + Some(buffer.raw), + buffer.offset as i32, + buffer_desc.stride as i32, + ) + }; + } + C::SetDepth(ref depth) => { + unsafe { gl.depth_func(depth.function) }; + unsafe { gl.depth_mask(depth.mask) }; + } + C::SetDepthBias(bias) => { + if bias.is_enabled() { + unsafe { gl.enable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.polygon_offset(bias.slope_scale, bias.constant as f32) }; + } else { + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + } + } + C::ConfigureDepthStencil(aspects) => { + if aspects.contains(crate::FormatAspects::DEPTH) { + unsafe { gl.enable(glow::DEPTH_TEST) }; + } else { + unsafe { gl.disable(glow::DEPTH_TEST) }; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + unsafe { gl.enable(glow::STENCIL_TEST) }; + } else { + unsafe { gl.disable(glow::STENCIL_TEST) }; + } + } + C::SetAlphaToCoverage(enabled) => { + if enabled { + unsafe { gl.enable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } else { + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } + } + C::SetProgram(program) => { + unsafe { gl.use_program(Some(program)) }; + } + C::SetPrimitive(ref state) => { + unsafe { gl.front_face(state.front_face) }; + if state.cull_face != 0 { + unsafe { gl.enable(glow::CULL_FACE) }; + unsafe { gl.cull_face(state.cull_face) }; + } else { + unsafe { gl.disable(glow::CULL_FACE) }; + } + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + //Note: this is a bit tricky, since we are controlling the clip, not the clamp. + if state.unclipped_depth { + unsafe { gl.enable(glow::DEPTH_CLAMP) }; + } else { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + } + // POLYGON_MODE_LINE also implies POLYGON_MODE_POINT + if self.features.contains(wgt::Features::POLYGON_MODE_LINE) { + unsafe { gl.polygon_mode(glow::FRONT_AND_BACK, state.polygon_mode) }; + } + } + C::SetBlendConstant(c) => { + unsafe { gl.blend_color(c[0], c[1], c[2], c[3]) }; + } + C::SetColorTarget { + draw_buffer_index, + desc: super::ColorTargetDesc { mask, ref blend }, + } => { + use wgt::ColorWrites as Cw; + if let Some(index) = draw_buffer_index { + unsafe { + gl.color_mask_draw_buffer( + index, + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable_draw_buffer(glow::BLEND, index) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate_draw_buffer( + index, + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate_draw_buffer( + index, + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation_draw_buffer(index, blend.color.equation) }; + unsafe { + gl.blend_func_draw_buffer(index, blend.color.src, blend.color.dst) + }; + } + } else { + unsafe { gl.disable_draw_buffer(glow::BLEND, index) }; + } + } else { + unsafe { + gl.color_mask( + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable(glow::BLEND) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate( + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate( + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation(blend.color.equation) }; + unsafe { gl.blend_func(blend.color.src, blend.color.dst) }; + } + } else { + unsafe { gl.disable(glow::BLEND) }; + } + } + } + C::BindBuffer { + target, + slot, + buffer, + offset, + size, + } => { + unsafe { gl.bind_buffer_range(target, slot, Some(buffer), offset, size) }; + } + C::BindSampler(texture_index, sampler) => { + unsafe { gl.bind_sampler(texture_index, sampler) }; + } + C::BindTexture { + slot, + texture, + target, + aspects, + ref mip_levels, + } => { + unsafe { gl.active_texture(glow::TEXTURE0 + slot) }; + unsafe { gl.bind_texture(target, Some(texture)) }; + + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_BASE_LEVEL, mip_levels.start as i32) + }; + unsafe { + gl.tex_parameter_i32( + target, + glow::TEXTURE_MAX_LEVEL, + (mip_levels.end - 1) as i32, + ) + }; + + let version = gl.version(); + let is_min_es_3_1 = version.is_embedded && (version.major, version.minor) >= (3, 1); + let is_min_4_3 = !version.is_embedded && (version.major, version.minor) >= (4, 3); + if is_min_es_3_1 || is_min_4_3 { + let mode = match aspects { + crate::FormatAspects::DEPTH => Some(glow::DEPTH_COMPONENT), + crate::FormatAspects::STENCIL => Some(glow::STENCIL_INDEX), + _ => None, + }; + if let Some(mode) = mode { + unsafe { + gl.tex_parameter_i32( + target, + glow::DEPTH_STENCIL_TEXTURE_MODE, + mode as _, + ) + }; + } + } + } + C::BindImage { slot, ref binding } => { + unsafe { + gl.bind_image_texture( + slot, + binding.raw, + binding.mip_level as i32, + binding.array_layer.is_none(), + binding.array_layer.unwrap_or_default() as i32, + binding.access, + binding.format, + ) + }; + } + C::InsertDebugMarker(ref range) => { + let marker = extract_marker(data_bytes, range); + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + gl.debug_message_insert( + glow::DEBUG_SOURCE_APPLICATION, + glow::DEBUG_TYPE_MARKER, + DEBUG_ID, + glow::DEBUG_SEVERITY_NOTIFICATION, + marker, + ) + } + }; + } + C::PushDebugGroup(ref range) => { + let marker = extract_marker(data_bytes, range); + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, marker) + } + }; + } + C::PopDebugGroup => { + unsafe { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + gl.pop_debug_group() + } + }; + } + C::SetPushConstants { + ref uniform, + offset, + } => { + // T must be POD + // + // This function is absolutely sketchy and we really should be using bytemuck. + unsafe fn get_data<T, const COUNT: usize>(data: &[u8], offset: u32) -> &[T; COUNT] { + let data_required = mem::size_of::<T>() * COUNT; + + let raw = &data[(offset as usize)..][..data_required]; + + debug_assert_eq!(data_required, raw.len()); + + let slice: &[T] = + unsafe { slice::from_raw_parts(raw.as_ptr() as *const _, COUNT) }; + + slice.try_into().unwrap() + } + + let location = Some(&uniform.location); + + match uniform.ty { + // + // --- Float 1-4 Component --- + // + naga::TypeInner::Scalar(naga::Scalar::F32) => { + let data = unsafe { get_data::<f32, 1>(data_bytes, offset)[0] }; + unsafe { gl.uniform_1_f32(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Bi, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 2>(data_bytes, offset) }; + unsafe { gl.uniform_2_f32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Tri, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 3>(data_bytes, offset) }; + unsafe { gl.uniform_3_f32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Quad, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 4>(data_bytes, offset) }; + unsafe { gl.uniform_4_f32_slice(location, data) }; + } + + // + // --- Int 1-4 Component --- + // + naga::TypeInner::Scalar(naga::Scalar::I32) => { + let data = unsafe { get_data::<i32, 1>(data_bytes, offset)[0] }; + unsafe { gl.uniform_1_i32(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Bi, + scalar: naga::Scalar::I32, + } => { + let data = unsafe { get_data::<i32, 2>(data_bytes, offset) }; + unsafe { gl.uniform_2_i32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Tri, + scalar: naga::Scalar::I32, + } => { + let data = unsafe { get_data::<i32, 3>(data_bytes, offset) }; + unsafe { gl.uniform_3_i32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Quad, + scalar: naga::Scalar::I32, + } => { + let data = unsafe { get_data::<i32, 4>(data_bytes, offset) }; + unsafe { gl.uniform_4_i32_slice(location, data) }; + } + + // + // --- Uint 1-4 Component --- + // + naga::TypeInner::Scalar(naga::Scalar::U32) => { + let data = unsafe { get_data::<u32, 1>(data_bytes, offset)[0] }; + unsafe { gl.uniform_1_u32(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Bi, + scalar: naga::Scalar::U32, + } => { + let data = unsafe { get_data::<u32, 2>(data_bytes, offset) }; + unsafe { gl.uniform_2_u32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Tri, + scalar: naga::Scalar::U32, + } => { + let data = unsafe { get_data::<u32, 3>(data_bytes, offset) }; + unsafe { gl.uniform_3_u32_slice(location, data) }; + } + naga::TypeInner::Vector { + size: naga::VectorSize::Quad, + scalar: naga::Scalar::U32, + } => { + let data = unsafe { get_data::<u32, 4>(data_bytes, offset) }; + unsafe { gl.uniform_4_u32_slice(location, data) }; + } + + // + // --- Matrix 2xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Bi, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 4>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_2_f32_slice(location, false, data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Tri, + scalar: naga::Scalar::F32, + } => { + // repack 2 vec3s into 6 values. + let unpacked_data = unsafe { get_data::<f32, 8>(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + ]; + unsafe { gl.uniform_matrix_2x3_f32_slice(location, false, &packed_data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Quad, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 8>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_2x4_f32_slice(location, false, data) }; + } + + // + // --- Matrix 3xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Bi, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 6>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_3x2_f32_slice(location, false, data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Tri, + scalar: naga::Scalar::F32, + } => { + // repack 3 vec3s into 9 values. + let unpacked_data = unsafe { get_data::<f32, 12>(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + unpacked_data[8], unpacked_data[9], unpacked_data[10], + ]; + unsafe { gl.uniform_matrix_3_f32_slice(location, false, &packed_data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Quad, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 12>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_3x4_f32_slice(location, false, data) }; + } + + // + // --- Matrix 4xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Bi, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 8>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_4x2_f32_slice(location, false, data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Tri, + scalar: naga::Scalar::F32, + } => { + // repack 4 vec3s into 12 values. + let unpacked_data = unsafe { get_data::<f32, 16>(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + unpacked_data[8], unpacked_data[9], unpacked_data[10], + unpacked_data[12], unpacked_data[13], unpacked_data[14], + ]; + unsafe { gl.uniform_matrix_4x3_f32_slice(location, false, &packed_data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Quad, + scalar: naga::Scalar::F32, + } => { + let data = unsafe { get_data::<f32, 16>(data_bytes, offset) }; + unsafe { gl.uniform_matrix_4_f32_slice(location, false, data) }; + } + _ => panic!("Unsupported uniform datatype: {:?}!", uniform.ty), + } + } + } + } +} + +impl crate::Queue<super::Api> for super::Queue { + unsafe fn submit( + &self, + command_buffers: &[&super::CommandBuffer], + _surface_textures: &[&super::Texture], + signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let shared = Arc::clone(&self.shared); + let gl = &shared.context.lock(); + for cmd_buf in command_buffers.iter() { + // The command encoder assumes a default state when encoding the command buffer. + // Always reset the state between command_buffers to reflect this assumption. Do + // this at the beginning of the loop in case something outside of wgpu modified + // this state prior to commit. + unsafe { self.reset_state(gl) }; + if let Some(ref label) = cmd_buf.label { + if self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + unsafe { gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, label) }; + } + } + + for command in cmd_buf.commands.iter() { + unsafe { self.process(gl, command, &cmd_buf.data_bytes, &cmd_buf.queries) }; + } + + if cmd_buf.label.is_some() + && self + .shared + .private_caps + .contains(PrivateCapabilities::DEBUG_FNS) + { + unsafe { gl.pop_debug_group() }; + } + } + + if let Some((fence, value)) = signal_fence { + fence.maintain(gl); + let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } + .map_err(|_| crate::DeviceError::OutOfMemory)?; + fence.pending.push((value, sync)); + } + + Ok(()) + } + + unsafe fn present( + &self, + surface: &super::Surface, + texture: super::Texture, + ) -> Result<(), crate::SurfaceError> { + unsafe { surface.present(texture, &self.shared.context) } + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +#[cfg(send_sync)] +unsafe impl Sync for super::Queue {} +#[cfg(send_sync)] +unsafe impl Send for super::Queue {} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag new file mode 100644 index 0000000000..1d0e414b28 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag @@ -0,0 +1,7 @@ +uniform vec4 color; +//Hack: Some WebGL implementations don't find "color" otherwise. +uniform vec4 color_workaround; +out vec4 frag; +void main() { + frag = color + color_workaround; +} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert new file mode 100644 index 0000000000..341b4e5f06 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert @@ -0,0 +1,9 @@ +// A triangle that fills the whole screen +vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +void main() { + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag new file mode 100644 index 0000000000..853f82a6ae --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag @@ -0,0 +1,16 @@ +#version 300 es +precision mediump float; +in vec2 uv; +uniform sampler2D present_texture; +out vec4 frag; +vec4 linear_to_srgb(vec4 linear) { + vec3 color_linear = linear.rgb; + vec3 selector = ceil(color_linear - 0.0031308); // 0 if under value, 1 if over + vec3 under = 12.92 * color_linear; + vec3 over = 1.055 * pow(color_linear, vec3(0.41666)) - 0.055; + vec3 result = mix(under, over, selector); + return vec4(result, linear.a); +} +void main() { + frag = linear_to_srgb(texture(present_texture, uv)); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert new file mode 100644 index 0000000000..922f2a1848 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert @@ -0,0 +1,18 @@ +#version 300 es +precision mediump float; +// A triangle that fills the whole screen +const vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +const vec2[3] TRIANGLE_UV = vec2[]( + vec2( 0.5, 1.), + vec2( -1.0, -1.0), + vec2( 2.0, -1.0) +); +out vec2 uv; +void main() { + uv = TRIANGLE_UV[gl_VertexID]; + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/web.rs b/third_party/rust/wgpu-hal/src/gles/web.rs new file mode 100644 index 0000000000..797d6f91d7 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/web.rs @@ -0,0 +1,452 @@ +use glow::HasContext; +use parking_lot::{Mutex, RwLock}; +use wasm_bindgen::{JsCast, JsValue}; + +use super::TextureFormatDesc; + +/// A wrapper around a [`glow::Context`] to provide a fake `lock()` api that makes it compatible +/// with the `AdapterContext` API from the EGL implementation. +pub struct AdapterContext { + pub glow_context: glow::Context, +} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + false + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock(&self) -> &glow::Context { + &self.glow_context + } +} + +#[derive(Debug)] +pub struct Instance { + /// Set when a canvas is provided, and used to implement [`Instance::enumerate_adapters()`]. + webgl2_context: Mutex<Option<web_sys::WebGl2RenderingContext>>, +} + +impl Instance { + pub fn create_surface_from_canvas( + &self, + canvas: web_sys::HtmlCanvasElement, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Canvas(canvas), result) + } + + pub fn create_surface_from_offscreen_canvas( + &self, + canvas: web_sys::OffscreenCanvas, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Offscreen(canvas), result) + } + + /// Common portion of public `create_surface_from_*` functions. + /// + /// Note: Analogous code also exists in the WebGPU backend at + /// `wgpu::backend::web::Context`. + fn create_surface_from_context( + &self, + canvas: Canvas, + context_result: Result<Option<js_sys::Object>, JsValue>, + ) -> Result<Surface, crate::InstanceError> { + let context_object: js_sys::Object = match context_result { + Ok(Some(context)) => context, + Ok(None) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext-dev> + // A getContext() call “returns null if contextId is not supported, or if the + // canvas has already been initialized with another context type”. Additionally, + // “not supported” could include “insufficient GPU resources” or “the GPU process + // previously crashed”. So, we must return it as an `Err` since it could occur + // for circumstances outside the application author's control. + return Err(crate::InstanceError::new(String::from( + "canvas.getContext() returned null; webgl2 not available or canvas already in use" + ))); + } + Err(js_error) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext> + // A thrown exception indicates misuse of the canvas state. + return Err(crate::InstanceError::new(format!( + "canvas.getContext() threw exception {js_error:?}", + ))); + } + }; + + // Not returning this error because it is a type error that shouldn't happen unless + // the browser, JS builtin objects, or wasm bindings are misbehaving somehow. + let webgl2_context: web_sys::WebGl2RenderingContext = context_object + .dyn_into() + .expect("canvas context is not a WebGl2RenderingContext"); + + // It is not inconsistent to overwrite an existing context, because the only thing that + // `self.webgl2_context` is used for is producing the response to `enumerate_adapters()`. + *self.webgl2_context.lock() = Some(webgl2_context.clone()); + + Ok(Surface { + canvas, + webgl2_context, + srgb_present_program: Mutex::new(None), + swapchain: RwLock::new(None), + texture: Mutex::new(None), + presentable: true, + }) + } + + fn create_context_options() -> js_sys::Object { + let context_options = js_sys::Object::new(); + js_sys::Reflect::set( + &context_options, + &"antialias".into(), + &wasm_bindgen::JsValue::FALSE, + ) + .expect("Cannot create context options"); + context_options + } +} + +#[cfg(send_sync)] +unsafe impl Sync for Instance {} +#[cfg(send_sync)] +unsafe impl Send for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init OpenGL (WebGL) Backend"); + Ok(Instance { + webgl2_context: Mutex::new(None), + }) + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let context_guard = self.webgl2_context.lock(); + let gl = match *context_guard { + Some(ref webgl2_context) => glow::Context::from_webgl2_context(webgl2_context.clone()), + None => return Vec::new(), + }; + + unsafe { super::Adapter::expose(AdapterContext { glow_context: gl }) } + .into_iter() + .collect() + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + let canvas: web_sys::HtmlCanvasElement = match window_handle { + raw_window_handle::RawWindowHandle::Web(handle) => web_sys::window() + .and_then(|win| win.document()) + .expect("Cannot get document") + .query_selector(&format!("canvas[data-raw-handle=\"{}\"]", handle.id)) + .expect("Cannot query for canvas") + .expect("Canvas is not found") + .dyn_into() + .expect("Failed to downcast to canvas type"), + raw_window_handle::RawWindowHandle::WebCanvas(handle) => { + let value: &JsValue = unsafe { handle.obj.cast().as_ref() }; + value.clone().unchecked_into() + } + raw_window_handle::RawWindowHandle::WebOffscreenCanvas(handle) => { + let value: &JsValue = unsafe { handle.obj.cast().as_ref() }; + let canvas: web_sys::OffscreenCanvas = value.clone().unchecked_into(); + + return self.create_surface_from_offscreen_canvas(canvas); + } + _ => { + return Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a web handle" + ))) + } + }; + + self.create_surface_from_canvas(canvas) + } + + unsafe fn destroy_surface(&self, surface: Surface) { + let mut context_option_ref = self.webgl2_context.lock(); + + if let Some(context) = context_option_ref.as_ref() { + if context == &surface.webgl2_context { + *context_option_ref = None; + } + } + } +} + +#[derive(Debug)] +pub struct Surface { + canvas: Canvas, + webgl2_context: web_sys::WebGl2RenderingContext, + pub(super) swapchain: RwLock<Option<Swapchain>>, + texture: Mutex<Option<glow::Texture>>, + pub(super) presentable: bool, + srgb_present_program: Mutex<Option<glow::Program>>, +} + +impl Clone for Surface { + fn clone(&self) -> Self { + Self { + canvas: self.canvas.clone(), + webgl2_context: self.webgl2_context.clone(), + swapchain: RwLock::new(self.swapchain.read().clone()), + texture: Mutex::new(*self.texture.lock()), + presentable: self.presentable, + srgb_present_program: Mutex::new(*self.srgb_present_program.lock()), + } + } +} + +#[cfg(send_sync)] +unsafe impl Sync for Surface {} +#[cfg(send_sync)] +unsafe impl Send for Surface {} + +#[derive(Clone, Debug)] +enum Canvas { + Canvas(web_sys::HtmlCanvasElement), + Offscreen(web_sys::OffscreenCanvas), +} + +#[derive(Clone, Debug)] +pub struct Swapchain { + pub(crate) extent: wgt::Extent3d, + // pub(crate) channel: f::ChannelType, + pub(super) format: wgt::TextureFormat, + pub(super) framebuffer: glow::Framebuffer, + pub(super) format_desc: TextureFormatDesc, +} + +impl Surface { + pub(super) unsafe fn present( + &self, + _suf_texture: super::Texture, + context: &AdapterContext, + ) -> Result<(), crate::SurfaceError> { + let gl = &context.glow_context; + let swapchain = self.swapchain.read(); + let swapchain = swapchain.as_ref().ok_or(crate::SurfaceError::Other( + "need to configure surface before presenting", + ))?; + + if swapchain.format.is_srgb() { + // Important to set the viewport since we don't know in what state the user left it. + unsafe { + gl.viewport( + 0, + 0, + swapchain.extent.width as _, + swapchain.extent.height as _, + ) + }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_sampler(0, None) }; + unsafe { gl.active_texture(glow::TEXTURE0) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, *self.texture.lock()) }; + unsafe { gl.use_program(*self.srgb_present_program.lock()) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::BACK]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + } else { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(swapchain.framebuffer)) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + swapchain.extent.height as i32, + swapchain.extent.width as i32, + 0, + 0, + 0, + swapchain.extent.width as i32, + swapchain.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + } + + Ok(()) + } + + unsafe fn create_srgb_present_program(gl: &glow::Context) -> glow::Program { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = + unsafe { gl.create_shader(glow::VERTEX_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(vertex, include_str!("./shaders/srgb_present.vert")) }; + unsafe { gl.compile_shader(vertex) }; + let fragment = + unsafe { gl.create_shader(glow::FRAGMENT_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(fragment, include_str!("./shaders/srgb_present.frag")) }; + unsafe { gl.compile_shader(fragment) }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + program + } + + pub fn supports_srgb(&self) -> bool { + // present.frag takes care of handling srgb conversion + true + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + match self.canvas { + Canvas::Canvas(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + Canvas::Offscreen(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + } + + let gl = &device.shared.context.lock(); + + { + let mut swapchain = self.swapchain.write(); + if let Some(swapchain) = swapchain.take() { + // delete all frame buffers already allocated + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + } + { + let mut srgb_present_program = self.srgb_present_program.lock(); + if srgb_present_program.is_none() && config.format.is_srgb() { + *srgb_present_program = Some(unsafe { Self::create_srgb_present_program(gl) }); + } + } + { + let mut texture = self.texture.lock(); + if let Some(texture) = texture.take() { + unsafe { gl.delete_texture(texture) }; + } + + *texture = Some(unsafe { gl.create_texture() }.map_err(|error| { + log::error!("Internal swapchain texture creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?); + + let desc = device.shared.describe_texture_format(config.format); + unsafe { gl.bind_texture(glow::TEXTURE_2D, *texture) }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MIN_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MAG_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_storage_2d( + glow::TEXTURE_2D, + 1, + desc.internal, + config.extent.width as i32, + config.extent.height as i32, + ) + }; + + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::TEXTURE_2D, + *texture, + 0, + ) + }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + let mut swapchain = self.swapchain.write(); + *swapchain = Some(Swapchain { + extent: config.extent, + // channel: config.format.base_format().1, + format: config.format, + format_desc: desc, + framebuffer, + }); + } + + Ok(()) + } + + unsafe fn unconfigure(&self, device: &super::Device) { + let gl = device.shared.context.lock(); + { + let mut swapchain = self.swapchain.write(); + if let Some(swapchain) = swapchain.take() { + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + } + if let Some(renderbuffer) = self.texture.lock().take() { + unsafe { gl.delete_texture(renderbuffer) }; + } + } + + unsafe fn acquire_texture( + &self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let swapchain = self.swapchain.read(); + let sc = swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Texture { + raw: self.texture.lock().unwrap(), + target: glow::TEXTURE_2D, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/wgl.rs b/third_party/rust/wgpu-hal/src/gles/wgl.rs new file mode 100644 index 0000000000..6243430dc2 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/wgl.rs @@ -0,0 +1,816 @@ +use glow::HasContext; +use glutin_wgl_sys::wgl_extra::{ + Wgl, CONTEXT_CORE_PROFILE_BIT_ARB, CONTEXT_DEBUG_BIT_ARB, CONTEXT_FLAGS_ARB, + CONTEXT_PROFILE_MASK_ARB, +}; +use once_cell::sync::Lazy; +use parking_lot::{Mutex, MutexGuard, RwLock}; +use raw_window_handle::{RawDisplayHandle, RawWindowHandle}; +use std::{ + collections::HashSet, + ffi::{c_void, CStr, CString}, + io::Error, + mem, + os::raw::c_int, + ptr, + sync::{ + mpsc::{sync_channel, SyncSender}, + Arc, + }, + thread, + time::Duration, +}; +use wgt::InstanceFlags; +use winapi::{ + shared::{ + minwindef::{FALSE, HMODULE, LPARAM, LRESULT, UINT, WPARAM}, + windef::{HDC, HGLRC, HWND}, + }, + um::{ + libloaderapi::{GetModuleHandleA, GetProcAddress, LoadLibraryA}, + wingdi::{ + wglCreateContext, wglDeleteContext, wglGetCurrentContext, wglGetProcAddress, + wglMakeCurrent, ChoosePixelFormat, DescribePixelFormat, GetPixelFormat, SetPixelFormat, + SwapBuffers, PFD_DOUBLEBUFFER, PFD_DRAW_TO_WINDOW, PFD_SUPPORT_OPENGL, PFD_TYPE_RGBA, + PIXELFORMATDESCRIPTOR, + }, + winuser::{ + CreateWindowExA, DefWindowProcA, DestroyWindow, GetDC, RegisterClassExA, ReleaseDC, + CS_OWNDC, WNDCLASSEXA, + }, + }, +}; + +/// The amount of time to wait while trying to obtain a lock to the adapter context +const CONTEXT_LOCK_TIMEOUT_SECS: u64 = 1; + +/// A wrapper around a `[`glow::Context`]` and the required WGL context that uses locking to +/// guarantee exclusive access when shared with multiple threads. +pub struct AdapterContext { + inner: Arc<Mutex<Inner>>, +} + +unsafe impl Sync for AdapterContext {} +unsafe impl Send for AdapterContext {} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + true + } + + pub fn raw_context(&self) -> *mut c_void { + self.inner.lock().context.context as *mut _ + } + + /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock(&self) -> AdapterContextLock<'_> { + let inner = self + .inner + // Don't lock forever. If it takes longer than 1 second to get the lock we've got a + // deadlock and should panic to show where we got stuck + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + inner.context.make_current(inner.device.dc).unwrap(); + + AdapterContextLock { inner } + } + + /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + /// + /// Unlike [`lock`](Self::lock), this accepts a device to pass to `make_current` and exposes the error + /// when `make_current` fails. + #[track_caller] + fn lock_with_dc(&self, device: HDC) -> Result<AdapterContextLock<'_>, Error> { + let inner = self + .inner + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + inner + .context + .make_current(device) + .map(|()| AdapterContextLock { inner }) + } +} + +/// A guard containing a lock to an [`AdapterContext`] +pub struct AdapterContextLock<'a> { + inner: MutexGuard<'a, Inner>, +} + +impl<'a> std::ops::Deref for AdapterContextLock<'a> { + type Target = glow::Context; + + fn deref(&self) -> &Self::Target { + &self.inner.gl + } +} + +impl<'a> Drop for AdapterContextLock<'a> { + fn drop(&mut self) { + self.inner.context.unmake_current().unwrap(); + } +} + +struct WglContext { + context: HGLRC, +} + +impl WglContext { + fn make_current(&self, device: HDC) -> Result<(), Error> { + if unsafe { wglMakeCurrent(device, self.context) } == FALSE { + Err(Error::last_os_error()) + } else { + Ok(()) + } + } + + fn unmake_current(&self) -> Result<(), Error> { + if unsafe { wglGetCurrentContext().is_null() } { + return Ok(()); + } + if unsafe { wglMakeCurrent(ptr::null_mut(), ptr::null_mut()) } == FALSE { + Err(Error::last_os_error()) + } else { + Ok(()) + } + } +} + +impl Drop for WglContext { + fn drop(&mut self) { + unsafe { + if wglDeleteContext(self.context) == FALSE { + log::error!("failed to delete WGL context {}", Error::last_os_error()); + } + }; + } +} + +unsafe impl Send for WglContext {} +unsafe impl Sync for WglContext {} + +struct Inner { + gl: glow::Context, + device: InstanceDevice, + context: WglContext, +} + +pub struct Instance { + srgb_capable: bool, + inner: Arc<Mutex<Inner>>, +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +fn load_gl_func(name: &str, module: Option<HMODULE>) -> *const c_void { + let addr = CString::new(name.as_bytes()).unwrap(); + let mut ptr = unsafe { wglGetProcAddress(addr.as_ptr()) }; + if ptr.is_null() { + if let Some(module) = module { + ptr = unsafe { GetProcAddress(module, addr.as_ptr()) }; + } + } + ptr.cast() +} + +fn get_extensions(extra: &Wgl, dc: HDC) -> HashSet<String> { + if extra.GetExtensionsStringARB.is_loaded() { + unsafe { CStr::from_ptr(extra.GetExtensionsStringARB(dc as *const _)) } + .to_str() + .unwrap_or("") + } else { + "" + } + .split(' ') + .map(|s| s.to_owned()) + .collect() +} + +unsafe fn setup_pixel_format(dc: HDC) -> Result<(), crate::InstanceError> { + let mut format: PIXELFORMATDESCRIPTOR = unsafe { mem::zeroed() }; + format.nVersion = 1; + format.nSize = mem::size_of_val(&format) as u16; + format.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + format.iPixelType = PFD_TYPE_RGBA; + format.cColorBits = 8; + + let index = unsafe { ChoosePixelFormat(dc, &format) }; + if index == 0 { + return Err(crate::InstanceError::with_source( + String::from("unable to choose pixel format"), + Error::last_os_error(), + )); + } + + let current = unsafe { GetPixelFormat(dc) }; + + if index != current && unsafe { SetPixelFormat(dc, index, &format) } == FALSE { + return Err(crate::InstanceError::with_source( + String::from("unable to set pixel format"), + Error::last_os_error(), + )); + } + + let index = unsafe { GetPixelFormat(dc) }; + if index == 0 { + return Err(crate::InstanceError::with_source( + String::from("unable to get pixel format index"), + Error::last_os_error(), + )); + } + if unsafe { DescribePixelFormat(dc, index, mem::size_of_val(&format) as UINT, &mut format) } + == 0 + { + return Err(crate::InstanceError::with_source( + String::from("unable to read pixel format"), + Error::last_os_error(), + )); + } + + if format.dwFlags & PFD_SUPPORT_OPENGL == 0 || format.iPixelType != PFD_TYPE_RGBA { + return Err(crate::InstanceError::new(String::from( + "unsuitable pixel format", + ))); + } + Ok(()) +} + +fn create_global_window_class() -> Result<CString, crate::InstanceError> { + let instance = unsafe { GetModuleHandleA(ptr::null()) }; + if instance.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to get executable instance"), + Error::last_os_error(), + )); + } + + // Use the address of `UNIQUE` as part of the window class name to ensure different + // `wgpu` versions use different names. + static UNIQUE: Mutex<u8> = Mutex::new(0); + let class_addr: *const _ = &UNIQUE; + let name = format!("wgpu Device Class {:x}\0", class_addr as usize); + let name = CString::from_vec_with_nul(name.into_bytes()).unwrap(); + + // Use a wrapper function for compatibility with `windows-rs`. + unsafe extern "system" fn wnd_proc( + window: HWND, + msg: UINT, + wparam: WPARAM, + lparam: LPARAM, + ) -> LRESULT { + unsafe { DefWindowProcA(window, msg, wparam, lparam) } + } + + let window_class = WNDCLASSEXA { + cbSize: mem::size_of::<WNDCLASSEXA>() as u32, + style: CS_OWNDC, + lpfnWndProc: Some(wnd_proc), + cbClsExtra: 0, + cbWndExtra: 0, + hInstance: instance, + hIcon: ptr::null_mut(), + hCursor: ptr::null_mut(), + hbrBackground: ptr::null_mut(), + lpszMenuName: ptr::null_mut(), + lpszClassName: name.as_ptr(), + hIconSm: ptr::null_mut(), + }; + + let atom = unsafe { RegisterClassExA(&window_class) }; + + if atom == 0 { + return Err(crate::InstanceError::with_source( + String::from("unable to register window class"), + Error::last_os_error(), + )); + } + + // We intentionally leak the window class as we only need one per process. + + Ok(name) +} + +fn get_global_window_class() -> Result<CString, crate::InstanceError> { + static GLOBAL: Lazy<Result<CString, crate::InstanceError>> = + Lazy::new(create_global_window_class); + GLOBAL.clone() +} + +struct InstanceDevice { + dc: HDC, + + /// This is used to keep the thread owning `dc` alive until this struct is dropped. + _tx: SyncSender<()>, +} + +fn create_instance_device() -> Result<InstanceDevice, crate::InstanceError> { + #[derive(Clone, Copy)] + struct SendDc(HDC); + unsafe impl Sync for SendDc {} + unsafe impl Send for SendDc {} + + struct Window { + window: HWND, + } + impl Drop for Window { + fn drop(&mut self) { + unsafe { + if DestroyWindow(self.window) == FALSE { + log::error!("failed to destroy window {}", Error::last_os_error()); + } + }; + } + } + struct DeviceContextHandle { + dc: HDC, + window: HWND, + } + impl Drop for DeviceContextHandle { + fn drop(&mut self) { + unsafe { + ReleaseDC(self.window, self.dc); + }; + } + } + + let window_class = get_global_window_class()?; + + let (drop_tx, drop_rx) = sync_channel(0); + let (setup_tx, setup_rx) = sync_channel(0); + + // We spawn a thread which owns the hidden window for this instance. + thread::Builder::new() + .stack_size(256 * 1024) + .name("wgpu-hal WGL Instance Thread".to_owned()) + .spawn(move || { + let setup = (|| { + let instance = unsafe { GetModuleHandleA(ptr::null()) }; + if instance.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to get executable instance"), + Error::last_os_error(), + )); + } + + // Create a hidden window since we don't pass `WS_VISIBLE`. + let window = unsafe { + CreateWindowExA( + 0, + window_class.as_ptr(), + window_class.as_ptr(), + 0, + 0, + 0, + 1, + 1, + ptr::null_mut(), + ptr::null_mut(), + instance, + ptr::null_mut(), + ) + }; + if window.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to create hidden instance window"), + Error::last_os_error(), + )); + } + let window = Window { window }; + + let dc = unsafe { GetDC(window.window) }; + if dc.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to create memory device"), + Error::last_os_error(), + )); + } + let dc = DeviceContextHandle { + dc, + window: window.window, + }; + unsafe { setup_pixel_format(dc.dc)? }; + + Ok((window, dc)) + })(); + + match setup { + Ok((_window, dc)) => { + setup_tx.send(Ok(SendDc(dc.dc))).unwrap(); + // Wait for the shutdown event to free the window and device context handle. + drop_rx.recv().ok(); + } + Err(err) => { + setup_tx.send(Err(err)).unwrap(); + } + } + }) + .map_err(|e| { + crate::InstanceError::with_source(String::from("unable to create instance thread"), e) + })?; + + let dc = setup_rx.recv().unwrap()?.0; + + Ok(InstanceDevice { dc, _tx: drop_tx }) +} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init OpenGL (WGL) Backend"); + let opengl_module = unsafe { LoadLibraryA("opengl32.dll\0".as_ptr() as *const _) }; + if opengl_module.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to load the OpenGL library"), + Error::last_os_error(), + )); + } + + let device = create_instance_device()?; + let dc = device.dc; + + let context = unsafe { wglCreateContext(dc) }; + if context.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to create initial OpenGL context"), + Error::last_os_error(), + )); + } + let context = WglContext { context }; + context.make_current(dc).map_err(|e| { + crate::InstanceError::with_source( + String::from("unable to set initial OpenGL context as current"), + e, + ) + })?; + + let extra = Wgl::load_with(|name| load_gl_func(name, None)); + let extensions = get_extensions(&extra, dc); + + let can_use_profile = extensions.contains("WGL_ARB_create_context_profile") + && extra.CreateContextAttribsARB.is_loaded(); + + let context = if can_use_profile { + let attributes = [ + CONTEXT_PROFILE_MASK_ARB as c_int, + CONTEXT_CORE_PROFILE_BIT_ARB as c_int, + CONTEXT_FLAGS_ARB as c_int, + if desc.flags.contains(InstanceFlags::DEBUG) { + CONTEXT_DEBUG_BIT_ARB as c_int + } else { + 0 + }, + 0, // End of list + ]; + let context = unsafe { + extra.CreateContextAttribsARB(dc as *const _, ptr::null(), attributes.as_ptr()) + }; + if context.is_null() { + return Err(crate::InstanceError::with_source( + String::from("unable to create OpenGL context"), + Error::last_os_error(), + )); + } + WglContext { + context: context as *mut _, + } + } else { + context + }; + + context.make_current(dc).map_err(|e| { + crate::InstanceError::with_source( + String::from("unable to set OpenGL context as current"), + e, + ) + })?; + + let mut gl = unsafe { + glow::Context::from_loader_function(|name| load_gl_func(name, Some(opengl_module))) + }; + + let extra = Wgl::load_with(|name| load_gl_func(name, None)); + let extensions = get_extensions(&extra, dc); + + let srgb_capable = extensions.contains("WGL_EXT_framebuffer_sRGB") + || extensions.contains("WGL_ARB_framebuffer_sRGB") + || gl + .supported_extensions() + .contains("GL_ARB_framebuffer_sRGB"); + + if srgb_capable { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + + if desc.flags.contains(InstanceFlags::VALIDATION) && gl.supports_debug() { + log::debug!("Enabling GL debug output"); + unsafe { gl.enable(glow::DEBUG_OUTPUT) }; + unsafe { gl.debug_message_callback(super::gl_debug_message_callback) }; + } + + context.unmake_current().map_err(|e| { + crate::InstanceError::with_source( + String::from("unable to unset the current WGL context"), + e, + ) + })?; + + Ok(Instance { + inner: Arc::new(Mutex::new(Inner { + device, + gl, + context, + })), + srgb_capable, + }) + } + + #[cfg_attr(target_os = "macos", allow(unused, unused_mut, unreachable_code))] + unsafe fn create_surface( + &self, + _display_handle: RawDisplayHandle, + window_handle: RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + let window = if let RawWindowHandle::Win32(handle) = window_handle { + handle + } else { + return Err(crate::InstanceError::new(format!( + "unsupported window: {window_handle:?}" + ))); + }; + Ok(Surface { + window: window.hwnd.get() as *mut _, + presentable: true, + swapchain: RwLock::new(None), + srgb_capable: self.srgb_capable, + }) + } + unsafe fn destroy_surface(&self, _surface: Surface) {} + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + unsafe { + super::Adapter::expose(AdapterContext { + inner: self.inner.clone(), + }) + } + .into_iter() + .collect() + } +} + +struct DeviceContextHandle { + device: HDC, + window: HWND, +} + +impl Drop for DeviceContextHandle { + fn drop(&mut self) { + unsafe { + ReleaseDC(self.window, self.device); + }; + } +} + +pub struct Swapchain { + framebuffer: glow::Framebuffer, + renderbuffer: glow::Renderbuffer, + + /// Extent because the window lies + extent: wgt::Extent3d, + + format: wgt::TextureFormat, + format_desc: super::TextureFormatDesc, + #[allow(unused)] + sample_type: wgt::TextureSampleType, +} + +pub struct Surface { + window: HWND, + pub(super) presentable: bool, + swapchain: RwLock<Option<Swapchain>>, + srgb_capable: bool, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub(super) unsafe fn present( + &self, + _suf_texture: super::Texture, + context: &AdapterContext, + ) -> Result<(), crate::SurfaceError> { + let swapchain = self.swapchain.read(); + let sc = swapchain.as_ref().unwrap(); + let dc = unsafe { GetDC(self.window) }; + if dc.is_null() { + log::error!( + "unable to get the device context from window: {}", + Error::last_os_error() + ); + return Err(crate::SurfaceError::Other( + "unable to get the device context from window", + )); + } + let dc = DeviceContextHandle { + device: dc, + window: self.window, + }; + + let gl = context.lock_with_dc(dc.device).map_err(|e| { + log::error!("unable to make the OpenGL context current for surface: {e}",); + crate::SurfaceError::Other("unable to make the OpenGL context current for surface") + })?; + + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + + if self.srgb_capable { + // Disable sRGB conversions for `glBlitFramebuffer` as behavior does diverge between + // drivers and formats otherwise and we want to ensure no sRGB conversions happen. + unsafe { gl.disable(glow::FRAMEBUFFER_SRGB) }; + } + + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + sc.extent.height as i32, + sc.extent.width as i32, + 0, + 0, + 0, + sc.extent.width as i32, + sc.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + + if self.srgb_capable { + unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) }; + } + + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + if unsafe { SwapBuffers(dc.device) } == FALSE { + log::error!("unable to swap buffers: {}", Error::last_os_error()); + return Err(crate::SurfaceError::Other("unable to swap buffers")); + } + + Ok(()) + } + + pub fn supports_srgb(&self) -> bool { + self.srgb_capable + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + // Remove the old configuration. + unsafe { self.unconfigure(device) }; + + let dc = unsafe { GetDC(self.window) }; + if dc.is_null() { + log::error!( + "unable to get the device context from window: {}", + Error::last_os_error() + ); + return Err(crate::SurfaceError::Other( + "unable to get the device context from window", + )); + } + let dc = DeviceContextHandle { + device: dc, + window: self.window, + }; + + if let Err(e) = unsafe { setup_pixel_format(dc.device) } { + log::error!("unable to setup surface pixel format: {e}",); + return Err(crate::SurfaceError::Other( + "unable to setup surface pixel format", + )); + } + + let format_desc = device.shared.describe_texture_format(config.format); + let gl = &device.shared.context.lock_with_dc(dc.device).map_err(|e| { + log::error!("unable to make the OpenGL context current for surface: {e}",); + crate::SurfaceError::Other("unable to make the OpenGL context current for surface") + })?; + + let renderbuffer = unsafe { gl.create_renderbuffer() }.map_err(|error| { + log::error!("Internal swapchain renderbuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(renderbuffer)) }; + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + config.extent.width as _, + config.extent.height as _, + ) + }; + + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_renderbuffer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::RENDERBUFFER, + Some(renderbuffer), + ) + }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + // Setup presentation mode + let extra = Wgl::load_with(|name| load_gl_func(name, None)); + let extensions = get_extensions(&extra, dc.device); + if !(extensions.contains("WGL_EXT_swap_control") && extra.SwapIntervalEXT.is_loaded()) { + log::error!("WGL_EXT_swap_control is unsupported"); + return Err(crate::SurfaceError::Other( + "WGL_EXT_swap_control is unsupported", + )); + } + + let vsync = match config.present_mode { + wgt::PresentMode::Immediate => false, + wgt::PresentMode::Fifo => true, + _ => { + log::error!("unsupported present mode: {:?}", config.present_mode); + return Err(crate::SurfaceError::Other("unsupported present mode")); + } + }; + + if unsafe { extra.SwapIntervalEXT(if vsync { 1 } else { 0 }) } == FALSE { + log::error!("unable to set swap interval: {}", Error::last_os_error()); + return Err(crate::SurfaceError::Other("unable to set swap interval")); + } + + self.swapchain.write().replace(Swapchain { + renderbuffer, + framebuffer, + extent: config.extent, + format: config.format, + format_desc, + sample_type: wgt::TextureSampleType::Float { filterable: false }, + }); + + Ok(()) + } + + unsafe fn unconfigure(&self, device: &super::Device) { + let gl = &device.shared.context.lock(); + if let Some(sc) = self.swapchain.write().take() { + unsafe { + gl.delete_renderbuffer(sc.renderbuffer); + gl.delete_framebuffer(sc.framebuffer) + }; + } + } + + unsafe fn acquire_texture( + &self, + _timeout_ms: Option<Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let swapchain = self.swapchain.read(); + let sc = swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: sc.renderbuffer, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs new file mode 100644 index 0000000000..5d8c6ddda8 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/lib.rs @@ -0,0 +1,1610 @@ +/*! This library describes the internal unsafe graphics abstraction API. + * It follows WebGPU for the most part, re-using wgpu-types, + * with the following deviations: + * - Fully unsafe: zero overhead, zero validation. + * - Compile-time backend selection via traits. + * - Objects are passed by references and returned by value. No IDs. + * - Mapping is persistent, with explicit synchronization. + * - Resource transitions are explicit. + * - All layouts are explicit. Binding model has compatibility. + * + * General design direction is to follow the majority by the following weights: + * - wgpu-core: 1.5 + * - primary backends (Vulkan/Metal/DX12): 1.0 each + * - secondary backend (GLES): 0.5 + */ + +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![allow( + // for `if_then_panic` until it reaches stable + unknown_lints, + // We use loops for getting early-out of scope without closures. + clippy::never_loop, + // We don't use syntax sugar where it's not necessary. + clippy::match_like_matches_macro, + // Redundant matching is more explicit. + clippy::redundant_pattern_matching, + // Explicit lifetimes are often easier to reason about. + clippy::needless_lifetimes, + // No need for defaults in the internal types. + clippy::new_without_default, + // Matches are good and extendable, no need to make an exception here. + clippy::single_match, + // Push commands are more regular than macros. + clippy::vec_init_then_push, + // "if panic" is a good uniform construct. + clippy::if_then_panic, + // We unsafe impl `Send` for a reason. + clippy::non_send_fields_in_send_ty, + // TODO! + clippy::missing_safety_doc, + // Clashes with clippy::pattern_type_mismatch + clippy::needless_borrowed_reference, +)] +#![warn( + trivial_casts, + trivial_numeric_casts, + unsafe_op_in_unsafe_fn, + unused_extern_crates, + unused_qualifications, + // We don't match on a reference, unless required. + clippy::pattern_type_mismatch, +)] + +/// DirectX12 API internals. +#[cfg(dx12)] +pub mod dx12; +/// A dummy API implementation. +pub mod empty; +/// GLES API internals. +#[cfg(gles)] +pub mod gles; +/// Metal API internals. +#[cfg(metal)] +pub mod metal; +/// Vulkan API internals. +#[cfg(vulkan)] +pub mod vulkan; + +pub mod auxil; +pub mod api { + #[cfg(dx12)] + pub use super::dx12::Api as Dx12; + pub use super::empty::Api as Empty; + #[cfg(gles)] + pub use super::gles::Api as Gles; + #[cfg(metal)] + pub use super::metal::Api as Metal; + #[cfg(vulkan)] + pub use super::vulkan::Api as Vulkan; +} + +use std::{ + borrow::{Borrow, Cow}, + fmt, + num::NonZeroU32, + ops::{Range, RangeInclusive}, + ptr::NonNull, + sync::Arc, +}; + +use bitflags::bitflags; +use parking_lot::Mutex; +use thiserror::Error; +use wgt::WasmNotSendSync; + +// - Vertex + Fragment +// - Compute +pub const MAX_CONCURRENT_SHADER_STAGES: usize = 2; +pub const MAX_ANISOTROPY: u8 = 16; +pub const MAX_BIND_GROUPS: usize = 8; +pub const MAX_VERTEX_BUFFERS: usize = 16; +pub const MAX_COLOR_ATTACHMENTS: usize = 8; +pub const MAX_MIP_LEVELS: u32 = 16; +/// Size of a single occlusion/timestamp query, when copied into a buffer, in bytes. +pub const QUERY_SIZE: wgt::BufferAddress = 8; + +pub type Label<'a> = Option<&'a str>; +pub type MemoryRange = Range<wgt::BufferAddress>; +pub type FenceValue = u64; + +/// Drop guard to signal wgpu-hal is no longer using an externally created object. +pub type DropGuard = Box<dyn std::any::Any + Send + Sync>; + +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum DeviceError { + #[error("Out of memory")] + OutOfMemory, + #[error("Device is lost")] + Lost, + #[error("Creation of a resource failed for a reason other than running out of memory.")] + ResourceCreationFailed, +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum ShaderError { + #[error("Compilation failed: {0:?}")] + Compilation(String), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum PipelineError { + #[error("Linkage failed for stage {0:?}: {1}")] + Linkage(wgt::ShaderStages, String), + #[error("Entry point for stage {0:?} is invalid")] + EntryPoint(naga::ShaderStage), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum SurfaceError { + #[error("Surface is lost")] + Lost, + #[error("Surface is outdated, needs to be re-created")] + Outdated, + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Other reason: {0}")] + Other(&'static str), +} + +/// Error occurring while trying to create an instance, or create a surface from an instance; +/// typically relating to the state of the underlying graphics API or hardware. +#[derive(Clone, Debug, Error)] +#[error("{message}")] +pub struct InstanceError { + /// These errors are very platform specific, so do not attempt to encode them as an enum. + /// + /// This message should describe the problem in sufficient detail to be useful for a + /// user-to-developer “why won't this work on my machine” bug report, and otherwise follow + /// <https://rust-lang.github.io/api-guidelines/interoperability.html#error-types-are-meaningful-and-well-behaved-c-good-err>. + message: String, + + /// Underlying error value, if any is available. + #[source] + source: Option<Arc<dyn std::error::Error + Send + Sync + 'static>>, +} + +impl InstanceError { + #[allow(dead_code)] // may be unused on some platforms + pub(crate) fn new(message: String) -> Self { + Self { + message, + source: None, + } + } + #[allow(dead_code)] // may be unused on some platforms + pub(crate) fn with_source( + message: String, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self { + message, + source: Some(Arc::new(source)), + } + } +} + +pub trait Api: Clone + fmt::Debug + Sized { + type Instance: Instance<Self>; + type Surface: Surface<Self>; + type Adapter: Adapter<Self>; + type Device: Device<Self>; + + type Queue: Queue<Self>; + type CommandEncoder: CommandEncoder<Self>; + type CommandBuffer: WasmNotSendSync + fmt::Debug; + + type Buffer: fmt::Debug + WasmNotSendSync + 'static; + type Texture: fmt::Debug + WasmNotSendSync + 'static; + type SurfaceTexture: fmt::Debug + WasmNotSendSync + Borrow<Self::Texture>; + type TextureView: fmt::Debug + WasmNotSendSync; + type Sampler: fmt::Debug + WasmNotSendSync; + type QuerySet: fmt::Debug + WasmNotSendSync; + type Fence: fmt::Debug + WasmNotSendSync; + + type BindGroupLayout: fmt::Debug + WasmNotSendSync; + type BindGroup: fmt::Debug + WasmNotSendSync; + type PipelineLayout: fmt::Debug + WasmNotSendSync; + type ShaderModule: fmt::Debug + WasmNotSendSync; + type RenderPipeline: fmt::Debug + WasmNotSendSync; + type ComputePipeline: fmt::Debug + WasmNotSendSync; + + type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static; +} + +pub trait Instance<A: Api>: Sized + WasmNotSendSync { + unsafe fn init(desc: &InstanceDescriptor) -> Result<Self, InstanceError>; + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<A::Surface, InstanceError>; + unsafe fn destroy_surface(&self, surface: A::Surface); + unsafe fn enumerate_adapters(&self) -> Vec<ExposedAdapter<A>>; +} + +pub trait Surface<A: Api>: WasmNotSendSync { + /// Configures the surface to use the given device. + /// + /// # Safety + /// + /// - All gpu work that uses the surface must have been completed. + /// - All [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All surfaces created using other devices must have been unconfigured before this call. + unsafe fn configure( + &self, + device: &A::Device, + config: &SurfaceConfiguration, + ) -> Result<(), SurfaceError>; + + /// Unconfigures the surface on the given device. + /// + /// # Safety + /// + /// - All gpu work that uses the surface must have been completed. + /// - All [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - The surface must have been configured on the given device. + unsafe fn unconfigure(&self, device: &A::Device); + + /// Returns the next texture to be presented by the swapchain for drawing + /// + /// A `timeout` of `None` means to wait indefinitely, with no timeout. + /// + /// # Portability + /// + /// Some backends can't support a timeout when acquiring a texture and + /// the timeout will be ignored. + /// + /// Returns `None` on timing out. + unsafe fn acquire_texture( + &self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<AcquiredSurfaceTexture<A>>, SurfaceError>; + unsafe fn discard_texture(&self, texture: A::SurfaceTexture); +} + +pub trait Adapter<A: Api>: WasmNotSendSync { + unsafe fn open( + &self, + features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<OpenDevice<A>, DeviceError>; + + /// Return the set of supported capabilities for a texture format. + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> TextureFormatCapabilities; + + /// Returns the capabilities of working with a specified surface. + /// + /// `None` means presentation is not supported for it. + unsafe fn surface_capabilities(&self, surface: &A::Surface) -> Option<SurfaceCapabilities>; + + /// Creates a [`PresentationTimestamp`] using the adapter's WSI. + /// + /// [`PresentationTimestamp`]: wgt::PresentationTimestamp + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp; +} + +pub trait Device<A: Api>: WasmNotSendSync { + /// Exit connection to this logical device. + unsafe fn exit(self, queue: A::Queue); + /// Creates a new buffer. + /// + /// The initial usage is `BufferUses::empty()`. + unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result<A::Buffer, DeviceError>; + unsafe fn destroy_buffer(&self, buffer: A::Buffer); + //TODO: clarify if zero-sized mapping is allowed + unsafe fn map_buffer( + &self, + buffer: &A::Buffer, + range: MemoryRange, + ) -> Result<BufferMapping, DeviceError>; + unsafe fn unmap_buffer(&self, buffer: &A::Buffer) -> Result<(), DeviceError>; + unsafe fn flush_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + + /// Creates a new texture. + /// + /// The initial usage for all subresources is `TextureUses::UNINITIALIZED`. + unsafe fn create_texture(&self, desc: &TextureDescriptor) -> Result<A::Texture, DeviceError>; + unsafe fn destroy_texture(&self, texture: A::Texture); + unsafe fn create_texture_view( + &self, + texture: &A::Texture, + desc: &TextureViewDescriptor, + ) -> Result<A::TextureView, DeviceError>; + unsafe fn destroy_texture_view(&self, view: A::TextureView); + unsafe fn create_sampler(&self, desc: &SamplerDescriptor) -> Result<A::Sampler, DeviceError>; + unsafe fn destroy_sampler(&self, sampler: A::Sampler); + + unsafe fn create_command_encoder( + &self, + desc: &CommandEncoderDescriptor<A>, + ) -> Result<A::CommandEncoder, DeviceError>; + unsafe fn destroy_command_encoder(&self, pool: A::CommandEncoder); + + /// Creates a bind group layout. + unsafe fn create_bind_group_layout( + &self, + desc: &BindGroupLayoutDescriptor, + ) -> Result<A::BindGroupLayout, DeviceError>; + unsafe fn destroy_bind_group_layout(&self, bg_layout: A::BindGroupLayout); + unsafe fn create_pipeline_layout( + &self, + desc: &PipelineLayoutDescriptor<A>, + ) -> Result<A::PipelineLayout, DeviceError>; + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: A::PipelineLayout); + unsafe fn create_bind_group( + &self, + desc: &BindGroupDescriptor<A>, + ) -> Result<A::BindGroup, DeviceError>; + unsafe fn destroy_bind_group(&self, group: A::BindGroup); + + unsafe fn create_shader_module( + &self, + desc: &ShaderModuleDescriptor, + shader: ShaderInput, + ) -> Result<A::ShaderModule, ShaderError>; + unsafe fn destroy_shader_module(&self, module: A::ShaderModule); + unsafe fn create_render_pipeline( + &self, + desc: &RenderPipelineDescriptor<A>, + ) -> Result<A::RenderPipeline, PipelineError>; + unsafe fn destroy_render_pipeline(&self, pipeline: A::RenderPipeline); + unsafe fn create_compute_pipeline( + &self, + desc: &ComputePipelineDescriptor<A>, + ) -> Result<A::ComputePipeline, PipelineError>; + unsafe fn destroy_compute_pipeline(&self, pipeline: A::ComputePipeline); + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<Label>, + ) -> Result<A::QuerySet, DeviceError>; + unsafe fn destroy_query_set(&self, set: A::QuerySet); + unsafe fn create_fence(&self) -> Result<A::Fence, DeviceError>; + unsafe fn destroy_fence(&self, fence: A::Fence); + unsafe fn get_fence_value(&self, fence: &A::Fence) -> Result<FenceValue, DeviceError>; + /// Calling wait with a lower value than the current fence value will immediately return. + unsafe fn wait( + &self, + fence: &A::Fence, + value: FenceValue, + timeout_ms: u32, + ) -> Result<bool, DeviceError>; + + unsafe fn start_capture(&self) -> bool; + unsafe fn stop_capture(&self); + + unsafe fn create_acceleration_structure( + &self, + desc: &AccelerationStructureDescriptor, + ) -> Result<A::AccelerationStructure, DeviceError>; + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &GetAccelerationStructureBuildSizesDescriptor<A>, + ) -> AccelerationStructureBuildSizes; + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &A::AccelerationStructure, + ) -> wgt::BufferAddress; + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: A::AccelerationStructure, + ); +} + +pub trait Queue<A: Api>: WasmNotSendSync { + /// Submits the command buffers for execution on GPU. + /// + /// Valid usage: + /// - all of the command buffers were created from command pools + /// that are associated with this queue. + /// - all of the command buffers had `CommandBuffer::finish()` called. + /// - all surface textures that the command buffers write to must be + /// passed to the surface_textures argument. + unsafe fn submit( + &self, + command_buffers: &[&A::CommandBuffer], + surface_textures: &[&A::SurfaceTexture], + signal_fence: Option<(&mut A::Fence, FenceValue)>, + ) -> Result<(), DeviceError>; + unsafe fn present( + &self, + surface: &A::Surface, + texture: A::SurfaceTexture, + ) -> Result<(), SurfaceError>; + unsafe fn get_timestamp_period(&self) -> f32; +} + +/// Encoder for commands in command buffers. +/// Serves as a parent for all the encoded command buffers. +/// Works in bursts of action: one or more command buffers are recorded, +/// then submitted to a queue, and then it needs to be `reset_all()`. +pub trait CommandEncoder<A: Api>: WasmNotSendSync + fmt::Debug { + /// Begin encoding a new command buffer. + unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; + /// Discard currently recorded list, if any. + unsafe fn discard_encoding(&mut self); + unsafe fn end_encoding(&mut self) -> Result<A::CommandBuffer, DeviceError>; + /// Reclaims all resources that are allocated for this encoder. + /// Must get all of the produced command buffers back, + /// and they must not be used by GPU at this moment. + unsafe fn reset_all<I>(&mut self, command_buffers: I) + where + I: Iterator<Item = A::CommandBuffer>; + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = BufferBarrier<'a, A>>; + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = TextureBarrier<'a, A>>; + + // copy operations + + unsafe fn clear_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange); + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &A::Buffer, dst: &A::Buffer, regions: T) + where + T: Iterator<Item = BufferCopy>; + + /// Copy from an external image to an internal texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + #[cfg(webgl)] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &A::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from one texture to another. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Texture, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from buffer to texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &A::Buffer, dst: &A::Texture, regions: T) + where + T: Iterator<Item = BufferTextureCopy>; + + /// Copy from texture to buffer. + /// Works with a single array layer. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Buffer, + regions: T, + ) where + T: Iterator<Item = BufferTextureCopy>; + + // pass common + + /// Sets the bind group at `index` to `group`, assuming the layout + /// of all the preceding groups to be taken from `layout`. + unsafe fn set_bind_group( + &mut self, + layout: &A::PipelineLayout, + index: u32, + group: &A::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ); + + /// Sets a range in push constant data. + /// + /// IMPORTANT: while the data is passed as words, the offset is in bytes! + /// + /// # Safety + /// + /// - `offset_bytes` must be a multiple of 4. + /// - The range of push constants written must be valid for the pipeline layout at draw time. + unsafe fn set_push_constants( + &mut self, + layout: &A::PipelineLayout, + stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ); + + unsafe fn insert_debug_marker(&mut self, label: &str); + unsafe fn begin_debug_marker(&mut self, group_label: &str); + unsafe fn end_debug_marker(&mut self); + + // queries + + /// # Safety: + /// + /// - If `set` is an occlusion query set, it must be the same one as used in the [`RenderPassDescriptor::occlusion_query_set`] parameter. + unsafe fn begin_query(&mut self, set: &A::QuerySet, index: u32); + /// # Safety: + /// + /// - If `set` is an occlusion query set, it must be the same one as used in the [`RenderPassDescriptor::occlusion_query_set`] parameter. + unsafe fn end_query(&mut self, set: &A::QuerySet, index: u32); + unsafe fn write_timestamp(&mut self, set: &A::QuerySet, index: u32); + unsafe fn reset_queries(&mut self, set: &A::QuerySet, range: Range<u32>); + unsafe fn copy_query_results( + &mut self, + set: &A::QuerySet, + range: Range<u32>, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ); + + // render passes + + // Begins a render pass, clears all active bindings. + unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<A>); + unsafe fn end_render_pass(&mut self); + + unsafe fn set_render_pipeline(&mut self, pipeline: &A::RenderPipeline); + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: BufferBinding<'a, A>, + format: wgt::IndexFormat, + ); + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, A>); + unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>); + unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>); + unsafe fn set_stencil_reference(&mut self, value: u32); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]); + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + + // compute passes + + // Begins a compute pass, clears all active bindings. + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<A>); + unsafe fn end_compute_pass(&mut self); + + unsafe fn set_compute_pipeline(&mut self, pipeline: &A::ComputePipeline); + + unsafe fn dispatch(&mut self, count: [u32; 3]); + unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); + + /// To get the required sizes for the buffer allocations use `get_acceleration_structure_build_sizes` per descriptor + /// All buffers must be synchronized externally + /// All buffer regions, which are written to may only be passed once per function call, + /// with the exception of updates in the same descriptor. + /// Consequences of this limitation: + /// - scratch buffers need to be unique + /// - a tlas can't be build in the same call with a blas it contains + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + descriptor_count: u32, + descriptors: T, + ) where + A: 'a, + T: IntoIterator<Item = BuildAccelerationStructureDescriptor<'a, A>>; + + unsafe fn place_acceleration_structure_barrier( + &mut self, + barrier: AccelerationStructureBarrier, + ); +} + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct PipelineLayoutFlags: u32 { + /// Include support for `first_vertex` / `first_instance` drawing. + const FIRST_VERTEX_INSTANCE = 1 << 0; + /// Include support for num work groups builtin. + const NUM_WORK_GROUPS = 1 << 1; + } +); + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BindGroupLayoutFlags: u32 { + /// Allows for bind group binding arrays to be shorter than the array in the BGL. + const PARTIALLY_BOUND = 1 << 0; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureFormatCapabilities: u32 { + /// Format can be sampled. + const SAMPLED = 1 << 0; + /// Format can be sampled with a linear sampler. + const SAMPLED_LINEAR = 1 << 1; + /// Format can be sampled with a min/max reduction sampler. + const SAMPLED_MINMAX = 1 << 2; + + /// Format can be used as storage with write-only access. + const STORAGE = 1 << 3; + /// Format can be used as storage with read and read/write access. + const STORAGE_READ_WRITE = 1 << 4; + /// Format can be used as storage with atomics. + const STORAGE_ATOMIC = 1 << 5; + + /// Format can be used as color and input attachment. + const COLOR_ATTACHMENT = 1 << 6; + /// Format can be used as color (with blending) and input attachment. + const COLOR_ATTACHMENT_BLEND = 1 << 7; + /// Format can be used as depth-stencil and input attachment. + const DEPTH_STENCIL_ATTACHMENT = 1 << 8; + + /// Format can be multisampled by x2. + const MULTISAMPLE_X2 = 1 << 9; + /// Format can be multisampled by x4. + const MULTISAMPLE_X4 = 1 << 10; + /// Format can be multisampled by x8. + const MULTISAMPLE_X8 = 1 << 11; + /// Format can be multisampled by x16. + const MULTISAMPLE_X16 = 1 << 12; + + /// Format can be used for render pass resolve targets. + const MULTISAMPLE_RESOLVE = 1 << 13; + + /// Format can be copied from. + const COPY_SRC = 1 << 14; + /// Format can be copied to. + const COPY_DST = 1 << 15; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct FormatAspects: u8 { + const COLOR = 1 << 0; + const DEPTH = 1 << 1; + const STENCIL = 1 << 2; + const PLANE_0 = 1 << 3; + const PLANE_1 = 1 << 4; + const PLANE_2 = 1 << 5; + + const DEPTH_STENCIL = Self::DEPTH.bits() | Self::STENCIL.bits(); + } +); + +impl FormatAspects { + pub fn new(format: wgt::TextureFormat, aspect: wgt::TextureAspect) -> Self { + let aspect_mask = match aspect { + wgt::TextureAspect::All => Self::all(), + wgt::TextureAspect::DepthOnly => Self::DEPTH, + wgt::TextureAspect::StencilOnly => Self::STENCIL, + wgt::TextureAspect::Plane0 => Self::PLANE_0, + wgt::TextureAspect::Plane1 => Self::PLANE_1, + wgt::TextureAspect::Plane2 => Self::PLANE_2, + }; + Self::from(format) & aspect_mask + } + + /// Returns `true` if only one flag is set + pub fn is_one(&self) -> bool { + self.bits().count_ones() == 1 + } + + pub fn map(&self) -> wgt::TextureAspect { + match *self { + Self::COLOR => wgt::TextureAspect::All, + Self::DEPTH => wgt::TextureAspect::DepthOnly, + Self::STENCIL => wgt::TextureAspect::StencilOnly, + Self::PLANE_0 => wgt::TextureAspect::Plane0, + Self::PLANE_1 => wgt::TextureAspect::Plane1, + Self::PLANE_2 => wgt::TextureAspect::Plane2, + _ => unreachable!(), + } + } +} + +impl From<wgt::TextureFormat> for FormatAspects { + fn from(format: wgt::TextureFormat) -> Self { + match format { + wgt::TextureFormat::Stencil8 => Self::STENCIL, + wgt::TextureFormat::Depth16Unorm + | wgt::TextureFormat::Depth32Float + | wgt::TextureFormat::Depth24Plus => Self::DEPTH, + wgt::TextureFormat::Depth32FloatStencil8 | wgt::TextureFormat::Depth24PlusStencil8 => { + Self::DEPTH_STENCIL + } + wgt::TextureFormat::NV12 => Self::PLANE_0 | Self::PLANE_1, + _ => Self::COLOR, + } + } +} + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct MemoryFlags: u32 { + const TRANSIENT = 1 << 0; + const PREFER_COHERENT = 1 << 1; + } +); + +//TODO: it's not intuitive for the backends to consider `LOAD` being optional. + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct AttachmentOps: u8 { + const LOAD = 1 << 0; + const STORE = 1 << 1; + } +); + +bitflags::bitflags! { + /// Similar to `wgt::BufferUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BufferUses: u16 { + /// The argument to a read-only mapping. + const MAP_READ = 1 << 0; + /// The argument to a write-only mapping. + const MAP_WRITE = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// The index buffer used for drawing. + const INDEX = 1 << 4; + /// A vertex buffer used for drawing. + const VERTEX = 1 << 5; + /// A uniform buffer bound in a bind group. + const UNIFORM = 1 << 6; + /// A read-only storage buffer used in a bind group. + const STORAGE_READ = 1 << 7; + /// A read-write or write-only buffer used in a bind group. + const STORAGE_READ_WRITE = 1 << 8; + /// The indirect or count buffer in a indirect draw or dispatch. + const INDIRECT = 1 << 9; + /// A buffer used to store query results. + const QUERY_RESOLVE = 1 << 10; + const ACCELERATION_STRUCTURE_SCRATCH = 1 << 11; + const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; + const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; + /// The combination of states that a buffer may be in _at the same time_. + const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | + Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | + Self::STORAGE_READ.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits(); + /// The combination of states that a buffer must exclusively be in. + const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits() | Self::ACCELERATION_STRUCTURE_SCRATCH.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::MAP_WRITE.bits(); + } +} + +bitflags::bitflags! { + /// Similar to `wgt::TextureUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureUses: u16 { + /// The texture is in unknown state. + const UNINITIALIZED = 1 << 0; + /// Ready to present image to the surface. + const PRESENT = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// Read-only sampled or fetched resource. + const RESOURCE = 1 << 4; + /// The color target of a renderpass. + const COLOR_TARGET = 1 << 5; + /// Read-only depth stencil usage. + const DEPTH_STENCIL_READ = 1 << 6; + /// Read-write depth stencil usage + const DEPTH_STENCIL_WRITE = 1 << 7; + /// Read-only storage buffer usage. Corresponds to a UAV in d3d, so is exclusive, despite being read only. + const STORAGE_READ = 1 << 8; + /// Read-write or write-only storage buffer usage. + const STORAGE_READ_WRITE = 1 << 9; + /// The combination of states that a texture may be in _at the same time_. + const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); + /// The combination of states that a texture must exclusively be in. + const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits() | Self::STORAGE_READ_WRITE.bits() | Self::PRESENT.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the texture state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits(); + + /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource + const COMPLEX = 1 << 10; + /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. + /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. + const UNKNOWN = 1 << 11; + } +} + +#[derive(Clone, Debug)] +pub struct InstanceDescriptor<'a> { + pub name: &'a str, + pub flags: wgt::InstanceFlags, + pub dx12_shader_compiler: wgt::Dx12Compiler, + pub gles_minor_version: wgt::Gles3MinorVersion, +} + +#[derive(Clone, Debug)] +pub struct Alignments { + /// The alignment of the start of the buffer used as a GPU copy source. + pub buffer_copy_offset: wgt::BufferSize, + /// The alignment of the row pitch of the texture data stored in a buffer that is + /// used in a GPU copy operation. + pub buffer_copy_pitch: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct Capabilities { + pub limits: wgt::Limits, + pub alignments: Alignments, + pub downlevel: wgt::DownlevelCapabilities, +} + +#[derive(Debug)] +pub struct ExposedAdapter<A: Api> { + pub adapter: A::Adapter, + pub info: wgt::AdapterInfo, + pub features: wgt::Features, + pub capabilities: Capabilities, +} + +/// Describes information about what a `Surface`'s presentation capabilities are. +/// Fetch this with [Adapter::surface_capabilities]. +#[derive(Debug, Clone)] +pub struct SurfaceCapabilities { + /// List of supported texture formats. + /// + /// Must be at least one. + pub formats: Vec<wgt::TextureFormat>, + + /// Range for the number of queued frames. + /// + /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given, + /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set. + /// + /// - `maximum_frame_latency.start` must be at least 1. + /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`. + pub maximum_frame_latency: RangeInclusive<u32>, + + /// Current extent of the surface, if known. + pub current_extent: Option<wgt::Extent3d>, + + /// Supported texture usage flags. + /// + /// Must have at least `TextureUses::COLOR_TARGET` + pub usage: TextureUses, + + /// List of supported V-sync modes. + /// + /// Must be at least one. + pub present_modes: Vec<wgt::PresentMode>, + + /// List of supported alpha composition modes. + /// + /// Must be at least one. + pub composite_alpha_modes: Vec<wgt::CompositeAlphaMode>, +} + +#[derive(Debug)] +pub struct AcquiredSurfaceTexture<A: Api> { + pub texture: A::SurfaceTexture, + /// The presentation configuration no longer matches + /// the surface properties exactly, but can still be used to present + /// to the surface successfully. + pub suboptimal: bool, +} + +#[derive(Debug)] +pub struct OpenDevice<A: Api> { + pub device: A::Device, + pub queue: A::Queue, +} + +#[derive(Clone, Debug)] +pub struct BufferMapping { + pub ptr: NonNull<u8>, + pub is_coherent: bool, +} + +#[derive(Clone, Debug)] +pub struct BufferDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub usage: BufferUses, + pub memory_flags: MemoryFlags, +} + +#[derive(Clone, Debug)] +pub struct TextureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::Extent3d, + pub mip_level_count: u32, + pub sample_count: u32, + pub dimension: wgt::TextureDimension, + pub format: wgt::TextureFormat, + pub usage: TextureUses, + pub memory_flags: MemoryFlags, + /// Allows views of this texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +impl TextureDescriptor<'_> { + pub fn copy_extent(&self) -> CopyExtent { + CopyExtent::map_extent_to_copy_size(&self.size, self.dimension) + } + + pub fn is_cube_compatible(&self) -> bool { + self.dimension == wgt::TextureDimension::D2 + && self.size.depth_or_array_layers % 6 == 0 + && self.sample_count == 1 + && self.size.width == self.size.height + } + + pub fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } +} + +/// TextureView descriptor. +/// +/// Valid usage: +///. - `format` has to be the same as `TextureDescriptor::format` +///. - `dimension` has to be compatible with `TextureDescriptor::dimension` +///. - `usage` has to be a subset of `TextureDescriptor::usage` +///. - `range` has to be a subset of parent texture +#[derive(Clone, Debug)] +pub struct TextureViewDescriptor<'a> { + pub label: Label<'a>, + pub format: wgt::TextureFormat, + pub dimension: wgt::TextureViewDimension, + pub usage: TextureUses, + pub range: wgt::ImageSubresourceRange, +} + +#[derive(Clone, Debug)] +pub struct SamplerDescriptor<'a> { + pub label: Label<'a>, + pub address_modes: [wgt::AddressMode; 3], + pub mag_filter: wgt::FilterMode, + pub min_filter: wgt::FilterMode, + pub mipmap_filter: wgt::FilterMode, + pub lod_clamp: Range<f32>, + pub compare: Option<wgt::CompareFunction>, + // Must in the range [1, 16]. + // + // Anisotropic filtering must be supported if this is not 1. + pub anisotropy_clamp: u16, + pub border_color: Option<wgt::SamplerBorderColor>, +} + +/// BindGroupLayout descriptor. +/// +/// Valid usage: +/// - `entries` are sorted by ascending `wgt::BindGroupLayoutEntry::binding` +#[derive(Clone, Debug)] +pub struct BindGroupLayoutDescriptor<'a> { + pub label: Label<'a>, + pub flags: BindGroupLayoutFlags, + pub entries: &'a [wgt::BindGroupLayoutEntry], +} + +#[derive(Clone, Debug)] +pub struct PipelineLayoutDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub flags: PipelineLayoutFlags, + pub bind_group_layouts: &'a [&'a A::BindGroupLayout], + pub push_constant_ranges: &'a [wgt::PushConstantRange], +} + +#[derive(Debug)] +pub struct BufferBinding<'a, A: Api> { + /// The buffer being bound. + pub buffer: &'a A::Buffer, + + /// The offset at which the bound region starts. + /// + /// This must be less than the size of the buffer. Some back ends + /// cannot tolerate zero-length regions; for example, see + /// [VUID-VkDescriptorBufferInfo-offset-00340][340] and + /// [VUID-VkDescriptorBufferInfo-range-00341][341], or the + /// documentation for GLES's [glBindBufferRange][bbr]. + /// + /// [340]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-offset-00340 + /// [341]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-range-00341 + /// [bbr]: https://registry.khronos.org/OpenGL-Refpages/es3.0/html/glBindBufferRange.xhtml + pub offset: wgt::BufferAddress, + + /// The size of the region bound, in bytes. + /// + /// If `None`, the region extends from `offset` to the end of the + /// buffer. Given the restrictions on `offset`, this means that + /// the size is always greater than zero. + pub size: Option<wgt::BufferSize>, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for BufferBinding<'_, A> { + fn clone(&self) -> Self { + Self { + buffer: self.buffer, + offset: self.offset, + size: self.size, + } + } +} + +#[derive(Debug)] +pub struct TextureBinding<'a, A: Api> { + pub view: &'a A::TextureView, + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for TextureBinding<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Clone, Debug)] +pub struct BindGroupEntry { + pub binding: u32, + pub resource_index: u32, + pub count: u32, +} + +/// BindGroup descriptor. +/// +/// Valid usage: +///. - `entries` has to be sorted by ascending `BindGroupEntry::binding` +///. - `entries` has to have the same set of `BindGroupEntry::binding` as `layout` +///. - each entry has to be compatible with the `layout` +///. - each entry's `BindGroupEntry::resource_index` is within range +/// of the corresponding resource array, selected by the relevant +/// `BindGroupLayoutEntry`. +#[derive(Clone, Debug)] +pub struct BindGroupDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub layout: &'a A::BindGroupLayout, + pub buffers: &'a [BufferBinding<'a, A>], + pub samplers: &'a [&'a A::Sampler], + pub textures: &'a [TextureBinding<'a, A>], + pub entries: &'a [BindGroupEntry], + pub acceleration_structures: &'a [&'a A::AccelerationStructure], +} + +#[derive(Clone, Debug)] +pub struct CommandEncoderDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub queue: &'a A::Queue, +} + +/// Naga shader module. +pub struct NagaShader { + /// Shader module IR. + pub module: Cow<'static, naga::Module>, + /// Analysis information of the module. + pub info: naga::valid::ModuleInfo, + /// Source codes for debug + pub debug_source: Option<DebugSource>, +} + +// Custom implementation avoids the need to generate Debug impl code +// for the whole Naga module and info. +impl fmt::Debug for NagaShader { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Naga shader") + } +} + +/// Shader input. +#[allow(clippy::large_enum_variant)] +pub enum ShaderInput<'a> { + Naga(NagaShader), + SpirV(&'a [u32]), +} + +pub struct ShaderModuleDescriptor<'a> { + pub label: Label<'a>, + pub runtime_checks: bool, +} + +#[derive(Debug, Clone)] +pub struct DebugSource { + pub file_name: Cow<'static, str>, + pub source_code: Cow<'static, str>, +} + +/// Describes a programmable pipeline stage. +#[derive(Debug)] +pub struct ProgrammableStage<'a, A: Api> { + /// The compiled shader module for this stage. + pub module: &'a A::ShaderModule, + /// The name of the entry point in the compiled shader. There must be a function with this name + /// in the shader. + pub entry_point: &'a str, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ProgrammableStage<'_, A> { + fn clone(&self) -> Self { + Self { + module: self.module, + entry_point: self.entry_point, + } + } +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug)] +pub struct ComputePipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The compiled compute stage and its entry point. + pub stage: ProgrammableStage<'a, A>, +} + +/// Describes how the vertex buffer is interpreted. +#[derive(Clone, Debug)] +pub struct VertexBufferLayout<'a> { + /// The stride, in bytes, between elements of this buffer. + pub array_stride: wgt::BufferAddress, + /// How often this vertex buffer is "stepped" forward. + pub step_mode: wgt::VertexStepMode, + /// The list of attributes which comprise a single vertex. + pub attributes: &'a [wgt::VertexAttribute], +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug)] +pub struct RenderPipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The format of any vertex buffers used with this pipeline. + pub vertex_buffers: &'a [VertexBufferLayout<'a>], + /// The vertex stage for this pipeline. + pub vertex_stage: ProgrammableStage<'a, A>, + /// The properties of the pipeline at the primitive assembly and rasterization level. + pub primitive: wgt::PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + pub depth_stencil: Option<wgt::DepthStencilState>, + /// The multi-sampling properties of the pipeline. + pub multisample: wgt::MultisampleState, + /// The fragment stage for this pipeline. + pub fragment_stage: Option<ProgrammableStage<'a, A>>, + /// The effect of draw calls on the color aspect of the output target. + pub color_targets: &'a [Option<wgt::ColorTargetState>], + /// If the pipeline will be used with a multiview render pass, this indicates how many array + /// layers the attachments will have. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Debug, Clone)] +pub struct SurfaceConfiguration { + /// Maximum number of queued frames. Must be in + /// `SurfaceCapabilities::maximum_frame_latency` range. + pub maximum_frame_latency: u32, + /// Vertical synchronization mode. + pub present_mode: wgt::PresentMode, + /// Alpha composition mode. + pub composite_alpha_mode: wgt::CompositeAlphaMode, + /// Format of the surface textures. + pub format: wgt::TextureFormat, + /// Requested texture extent. Must be in + /// `SurfaceCapabilities::extents` range. + pub extent: wgt::Extent3d, + /// Allowed usage of surface textures, + pub usage: TextureUses, + /// Allows views of swapchain texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +#[derive(Debug, Clone)] +pub struct Rect<T> { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[derive(Debug, Clone)] +pub struct BufferBarrier<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub usage: Range<BufferUses>, +} + +#[derive(Debug, Clone)] +pub struct TextureBarrier<'a, A: Api> { + pub texture: &'a A::Texture, + pub range: wgt::ImageSubresourceRange, + pub usage: Range<TextureUses>, +} + +#[derive(Clone, Copy, Debug)] +pub struct BufferCopy { + pub src_offset: wgt::BufferAddress, + pub dst_offset: wgt::BufferAddress, + pub size: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct TextureCopyBase { + pub mip_level: u32, + pub array_layer: u32, + /// Origin within a texture. + /// Note: for 1D and 2D textures, Z must be 0. + pub origin: wgt::Origin3d, + pub aspect: FormatAspects, +} + +#[derive(Clone, Copy, Debug)] +pub struct CopyExtent { + pub width: u32, + pub height: u32, + pub depth: u32, +} + +#[derive(Clone, Debug)] +pub struct TextureCopy { + pub src_base: TextureCopyBase, + pub dst_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Clone, Debug)] +pub struct BufferTextureCopy { + pub buffer_layout: wgt::ImageDataLayout, + pub texture_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Debug)] +pub struct Attachment<'a, A: Api> { + pub view: &'a A::TextureView, + /// Contains either a single mutating usage as a target, + /// or a valid combination of read-only usages. + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for Attachment<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Debug)] +pub struct ColorAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub resolve_target: Option<Attachment<'a, A>>, + pub ops: AttachmentOps, + pub clear_value: wgt::Color, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ColorAttachment<'_, A> { + fn clone(&self) -> Self { + Self { + target: self.target.clone(), + resolve_target: self.resolve_target.clone(), + ops: self.ops, + clear_value: self.clear_value, + } + } +} + +#[derive(Clone, Debug)] +pub struct DepthStencilAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub depth_ops: AttachmentOps, + pub stencil_ops: AttachmentOps, + pub clear_value: (f32, u32), +} + +#[derive(Debug)] +pub struct RenderPassTimestampWrites<'a, A: Api> { + pub query_set: &'a A::QuerySet, + pub beginning_of_pass_write_index: Option<u32>, + pub end_of_pass_write_index: Option<u32>, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for RenderPassTimestampWrites<'_, A> { + fn clone(&self) -> Self { + Self { + query_set: self.query_set, + beginning_of_pass_write_index: self.beginning_of_pass_write_index, + end_of_pass_write_index: self.end_of_pass_write_index, + } + } +} + +#[derive(Clone, Debug)] +pub struct RenderPassDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub extent: wgt::Extent3d, + pub sample_count: u32, + pub color_attachments: &'a [Option<ColorAttachment<'a, A>>], + pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>, + pub multiview: Option<NonZeroU32>, + pub timestamp_writes: Option<RenderPassTimestampWrites<'a, A>>, + pub occlusion_query_set: Option<&'a A::QuerySet>, +} + +#[derive(Debug)] +pub struct ComputePassTimestampWrites<'a, A: Api> { + pub query_set: &'a A::QuerySet, + pub beginning_of_pass_write_index: Option<u32>, + pub end_of_pass_write_index: Option<u32>, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ComputePassTimestampWrites<'_, A> { + fn clone(&self) -> Self { + Self { + query_set: self.query_set, + beginning_of_pass_write_index: self.beginning_of_pass_write_index, + end_of_pass_write_index: self.end_of_pass_write_index, + } + } +} + +#[derive(Clone, Debug)] +pub struct ComputePassDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub timestamp_writes: Option<ComputePassTimestampWrites<'a, A>>, +} + +/// Stores the text of any validation errors that have occurred since +/// the last call to `get_and_reset`. +/// +/// Each value is a validation error and a message associated with it, +/// or `None` if the error has no message from the api. +/// +/// This is used for internal wgpu testing only and _must not_ be used +/// as a way to check for errors. +/// +/// This works as a static because `cargo nextest` runs all of our +/// tests in separate processes, so each test gets its own canary. +/// +/// This prevents the issue of one validation error terminating the +/// entire process. +pub static VALIDATION_CANARY: ValidationCanary = ValidationCanary { + inner: Mutex::new(Vec::new()), +}; + +/// Flag for internal testing. +pub struct ValidationCanary { + inner: Mutex<Vec<String>>, +} + +impl ValidationCanary { + #[allow(dead_code)] // in some configurations this function is dead + fn add(&self, msg: String) { + self.inner.lock().push(msg); + } + + /// Returns any API validation errors that have occurred in this process + /// since the last call to this function. + pub fn get_and_reset(&self) -> Vec<String> { + self.inner.lock().drain(..).collect() + } +} + +#[test] +fn test_default_limits() { + let limits = wgt::Limits::default(); + assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); +} + +#[derive(Clone, Debug)] +pub struct AccelerationStructureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub format: AccelerationStructureFormat, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureFormat { + TopLevel, + BottomLevel, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureBuildMode { + Build, + Update, +} + +/// Information of the required size for a corresponding entries struct (+ flags) +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct AccelerationStructureBuildSizes { + pub acceleration_structure_size: wgt::BufferAddress, + pub update_scratch_size: wgt::BufferAddress, + pub build_scratch_size: wgt::BufferAddress, +} + +/// Updates use source_acceleration_structure if present, else the update will be performed in place. +/// For updates, only the data is allowed to change (not the meta data or sizes). +#[derive(Clone, Debug)] +pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { + pub entries: &'a AccelerationStructureEntries<'a, A>, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub source_acceleration_structure: Option<&'a A::AccelerationStructure>, + pub destination_acceleration_structure: &'a A::AccelerationStructure, + pub scratch_buffer: &'a A::Buffer, + pub scratch_buffer_offset: wgt::BufferAddress, +} + +/// - All buffers, buffer addresses and offsets will be ignored. +/// - The build mode will be ignored. +/// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups), +/// may result in reduced size requirements. +/// - Any other change may result in a bigger or smaller size requirement. +#[derive(Clone, Debug)] +pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> { + pub entries: &'a AccelerationStructureEntries<'a, A>, + pub flags: AccelerationStructureBuildFlags, +} + +/// Entries for a single descriptor +/// * `Instances` - Multiple instances for a top level acceleration structure +/// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure +/// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure +#[derive(Debug)] +pub enum AccelerationStructureEntries<'a, A: Api> { + Instances(AccelerationStructureInstances<'a, A>), + Triangles(Vec<AccelerationStructureTriangles<'a, A>>), + AABBs(Vec<AccelerationStructureAABBs<'a, A>>), +} + +/// * `first_vertex` - offset in the vertex buffer (as number of vertices) +/// * `indices` - optional index buffer with attributes +/// * `transform` - optional transform +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangles<'a, A: Api> { + pub vertex_buffer: Option<&'a A::Buffer>, + pub vertex_format: wgt::VertexFormat, + pub first_vertex: u32, + pub vertex_count: u32, + pub vertex_stride: wgt::BufferAddress, + pub indices: Option<AccelerationStructureTriangleIndices<'a, A>>, + pub transform: Option<AccelerationStructureTriangleTransform<'a, A>>, + pub flags: AccelerationStructureGeometryFlags, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureAABBs<'a, A: Api> { + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, + pub stride: wgt::BufferAddress, + pub flags: AccelerationStructureGeometryFlags, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureInstances<'a, A: Api> { + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangleIndices<'a, A: Api> { + pub format: wgt::IndexFormat, + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangleTransform<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub offset: u32, +} + +pub use wgt::AccelerationStructureFlags as AccelerationStructureBuildFlags; +pub use wgt::AccelerationStructureGeometryFlags; + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct AccelerationStructureUses: u8 { + // For blas used as input for tlas + const BUILD_INPUT = 1 << 0; + // Target for acceleration structure build + const BUILD_OUTPUT = 1 << 1; + // Tlas used in a shader + const SHADER_INPUT = 1 << 2; + } +} + +#[derive(Debug, Clone)] +pub struct AccelerationStructureBarrier { + pub usage: Range<AccelerationStructureUses>, +} diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs new file mode 100644 index 0000000000..a946ce5819 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -0,0 +1,1137 @@ +use metal::{MTLFeatureSet, MTLGPUFamily, MTLLanguageVersion, MTLReadWriteTextureTier}; +use objc::{class, msg_send, sel, sel_impl}; +use parking_lot::Mutex; +use wgt::{AstcBlock, AstcChannel}; + +use std::{sync::Arc, thread}; + +use super::TimestampQuerySupport; + +const MAX_COMMAND_BUFFERS: u64 = 2048; + +unsafe impl Send for super::Adapter {} +unsafe impl Sync for super::Adapter {} + +impl super::Adapter { + pub(super) fn new(shared: Arc<super::AdapterShared>) -> Self { + Self { shared } + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = self + .shared + .device + .lock() + .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + + // Acquiring the meaning of timestamp ticks is hard with Metal! + // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`). + // Users are supposed to call this method twice and calculate the difference, + // see "Converting GPU Timestamps into CPU Time": + // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time + // Not only does this mean we get an approximate value, this is as also *very slow*! + // Chromium opted to solve this using a linear regression that they stop at some point + // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46 + // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values. + // + // We should do as Chromium does for the general case, but this requires quite some state tracking + // and doesn't even provide perfectly accurate values, especially at the start of the application when + // we didn't have the chance to sample a lot of values just yet. + // + // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps + // conversions on different devices, after all Metal isn't supported on that many ;) + // Based on: + // * https://github.com/gfx-rs/wgpu/pull/2528 + // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326 + let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") { + 83.333 + } else { + // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs. + 1.0 + }; + + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + features, + }, + queue: super::Queue { + raw: Arc::new(Mutex::new(queue)), + timestamp_period, + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let pc = &self.shared.private_caps; + // Affected formats documented at: + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier1?language=objc + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier2?language=objc + let (read_write_tier1_if, read_write_tier2_if) = match pc.read_write_texture_tier { + metal::MTLReadWriteTextureTier::TierNone => (Tfc::empty(), Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier1 => (Tfc::STORAGE_READ_WRITE, Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier2 => { + (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE) + } + }; + let msaa_count = pc.sample_count_mask; + + let msaa_resolve_desktop_if = if pc.msaa_desktop { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let msaa_resolve_apple3x_if = if pc.msaa_desktop | pc.msaa_apple3 { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let is_not_apple1x = super::PrivateCapabilities::supports_any( + self.shared.device.lock().as_ref(), + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + ], + ); + + // Metal defined pixel format capabilities + let all_caps = Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE; + + let extra = match format { + Tf::R8Unorm | Tf::R16Float | Tf::Rgba8Unorm | Tf::Rgba16Float => { + read_write_tier2_if | all_caps + } + Tf::R8Snorm | Tf::Rg8Snorm | Tf::Rgba8Snorm => { + let mut flags = all_caps; + flags.set(Tfc::MULTISAMPLE_RESOLVE, is_not_apple1x); + flags + } + Tf::R8Uint + | Tf::R8Sint + | Tf::R16Uint + | Tf::R16Sint + | Tf::Rgba8Uint + | Tf::Rgba8Sint + | Tf::Rgba16Uint + | Tf::Rgba16Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R16Unorm + | Tf::R16Snorm + | Tf::Rg16Unorm + | Tf::Rg16Snorm + | Tf::Rgba16Unorm + | Tf::Rgba16Snorm => { + Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | msaa_resolve_desktop_if + } + Tf::Rg8Unorm | Tf::Rg16Float | Tf::Bgra8Unorm => all_caps, + Tf::Rg8Uint | Tf::Rg8Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::R32Uint | Tf::R32Sint => { + read_write_tier1_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R32Float => { + let flags = if pc.format_r32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + }; + read_write_tier1_if | flags + } + Tf::Rg16Uint | Tf::Rg16Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::Rgba8UnormSrgb | Tf::Bgra8UnormSrgb => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgba8_srgb_all); + flags + } + Tf::Rgb10a2Uint => { + let mut flags = Tfc::COLOR_ATTACHMENT | msaa_count; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_uint_write); + flags + } + Tf::Rgb10a2Unorm => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all); + flags + } + Tf::Rg11b10Float => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rg11b10_all); + flags + } + Tf::Rg32Uint | Tf::Rg32Sint => Tfc::COLOR_ATTACHMENT | Tfc::STORAGE | msaa_count, + Tf::Rg32Float => { + if pc.format_rg32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + } + } + Tf::Rgba32Uint | Tf::Rgba32Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::Rgba32Float => { + let mut flags = read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT; + if pc.format_rgba32float_all { + flags |= all_caps + } else if pc.msaa_apple7 { + flags |= msaa_count + }; + flags + } + Tf::Stencil8 => { + all_caps | Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if + } + Tf::Depth16Unorm => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth16unorm { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth32Float | Tf::Depth32FloatStencil8 => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth24Plus | Tf::Depth24PlusStencil8 => { + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count; + if pc.format_depth24_stencil8 { + flags |= Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE_RESOLVE + } else { + flags |= msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + } + flags + } + Tf::NV12 => return Tfc::empty(), + Tf::Rgb9e5Ufloat => { + if pc.msaa_apple3 { + all_caps + } else if pc.msaa_desktop { + Tfc::SAMPLED_LINEAR + } else { + Tfc::SAMPLED_LINEAR + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE + } + } + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbFloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => { + if pc.format_bc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => { + if pc.format_eac_etc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Astc { + block: _, + channel: _, + } => { + if pc.format_astc || pc.format_astc_hdr { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + }; + + Tfc::COPY_SRC | Tfc::COPY_DST | Tfc::SAMPLED | extra + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = if surface.main_thread_id == thread::current().id() { + Some(surface.dimensions()) + } else { + log::warn!("Unable to get the current view dimensions on a non-main thread"); + None + }; + + let mut formats = vec![ + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Rgba16Float, + ]; + if self.shared.private_caps.format_rgb10a2_unorm_all { + formats.push(wgt::TextureFormat::Rgb10a2Unorm); + } + + let pc = &self.shared.private_caps; + Some(crate::SurfaceCapabilities { + formats, + // We use this here to govern the maximum number of drawables + 1. + // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount + maximum_frame_latency: if pc.can_set_maximum_drawables_count { + 1..=2 + } else { + // 3 is the default value for maximum drawables in `CAMetalLayer` documentation + // iOS 10.3 was tested to use 3 on iphone5s + 2..=2 + }, + present_modes: if pc.can_set_display_sync { + vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] + } else { + vec![wgt::PresentMode::Fifo] + }, + composite_alpha_modes: vec![ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::PostMultiplied, + ], + + current_extent, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + let timestamp = self.shared.presentation_timer.get_timestamp_ns(); + + wgt::PresentationTimestamp(timestamp) + } +} + +const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const SAMPLER_CLAMP_TO_BORDER_SUPPORT: &[MTLFeatureSet] = &[MTLFeatureSet::macOS_GPUFamily1_v2]; + +const ASTC_PIXEL_FORMAT_FEATURES: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, +]; + +const ANY8_UNORM_SRGB_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const ANY8_SNORM_RESOLVE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGBA8_SRGB: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const RGB10A2UNORM_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB10A2UINT_WRITE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RG11B10FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB9E5FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const BGR10A2_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +/// "Indirect draw & dispatch arguments" in the Metal feature set tables +const INDIRECT_DRAW_DISPATCH_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +/// "Base vertex/instance drawing" in the Metal feature set tables +/// +/// in our terms, `base_vertex` and `first_instance` must be 0 +const BASE_VERTEX_FIRST_INSTANCE_SUPPORT: &[MTLFeatureSet] = INDIRECT_DRAW_DISPATCH_SUPPORT; + +const TEXTURE_CUBE_ARRAY_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const DUAL_SOURCE_BLEND_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const LAYERED_RENDERING_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const FUNCTION_SPECIALIZATION_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const OS_NOT_SUPPORT: (usize, usize) = (10000, 0); + +impl super::PrivateCapabilities { + fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + features_sets + .iter() + .cloned() + .any(|x| raw.supports_feature_set(x)) + } + + pub fn new(device: &metal::Device) -> Self { + #[repr(C)] + #[derive(Clone, Copy, Debug)] + #[allow(clippy::upper_case_acronyms)] + struct NSOperatingSystemVersion { + major: usize, + minor: usize, + patch: usize, + } + + impl NSOperatingSystemVersion { + fn at_least( + &self, + mac_version: (usize, usize), + ios_version: (usize, usize), + is_mac: bool, + ) -> bool { + if is_mac { + self.major > mac_version.0 + || (self.major == mac_version.0 && self.minor >= mac_version.1) + } else { + self.major > ios_version.0 + || (self.major == ios_version.0 && self.minor >= ios_version.1) + } + } + } + + let version: NSOperatingSystemVersion = unsafe { + let process_info: *mut objc::runtime::Object = + msg_send![class!(NSProcessInfo), processInfo]; + msg_send![process_info, operatingSystemVersion] + }; + + let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + // Metal was first introduced in OS X 10.11 and iOS 8. The current version number of visionOS is 1.0.0. Additionally, + // on the Simulator, Apple only provides the Apple2 GPU capability, and the Apple2+ GPU capability covers the capabilities of Apple2. + // Therefore, the following conditions can be used to determine if it is visionOS. + // https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator + let os_is_xr = version.major < 8 && device.supports_family(MTLGPUFamily::Apple2); + let family_check = os_is_xr || version.at_least((10, 15), (13, 0), os_is_mac); + + let mut sample_count_mask = crate::TextureFormatCapabilities::MULTISAMPLE_X4; // 1 and 4 samples are supported on all devices + if device.supports_texture_sample_count(2) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X2; + } + if device.supports_texture_sample_count(8) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X8; + } + if device.supports_texture_sample_count(16) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X16; + } + + let rw_texture_tier = if version.at_least((10, 13), (11, 0), os_is_mac) { + device.read_write_texture_support() + } else if version.at_least((10, 12), OS_NOT_SUPPORT, os_is_mac) { + if Self::supports_any(device, &[MTLFeatureSet::macOS_ReadWriteTextureTier2]) { + MTLReadWriteTextureTier::Tier2 + } else { + MTLReadWriteTextureTier::Tier1 + } + } else { + MTLReadWriteTextureTier::TierNone + }; + + let mut timestamp_query_support = TimestampQuerySupport::empty(); + if version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary) + { + // If we don't support at stage boundary, don't support anything else. + timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES); + + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary) + { + timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtBlitBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER); + } + // `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags. + } + + Self { + family_check, + msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) { + MTLLanguageVersion::V2_4 + } else if version.at_least((11, 0), (14, 0), os_is_mac) { + MTLLanguageVersion::V2_3 + } else if version.at_least((10, 15), (13, 0), os_is_mac) { + MTLLanguageVersion::V2_2 + } else if version.at_least((10, 14), (12, 0), os_is_mac) { + MTLLanguageVersion::V2_1 + } else if version.at_least((10, 13), (11, 0), os_is_mac) { + MTLLanguageVersion::V2_0 + } else if version.at_least((10, 12), (10, 0), os_is_mac) { + MTLLanguageVersion::V1_2 + } else if version.at_least((10, 11), (9, 0), os_is_mac) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + }, + // macOS 10.11 doesn't support read-write resources + fragment_rw_storage: version.at_least((10, 12), (8, 0), os_is_mac), + read_write_texture_tier: rw_texture_tier, + msaa_desktop: os_is_mac, + msaa_apple3: if family_check { + device.supports_family(MTLGPUFamily::Apple3) + } else { + device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily3_v4) + }, + msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), + resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), + argument_buffers: Self::supports_any(device, ARGUMENT_BUFFER_SUPPORT), + shared_textures: !os_is_mac, + mutable_comparison_samplers: Self::supports_any( + device, + MUTABLE_COMPARISON_SAMPLER_SUPPORT, + ), + sampler_clamp_to_border: Self::supports_any(device, SAMPLER_CLAMP_TO_BORDER_SUPPORT), + indirect_draw_dispatch: Self::supports_any(device, INDIRECT_DRAW_DISPATCH_SUPPORT), + base_vertex_first_instance_drawing: Self::supports_any( + device, + BASE_VERTEX_FIRST_INSTANCE_SUPPORT, + ), + dual_source_blending: Self::supports_any(device, DUAL_SOURCE_BLEND_SUPPORT), + low_power: !os_is_mac || device.is_low_power(), + headless: os_is_mac && device.is_headless(), + layered_rendering: Self::supports_any(device, LAYERED_RENDERING_SUPPORT), + function_specialization: Self::supports_any(device, FUNCTION_SPECIALIZATION_SUPPORT), + depth_clip_mode: Self::supports_any(device, DEPTH_CLIP_MODE), + texture_cube_array: Self::supports_any(device, TEXTURE_CUBE_ARRAY_SUPPORT), + supports_float_filtering: os_is_mac + || (version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_32bit_float_filtering()), + format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + format_depth32_stencil8_filter: os_is_mac, + format_depth32_stencil8_none: !os_is_mac, + format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, + format_b5: !os_is_mac, + format_bc: os_is_mac, + format_eac_etc: !os_is_mac + // M1 in macOS supports EAC/ETC2 + || (family_check && device.supports_family(MTLGPUFamily::Apple7)), + // A8(Apple2) and later always support ASTC pixel formats + format_astc: (family_check && device.supports_family(MTLGPUFamily::Apple2)) + || Self::supports_any(device, ASTC_PIXEL_FORMAT_FEATURES), + // A13(Apple6) M1(Apple7) and later always support HDR ASTC pixel formats + format_astc_hdr: family_check && device.supports_family(MTLGPUFamily::Apple6), + format_any8_unorm_srgb_all: Self::supports_any(device, ANY8_UNORM_SRGB_ALL), + format_any8_unorm_srgb_no_write: !Self::supports_any(device, ANY8_UNORM_SRGB_ALL) + && !os_is_mac, + format_any8_snorm_all: Self::supports_any(device, ANY8_SNORM_RESOLVE), + format_r16_norm_all: os_is_mac, + // No devices support r32's all capabilities + format_r32_all: false, + // All devices support r32's write capability + format_r32_no_write: false, + // iOS support r32float's write capability, macOS support r32float's all capabilities + format_r32float_no_write_no_filter: false, + // Only iOS doesn't support r32float's filter capability + format_r32float_no_filter: !os_is_mac, + format_r32float_all: os_is_mac, + format_rgba8_srgb_all: Self::supports_any(device, RGBA8_SRGB), + format_rgba8_srgb_no_write: !Self::supports_any(device, RGBA8_SRGB), + format_rgb10a2_unorm_all: Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_unorm_no_write: !Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_uint_write: Self::supports_any(device, RGB10A2UINT_WRITE), + format_rg11b10_all: Self::supports_any(device, RG11B10FLOAT_ALL), + format_rg11b10_no_write: !Self::supports_any(device, RG11B10FLOAT_ALL), + format_rgb9e5_all: Self::supports_any(device, RGB9E5FLOAT_ALL), + format_rgb9e5_no_write: !Self::supports_any(device, RGB9E5FLOAT_ALL) && !os_is_mac, + format_rgb9e5_filter_only: os_is_mac, + format_rg32_color: true, + format_rg32_color_write: true, + // Only macOS support rg32float's all capabilities + format_rg32float_all: os_is_mac, + // All devices support rg32float's color + blend capabilities + format_rg32float_color_blend: true, + // Only iOS doesn't support rg32float's filter + format_rg32float_no_filter: !os_is_mac, + format_rgba32int_color: true, + // All devices support rgba32uint and rgba32sint's color + write capabilities + format_rgba32int_color_write: true, + format_rgba32float_color: true, + // All devices support rgba32float's color + write capabilities + format_rgba32float_color_write: true, + // Only macOS support rgba32float's all capabilities + format_rgba32float_all: os_is_mac, + format_depth16unorm: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ), + format_depth32float_filter: os_is_mac, + format_depth32float_none: !os_is_mac, + format_bgr10a2_all: Self::supports_any(device, BGR10A2_ALL), + format_bgr10a2_no_write: !Self::supports_any(device, BGR10A2_ALL), + max_buffers_per_stage: 31, + max_vertex_buffers: 31.min(crate::MAX_VERTEX_BUFFERS as u32), + max_textures_per_stage: if os_is_mac + || (family_check && device.supports_family(MTLGPUFamily::Apple6)) + { + 128 + } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + 96 + } else { + 31 + }, + max_samplers_per_stage: 16, + buffer_alignment: if os_is_mac || os_is_xr { 256 } else { 64 }, + max_buffer_size: if version.at_least((10, 14), (12, 0), os_is_mac) { + // maxBufferLength available on macOS 10.14+ and iOS 12.0+ + let buffer_size: metal::NSInteger = + unsafe { msg_send![device.as_ref(), maxBufferLength] }; + buffer_size as _ + } else if os_is_mac { + 1 << 30 // 1GB on macOS 10.11 and up + } else { + 1 << 28 // 256MB on iOS 8.0+ + }, + max_texture_size: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 16384 + } else { + 8192 + }, + max_texture_3d_size: 2048, + max_texture_layers: 2048, + max_fragment_input_components: if os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1) + { + 124 + } else { + 60 + }, + max_color_render_targets: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 8 + } else { + 4 + }, + max_varying_components: if device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) + { + 124 + } else { + 60 + }, + max_threads_per_group: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 1024 + } else { + 512 + }, + max_total_threadgroup_memory: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ) { + 32 << 10 + } else { + 16 << 10 + }, + sample_count_mask, + supports_debug_markers: Self::supports_any( + device, + &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + ], + ), + supports_binary_archives: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Mac1)), + supports_capture_manager: version.at_least((10, 13), (11, 0), os_is_mac), + can_set_maximum_drawables_count: version.at_least((10, 14), (11, 2), os_is_mac), + can_set_display_sync: version.at_least((10, 13), OS_NOT_SUPPORT, os_is_mac), + can_set_next_drawable_timeout: version.at_least((10, 13), (11, 0), os_is_mac), + supports_arrays_of_textures: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + ], + ), + supports_arrays_of_textures_write: family_check + && (device.supports_family(MTLGPUFamily::Apple6) + || device.supports_family(MTLGPUFamily::Mac1) + || device.supports_family(MTLGPUFamily::MacCatalyst1)), + supports_mutability: version.at_least((10, 13), (11, 0), os_is_mac), + //Depth clipping is supported on all macOS GPU families and iOS family 4 and later + supports_depth_clip_control: os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1), + supports_preserve_invariance: version.at_least((11, 0), (13, 0), os_is_mac), + // Metal 2.2 on mac, 2.3 on iOS. + supports_shader_primitive_index: version.at_least((10, 15), (14, 0), os_is_mac), + has_unified_memory: if version.at_least((10, 15), (13, 0), os_is_mac) { + Some(device.has_unified_memory()) + } else { + None + }, + timestamp_query_support, + } + } + + pub fn device_type(&self) -> wgt::DeviceType { + if self.has_unified_memory.unwrap_or(self.low_power) { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + } + } + + pub fn features(&self) -> wgt::Features { + use wgt::Features as F; + + let mut features = F::empty() + | F::MAPPABLE_PRIMARY_BUFFERS + | F::VERTEX_WRITABLE_STORAGE + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::PUSH_CONSTANTS + | F::POLYGON_MODE_LINE + | F::CLEAR_TEXTURE + | F::TEXTURE_FORMAT_16BIT_NORM + | F::SHADER_F16 + | F::DEPTH32FLOAT_STENCIL8 + | F::BGRA8UNORM_STORAGE; + + features.set(F::FLOAT32_FILTERABLE, self.supports_float_filtering); + features.set( + F::INDIRECT_FIRST_INSTANCE | F::MULTI_DRAW_INDIRECT, + self.indirect_draw_dispatch, + ); + features.set( + F::TIMESTAMP_QUERY, + self.timestamp_query_support + .contains(TimestampQuerySupport::STAGE_BOUNDARIES), + ); + features.set( + F::TIMESTAMP_QUERY_INSIDE_PASSES, + self.timestamp_query_support + .contains(TimestampQuerySupport::INSIDE_WGPU_PASSES), + ); + features.set( + F::DUAL_SOURCE_BLENDING, + self.msl_version >= MTLLanguageVersion::V1_2 && self.dual_source_blending, + ); + features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); + features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); + features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); + features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc); + + features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control); + features.set( + F::SHADER_PRIMITIVE_INDEX, + self.supports_shader_primitive_index, + ); + + features.set( + F::TEXTURE_BINDING_ARRAY + | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + ); + //// XXX: this is technically not true, as read-only storage images can be used in arrays + //// on precisely the same conditions that sampled textures can. But texel fetch from a + //// sampled texture is a thing; should we bother introducing another feature flag? + if self.msl_version >= MTLLanguageVersion::V2_2 + && self.supports_arrays_of_textures + && self.supports_arrays_of_textures_write + { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + + features.set( + F::ADDRESS_MODE_CLAMP_TO_BORDER, + self.sampler_clamp_to_border, + ); + features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); + + features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); + features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + + features + } + + pub fn capabilities(&self) -> crate::Capabilities { + let mut downlevel = wgt::DownlevelCapabilities::default(); + downlevel.flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + self.fragment_rw_storage, + ); + downlevel.flags.set( + wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES, + self.texture_cube_array, + ); + // TODO: separate the mutable comparisons from immutable ones + downlevel.flags.set( + wgt::DownlevelFlags::COMPARISON_SAMPLERS, + self.mutable_comparison_samplers, + ); + downlevel.flags.set( + wgt::DownlevelFlags::INDIRECT_EXECUTION, + self.indirect_draw_dispatch, + ); + // TODO: add another flag for `first_instance` + downlevel.flags.set( + wgt::DownlevelFlags::BASE_VERTEX, + self.base_vertex_first_instance_drawing, + ); + downlevel + .flags + .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + + let base = wgt::Limits::default(); + crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: self.max_texture_size as u32, + max_texture_dimension_2d: self.max_texture_size as u32, + max_texture_dimension_3d: self.max_texture_3d_size as u32, + max_texture_array_layers: self.max_texture_layers as u32, + max_bind_groups: 8, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: self.max_textures_per_stage, + max_samplers_per_shader_stage: self.max_samplers_per_stage, + max_storage_buffers_per_shader_stage: self.max_buffers_per_stage, + max_storage_textures_per_shader_stage: self.max_textures_per_stage, + max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage, + max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_vertex_buffers: self.max_vertex_buffers, + max_vertex_attributes: 31, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + max_push_constant_size: 0x1000, + min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, + min_storage_buffer_offset_alignment: self.buffer_alignment as u32, + max_inter_stage_shader_components: self.max_varying_components, + max_compute_workgroup_storage_size: self.max_total_threadgroup_memory, + max_compute_invocations_per_workgroup: self.max_threads_per_group, + max_compute_workgroup_size_x: self.max_threads_per_group, + max_compute_workgroup_size_y: self.max_threads_per_group, + max_compute_workgroup_size_z: self.max_threads_per_group, + max_compute_workgroups_per_dimension: 0xFFFF, + max_buffer_size: self.max_buffer_size, + max_non_sampler_bindings: std::u32::MAX, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + downlevel, + } + } + + pub fn map_format(&self, format: wgt::TextureFormat) -> metal::MTLPixelFormat { + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match format { + Tf::R8Unorm => R8Unorm, + Tf::R8Snorm => R8Snorm, + Tf::R8Uint => R8Uint, + Tf::R8Sint => R8Sint, + Tf::R16Uint => R16Uint, + Tf::R16Sint => R16Sint, + Tf::R16Unorm => R16Unorm, + Tf::R16Snorm => R16Snorm, + Tf::R16Float => R16Float, + Tf::Rg8Unorm => RG8Unorm, + Tf::Rg8Snorm => RG8Snorm, + Tf::Rg8Uint => RG8Uint, + Tf::Rg8Sint => RG8Sint, + Tf::Rg16Unorm => RG16Unorm, + Tf::Rg16Snorm => RG16Snorm, + Tf::R32Uint => R32Uint, + Tf::R32Sint => R32Sint, + Tf::R32Float => R32Float, + Tf::Rg16Uint => RG16Uint, + Tf::Rg16Sint => RG16Sint, + Tf::Rg16Float => RG16Float, + Tf::Rgba8Unorm => RGBA8Unorm, + Tf::Rgba8UnormSrgb => RGBA8Unorm_sRGB, + Tf::Bgra8UnormSrgb => BGRA8Unorm_sRGB, + Tf::Rgba8Snorm => RGBA8Snorm, + Tf::Bgra8Unorm => BGRA8Unorm, + Tf::Rgba8Uint => RGBA8Uint, + Tf::Rgba8Sint => RGBA8Sint, + Tf::Rgb10a2Uint => RGB10A2Uint, + Tf::Rgb10a2Unorm => RGB10A2Unorm, + Tf::Rg11b10Float => RG11B10Float, + Tf::Rg32Uint => RG32Uint, + Tf::Rg32Sint => RG32Sint, + Tf::Rg32Float => RG32Float, + Tf::Rgba16Uint => RGBA16Uint, + Tf::Rgba16Sint => RGBA16Sint, + Tf::Rgba16Unorm => RGBA16Unorm, + Tf::Rgba16Snorm => RGBA16Snorm, + Tf::Rgba16Float => RGBA16Float, + Tf::Rgba32Uint => RGBA32Uint, + Tf::Rgba32Sint => RGBA32Sint, + Tf::Rgba32Float => RGBA32Float, + Tf::Stencil8 => Stencil8, + Tf::Depth16Unorm => Depth16Unorm, + Tf::Depth32Float => Depth32Float, + Tf::Depth32FloatStencil8 => Depth32Float_Stencil8, + Tf::Depth24Plus => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float + } + } + Tf::Depth24PlusStencil8 => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float_Stencil8 + } + } + Tf::NV12 => unreachable!(), + Tf::Rgb9e5Ufloat => RGB9E5Float, + Tf::Bc1RgbaUnorm => BC1_RGBA, + Tf::Bc1RgbaUnormSrgb => BC1_RGBA_sRGB, + Tf::Bc2RgbaUnorm => BC2_RGBA, + Tf::Bc2RgbaUnormSrgb => BC2_RGBA_sRGB, + Tf::Bc3RgbaUnorm => BC3_RGBA, + Tf::Bc3RgbaUnormSrgb => BC3_RGBA_sRGB, + Tf::Bc4RUnorm => BC4_RUnorm, + Tf::Bc4RSnorm => BC4_RSnorm, + Tf::Bc5RgUnorm => BC5_RGUnorm, + Tf::Bc5RgSnorm => BC5_RGSnorm, + Tf::Bc6hRgbFloat => BC6H_RGBFloat, + Tf::Bc6hRgbUfloat => BC6H_RGBUfloat, + Tf::Bc7RgbaUnorm => BC7_RGBAUnorm, + Tf::Bc7RgbaUnormSrgb => BC7_RGBAUnorm_sRGB, + Tf::Etc2Rgb8Unorm => ETC2_RGB8, + Tf::Etc2Rgb8UnormSrgb => ETC2_RGB8_sRGB, + Tf::Etc2Rgb8A1Unorm => ETC2_RGB8A1, + Tf::Etc2Rgb8A1UnormSrgb => ETC2_RGB8A1_sRGB, + Tf::Etc2Rgba8Unorm => EAC_RGBA8, + Tf::Etc2Rgba8UnormSrgb => EAC_RGBA8_sRGB, + Tf::EacR11Unorm => EAC_R11Unorm, + Tf::EacR11Snorm => EAC_R11Snorm, + Tf::EacRg11Unorm => EAC_RG11Unorm, + Tf::EacRg11Snorm => EAC_RG11Snorm, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => ASTC_4x4_LDR, + AstcBlock::B5x4 => ASTC_5x4_LDR, + AstcBlock::B5x5 => ASTC_5x5_LDR, + AstcBlock::B6x5 => ASTC_6x5_LDR, + AstcBlock::B6x6 => ASTC_6x6_LDR, + AstcBlock::B8x5 => ASTC_8x5_LDR, + AstcBlock::B8x6 => ASTC_8x6_LDR, + AstcBlock::B8x8 => ASTC_8x8_LDR, + AstcBlock::B10x5 => ASTC_10x5_LDR, + AstcBlock::B10x6 => ASTC_10x6_LDR, + AstcBlock::B10x8 => ASTC_10x8_LDR, + AstcBlock::B10x10 => ASTC_10x10_LDR, + AstcBlock::B12x10 => ASTC_12x10_LDR, + AstcBlock::B12x12 => ASTC_12x12_LDR, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => ASTC_4x4_sRGB, + AstcBlock::B5x4 => ASTC_5x4_sRGB, + AstcBlock::B5x5 => ASTC_5x5_sRGB, + AstcBlock::B6x5 => ASTC_6x5_sRGB, + AstcBlock::B6x6 => ASTC_6x6_sRGB, + AstcBlock::B8x5 => ASTC_8x5_sRGB, + AstcBlock::B8x6 => ASTC_8x6_sRGB, + AstcBlock::B8x8 => ASTC_8x8_sRGB, + AstcBlock::B10x5 => ASTC_10x5_sRGB, + AstcBlock::B10x6 => ASTC_10x6_sRGB, + AstcBlock::B10x8 => ASTC_10x8_sRGB, + AstcBlock::B10x10 => ASTC_10x10_sRGB, + AstcBlock::B12x10 => ASTC_12x10_sRGB, + AstcBlock::B12x12 => ASTC_12x12_sRGB, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => ASTC_4x4_HDR, + AstcBlock::B5x4 => ASTC_5x4_HDR, + AstcBlock::B5x5 => ASTC_5x5_HDR, + AstcBlock::B6x5 => ASTC_6x5_HDR, + AstcBlock::B6x6 => ASTC_6x6_HDR, + AstcBlock::B8x5 => ASTC_8x5_HDR, + AstcBlock::B8x6 => ASTC_8x6_HDR, + AstcBlock::B8x8 => ASTC_8x8_HDR, + AstcBlock::B10x5 => ASTC_10x5_HDR, + AstcBlock::B10x6 => ASTC_10x6_HDR, + AstcBlock::B10x8 => ASTC_10x8_HDR, + AstcBlock::B10x10 => ASTC_10x10_HDR, + AstcBlock::B12x10 => ASTC_12x10_HDR, + AstcBlock::B12x12 => ASTC_12x12_HDR, + }, + }, + } + } + + pub fn map_view_format( + &self, + format: wgt::TextureFormat, + aspects: crate::FormatAspects, + ) -> metal::MTLPixelFormat { + use crate::FormatAspects as Fa; + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match (format, aspects) { + // map combined depth-stencil format to their stencil-only format + // see https://developer.apple.com/library/archive/documentation/Miscellaneous/Conceptual/MetalProgrammingGuide/WhatsNewiniOS10tvOS10andOSX1012/WhatsNewiniOS10tvOS10andOSX1012.html#//apple_ref/doc/uid/TP40014221-CH14-DontLinkElementID_77 + (Tf::Depth24PlusStencil8, Fa::STENCIL) => { + if self.format_depth24_stencil8 { + X24_Stencil8 + } else { + X32_Stencil8 + } + } + (Tf::Depth32FloatStencil8, Fa::STENCIL) => X32_Stencil8, + + _ => self.map_format(format), + } + } +} + +impl super::PrivateDisabilities { + pub fn new(device: &metal::Device) -> Self { + let is_intel = device.name().starts_with("Intel"); + Self { + broken_viewport_near_depth: is_intel + && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + broken_layered_clear_image: is_intel, + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/command.rs b/third_party/rust/wgpu-hal/src/metal/command.rs new file mode 100644 index 0000000000..6f1a0d9c2f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/command.rs @@ -0,0 +1,1254 @@ +use super::{conv, AsNative, TimestampQuerySupport}; +use crate::CommandEncoder as _; +use std::{borrow::Cow, mem, ops::Range}; + +// has to match `Temp::binding_sizes` +const WORD_SIZE: usize = 4; + +impl Default for super::CommandState { + fn default() -> Self { + Self { + blit: None, + render: None, + compute: None, + raw_primitive_type: metal::MTLPrimitiveType::Point, + index: None, + raw_wg_size: metal::MTLSize::new(0, 0, 0), + stage_infos: Default::default(), + storage_buffer_length_map: Default::default(), + work_group_memory_sizes: Vec::new(), + push_constants: Vec::new(), + pending_timer_queries: Vec::new(), + } + } +} + +impl super::CommandEncoder { + fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { + if self.state.blit.is_none() { + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + + // Take care of pending timer queries. + // If we can't use `sample_counters_in_buffer` we have to create a dummy blit encoder! + // + // There is a known bug in Metal where blit encoders won't write timestamps if they don't have a blit operation. + // See https://github.com/gpuweb/gpuweb/issues/2046#issuecomment-1205793680 & https://source.chromium.org/chromium/chromium/src/+/006c4eb70c96229834bbaf271290f40418144cd3:third_party/dawn/src/dawn/native/metal/BackendMTL.mm;l=350 + // + // To make things worse: + // * what counts as a blit operation is a bit unclear, experimenting seemed to indicate that resolve_counters doesn't count. + // * in some cases (when?) using `set_start_of_encoder_sample_index` doesn't work, so we have to use `set_end_of_encoder_sample_index` instead + // + // All this means that pretty much the only *reliable* thing as of writing is to: + // * create a dummy blit encoder using set_end_of_encoder_sample_index + // * do a dummy write that is known to be not optimized out. + // * close the encoder since we used set_end_of_encoder_sample_index and don't want to get any extra stuff in there. + // * create another encoder for whatever we actually had in mind. + let supports_sample_counters_in_buffer = self + .shared + .private_caps + .timestamp_query_support + .contains(TimestampQuerySupport::ON_BLIT_ENCODER); + + if !self.state.pending_timer_queries.is_empty() && !supports_sample_counters_in_buffer { + objc::rc::autoreleasepool(|| { + let descriptor = metal::BlitPassDescriptor::new(); + let mut last_query = None; + for (i, (set, index)) in self.state.pending_timer_queries.drain(..).enumerate() + { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(i as _) + .unwrap(); + sba_descriptor + .set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + + // Here be dragons: + // As mentioned above, for some reasons using the start of the encoder won't yield any results sometimes! + sba_descriptor + .set_start_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_end_of_encoder_sample_index(index as _); + + last_query = Some((set, index)); + } + let encoder = cmd_buf.blit_command_encoder_with_descriptor(descriptor); + + // As explained above, we need to do some write: + // Conveniently, we have a buffer with every query set, that we can use for this for a dummy write, + // since we know that it is going to be overwritten again on timer resolve and HAL doesn't define its state before that. + let raw_range = metal::NSRange { + location: last_query.as_ref().unwrap().1 as u64 * crate::QUERY_SIZE, + length: 1, + }; + encoder.fill_buffer( + &last_query.as_ref().unwrap().0.raw_buffer, + raw_range, + 255, // Don't write 0, so it's easier to identify if something went wrong. + ); + + encoder.end_encoding(); + }); + } + + objc::rc::autoreleasepool(|| { + self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); + }); + + let encoder = self.state.blit.as_ref().unwrap(); + + // UNTESTED: + // If the above described issue with empty blit encoder applies to `sample_counters_in_buffer` as well, we should use the same workaround instead! + for (set, index) in self.state.pending_timer_queries.drain(..) { + debug_assert!(supports_sample_counters_in_buffer); + encoder.sample_counters_in_buffer( + set.counter_sample_buffer.as_ref().unwrap(), + index as _, + true, + ) + } + } + self.state.blit.as_ref().unwrap() + } + + pub(super) fn leave_blit(&mut self) { + if let Some(encoder) = self.state.blit.take() { + encoder.end_encoding(); + } + } + + fn active_encoder(&mut self) -> Option<&metal::CommandEncoderRef> { + if let Some(ref encoder) = self.state.render { + Some(encoder) + } else if let Some(ref encoder) = self.state.compute { + Some(encoder) + } else if let Some(ref encoder) = self.state.blit { + Some(encoder) + } else { + None + } + } + + fn begin_pass(&mut self) { + self.state.reset(); + self.leave_blit(); + } +} + +impl super::CommandState { + fn reset(&mut self) { + self.storage_buffer_length_map.clear(); + self.stage_infos.vs.clear(); + self.stage_infos.fs.clear(); + self.stage_infos.cs.clear(); + self.work_group_memory_sizes.clear(); + self.push_constants.clear(); + } + + fn make_sizes_buffer_update<'a>( + &self, + stage: naga::ShaderStage, + result_sizes: &'a mut Vec<u32>, + ) -> Option<(u32, &'a [u32])> { + let stage_info = &self.stage_infos[stage]; + let slot = stage_info.sizes_slot?; + + result_sizes.clear(); + result_sizes.extend(stage_info.sized_bindings.iter().map(|br| { + self.storage_buffer_length_map + .get(br) + .map(|size| u32::try_from(size.get()).unwrap_or(u32::MAX)) + .unwrap_or_default() + })); + + if !result_sizes.is_empty() { + Some((slot as _, result_sizes)) + } else { + None + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let queue = &self.raw_queue.lock(); + let retain_references = self.shared.settings.retain_command_buffer_references; + let raw = objc::rc::autoreleasepool(move || { + let cmd_buf_ref = if retain_references { + queue.new_command_buffer() + } else { + queue.new_command_buffer_with_unretained_references() + }; + if let Some(label) = label { + cmd_buf_ref.set_label(label); + } + cmd_buf_ref.to_owned() + }); + + self.raw_cmd_buf = Some(raw); + + Ok(()) + } + + unsafe fn discard_encoding(&mut self) { + self.leave_blit(); + // when discarding, we don't have a guarantee that + // everything is in a good state, so check carefully + if let Some(encoder) = self.state.render.take() { + encoder.end_encoding(); + } + if let Some(encoder) = self.state.compute.take() { + encoder.end_encoding(); + } + self.raw_cmd_buf = None; + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + // Handle pending timer query if any. + if !self.state.pending_timer_queries.is_empty() { + self.leave_blit(); + self.enter_blit(); + } + + self.leave_blit(); + debug_assert!(self.state.render.is_none()); + debug_assert!(self.state.compute.is_none()); + debug_assert!(self.state.pending_timer_queries.is_empty()); + + Ok(super::CommandBuffer { + raw: self.raw_cmd_buf.take().unwrap(), + }) + } + + unsafe fn reset_all<I>(&mut self, _cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + //do nothing + } + + unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let encoder = self.enter_blit(); + encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + encoder.copy_from_buffer( + &src.raw, + copy.src_offset, + &dst.raw, + copy.dst_offset, + copy.size.get(), + ); + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let dst_texture = if src.format != dst.format { + let raw_format = self.shared.private_caps.map_format(src.format); + Cow::Owned(objc::rc::autoreleasepool(|| { + dst.raw.new_texture_view(raw_format) + })) + } else { + Cow::Borrowed(&dst.raw) + }; + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + // no clamping is done: Metal expects physical sizes here + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst_texture, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let dst_origin = conv::map_origin(©.texture_base.origin); + // Metal expects buffer-texture copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&dst.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let image_byte_stride = if extent.depth > 1 { + copy.buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row) + } else { + // Don't pass a stride when updating a single layer, otherwise metal validation + // fails when updating a subset of the image due to the stride being larger than + // the amount of data to copy. + 0 + }; + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + image_byte_stride, + conv::map_copy_extent(&extent), + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + conv::get_blit_option(dst.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.texture_base.origin); + // Metal expects texture-buffer copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&src.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let bytes_per_image = copy + .buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row); + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + conv::map_copy_extent(&extent), + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + conv::get_blit_option(src.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode( + metal::MTLVisibilityResultMode::Boolean, + index as u64 * crate::QUERY_SIZE, + ); + } + _ => {} + } + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode(metal::MTLVisibilityResultMode::Disabled, 0); + } + _ => {} + } + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + let support = self.shared.private_caps.timestamp_query_support; + debug_assert!( + support.contains(TimestampQuerySupport::STAGE_BOUNDARIES), + "Timestamp queries are not supported" + ); + let sample_buffer = set.counter_sample_buffer.as_ref().unwrap(); + let with_barrier = true; + + // Try to use an existing encoder for timestamp query if possible. + // This works only if it's supported for the active encoder. + if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_BLIT_ENCODER), + self.state.blit.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_RENDER_ENCODER), + self.state.render.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_COMPUTE_ENCODER), + self.state.compute.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else { + // If we're here it means we either have no encoder open, or it's not supported to sample within them. + // If this happens with render/compute open, this is an invalid usage! + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + + // But otherwise it means we'll put defer this to the next created encoder. + self.state.pending_timer_queries.push((set.clone(), index)); + + // Ensure we didn't already have a blit open. + self.leave_blit(); + }; + } + + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + let encoder = self.enter_blit(); + let raw_range = metal::NSRange { + location: range.start as u64 * crate::QUERY_SIZE, + length: (range.end - range.start) as u64 * crate::QUERY_SIZE, + }; + encoder.fill_buffer(&set.raw_buffer, raw_range, 0); + } + + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported + ) { + let encoder = self.enter_blit(); + match set.ty { + wgt::QueryType::Occlusion => { + let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; + encoder.copy_from_buffer( + &set.raw_buffer, + range.start as u64 * crate::QUERY_SIZE, + &buffer.raw, + offset, + size, + ); + } + wgt::QueryType::Timestamp => { + encoder.resolve_counters( + set.counter_sample_buffer.as_ref().unwrap(), + metal::NSRange::new(range.start as u64, range.end as u64), + &buffer.raw, + offset, + ); + } + wgt::QueryType::PipelineStatistics(_) => todo!(), + } + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.begin_pass(); + self.state.index = None; + + assert!(self.state.blit.is_none()); + assert!(self.state.compute.is_none()); + assert!(self.state.render.is_none()); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPassDescriptor::new(); + + for (i, at) in desc.color_attachments.iter().enumerate() { + if let Some(at) = at.as_ref() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + if let Some(ref resolve) = at.resolve_target { + //Note: the selection of levels and slices is already handled by `TextureView` + at_descriptor.set_resolve_texture(Some(&resolve.view.raw)); + } + let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value)); + metal::MTLLoadAction::Clear + }; + let store_action = conv::map_store_action( + at.ops.contains(crate::AttachmentOps::STORE), + at.resolve_target.is_some(), + ); + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + if let Some(ref at) = desc.depth_stencil_attachment { + if at.target.view.aspects.contains(crate::FormatAspects::DEPTH) { + let at_descriptor = descriptor.depth_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_depth(at.clear_value.0 as f64); + metal::MTLLoadAction::Clear + }; + let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + if at + .target + .view + .aspects + .contains(crate::FormatAspects::STENCIL) + { + let at_descriptor = descriptor.stencil_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_stencil(at.clear_value.1); + metal::MTLLoadAction::Clear + }; + let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + let mut sba_index = 0; + let mut next_sba_descriptor = || { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(sba_index) + .unwrap(); + + sba_descriptor.set_end_of_vertex_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_start_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_vertex_sample_index(index as _); + sba_descriptor.set_end_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + } + + if let Some(ref timestamp_writes) = desc.timestamp_writes { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + sba_descriptor.set_start_of_vertex_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_fragment_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + } + + if let Some(occlusion_query_set) = desc.occlusion_query_set { + descriptor + .set_visibility_result_buffer(Some(occlusion_query_set.raw_buffer.as_ref())) + } + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + let encoder = raw.new_render_command_encoder(descriptor); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.render = Some(encoder.to_owned()); + }); + } + + unsafe fn end_render_pass(&mut self) { + self.state.render.take().unwrap().end_encoding(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + group_index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let bg_info = &layout.bind_group_infos[group_index as usize]; + + if let Some(ref encoder) = self.state.render { + let mut changes_sizes_buffer = false; + for index in 0..group.counters.vs.buffers { + let buf = &group.buffers[index as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_vertex_buffer( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Vertex, + &mut self.temp.binding_sizes, + ) { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + changes_sizes_buffer = false; + for index in 0..group.counters.fs.buffers { + let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_fragment_buffer( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Fragment, + &mut self.temp.binding_sizes, + ) { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.vs.samplers { + let res = group.samplers[index as usize]; + encoder.set_vertex_sampler_state( + (bg_info.base_resource_indices.vs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.samplers { + let res = group.samplers[(group.counters.vs.samplers + index) as usize]; + encoder.set_fragment_sampler_state( + (bg_info.base_resource_indices.fs.samplers + index) as u64, + Some(res.as_native()), + ); + } + + for index in 0..group.counters.vs.textures { + let res = group.textures[index as usize]; + encoder.set_vertex_texture( + (bg_info.base_resource_indices.vs.textures + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.textures { + let res = group.textures[(group.counters.vs.textures + index) as usize]; + encoder.set_fragment_texture( + (bg_info.base_resource_indices.fs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + + if let Some(ref encoder) = self.state.compute { + let index_base = super::ResourceData { + buffers: group.counters.vs.buffers + group.counters.fs.buffers, + samplers: group.counters.vs.samplers + group.counters.fs.samplers, + textures: group.counters.vs.textures + group.counters.fs.textures, + }; + + let mut changes_sizes_buffer = false; + for index in 0..group.counters.cs.buffers { + let buf = &group.buffers[(index_base.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_buffer( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Compute, + &mut self.temp.binding_sizes, + ) { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.cs.samplers { + let res = group.samplers[(index_base.samplers + index) as usize]; + encoder.set_sampler_state( + (bg_info.base_resource_indices.cs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.cs.textures { + let res = group.textures[(index_base.textures + index) as usize]; + encoder.set_texture( + (bg_info.base_resource_indices.cs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + let state_pc = &mut self.state.push_constants; + if state_pc.len() < layout.total_push_constants as usize { + state_pc.resize(layout.total_push_constants as usize, 0); + } + debug_assert_eq!(offset_bytes as usize % WORD_SIZE, 0); + + let offset_words = offset_bytes as usize / WORD_SIZE; + state_pc[offset_words..offset_words + data.len()].copy_from_slice(data); + + if stages.contains(wgt::ShaderStages::COMPUTE) { + self.state.compute.as_ref().unwrap().set_bytes( + layout.push_constants_infos.cs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::VERTEX) { + self.state.render.as_ref().unwrap().set_vertex_bytes( + layout.push_constants_infos.vs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::FRAGMENT) { + self.state.render.as_ref().unwrap().set_fragment_bytes( + layout.push_constants_infos.fs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(encoder) = self.active_encoder() { + encoder.insert_debug_signpost(label); + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(encoder) = self.active_encoder() { + encoder.push_debug_group(group_label); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.push_debug_group(group_label); + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(encoder) = self.active_encoder() { + encoder.pop_debug_group(); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.pop_debug_group(); + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.raw_primitive_type = pipeline.raw_primitive_type; + self.state.stage_infos.vs.assign_from(&pipeline.vs_info); + match pipeline.fs_info { + Some(ref info) => self.state.stage_infos.fs.assign_from(info), + None => self.state.stage_infos.fs.clear(), + } + + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_render_pipeline_state(&pipeline.raw); + encoder.set_front_facing_winding(pipeline.raw_front_winding); + encoder.set_cull_mode(pipeline.raw_cull_mode); + encoder.set_triangle_fill_mode(pipeline.raw_triangle_fill_mode); + if let Some(depth_clip) = pipeline.raw_depth_clip_mode { + encoder.set_depth_clip_mode(depth_clip); + } + if let Some((ref state, bias)) = pipeline.depth_stencil { + encoder.set_depth_stencil_state(state); + encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); + } + + { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) + { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + if pipeline.fs_lib.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) + { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + let (stride, raw_type) = match format { + wgt::IndexFormat::Uint16 => (2, metal::MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, metal::MTLIndexType::UInt32), + }; + self.state.index = Some(super::IndexState { + buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), + offset: binding.offset, + stride, + raw_type, + }); + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let zfar = if self.shared.disabilities.broken_viewport_near_depth { + depth_range.end - depth_range.start + } else { + depth_range.end + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_viewport(metal::MTLViewport { + originX: rect.x as _, + originY: rect.y as _, + width: rect.w as _, + height: rect.h as _, + znear: depth_range.start as _, + zfar: zfar as _, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + //TODO: support empty scissors by modifying the viewport + let scissor = metal::MTLScissorRect { + x: rect.x as _, + y: rect.y as _, + width: rect.w as _, + height: rect.h as _, + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_scissor_rect(scissor); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_stencil_front_back_reference_value(value, value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); + } + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + if first_instance != 0 { + encoder.draw_primitives_instanced_base_instance( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + instance_count as _, + first_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_primitives_instanced( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + instance_count as _, + ); + } else { + encoder.draw_primitives( + self.state.raw_primitive_type, + first_vertex as _, + vertex_count as _, + ); + } + } + + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + let offset = index.offset + index.stride * first_index as wgt::BufferAddress; + if base_vertex != 0 || first_instance != 0 { + encoder.draw_indexed_primitives_instanced_base_instance( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + base_vertex as _, + first_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_indexed_primitives_instanced( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + ); + } else { + encoder.draw_indexed_primitives( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + ); + } + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_primitives_indirect(self.state.raw_primitive_type, &buffer.raw, offset); + offset += mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_indexed_primitives_indirect( + self.state.raw_primitive_type, + index.raw_type, + index.buffer_ptr.as_native(), + index.offset, + &buffer.raw, + offset, + ); + offset += mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<super::Api>) { + self.begin_pass(); + + debug_assert!(self.state.blit.is_none()); + debug_assert!(self.state.compute.is_none()); + debug_assert!(self.state.render.is_none()); + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + + objc::rc::autoreleasepool(|| { + // TimeStamp Queries and ComputePassDescriptor were both introduced in Metal 2.3 (macOS 11, iOS 14) + // and we currently only need ComputePassDescriptor for timestamp queries + let encoder = if self.shared.private_caps.timestamp_query_support.is_empty() { + raw.new_compute_command_encoder() + } else { + let descriptor = metal::ComputePassDescriptor::new(); + + let mut sba_index = 0; + let mut next_sba_descriptor = || { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(sba_index) + .unwrap(); + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_encoder_sample_index(index as _); + sba_descriptor.set_end_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + } + + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer( + timestamp_writes + .query_set + .counter_sample_buffer + .as_ref() + .unwrap(), + ); + + sba_descriptor.set_start_of_encoder_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_encoder_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + } + + raw.compute_command_encoder_with_descriptor(descriptor) + }; + + if let Some(label) = desc.label { + encoder.set_label(label); + } + + self.state.compute = Some(encoder.to_owned()); + }); + } + unsafe fn end_compute_pass(&mut self) { + self.state.compute.take().unwrap().end_encoding(); + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.state.raw_wg_size = pipeline.work_group_size; + self.state.stage_infos.cs.assign_from(&pipeline.cs_info); + + let encoder = self.state.compute.as_ref().unwrap(); + encoder.set_compute_pipeline_state(&pipeline.raw); + + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes) + { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + + // update the threadgroup memory sizes + while self.state.work_group_memory_sizes.len() < pipeline.work_group_memory_sizes.len() { + self.state.work_group_memory_sizes.push(0); + } + for (index, (cur_size, pipeline_size)) in self + .state + .work_group_memory_sizes + .iter_mut() + .zip(pipeline.work_group_memory_sizes.iter()) + .enumerate() + { + const ALIGN_MASK: u32 = 0xF; // must be a multiple of 16 bytes + let size = ((*pipeline_size - 1) | ALIGN_MASK) + 1; + if *cur_size != size { + *cur_size = size; + encoder.set_threadgroup_memory_length(index as _, size as _); + } + } + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + let encoder = self.state.compute.as_ref().unwrap(); + let raw_count = metal::MTLSize { + width: count[0] as u64, + height: count[1] as u64, + depth: count[2] as u64, + }; + encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + let encoder = self.state.compute.as_ref().unwrap(); + encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>, + { + unimplemented!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + unimplemented!() + } +} + +impl Drop for super::CommandEncoder { + fn drop(&mut self) { + // Metal raises an assert when a MTLCommandEncoder is deallocated without a call + // to endEncoding. This isn't documented in the general case at + // https://developer.apple.com/documentation/metal/mtlcommandencoder, but for the + // more-specific MTLComputeCommandEncoder it is stated as a requirement at + // https://developer.apple.com/documentation/metal/mtlcomputecommandencoder. It + // appears to be a requirement for all MTLCommandEncoder objects. Failing to call + // endEncoding causes a crash with the message 'Command encoder released without + // endEncoding'. To prevent this, we explicitiy call discard_encoding, which + // calls end_encoding on any still-held metal::CommandEncoders. + unsafe { + self.discard_encoding(); + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs new file mode 100644 index 0000000000..8f6439b50b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -0,0 +1,322 @@ +pub fn map_texture_usage( + format: wgt::TextureFormat, + usage: crate::TextureUses, +) -> metal::MTLTextureUsage { + use crate::TextureUses as Tu; + + let mut mtl_usage = metal::MTLTextureUsage::Unknown; + + mtl_usage.set( + metal::MTLTextureUsage::RenderTarget, + usage.intersects(Tu::COLOR_TARGET | Tu::DEPTH_STENCIL_READ | Tu::DEPTH_STENCIL_WRITE), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderRead, + usage.intersects( + Tu::RESOURCE | Tu::DEPTH_STENCIL_READ | Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE, + ), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderWrite, + usage.intersects(Tu::STORAGE_READ_WRITE), + ); + // needed for combined depth/stencil formats since we might + // create a stencil-only view from them + mtl_usage.set( + metal::MTLTextureUsage::PixelFormatView, + format.is_combined_depth_stencil_format(), + ); + + mtl_usage +} + +pub fn map_texture_view_dimension(dim: wgt::TextureViewDimension) -> metal::MTLTextureType { + use metal::MTLTextureType::*; + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 => D1, + Tvd::D2 => D2, + Tvd::D2Array => D2Array, + Tvd::D3 => D3, + Tvd::Cube => Cube, + Tvd::CubeArray => CubeArray, + } +} + +pub fn map_compare_function(fun: wgt::CompareFunction) -> metal::MTLCompareFunction { + use metal::MTLCompareFunction::*; + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => Never, + Cf::Less => Less, + Cf::LessEqual => LessEqual, + Cf::Equal => Equal, + Cf::GreaterEqual => GreaterEqual, + Cf::Greater => Greater, + Cf::NotEqual => NotEqual, + Cf::Always => Always, + } +} + +pub fn map_filter_mode(filter: wgt::FilterMode) -> metal::MTLSamplerMinMagFilter { + use metal::MTLSamplerMinMagFilter::*; + match filter { + wgt::FilterMode::Nearest => Nearest, + wgt::FilterMode::Linear => Linear, + } +} + +pub fn map_address_mode(address: wgt::AddressMode) -> metal::MTLSamplerAddressMode { + use metal::MTLSamplerAddressMode::*; + use wgt::AddressMode as Fm; + match address { + Fm::Repeat => Repeat, + Fm::MirrorRepeat => MirrorRepeat, + Fm::ClampToEdge => ClampToEdge, + Fm::ClampToBorder => ClampToBorderColor, + //Fm::MirrorClamp => MirrorClampToEdge, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> metal::MTLSamplerBorderColor { + use metal::MTLSamplerBorderColor::*; + match border_color { + wgt::SamplerBorderColor::TransparentBlack => TransparentBlack, + wgt::SamplerBorderColor::OpaqueBlack => OpaqueBlack, + wgt::SamplerBorderColor::OpaqueWhite => OpaqueWhite, + wgt::SamplerBorderColor::Zero => unreachable!(), + } +} + +pub fn map_primitive_topology( + topology: wgt::PrimitiveTopology, +) -> (metal::MTLPrimitiveTopologyClass, metal::MTLPrimitiveType) { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => ( + metal::MTLPrimitiveTopologyClass::Point, + metal::MTLPrimitiveType::Point, + ), + Pt::LineList => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::Line, + ), + Pt::LineStrip => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::LineStrip, + ), + Pt::TriangleList => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::Triangle, + ), + Pt::TriangleStrip => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::TriangleStrip, + ), + } +} + +pub fn map_color_write(mask: wgt::ColorWrites) -> metal::MTLColorWriteMask { + let mut raw_mask = metal::MTLColorWriteMask::empty(); + + if mask.contains(wgt::ColorWrites::RED) { + raw_mask |= metal::MTLColorWriteMask::Red; + } + if mask.contains(wgt::ColorWrites::GREEN) { + raw_mask |= metal::MTLColorWriteMask::Green; + } + if mask.contains(wgt::ColorWrites::BLUE) { + raw_mask |= metal::MTLColorWriteMask::Blue; + } + if mask.contains(wgt::ColorWrites::ALPHA) { + raw_mask |= metal::MTLColorWriteMask::Alpha; + } + + raw_mask +} + +pub fn map_blend_factor(factor: wgt::BlendFactor) -> metal::MTLBlendFactor { + use metal::MTLBlendFactor::*; + use wgt::BlendFactor as Bf; + + match factor { + Bf::Zero => Zero, + Bf::One => One, + Bf::Src => SourceColor, + Bf::OneMinusSrc => OneMinusSourceColor, + Bf::Dst => DestinationColor, + Bf::OneMinusDst => OneMinusDestinationColor, + Bf::SrcAlpha => SourceAlpha, + Bf::OneMinusSrcAlpha => OneMinusSourceAlpha, + Bf::DstAlpha => DestinationAlpha, + Bf::OneMinusDstAlpha => OneMinusDestinationAlpha, + Bf::Constant => BlendColor, + Bf::OneMinusConstant => OneMinusBlendColor, + Bf::SrcAlphaSaturated => SourceAlphaSaturated, + Bf::Src1 => Source1Color, + Bf::OneMinusSrc1 => OneMinusSource1Color, + Bf::Src1Alpha => Source1Alpha, + Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, + } +} + +pub fn map_blend_op(operation: wgt::BlendOperation) -> metal::MTLBlendOperation { + use metal::MTLBlendOperation::*; + use wgt::BlendOperation as Bo; + + match operation { + Bo::Add => Add, + Bo::Subtract => Subtract, + Bo::ReverseSubtract => ReverseSubtract, + Bo::Min => Min, + Bo::Max => Max, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> ( + metal::MTLBlendOperation, + metal::MTLBlendFactor, + metal::MTLBlendFactor, +) { + ( + map_blend_op(component.operation), + map_blend_factor(component.src_factor), + map_blend_factor(component.dst_factor), + ) +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { + use metal::MTLVertexFormat::*; + use wgt::VertexFormat as Vf; + + match format { + Vf::Unorm8x2 => UChar2Normalized, + Vf::Snorm8x2 => Char2Normalized, + Vf::Uint8x2 => UChar2, + Vf::Sint8x2 => Char2, + Vf::Unorm8x4 => UChar4Normalized, + Vf::Snorm8x4 => Char4Normalized, + Vf::Uint8x4 => UChar4, + Vf::Sint8x4 => Char4, + Vf::Unorm16x2 => UShort2Normalized, + Vf::Snorm16x2 => Short2Normalized, + Vf::Uint16x2 => UShort2, + Vf::Sint16x2 => Short2, + Vf::Float16x2 => Half2, + Vf::Unorm16x4 => UShort4Normalized, + Vf::Snorm16x4 => Short4Normalized, + Vf::Uint16x4 => UShort4, + Vf::Sint16x4 => Short4, + Vf::Float16x4 => Half4, + Vf::Uint32 => UInt, + Vf::Sint32 => Int, + Vf::Float32 => Float, + Vf::Uint32x2 => UInt2, + Vf::Sint32x2 => Int2, + Vf::Float32x2 => Float2, + Vf::Uint32x3 => UInt3, + Vf::Sint32x3 => Int3, + Vf::Float32x3 => Float3, + Vf::Uint32x4 => UInt4, + Vf::Sint32x4 => Int4, + Vf::Float32x4 => Float4, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_step_mode(mode: wgt::VertexStepMode) -> metal::MTLVertexStepFunction { + match mode { + wgt::VertexStepMode::Vertex => metal::MTLVertexStepFunction::PerVertex, + wgt::VertexStepMode::Instance => metal::MTLVertexStepFunction::PerInstance, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> metal::MTLStencilOperation { + use metal::MTLStencilOperation::*; + use wgt::StencilOperation as So; + + match op { + So::Keep => Keep, + So::Zero => Zero, + So::Replace => Replace, + So::IncrementClamp => IncrementClamp, + So::IncrementWrap => IncrementWrap, + So::DecrementClamp => DecrementClamp, + So::DecrementWrap => DecrementWrap, + So::Invert => Invert, + } +} + +pub fn map_winding(winding: wgt::FrontFace) -> metal::MTLWinding { + match winding { + wgt::FrontFace::Cw => metal::MTLWinding::Clockwise, + wgt::FrontFace::Ccw => metal::MTLWinding::CounterClockwise, + } +} + +pub fn map_cull_mode(face: Option<wgt::Face>) -> metal::MTLCullMode { + match face { + None => metal::MTLCullMode::None, + Some(wgt::Face::Front) => metal::MTLCullMode::Front, + Some(wgt::Face::Back) => metal::MTLCullMode::Back, + } +} + +pub fn map_range(range: &crate::MemoryRange) -> metal::NSRange { + metal::NSRange { + location: range.start, + length: range.end - range.start, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> metal::MTLSize { + metal::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } +} + +pub fn map_origin(origin: &wgt::Origin3d) -> metal::MTLOrigin { + metal::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } +} + +pub fn map_store_action(store: bool, resolve: bool) -> metal::MTLStoreAction { + use metal::MTLStoreAction::*; + match (store, resolve) { + (true, true) => StoreAndMultisampleResolve, + (false, true) => MultisampleResolve, + (true, false) => Store, + (false, false) => DontCare, + } +} + +pub fn map_clear_color(color: &wgt::Color) -> metal::MTLClearColor { + metal::MTLClearColor { + red: color.r, + green: color.g, + blue: color.b, + alpha: color.a, + } +} + +pub fn get_blit_option( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> metal::MTLBlitOption { + if format.is_combined_depth_stencil_format() { + match aspect { + crate::FormatAspects::DEPTH => metal::MTLBlitOption::DepthFromDepthStencil, + crate::FormatAspects::STENCIL => metal::MTLBlitOption::StencilFromDepthStencil, + _ => unreachable!(), + } + } else { + metal::MTLBlitOption::None + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs new file mode 100644 index 0000000000..d7fd06c8f3 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -0,0 +1,1251 @@ +use parking_lot::Mutex; +use std::{ + num::NonZeroU32, + ptr, + sync::{atomic, Arc}, + thread, time, +}; + +use super::conv; +use crate::auxil::map_naga_stage; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +struct CompiledShader { + library: metal::Library, + function: metal::Function, + wg_size: metal::MTLSize, + wg_memory_sizes: Vec<u32>, + + /// Bindings of WGSL `storage` globals that contain variable-sized arrays. + /// + /// In order to implement bounds checks and the `arrayLength` function for + /// WGSL runtime-sized arrays, we pass the entry point a struct with a + /// member for each global variable that contains such an array. That member + /// is a `u32` holding the variable's total size in bytes---which is simply + /// the size of the `Buffer` supplying that variable's contents for the + /// draw call. + sized_bindings: Vec<naga::ResourceBinding>, + + immutable_buffer_mask: usize, +} + +fn create_stencil_desc( + face: &wgt::StencilFaceState, + read_mask: u32, + write_mask: u32, +) -> metal::StencilDescriptor { + let desc = metal::StencilDescriptor::new(); + desc.set_stencil_compare_function(conv::map_compare_function(face.compare)); + desc.set_read_mask(read_mask); + desc.set_write_mask(write_mask); + desc.set_stencil_failure_operation(conv::map_stencil_op(face.fail_op)); + desc.set_depth_failure_operation(conv::map_stencil_op(face.depth_fail_op)); + desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.pass_op)); + desc +} + +fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthStencilDescriptor { + let desc = metal::DepthStencilDescriptor::new(); + desc.set_depth_compare_function(conv::map_compare_function(state.depth_compare)); + desc.set_depth_write_enabled(state.depth_write_enabled); + let s = &state.stencil; + if s.is_enabled() { + let front_desc = create_stencil_desc(&s.front, s.read_mask, s.write_mask); + desc.set_front_face_stencil(Some(&front_desc)); + let back_desc = create_stencil_desc(&s.back, s.read_mask, s.write_mask); + desc.set_back_face_stencil(Some(&back_desc)); + } + desc +} + +impl super::Device { + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + primitive_class: metal::MTLPrimitiveTopologyClass, + naga_stage: naga::ShaderStage, + ) -> Result<CompiledShader, crate::PipelineError> { + let stage_bit = map_naga_stage(naga_stage); + + let module = &stage.module.naga.module; + let ep_resources = &layout.per_stage_map[naga_stage]; + + let bounds_check_policy = if stage.module.runtime_checks { + naga::proc::BoundsCheckPolicy::ReadZeroSkipWrite + } else { + naga::proc::BoundsCheckPolicy::Unchecked + }; + + let options = naga::back::msl::Options { + lang_version: match self.shared.private_caps.msl_version { + metal::MTLLanguageVersion::V1_0 => (1, 0), + metal::MTLLanguageVersion::V1_1 => (1, 1), + metal::MTLLanguageVersion::V1_2 => (1, 2), + metal::MTLLanguageVersion::V2_0 => (2, 0), + metal::MTLLanguageVersion::V2_1 => (2, 1), + metal::MTLLanguageVersion::V2_2 => (2, 2), + metal::MTLLanguageVersion::V2_3 => (2, 3), + metal::MTLLanguageVersion::V2_4 => (2, 4), + }, + inline_samplers: Default::default(), + spirv_cross_compatibility: false, + fake_missing_bindings: false, + per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( + stage.entry_point.to_string(), + ep_resources.clone(), + )]), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: bounds_check_policy, + buffer: bounds_check_policy, + image_load: bounds_check_policy, + image_store: naga::proc::BoundsCheckPolicy::Unchecked, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: true, + }; + + let pipeline_options = naga::back::msl::PipelineOptions { + allow_and_force_point_size: match primitive_class { + metal::MTLPrimitiveTopologyClass::Point => true, + _ => false, + }, + }; + + let (source, info) = naga::back::msl::write_string( + module, + &stage.module.naga.info, + &options, + &pipeline_options, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + + log::debug!( + "Naga generated shader for entry point '{}' and stage {:?}\n{}", + stage.entry_point, + naga_stage, + &source + ); + + let options = metal::CompileOptions::new(); + options.set_language_version(self.shared.private_caps.msl_version); + + if self.shared.private_caps.supports_preserve_invariance { + options.set_preserve_invariance(true); + } + + let library = self + .shared + .device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| { + log::warn!("Naga generated shader:\n{}", source); + crate::PipelineError::Linkage(stage_bit, format!("Metal: {}", err)) + })?; + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + let ep = &module.entry_points[ep_index]; + let ep_name = info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{}", e)))?; + + let wg_size = metal::MTLSize { + width: ep.workgroup_size[0] as _, + height: ep.workgroup_size[1] as _, + depth: ep.workgroup_size[2] as _, + }; + + let function = library.get_function(ep_name, None).map_err(|e| { + log::error!("get_function: {:?}", e); + crate::PipelineError::EntryPoint(naga_stage) + })?; + + // collect sizes indices, immutable buffers, and work group memory sizes + let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let mut wg_memory_sizes = Vec::new(); + let mut sized_bindings = Vec::new(); + let mut immutable_buffer_mask = 0; + for (var_handle, var) in module.global_variables.iter() { + match var.space { + naga::AddressSpace::WorkGroup => { + if !ep_info[var_handle].is_empty() { + let size = module.types[var.ty].inner.size(module.to_ctx()); + wg_memory_sizes.push(size); + } + } + naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { + let br = match var.binding { + Some(ref br) => br.clone(), + None => continue, + }; + let storage_access_store = match var.space { + naga::AddressSpace::Storage { access } => { + access.contains(naga::StorageAccess::STORE) + } + _ => false, + }; + + // check for an immutable buffer + if !ep_info[var_handle].is_empty() && !storage_access_store { + let slot = ep_resources.resources[&br].buffer.unwrap(); + immutable_buffer_mask |= 1 << slot; + } + + let mut dynamic_array_container_ty = var.ty; + if let naga::TypeInner::Struct { ref members, .. } = module.types[var.ty].inner + { + dynamic_array_container_ty = members.last().unwrap().ty; + } + if let naga::TypeInner::Array { + size: naga::ArraySize::Dynamic, + .. + } = module.types[dynamic_array_container_ty].inner + { + sized_bindings.push(br); + } + } + _ => {} + } + } + + Ok(CompiledShader { + library, + function, + wg_size, + wg_memory_sizes, + sized_bindings, + immutable_buffer_mask, + }) + } + + fn set_buffers_mutability( + buffers: &metal::PipelineBufferDescriptorArrayRef, + mut immutable_mask: usize, + ) { + while immutable_mask != 0 { + let slot = immutable_mask.trailing_zeros(); + immutable_mask ^= 1 << slot; + buffers + .object_at(slot as u64) + .unwrap() + .set_mutability(metal::MTLMutability::Immutable); + } + } + + pub unsafe fn texture_from_raw( + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, + ) -> super::Texture { + super::Texture { + raw, + format, + raw_type, + array_layers, + mip_levels, + copy_size, + } + } + + pub unsafe fn device_from_raw(raw: metal::Device, features: wgt::Features) -> super::Device { + super::Device { + shared: Arc::new(super::AdapterShared::new(raw)), + features, + } + } + + pub unsafe fn buffer_from_raw(raw: metal::Buffer, size: wgt::BufferAddress) -> super::Buffer { + super::Buffer { raw, size } + } + + pub fn raw_device(&self) -> &Mutex<metal::Device> { + &self.shared.device + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, _queue: super::Queue) {} + + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> { + let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let mut options = metal::MTLResourceOptions::empty(); + options |= if map_read || map_write { + // `crate::MemoryFlags::PREFER_COHERENT` is ignored here + metal::MTLResourceOptions::StorageModeShared + } else { + metal::MTLResourceOptions::StorageModePrivate + }; + options.set( + metal::MTLResourceOptions::CPUCacheModeWriteCombined, + map_write, + ); + + //TODO: HazardTrackingModeUntracked + + objc::rc::autoreleasepool(|| { + let raw = self.shared.device.lock().new_buffer(desc.size, options); + if let Some(label) = desc.label { + raw.set_label(label); + } + Ok(super::Buffer { + raw, + size: desc.size, + }) + }) + } + unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {} + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + let ptr = buffer.raw.contents() as *mut u8; + assert!(!ptr.is_null()); + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> DeviceResult<super::Texture> { + use metal::foreign_types::ForeignType as _; + + let mtl_format = self.shared.private_caps.map_format(desc.format); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::TextureDescriptor::new(); + + let mtl_type = match desc.dimension { + wgt::TextureDimension::D1 => metal::MTLTextureType::D1, + wgt::TextureDimension::D2 => { + if desc.sample_count > 1 { + descriptor.set_sample_count(desc.sample_count as u64); + metal::MTLTextureType::D2Multisample + } else if desc.size.depth_or_array_layers > 1 { + descriptor.set_array_length(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D2Array + } else { + metal::MTLTextureType::D2 + } + } + wgt::TextureDimension::D3 => { + descriptor.set_depth(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D3 + } + }; + + descriptor.set_texture_type(mtl_type); + descriptor.set_width(desc.size.width as u64); + descriptor.set_height(desc.size.height as u64); + descriptor.set_mipmap_level_count(desc.mip_level_count as u64); + descriptor.set_pixel_format(mtl_format); + descriptor.set_usage(conv::map_texture_usage(desc.format, desc.usage)); + descriptor.set_storage_mode(metal::MTLStorageMode::Private); + + let raw = self.shared.device.lock().new_texture(&descriptor); + if raw.as_ptr().is_null() { + return Err(crate::DeviceError::OutOfMemory); + } + if let Some(label) = desc.label { + raw.set_label(label); + } + + Ok(super::Texture { + raw, + format: desc.format, + raw_type: mtl_type, + mip_levels: desc.mip_level_count, + array_layers: desc.array_layer_count(), + copy_size: desc.copy_extent(), + }) + }) + } + + unsafe fn destroy_texture(&self, _texture: super::Texture) {} + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<super::TextureView> { + let raw_type = if texture.raw_type == metal::MTLTextureType::D2Multisample { + texture.raw_type + } else { + conv::map_texture_view_dimension(desc.dimension) + }; + + let aspects = crate::FormatAspects::new(texture.format, desc.range.aspect); + + let raw_format = self + .shared + .private_caps + .map_view_format(desc.format, aspects); + + let format_equal = raw_format == self.shared.private_caps.map_format(texture.format); + let type_equal = raw_type == texture.raw_type; + let range_full_resource = + desc.range + .is_full_resource(desc.format, texture.mip_levels, texture.array_layers); + + let raw = if format_equal && type_equal && range_full_resource { + // Some images are marked as framebuffer-only, and we can't create aliases of them. + // Also helps working around Metal bugs with aliased array textures. + texture.raw.to_owned() + } else { + let mip_level_count = desc + .range + .mip_level_count + .unwrap_or(texture.mip_levels - desc.range.base_mip_level); + let array_layer_count = desc + .range + .array_layer_count + .unwrap_or(texture.array_layers - desc.range.base_array_layer); + + objc::rc::autoreleasepool(|| { + let raw = texture.raw.new_texture_view_from_slice( + raw_format, + raw_type, + metal::NSRange { + location: desc.range.base_mip_level as _, + length: mip_level_count as _, + }, + metal::NSRange { + location: desc.range.base_array_layer as _, + length: array_layer_count as _, + }, + ); + if let Some(label) = desc.label { + raw.set_label(label); + } + raw + }) + }; + + Ok(super::TextureView { raw, aspects }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> DeviceResult<super::Sampler> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::SamplerDescriptor::new(); + + descriptor.set_min_filter(conv::map_filter_mode(desc.min_filter)); + descriptor.set_mag_filter(conv::map_filter_mode(desc.mag_filter)); + descriptor.set_mip_filter(match desc.mipmap_filter { + wgt::FilterMode::Nearest if desc.lod_clamp == (0.0..0.0) => { + metal::MTLSamplerMipFilter::NotMipmapped + } + wgt::FilterMode::Nearest => metal::MTLSamplerMipFilter::Nearest, + wgt::FilterMode::Linear => metal::MTLSamplerMipFilter::Linear, + }); + + let [s, t, r] = desc.address_modes; + descriptor.set_address_mode_s(conv::map_address_mode(s)); + descriptor.set_address_mode_t(conv::map_address_mode(t)); + descriptor.set_address_mode_r(conv::map_address_mode(r)); + + // Anisotropy is always supported on mac up to 16x + descriptor.set_max_anisotropy(desc.anisotropy_clamp as _); + + descriptor.set_lod_min_clamp(desc.lod_clamp.start); + descriptor.set_lod_max_clamp(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + descriptor.set_compare_function(conv::map_compare_function(fun)); + } + + if let Some(border_color) = desc.border_color { + if let wgt::SamplerBorderColor::Zero = border_color { + if s == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_s(metal::MTLSamplerAddressMode::ClampToZero); + } + + if t == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_t(metal::MTLSamplerAddressMode::ClampToZero); + } + + if r == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_r(metal::MTLSamplerAddressMode::ClampToZero); + } + } else { + descriptor.set_border_color(conv::map_border_color(border_color)); + } + } + + if let Some(label) = desc.label { + descriptor.set_label(label); + } + let raw = self.shared.device.lock().new_sampler(&descriptor); + + Ok(super::Sampler { raw }) + }) + } + unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + shared: Arc::clone(&self.shared), + raw_queue: Arc::clone(&desc.queue.raw), + raw_cmd_buf: None, + state: super::CommandState::default(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<super::BindGroupLayout> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> DeviceResult<super::PipelineLayout> { + #[derive(Debug)] + struct StageInfo { + stage: naga::ShaderStage, + counters: super::ResourceData<super::ResourceIndex>, + pc_buffer: Option<super::ResourceIndex>, + pc_limit: u32, + sizes_buffer: Option<super::ResourceIndex>, + sizes_count: u8, + resources: naga::back::msl::BindingMap, + } + + let mut stage_data = super::NAGA_STAGES.map(|stage| StageInfo { + stage, + counters: super::ResourceData::default(), + pc_buffer: None, + pc_limit: 0, + sizes_buffer: None, + sizes_count: 0, + resources: Default::default(), + }); + let mut bind_group_infos = arrayvec::ArrayVec::new(); + + // First, place the push constants + let mut total_push_constants = 0; + for info in stage_data.iter_mut() { + for pcr in desc.push_constant_ranges { + if pcr.stages.contains(map_naga_stage(info.stage)) { + debug_assert_eq!(pcr.range.end % 4, 0); + info.pc_limit = (pcr.range.end / 4).max(info.pc_limit); + } + } + + // round up the limits alignment to 4, so that it matches MTL compiler logic + const LIMIT_MASK: u32 = 3; + //TODO: figure out what and how exactly does the alignment. Clearly, it's not + // straightforward, given that value of 2 stays non-aligned. + if info.pc_limit > LIMIT_MASK { + info.pc_limit = (info.pc_limit + LIMIT_MASK) & !LIMIT_MASK; + } + + // handle the push constant buffer assignment and shader overrides + if info.pc_limit != 0 { + info.pc_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + + total_push_constants = total_push_constants.max(info.pc_limit); + } + + // Second, place the described resources + for (group_index, &bgl) in desc.bind_group_layouts.iter().enumerate() { + // remember where the resources for this set start at each shader stage + let base_resource_indices = stage_data.map_ref(|info| info.counters.clone()); + + for entry in bgl.entries.iter() { + if let wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + for info in stage_data.iter_mut() { + if entry.visibility.contains(map_naga_stage(info.stage)) { + info.sizes_count += 1; + } + } + } + + for info in stage_data.iter_mut() { + if !entry.visibility.contains(map_naga_stage(info.stage)) { + continue; + } + + let mut target = naga::back::msl::BindTarget::default(); + let count = entry.count.map_or(1, NonZeroU32::get); + target.binding_array_size = entry.count.map(NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += count; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } + } + wgt::BindingType::Sampler { .. } => { + target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += count; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } + + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + info.resources.insert(br, target); + } + } + + bind_group_infos.push(super::BindGroupLayoutInfo { + base_resource_indices, + }); + } + + // Finally, make sure we fit the limits + for info in stage_data.iter_mut() { + // handle the sizes buffer assignment and shader overrides + if info.sizes_count != 0 { + info.sizes_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage + || info.counters.textures > self.shared.private_caps.max_textures_per_stage + || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage + { + log::error!("Resource limit exceeded: {:?}", info); + return Err(crate::DeviceError::OutOfMemory); + } + } + + let push_constants_infos = stage_data.map_ref(|info| { + info.pc_buffer.map(|buffer_index| super::PushConstantsInfo { + count: info.pc_limit, + buffer_index, + }) + }); + + let total_counters = stage_data.map_ref(|info| info.counters.clone()); + + let per_stage_map = stage_data.map(|info| naga::back::msl::EntryPointResources { + push_constant_buffer: info + .pc_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + sizes_buffer: info + .sizes_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + resources: info.resources, + }); + + Ok(super::PipelineLayout { + bind_group_infos, + push_constants_infos, + total_counters, + total_push_constants, + per_stage_map, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> DeviceResult<super::BindGroup> { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let size = layout.count.map_or(1, |c| c.get()); + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += size; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = + wgt::BufferSize::new(source.buffer.size - source.offset); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.samplers + .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); + counter.samplers += size; + } + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += size; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } + } + } + + Ok(bg) + } + + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { + naga, + runtime_checks: desc.runtime_checks, + }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPipelineDescriptor::new(); + + let raw_triangle_fill_mode = match desc.primitive.polygon_mode { + wgt::PolygonMode::Fill => metal::MTLTriangleFillMode::Fill, + wgt::PolygonMode::Line => metal::MTLTriangleFillMode::Lines, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), + }; + + let (primitive_class, raw_primitive_type) = + conv::map_primitive_topology(desc.primitive.topology); + + // Vertex shader + let (vs_lib, vs_info) = { + let vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + primitive_class, + naga::ShaderStage::Vertex, + )?; + + descriptor.set_vertex_function(Some(&vs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.vertex_buffers().unwrap(), + vs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.vs, + sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, + sized_bindings: vs.sized_bindings, + }; + + (vs.library, info) + }; + + // Fragment shader + let (fs_lib, fs_info) = match desc.fragment_stage { + Some(ref stage) => { + let fs = self.load_shader( + stage, + desc.layout, + primitive_class, + naga::ShaderStage::Fragment, + )?; + + descriptor.set_fragment_function(Some(&fs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.fragment_buffers().unwrap(), + fs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.fs, + sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, + sized_bindings: fs.sized_bindings, + }; + + (Some(fs.library), Some(info)) + } + None => { + // TODO: This is a workaround for what appears to be a Metal validation bug + // A pixel format is required even though no attachments are provided + if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { + descriptor + .set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); + } + (None, None) + } + }; + + for (i, ct) in desc.color_targets.iter().enumerate() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let ct = if let Some(color_target) = ct.as_ref() { + color_target + } else { + at_descriptor.set_pixel_format(metal::MTLPixelFormat::Invalid); + continue; + }; + + let raw_format = self.shared.private_caps.map_format(ct.format); + at_descriptor.set_pixel_format(raw_format); + at_descriptor.set_write_mask(conv::map_color_write(ct.write_mask)); + + if let Some(ref blend) = ct.blend { + at_descriptor.set_blending_enabled(true); + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + + at_descriptor.set_rgb_blend_operation(color_op); + at_descriptor.set_source_rgb_blend_factor(color_src); + at_descriptor.set_destination_rgb_blend_factor(color_dst); + + at_descriptor.set_alpha_blend_operation(alpha_op); + at_descriptor.set_source_alpha_blend_factor(alpha_src); + at_descriptor.set_destination_alpha_blend_factor(alpha_dst); + } + } + + let depth_stencil = match desc.depth_stencil { + Some(ref ds) => { + let raw_format = self.shared.private_caps.map_format(ds.format); + let aspects = crate::FormatAspects::from(ds.format); + if aspects.contains(crate::FormatAspects::DEPTH) { + descriptor.set_depth_attachment_pixel_format(raw_format); + } + if aspects.contains(crate::FormatAspects::STENCIL) { + descriptor.set_stencil_attachment_pixel_format(raw_format); + } + + let ds_descriptor = create_depth_stencil_desc(ds); + let raw = self + .shared + .device + .lock() + .new_depth_stencil_state(&ds_descriptor); + Some((raw, ds.bias)) + } + None => None, + }; + + if desc.layout.total_counters.vs.buffers + (desc.vertex_buffers.len() as u32) + > self.shared.private_caps.max_vertex_buffers + { + let msg = format!( + "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", + desc.vertex_buffers.len(), + desc.layout.total_counters.vs.buffers + ); + return Err(crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX, + msg, + )); + } + + if !desc.vertex_buffers.is_empty() { + let vertex_descriptor = metal::VertexDescriptor::new(); + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + let buffer_index = + self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; + let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + + // Metal expects the stride to be the actual size of the attributes. + // The semantics of array_stride == 0 can be achieved by setting + // the step function to constant and rate to 0. + if vb.array_stride == 0 { + let stride = vb + .attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0); + buffer_desc.set_stride(wgt::math::align_to(stride, 4)); + buffer_desc.set_step_function(metal::MTLVertexStepFunction::Constant); + buffer_desc.set_step_rate(0); + } else { + buffer_desc.set_stride(vb.array_stride); + buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + } + + for at in vb.attributes { + let attribute_desc = vertex_descriptor + .attributes() + .object_at(at.shader_location as u64) + .unwrap(); + attribute_desc.set_format(conv::map_vertex_format(at.format)); + attribute_desc.set_buffer_index(buffer_index); + attribute_desc.set_offset(at.offset); + } + } + descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + } + + if desc.multisample.count != 1 { + //TODO: handle sample mask + descriptor.set_sample_count(desc.multisample.count as u64); + descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); + } + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_render_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::RenderPipeline { + raw, + vs_lib, + fs_lib, + vs_info, + fs_info, + raw_primitive_type, + raw_triangle_fill_mode, + raw_front_winding: conv::map_winding(desc.primitive.front_face), + raw_cull_mode: conv::map_cull_mode(desc.primitive.cull_mode), + raw_depth_clip_mode: if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + Some(if desc.primitive.unclipped_depth { + metal::MTLDepthClipMode::Clamp + } else { + metal::MTLDepthClipMode::Clip + }) + } else { + None + }, + depth_stencil, + }) + }) + } + unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {} + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::ComputePipelineDescriptor::new(); + + let cs = self.load_shader( + &desc.stage, + desc.layout, + metal::MTLPrimitiveTopologyClass::Unspecified, + naga::ShaderStage::Compute, + )?; + descriptor.set_compute_function(Some(&cs.function)); + + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.buffers().unwrap(), + cs.immutable_buffer_mask, + ); + } + + let cs_info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.cs, + sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, + sized_bindings: cs.sized_bindings, + }; + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_compute_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::COMPUTE, + format!("new_compute_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::ComputePipeline { + raw, + cs_info, + cs_lib: cs.library, + work_group_size: cs.wg_size, + work_group_memory_sizes: cs.wg_memory_sizes, + }) + }) + } + unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<super::QuerySet> { + objc::rc::autoreleasepool(|| { + match desc.ty { + wgt::QueryType::Occlusion => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let options = metal::MTLResourceOptions::empty(); + //TODO: HazardTrackingModeUntracked + let raw_buffer = self.shared.device.lock().new_buffer(size, options); + if let Some(label) = desc.label { + raw_buffer.set_label(label); + } + Ok(super::QuerySet { + raw_buffer, + counter_sample_buffer: None, + ty: desc.ty, + }) + } + wgt::QueryType::Timestamp => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let device = self.shared.device.lock(); + let destination_buffer = + device.new_buffer(size, metal::MTLResourceOptions::empty()); + + let csb_desc = metal::CounterSampleBufferDescriptor::new(); + csb_desc.set_storage_mode(metal::MTLStorageMode::Shared); + csb_desc.set_sample_count(desc.count as _); + if let Some(label) = desc.label { + csb_desc.set_label(label); + } + + let counter_sets = device.counter_sets(); + let timestamp_counter = + match counter_sets.iter().find(|cs| cs.name() == "timestamp") { + Some(counter) => counter, + None => { + log::error!("Failed to obtain timestamp counter set."); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; + csb_desc.set_counter_set(timestamp_counter); + + let counter_sample_buffer = + match device.new_counter_sample_buffer_with_descriptor(&csb_desc) { + Ok(buffer) => buffer, + Err(err) => { + log::error!("Failed to create counter sample buffer: {:?}", err); + return Err(crate::DeviceError::ResourceCreationFailed); + } + }; + + Ok(super::QuerySet { + raw_buffer: destination_buffer, + counter_sample_buffer: Some(counter_sample_buffer), + ty: desc.ty, + }) + } + _ => { + todo!() + } + } + }) + } + unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> DeviceResult<super::Fence> { + Ok(super::Fence { + completed_value: Arc::new(atomic::AtomicU64::new(0)), + pending_command_buffers: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, _fence: super::Fence) {} + unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> { + let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + Ok(max_value) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) { + return Ok(true); + } + + let cmd_buf = match fence + .pending_command_buffers + .iter() + .find(|&&(value, _)| value >= wait_value) + { + Some(&(_, ref cmd_buf)) => cmd_buf, + None => { + log::error!("No active command buffers for fence value {}", wait_value); + return Err(crate::DeviceError::Lost); + } + }; + + let start = time::Instant::now(); + loop { + if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { + return Ok(true); + } + if start.elapsed().as_millis() >= timeout_ms as u128 { + return Ok(false); + } + thread::sleep(time::Duration::from_millis(1)); + } + } + + unsafe fn start_capture(&self) -> bool { + if !self.shared.private_caps.supports_capture_manager { + return false; + } + let device = self.shared.device.lock(); + let shared_capture_manager = metal::CaptureManager::shared(); + let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device); + shared_capture_manager.set_default_capture_scope(&default_capture_scope); + shared_capture_manager.start_capture_with_scope(&default_capture_scope); + default_capture_scope.begin_scope(); + true + } + unsafe fn stop_capture(&self) { + let shared_capture_manager = metal::CaptureManager::shared(); + if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { + default_capture_scope.end_scope(); + } + shared_capture_manager.stop_capture(); + } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + unimplemented!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<super::AccelerationStructure, crate::DeviceError> { + unimplemented!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + unimplemented!() + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs new file mode 100644 index 0000000000..298f60faac --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -0,0 +1,852 @@ +/*! +# Metal API internals. + +## Pipeline Layout + +In Metal, push constants, vertex buffers, and resources in the bind groups +are all placed together in the native resource bindings, which work similarly to D3D11: +there are tables of textures, buffers, and samplers. + +We put push constants first (if any) in the table, followed by bind group 0 +resources, followed by other bind groups. The vertex buffers are bound at the very +end of the VS buffer table. + +!*/ + +// `MTLFeatureSet` is superseded by `MTLGpuFamily`. +// However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13, +// See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec. +// TODO: Eventually all deprecated features should be abstracted and use new api when available. +#[allow(deprecated)] +mod adapter; +mod command; +mod conv; +mod device; +mod surface; +mod time; + +use std::{ + fmt, iter, ops, + ptr::NonNull, + sync::{atomic, Arc}, + thread, +}; + +use arrayvec::ArrayVec; +use bitflags::bitflags; +use metal::foreign_types::ForeignTypeRef as _; +use parking_lot::{Mutex, RwLock}; + +#[derive(Clone, Debug)] +pub struct Api; + +type ResourceIndex = u32; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; +} + +pub struct Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate, +} + +impl Instance { + pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { + unsafe { Surface::from_layer(layer) } + } +} + +impl crate::Instance<Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init Metal Backend"); + // We do not enable metal validation based on the validation flags as it affects the entire + // process. Instead, we enable the validation inside the test harness itself in tests/src/native.rs. + Ok(Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate::new(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + match window_handle { + #[cfg(target_os = "ios")] + raw_window_handle::RawWindowHandle::UiKit(handle) => { + let _ = &self.managed_metal_layer_delegate; + Ok(unsafe { Surface::from_view(handle.ui_view.as_ptr(), None) }) + } + #[cfg(target_os = "macos")] + raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe { + Surface::from_view( + handle.ns_view.as_ptr(), + Some(&self.managed_metal_layer_delegate), + ) + }), + _ => Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a Metal-compatible handle" + ))), + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + unsafe { surface.dispose() }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + let devices = metal::Device::all(); + let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices + .into_iter() + .map(|dev| { + let name = dev.name().into(); + let shared = AdapterShared::new(dev); + crate::ExposedAdapter { + info: wgt::AdapterInfo { + name, + vendor: 0, + device: 0, + device_type: shared.private_caps.device_type(), + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Metal, + }, + features: shared.private_caps.features(), + capabilities: shared.private_caps.capabilities(), + adapter: Adapter::new(Arc::new(shared)), + } + }) + .collect(); + adapters.sort_by_key(|ad| { + ( + ad.adapter.shared.private_caps.low_power, + ad.adapter.shared.private_caps.headless, + ) + }); + adapters + } +} + +bitflags!( + /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes. + #[derive(Debug, Copy, Clone)] + pub struct TimestampQuerySupport: u32 { + /// On creating Metal encoders. + const STAGE_BOUNDARIES = 1 << 1; + /// Within existing draw encoders. + const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2); + /// Within existing dispatch encoders. + const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3); + /// Within existing blit encoders. + const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4); + + /// Within any wgpu render/compute pass. + const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits(); + } +); + +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct PrivateCapabilities { + family_check: bool, + msl_version: metal::MTLLanguageVersion, + fragment_rw_storage: bool, + read_write_texture_tier: metal::MTLReadWriteTextureTier, + msaa_desktop: bool, + msaa_apple3: bool, + msaa_apple7: bool, + resource_heaps: bool, + argument_buffers: bool, + shared_textures: bool, + mutable_comparison_samplers: bool, + sampler_clamp_to_border: bool, + indirect_draw_dispatch: bool, + base_vertex_first_instance_drawing: bool, + dual_source_blending: bool, + low_power: bool, + headless: bool, + layered_rendering: bool, + function_specialization: bool, + depth_clip_mode: bool, + texture_cube_array: bool, + supports_float_filtering: bool, + format_depth24_stencil8: bool, + format_depth32_stencil8_filter: bool, + format_depth32_stencil8_none: bool, + format_min_srgb_channels: u8, + format_b5: bool, + format_bc: bool, + format_eac_etc: bool, + format_astc: bool, + format_astc_hdr: bool, + format_any8_unorm_srgb_all: bool, + format_any8_unorm_srgb_no_write: bool, + format_any8_snorm_all: bool, + format_r16_norm_all: bool, + format_r32_all: bool, + format_r32_no_write: bool, + format_r32float_no_write_no_filter: bool, + format_r32float_no_filter: bool, + format_r32float_all: bool, + format_rgba8_srgb_all: bool, + format_rgba8_srgb_no_write: bool, + format_rgb10a2_unorm_all: bool, + format_rgb10a2_unorm_no_write: bool, + format_rgb10a2_uint_write: bool, + format_rg11b10_all: bool, + format_rg11b10_no_write: bool, + format_rgb9e5_all: bool, + format_rgb9e5_no_write: bool, + format_rgb9e5_filter_only: bool, + format_rg32_color: bool, + format_rg32_color_write: bool, + format_rg32float_all: bool, + format_rg32float_color_blend: bool, + format_rg32float_no_filter: bool, + format_rgba32int_color: bool, + format_rgba32int_color_write: bool, + format_rgba32float_color: bool, + format_rgba32float_color_write: bool, + format_rgba32float_all: bool, + format_depth16unorm: bool, + format_depth32float_filter: bool, + format_depth32float_none: bool, + format_bgr10a2_all: bool, + format_bgr10a2_no_write: bool, + max_buffers_per_stage: ResourceIndex, + max_vertex_buffers: ResourceIndex, + max_textures_per_stage: ResourceIndex, + max_samplers_per_stage: ResourceIndex, + buffer_alignment: u64, + max_buffer_size: u64, + max_texture_size: u64, + max_texture_3d_size: u64, + max_texture_layers: u64, + max_fragment_input_components: u64, + max_color_render_targets: u8, + max_varying_components: u32, + max_threads_per_group: u32, + max_total_threadgroup_memory: u32, + sample_count_mask: crate::TextureFormatCapabilities, + supports_debug_markers: bool, + supports_binary_archives: bool, + supports_capture_manager: bool, + can_set_maximum_drawables_count: bool, + can_set_display_sync: bool, + can_set_next_drawable_timeout: bool, + supports_arrays_of_textures: bool, + supports_arrays_of_textures_write: bool, + supports_mutability: bool, + supports_depth_clip_control: bool, + supports_preserve_invariance: bool, + supports_shader_primitive_index: bool, + has_unified_memory: Option<bool>, + timestamp_query_support: TimestampQuerySupport, +} + +#[derive(Clone, Debug)] +struct PrivateDisabilities { + /// Near depth is not respected properly on some Intel GPUs. + broken_viewport_near_depth: bool, + /// Multi-target clears don't appear to work properly on Intel GPUs. + #[allow(dead_code)] + broken_layered_clear_image: bool, +} + +#[derive(Debug, Default)] +struct Settings { + retain_command_buffer_references: bool, +} + +struct AdapterShared { + device: Mutex<metal::Device>, + disabilities: PrivateDisabilities, + private_caps: PrivateCapabilities, + settings: Settings, + presentation_timer: time::PresentationTimer, +} + +unsafe impl Send for AdapterShared {} +unsafe impl Sync for AdapterShared {} + +impl AdapterShared { + fn new(device: metal::Device) -> Self { + let private_caps = PrivateCapabilities::new(&device); + log::debug!("{:#?}", private_caps); + + Self { + disabilities: PrivateDisabilities::new(&device), + private_caps, + device: Mutex::new(device), + settings: Settings::default(), + presentation_timer: time::PresentationTimer::new(), + } + } +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Queue { + raw: Arc<Mutex<metal::CommandQueue>>, + timestamp_period: f32, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl Queue { + pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self { + Self { + raw: Arc::new(Mutex::new(raw)), + timestamp_period, + } + } +} + +pub struct Device { + shared: Arc<AdapterShared>, + features: wgt::Features, +} + +pub struct Surface { + view: Option<NonNull<objc::runtime::Object>>, + render_layer: Mutex<metal::MetalLayer>, + swapchain_format: RwLock<Option<wgt::TextureFormat>>, + extent: RwLock<wgt::Extent3d>, + main_thread_id: thread::ThreadId, + // Useful for UI-intensive applications that are sensitive to + // window resizing. + pub present_with_transaction: bool, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug)] +pub struct SurfaceTexture { + texture: Texture, + drawable: metal::MetalDrawable, + present_with_transaction: bool, +} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +unsafe impl Send for SurfaceTexture {} +unsafe impl Sync for SurfaceTexture {} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &self, + command_buffers: &[&CommandBuffer], + _surface_textures: &[&SurfaceTexture], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + objc::rc::autoreleasepool(|| { + let extra_command_buffer = match signal_fence { + Some((fence, value)) => { + let completed_value = Arc::clone(&fence.completed_value); + let block = block::ConcreteBlock::new(move |_cmd_buf| { + completed_value.store(value, atomic::Ordering::Release); + }) + .copy(); + + let raw = match command_buffers.last() { + Some(&cmd_buf) => cmd_buf.raw.to_owned(), + None => { + let queue = self.raw.lock(); + queue + .new_command_buffer_with_unretained_references() + .to_owned() + } + }; + raw.set_label("(wgpu internal) Signal"); + raw.add_completed_handler(&block); + + fence.maintain(); + fence.pending_command_buffers.push((value, raw.to_owned())); + // only return an extra one if it's extra + match command_buffers.last() { + Some(_) => None, + None => Some(raw), + } + } + None => None, + }; + + for cmd_buffer in command_buffers { + cmd_buffer.raw.commit(); + } + + if let Some(raw) = extra_command_buffer { + raw.commit(); + } + }); + Ok(()) + } + unsafe fn present( + &self, + _surface: &Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let queue = &self.raw.lock(); + objc::rc::autoreleasepool(|| { + let command_buffer = queue.new_command_buffer(); + command_buffer.set_label("(wgpu internal) Present"); + + // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc + if !texture.present_with_transaction { + command_buffer.present_drawable(&texture.drawable); + } + + command_buffer.commit(); + + if texture.present_with_transaction { + command_buffer.wait_until_scheduled(); + texture.drawable.present(); + } + }); + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.timestamp_period + } +} + +#[derive(Debug)] +pub struct Buffer { + raw: metal::Buffer, + size: wgt::BufferAddress, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl Buffer { + fn as_raw(&self) -> BufferPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Texture { + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +#[derive(Debug)] +pub struct TextureView { + raw: metal::Texture, + aspects: crate::FormatAspects, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +impl TextureView { + fn as_raw(&self) -> TexturePtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Sampler { + raw: metal::SamplerState, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +impl Sampler { + fn as_raw(&self) -> SamplerPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct BindGroupLayout { + /// Sorted list of BGL entries. + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +#[derive(Clone, Debug, Default)] +struct ResourceData<T> { + buffers: T, + textures: T, + samplers: T, +} + +#[derive(Clone, Debug, Default)] +struct MultiStageData<T> { + vs: T, + fs: T, + cs: T, +} + +const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData { + vs: naga::ShaderStage::Vertex, + fs: naga::ShaderStage::Fragment, + cs: naga::ShaderStage::Compute, +}; + +impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> { + type Output = T; + fn index(&self, stage: naga::ShaderStage) -> &T { + match stage { + naga::ShaderStage::Vertex => &self.vs, + naga::ShaderStage::Fragment => &self.fs, + naga::ShaderStage::Compute => &self.cs, + } + } +} + +impl<T> MultiStageData<T> { + fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(&self.vs), + fs: fun(&self.fs), + cs: fun(&self.cs), + } + } + fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(self.vs), + fs: fun(self.fs), + cs: fun(self.cs), + } + } + fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> { + iter::once(&self.vs) + .chain(iter::once(&self.fs)) + .chain(iter::once(&self.cs)) + } + fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> { + iter::once(&mut self.vs) + .chain(iter::once(&mut self.fs)) + .chain(iter::once(&mut self.cs)) + } +} + +type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>; +type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>; + +#[derive(Debug)] +struct BindGroupLayoutInfo { + base_resource_indices: MultiStageResourceCounters, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PushConstantsInfo { + count: u32, + buffer_index: ResourceIndex, +} + +#[derive(Debug)] +pub struct PipelineLayout { + bind_group_infos: ArrayVec<BindGroupLayoutInfo, { crate::MAX_BIND_GROUPS }>, + push_constants_infos: MultiStageData<Option<PushConstantsInfo>>, + total_counters: MultiStageResourceCounters, + total_push_constants: u32, + per_stage_map: MultiStageResources, +} + +trait AsNative { + type Native; + fn from(native: &Self::Native) -> Self; + fn as_native(&self) -> &Self::Native; +} + +type BufferPtr = NonNull<metal::MTLBuffer>; +type TexturePtr = NonNull<metal::MTLTexture>; +type SamplerPtr = NonNull<metal::MTLSamplerState>; + +impl AsNative for BufferPtr { + type Native = metal::BufferRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for TexturePtr { + type Native = metal::TextureRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for SamplerPtr { + type Native = metal::SamplerStateRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +#[derive(Debug)] +struct BufferResource { + ptr: BufferPtr, + offset: wgt::BufferAddress, + dynamic_index: Option<u32>, + + /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. + /// + /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can + /// hold WGSL runtime-sized arrays. When one does, we must pass its size to + /// shader entry points to implement bounds checks and WGSL's `arrayLength` + /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// + /// [`Storage`]: wgt::BufferBindingType::Storage + binding_size: Option<wgt::BufferSize>, + + binding_location: u32, +} + +#[derive(Debug, Default)] +pub struct BindGroup { + counters: MultiStageResourceCounters, + buffers: Vec<BufferResource>, + samplers: Vec<SamplerPtr>, + textures: Vec<TexturePtr>, +} + +unsafe impl Send for BindGroup {} +unsafe impl Sync for BindGroup {} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + runtime_checks: bool, +} + +#[derive(Debug, Default)] +struct PipelineStageInfo { + push_constants: Option<PushConstantsInfo>, + + /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sizes_slot: Option<naga::back::msl::Slot>, + + /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sized_bindings: Vec<naga::ResourceBinding>, +} + +impl PipelineStageInfo { + fn clear(&mut self) { + self.push_constants = None; + self.sizes_slot = None; + self.sized_bindings.clear(); + } + + fn assign_from(&mut self, other: &Self) { + self.push_constants = other.push_constants; + self.sizes_slot = other.sizes_slot; + self.sized_bindings.clear(); + self.sized_bindings.extend_from_slice(&other.sized_bindings); + } +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: metal::RenderPipelineState, + #[allow(dead_code)] + vs_lib: metal::Library, + #[allow(dead_code)] + fs_lib: Option<metal::Library>, + vs_info: PipelineStageInfo, + fs_info: Option<PipelineStageInfo>, + raw_primitive_type: metal::MTLPrimitiveType, + raw_triangle_fill_mode: metal::MTLTriangleFillMode, + raw_front_winding: metal::MTLWinding, + raw_cull_mode: metal::MTLCullMode, + raw_depth_clip_mode: Option<metal::MTLDepthClipMode>, + depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: metal::ComputePipelineState, + #[allow(dead_code)] + cs_lib: metal::Library, + cs_info: PipelineStageInfo, + work_group_size: metal::MTLSize, + work_group_memory_sizes: Vec<u32>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug, Clone)] +pub struct QuerySet { + raw_buffer: metal::Buffer, + //Metal has a custom buffer for counters. + counter_sample_buffer: Option<metal::CounterSampleBuffer>, + ty: wgt::QueryType, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + completed_value: Arc<atomic::AtomicU64>, + /// The pending fence values have to be ascending. + pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self) -> crate::FenceValue { + let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self) { + let latest = self.get_latest(); + self.pending_command_buffers + .retain(|&(value, _)| value > latest); + } +} + +struct IndexState { + buffer_ptr: BufferPtr, + offset: wgt::BufferAddress, + stride: wgt::BufferAddress, + raw_type: metal::MTLIndexType, +} + +#[derive(Default)] +struct Temp { + binding_sizes: Vec<u32>, +} + +struct CommandState { + blit: Option<metal::BlitCommandEncoder>, + render: Option<metal::RenderCommandEncoder>, + compute: Option<metal::ComputeCommandEncoder>, + raw_primitive_type: metal::MTLPrimitiveType, + index: Option<IndexState>, + raw_wg_size: metal::MTLSize, + stage_infos: MultiStageData<PipelineStageInfo>, + + /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. + /// + /// Specifically: + /// + /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// and `@binding` attributes) for `var<storage>` global variables in the + /// current module that contain runtime-sized arrays. + /// + /// - The values are the actual sizes of the buffers currently bound to + /// provide those globals' contents, which are needed to implement bounds + /// checks and the WGSL `arrayLength` function. + /// + /// For each stage `S` in `stage_infos`, we consult this to find the sizes + /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must + /// pass to the entry point. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + /// + /// [`ResourceBinding`]: naga::ResourceBinding + storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, + + work_group_memory_sizes: Vec<u32>, + push_constants: Vec<u32>, + + /// Timer query that should be executed when the next pass starts. + pending_timer_queries: Vec<(QuerySet, u32)>, +} + +pub struct CommandEncoder { + shared: Arc<AdapterShared>, + raw_queue: Arc<Mutex<metal::CommandQueue>>, + raw_cmd_buf: Option<metal::CommandBuffer>, + state: CommandState, + temp: Temp, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw_queue", &self.raw_queue) + .field("raw_cmd_buf", &self.raw_cmd_buf) + .finish() + } +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: metal::CommandBuffer, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct AccelerationStructure; diff --git a/third_party/rust/wgpu-hal/src/metal/surface.rs b/third_party/rust/wgpu-hal/src/metal/surface.rs new file mode 100644 index 0000000000..a97eff0aae --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/surface.rs @@ -0,0 +1,280 @@ +#![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type + +use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread}; + +use core_graphics_types::{ + base::CGFloat, + geometry::{CGRect, CGSize}, +}; +use objc::{ + class, + declare::ClassDecl, + msg_send, + rc::autoreleasepool, + runtime::{Class, Object, Sel, BOOL, NO, YES}, + sel, sel_impl, +}; +use parking_lot::{Mutex, RwLock}; + +#[cfg(target_os = "macos")] +#[cfg_attr(feature = "link", link(name = "QuartzCore", kind = "framework"))] +extern "C" { + #[allow(non_upper_case_globals)] + static kCAGravityTopLeft: *mut Object; +} + +extern "C" fn layer_should_inherit_contents_scale_from_window( + _: &Class, + _: Sel, + _layer: *mut Object, + _new_scale: CGFloat, + _from_window: *mut Object, +) -> BOOL { + YES +} + +static CAML_DELEGATE_REGISTER: Once = Once::new(); + +#[derive(Debug)] +pub struct HalManagedMetalLayerDelegate(&'static Class); + +impl HalManagedMetalLayerDelegate { + pub fn new() -> Self { + let class_name = format!("HalManagedMetalLayerDelegate@{:p}", &CAML_DELEGATE_REGISTER); + + CAML_DELEGATE_REGISTER.call_once(|| { + type Fun = extern "C" fn(&Class, Sel, *mut Object, CGFloat, *mut Object) -> BOOL; + let mut decl = ClassDecl::new(&class_name, class!(NSObject)).unwrap(); + #[allow(trivial_casts)] // false positive + unsafe { + decl.add_class_method( + sel!(layer:shouldInheritContentsScale:fromWindow:), + layer_should_inherit_contents_scale_from_window as Fun, + ); + } + decl.register(); + }); + Self(Class::get(&class_name).unwrap()) + } +} + +impl super::Surface { + fn new(view: Option<NonNull<Object>>, layer: metal::MetalLayer) -> Self { + Self { + view, + render_layer: Mutex::new(layer), + swapchain_format: RwLock::new(None), + extent: RwLock::new(wgt::Extent3d::default()), + main_thread_id: thread::current().id(), + present_with_transaction: false, + } + } + + pub unsafe fn dispose(self) { + if let Some(view) = self.view { + let () = msg_send![view.as_ptr(), release]; + } + } + + /// If not called on the main thread, this will panic. + #[allow(clippy::transmute_ptr_to_ref)] + pub unsafe fn from_view( + view: *mut c_void, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> Self { + let view = view as *mut Object; + let render_layer = { + let layer = unsafe { Self::get_metal_layer(view, delegate) }; + unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) } + } + .to_owned(); + let _: *mut c_void = msg_send![view, retain]; + Self::new(NonNull::new(view), render_layer) + } + + pub unsafe fn from_layer(layer: &metal::MetalLayerRef) -> Self { + let class = class!(CAMetalLayer); + let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; + assert_eq!(proper_kind, YES); + Self::new(None, layer.to_owned()) + } + + /// If not called on the main thread, this will panic. + pub(crate) unsafe fn get_metal_layer( + view: *mut Object, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> *mut Object { + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let is_main_thread: BOOL = msg_send![class!(NSThread), isMainThread]; + if is_main_thread == NO { + panic!("get_metal_layer cannot be called in non-ui thread."); + } + + let main_layer: *mut Object = msg_send![view, layer]; + let class = class!(CAMetalLayer); + let is_valid_layer: BOOL = msg_send![main_layer, isKindOfClass: class]; + + if is_valid_layer == YES { + main_layer + } else { + // If the main layer is not a CAMetalLayer, we create a CAMetalLayer and use it. + let new_layer: *mut Object = msg_send![class, new]; + let frame: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![new_layer, setFrame: frame]; + #[cfg(target_os = "ios")] + { + // Unlike NSView, UIView does not allow to replace main layer. + let () = msg_send![main_layer, addSublayer: new_layer]; + // On iOS, "from_view" may be called before the application initialization is complete, + // `msg_send![view, window]` and `msg_send![window, screen]` will get null. + let screen: *mut Object = msg_send![class!(UIScreen), mainScreen]; + let scale_factor: CGFloat = msg_send![screen, nativeScale]; + let () = msg_send![view, setContentScaleFactor: scale_factor]; + }; + #[cfg(target_os = "macos")] + { + let () = msg_send![view, setLayer: new_layer]; + let () = msg_send![view, setWantsLayer: YES]; + let () = msg_send![new_layer, setContentsGravity: unsafe { kCAGravityTopLeft }]; + let window: *mut Object = msg_send![view, window]; + if !window.is_null() { + let scale_factor: CGFloat = msg_send![window, backingScaleFactor]; + let () = msg_send![new_layer, setContentsScale: scale_factor]; + } + }; + if let Some(delegate) = delegate { + let () = msg_send![new_layer, setDelegate: delegate.0]; + } + new_layer + } + } + + pub(super) fn dimensions(&self) -> wgt::Extent3d { + let (size, scale): (CGSize, CGFloat) = unsafe { + let render_layer_borrow = self.render_layer.lock(); + let render_layer = render_layer_borrow.as_ref(); + let bounds: CGRect = msg_send![render_layer, bounds]; + let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + (bounds.size, contents_scale) + }; + + wgt::Extent3d { + width: (size.width * scale) as u32, + height: (size.height * scale) as u32, + depth_or_array_layers: 1, + } + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + log::debug!("build swapchain {:?}", config); + + let caps = &device.shared.private_caps; + *self.swapchain_format.write() = Some(config.format); + *self.extent.write() = config.extent; + + let render_layer = self.render_layer.lock(); + let framebuffer_only = config.usage == crate::TextureUses::COLOR_TARGET; + let display_sync = match config.present_mode { + wgt::PresentMode::Fifo => true, + wgt::PresentMode::Immediate => false, + m => unreachable!("Unsupported present mode: {m:?}"), + }; + let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); + + match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Opaque => render_layer.set_opaque(true), + wgt::CompositeAlphaMode::PostMultiplied => render_layer.set_opaque(false), + _ => (), + } + + let device_raw = device.shared.device.lock(); + // On iOS, unless the user supplies a view with a CAMetalLayer, we + // create one as a sublayer. However, when the view changes size, + // its sublayers are not automatically resized, and we must resize + // it here. The drawable size and the layer size don't correlate + #[cfg(target_os = "ios")] + { + if let Some(view) = self.view { + let main_layer: *mut Object = msg_send![view.as_ptr(), layer]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![*render_layer, setFrame: bounds]; + } + } + render_layer.set_device(&device_raw); + render_layer.set_pixel_format(caps.map_format(config.format)); + render_layer.set_framebuffer_only(framebuffer_only); + render_layer.set_presents_with_transaction(self.present_with_transaction); + // opt-in to Metal EDR + // EDR potentially more power used in display and more bandwidth, memory footprint. + let wants_edr = config.format == wgt::TextureFormat::Rgba16Float; + if wants_edr != render_layer.wants_extended_dynamic_range_content() { + render_layer.set_wants_extended_dynamic_range_content(wants_edr); + } + + // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); + render_layer.set_drawable_size(drawable_size); + if caps.can_set_next_drawable_timeout { + let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + } + if caps.can_set_display_sync { + let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + } + + Ok(()) + } + + unsafe fn unconfigure(&self, _device: &super::Device) { + *self.swapchain_format.write() = None; + } + + unsafe fn acquire_texture( + &self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let render_layer = self.render_layer.lock(); + let (drawable, texture) = match autoreleasepool(|| { + render_layer + .next_drawable() + .map(|drawable| (drawable.to_owned(), drawable.texture().to_owned())) + }) { + Some(pair) => pair, + None => return Ok(None), + }; + + let swapchain_format = self.swapchain_format.read().unwrap(); + let extent = self.extent.read(); + let suf_texture = super::SurfaceTexture { + texture: super::Texture { + raw: texture, + format: swapchain_format, + raw_type: metal::MTLTextureType::D2, + array_layers: 1, + mip_levels: 1, + copy_size: crate::CopyExtent { + width: extent.width, + height: extent.height, + depth: 1, + }, + }, + drawable, + present_with_transaction: self.present_with_transaction, + }; + + Ok(Some(crate::AcquiredSurfaceTexture { + texture: suf_texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/metal/time.rs b/third_party/rust/wgpu-hal/src/metal/time.rs new file mode 100644 index 0000000000..5c6bec10cd --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/time.rs @@ -0,0 +1,38 @@ +//! Handling of global timestamps. + +#[repr(C)] +#[derive(Debug)] +struct MachTimebaseInfo { + numerator: u32, + denominator: u32, +} +extern "C" { + fn mach_timebase_info(out: *mut MachTimebaseInfo) -> u32; + fn mach_absolute_time() -> u64; +} + +/// A timer which uses mach_absolute_time to get its time. This is what the metal callbacks use. +#[derive(Debug)] +pub struct PresentationTimer { + scale: MachTimebaseInfo, +} +impl PresentationTimer { + /// Generates a new timer. + pub fn new() -> Self { + // Default to 1 / 1 in case the call to timebase_info fails. + let mut scale = MachTimebaseInfo { + numerator: 1, + denominator: 1, + }; + unsafe { mach_timebase_info(&mut scale) }; + + Self { scale } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + let time = unsafe { mach_absolute_time() }; + + (time as u128 * self.scale.numerator as u128) / self.scale.denominator as u128 + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs new file mode 100644 index 0000000000..85e620d23c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -0,0 +1,2006 @@ +use super::conv; + +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{ + collections::BTreeMap, + ffi::CStr, + sync::{atomic::AtomicIsize, Arc}, +}; + +fn depth_stencil_required_flags() -> vk::FormatFeatureFlags { + vk::FormatFeatureFlags::SAMPLED_IMAGE | vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT +} + +//TODO: const fn? +fn indexing_features() -> wgt::Features { + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY +} + +/// Aggregate of the `vk::PhysicalDevice*Features` structs used by `gfx`. +#[derive(Debug, Default)] +pub struct PhysicalDeviceFeatures { + core: vk::PhysicalDeviceFeatures, + pub(super) descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingFeaturesEXT>, + imageless_framebuffer: Option<vk::PhysicalDeviceImagelessFramebufferFeaturesKHR>, + timeline_semaphore: Option<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>, + image_robustness: Option<vk::PhysicalDeviceImageRobustnessFeaturesEXT>, + robustness2: Option<vk::PhysicalDeviceRobustness2FeaturesEXT>, + multiview: Option<vk::PhysicalDeviceMultiviewFeaturesKHR>, + sampler_ycbcr_conversion: Option<vk::PhysicalDeviceSamplerYcbcrConversionFeatures>, + astc_hdr: Option<vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT>, + shader_float16: Option<( + vk::PhysicalDeviceShaderFloat16Int8Features, + vk::PhysicalDevice16BitStorageFeatures, + )>, + acceleration_structure: Option<vk::PhysicalDeviceAccelerationStructureFeaturesKHR>, + buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>, + ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>, + zero_initialize_workgroup_memory: + Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceFeatures {} +unsafe impl Sync for PhysicalDeviceFeatures {} + +impl PhysicalDeviceFeatures { + /// Add the members of `self` into `info.enabled_features` and its `p_next` chain. + pub fn add_to_device_create_builder<'a>( + &'a mut self, + mut info: vk::DeviceCreateInfoBuilder<'a>, + ) -> vk::DeviceCreateInfoBuilder<'a> { + info = info.enabled_features(&self.core); + if let Some(ref mut feature) = self.descriptor_indexing { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.imageless_framebuffer { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.timeline_semaphore { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.image_robustness { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.robustness2 { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.astc_hdr { + info = info.push_next(feature); + } + if let Some((ref mut f16_i8_feature, ref mut _16bit_feature)) = self.shader_float16 { + info = info.push_next(f16_i8_feature); + info = info.push_next(_16bit_feature); + } + if let Some(ref mut feature) = self.zero_initialize_workgroup_memory { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.acceleration_structure { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.buffer_device_address { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_query { + info = info.push_next(feature); + } + info + } + + /// Create a `PhysicalDeviceFeatures` that will be used to create a logical device. + /// + /// `requested_features` should be the same as what was used to generate `enabled_extensions`. + fn from_extensions_and_requested_features( + device_api_version: u32, + enabled_extensions: &[&'static CStr], + requested_features: wgt::Features, + downlevel_flags: wgt::DownlevelFlags, + private_caps: &super::PrivateCapabilities, + ) -> Self { + let needs_sampled_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_buffer_non_uniform = requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_uniform_buffer_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_partially_bound = + requested_features.intersects(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY); + + Self { + // vk::PhysicalDeviceFeatures is a struct composed of Bool32's while + // Features is a bitfield so we need to map everything manually + core: vk::PhysicalDeviceFeatures::builder() + .robust_buffer_access(private_caps.robust_buffer_access) + .independent_blend(downlevel_flags.contains(wgt::DownlevelFlags::INDEPENDENT_BLEND)) + .sample_rate_shading( + downlevel_flags.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), + ) + .image_cube_array( + downlevel_flags.contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), + ) + .draw_indirect_first_instance( + requested_features.contains(wgt::Features::INDIRECT_FIRST_INSTANCE), + ) + //.dual_src_blend(requested_features.contains(wgt::Features::DUAL_SRC_BLENDING)) + .multi_draw_indirect( + requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT), + ) + .fill_mode_non_solid(requested_features.intersects( + wgt::Features::POLYGON_MODE_LINE | wgt::Features::POLYGON_MODE_POINT, + )) + //.depth_bounds(requested_features.contains(wgt::Features::DEPTH_BOUNDS)) + //.alpha_to_one(requested_features.contains(wgt::Features::ALPHA_TO_ONE)) + //.multi_viewport(requested_features.contains(wgt::Features::MULTI_VIEWPORTS)) + .sampler_anisotropy( + downlevel_flags.contains(wgt::DownlevelFlags::ANISOTROPIC_FILTERING), + ) + .texture_compression_etc2( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ETC2), + ) + .texture_compression_astc_ldr( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC), + ) + .texture_compression_bc( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC), + ) + //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY)) + .pipeline_statistics_query( + requested_features.contains(wgt::Features::PIPELINE_STATISTICS_QUERY), + ) + .vertex_pipeline_stores_and_atomics( + requested_features.contains(wgt::Features::VERTEX_WRITABLE_STORAGE), + ) + .fragment_stores_and_atomics( + downlevel_flags.contains(wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE), + ) + //.shader_image_gather_extended( + //.shader_storage_image_extended_formats( + .shader_uniform_buffer_array_dynamic_indexing( + requested_features.contains(wgt::Features::BUFFER_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + .shader_sampled_image_array_dynamic_indexing( + requested_features.contains(wgt::Features::TEXTURE_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + //.shader_storage_image_array_dynamic_indexing( + //.shader_clip_distance(requested_features.contains(wgt::Features::SHADER_CLIP_DISTANCE)) + //.shader_cull_distance(requested_features.contains(wgt::Features::SHADER_CULL_DISTANCE)) + .shader_float64(requested_features.contains(wgt::Features::SHADER_F64)) + //.shader_int64(requested_features.contains(wgt::Features::SHADER_INT64)) + .shader_int16(requested_features.contains(wgt::Features::SHADER_I16)) + //.shader_resource_residency(requested_features.contains(wgt::Features::SHADER_RESOURCE_RESIDENCY)) + .geometry_shader(requested_features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX)) + .depth_clamp(requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL)) + .dual_src_blend(requested_features.contains(wgt::Features::DUAL_SOURCE_BLENDING)) + .build(), + descriptor_indexing: if requested_features.intersects(indexing_features()) { + Some( + vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::builder() + .shader_sampled_image_array_non_uniform_indexing( + needs_sampled_image_non_uniform, + ) + .shader_storage_image_array_non_uniform_indexing( + needs_storage_image_non_uniform, + ) + .shader_uniform_buffer_array_non_uniform_indexing( + needs_uniform_buffer_non_uniform, + ) + .shader_storage_buffer_array_non_uniform_indexing( + needs_storage_buffer_non_uniform, + ) + .descriptor_binding_partially_bound(needs_partially_bound) + .build(), + ) + } else { + None + }, + imageless_framebuffer: if device_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrImagelessFramebufferFn::name()) + { + Some( + vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::builder() + .imageless_framebuffer(private_caps.imageless_framebuffers) + .build(), + ) + } else { + None + }, + timeline_semaphore: if device_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrTimelineSemaphoreFn::name()) + { + Some( + vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::builder() + .timeline_semaphore(private_caps.timeline_semaphores) + .build(), + ) + } else { + None + }, + image_robustness: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtImageRobustnessFn::name()) + { + Some( + vk::PhysicalDeviceImageRobustnessFeaturesEXT::builder() + .robust_image_access(private_caps.robust_image_access) + .build(), + ) + } else { + None + }, + robustness2: if enabled_extensions.contains(&vk::ExtRobustness2Fn::name()) { + // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking + // since we can enable `robust_buffer_access` all the time. But it improves + // program portability, so we opt into it if they are supported. + Some( + vk::PhysicalDeviceRobustness2FeaturesEXT::builder() + .robust_buffer_access2(private_caps.robust_buffer_access2) + .robust_image_access2(private_caps.robust_image_access2) + .build(), + ) + } else { + None + }, + multiview: if device_api_version >= vk::API_VERSION_1_1 + || enabled_extensions.contains(&vk::KhrMultiviewFn::name()) + { + Some( + vk::PhysicalDeviceMultiviewFeatures::builder() + .multiview(requested_features.contains(wgt::Features::MULTIVIEW)) + .build(), + ) + } else { + None + }, + sampler_ycbcr_conversion: if device_api_version >= vk::API_VERSION_1_1 + || enabled_extensions.contains(&vk::KhrSamplerYcbcrConversionFn::name()) + { + Some( + vk::PhysicalDeviceSamplerYcbcrConversionFeatures::builder() + // .sampler_ycbcr_conversion(requested_features.contains(wgt::Features::TEXTURE_FORMAT_NV12)) + .build(), + ) + } else { + None + }, + astc_hdr: if enabled_extensions.contains(&vk::ExtTextureCompressionAstcHdrFn::name()) { + Some( + vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::builder() + .texture_compression_astc_hdr(true) + .build(), + ) + } else { + None + }, + shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) { + Some(( + vk::PhysicalDeviceShaderFloat16Int8Features::builder() + .shader_float16(true) + .build(), + vk::PhysicalDevice16BitStorageFeatures::builder() + .storage_buffer16_bit_access(true) + .uniform_and_storage_buffer16_bit_access(true) + .build(), + )) + } else { + None + }, + acceleration_structure: if enabled_extensions + .contains(&vk::KhrAccelerationStructureFn::name()) + { + Some( + vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(true) + .build(), + ) + } else { + None + }, + buffer_device_address: if enabled_extensions + .contains(&vk::KhrBufferDeviceAddressFn::name()) + { + Some( + vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder() + .buffer_device_address(true) + .build(), + ) + } else { + None + }, + ray_query: if enabled_extensions.contains(&vk::KhrRayQueryFn::name()) { + Some( + vk::PhysicalDeviceRayQueryFeaturesKHR::builder() + .ray_query(true) + .build(), + ) + } else { + None + }, + zero_initialize_workgroup_memory: if device_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + Some( + vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::builder() + .shader_zero_initialize_workgroup_memory( + private_caps.zero_initialize_workgroup_memory, + ) + .build(), + ) + } else { + None + }, + } + } + + fn to_wgpu( + &self, + instance: &ash::Instance, + phd: vk::PhysicalDevice, + caps: &PhysicalDeviceCapabilities, + ) -> (wgt::Features, wgt::DownlevelFlags) { + use crate::auxil::db; + use wgt::{DownlevelFlags as Df, Features as F}; + let mut features = F::empty() + | F::SPIRV_SHADER_PASSTHROUGH + | F::MAPPABLE_PRIMARY_BUFFERS + | F::PUSH_CONSTANTS + | F::ADDRESS_MODE_CLAMP_TO_BORDER + | F::ADDRESS_MODE_CLAMP_TO_ZERO + | F::TIMESTAMP_QUERY + | F::TIMESTAMP_QUERY_INSIDE_PASSES + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::CLEAR_TEXTURE; + + let mut dl_flags = Df::COMPUTE_SHADERS + | Df::BASE_VERTEX + | Df::READ_ONLY_DEPTH_STENCIL + | Df::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | Df::COMPARISON_SAMPLERS + | Df::VERTEX_STORAGE + | Df::FRAGMENT_STORAGE + | Df::DEPTH_TEXTURE_AND_BUFFER_COPIES + | Df::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED + | Df::UNRESTRICTED_INDEX_BUFFER + | Df::INDIRECT_EXECUTION + | Df::VIEW_FORMATS + | Df::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES + | Df::NONBLOCKING_QUERY_RESOLVE + | Df::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW; + + dl_flags.set( + Df::SURFACE_VIEW_FORMATS, + caps.supports_extension(vk::KhrSwapchainMutableFormatFn::name()), + ); + dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0); + dl_flags.set(Df::ANISOTROPIC_FILTERING, self.core.sampler_anisotropy != 0); + dl_flags.set( + Df::FRAGMENT_WRITABLE_STORAGE, + self.core.fragment_stores_and_atomics != 0, + ); + dl_flags.set(Df::MULTISAMPLED_SHADING, self.core.sample_rate_shading != 0); + dl_flags.set(Df::INDEPENDENT_BLEND, self.core.independent_blend != 0); + dl_flags.set( + Df::FULL_DRAW_INDEX_UINT32, + self.core.full_draw_index_uint32 != 0, + ); + dl_flags.set(Df::DEPTH_BIAS_CLAMP, self.core.depth_bias_clamp != 0); + + features.set( + F::INDIRECT_FIRST_INSTANCE, + self.core.draw_indirect_first_instance != 0, + ); + //if self.core.dual_src_blend != 0 + features.set(F::MULTI_DRAW_INDIRECT, self.core.multi_draw_indirect != 0); + features.set(F::POLYGON_MODE_LINE, self.core.fill_mode_non_solid != 0); + features.set(F::POLYGON_MODE_POINT, self.core.fill_mode_non_solid != 0); + //if self.core.depth_bounds != 0 { + //if self.core.alpha_to_one != 0 { + //if self.core.multi_viewport != 0 { + features.set( + F::TEXTURE_COMPRESSION_ETC2, + self.core.texture_compression_etc2 != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_ASTC, + self.core.texture_compression_astc_ldr != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_BC, + self.core.texture_compression_bc != 0, + ); + features.set( + F::PIPELINE_STATISTICS_QUERY, + self.core.pipeline_statistics_query != 0, + ); + features.set( + F::VERTEX_WRITABLE_STORAGE, + self.core.vertex_pipeline_stores_and_atomics != 0, + ); + //if self.core.shader_image_gather_extended != 0 { + //if self.core.shader_storage_image_extended_formats != 0 { + features.set( + F::BUFFER_BINDING_ARRAY, + self.core.shader_uniform_buffer_array_dynamic_indexing != 0, + ); + features.set( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_sampled_image_array_dynamic_indexing != 0, + ); + features.set(F::SHADER_PRIMITIVE_INDEX, self.core.geometry_shader != 0); + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + self.core.shader_storage_buffer_array_dynamic_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_storage_image_array_dynamic_indexing, + ), + ], + ) { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + //if self.core.shader_storage_image_array_dynamic_indexing != 0 { + //if self.core.shader_clip_distance != 0 { + //if self.core.shader_cull_distance != 0 { + features.set(F::SHADER_F64, self.core.shader_float64 != 0); + //if self.core.shader_int64 != 0 { + features.set(F::SHADER_I16, self.core.shader_int16 != 0); + + //if caps.supports_extension(vk::KhrSamplerMirrorClampToEdgeFn::name()) { + //if caps.supports_extension(vk::ExtSamplerFilterMinmaxFn::name()) { + features.set( + F::MULTI_DRAW_INDIRECT_COUNT, + caps.supports_extension(vk::KhrDrawIndirectCountFn::name()), + ); + features.set( + F::CONSERVATIVE_RASTERIZATION, + caps.supports_extension(vk::ExtConservativeRasterizationFn::name()), + ); + + let intel_windows = caps.properties.vendor_id == db::intel::VENDOR && cfg!(windows); + + if let Some(ref descriptor_indexing) = self.descriptor_indexing { + const STORAGE: F = F::STORAGE_RESOURCE_BINDING_ARRAY; + if Self::all_features_supported( + &features, + &[ + ( + F::TEXTURE_BINDING_ARRAY, + descriptor_indexing.shader_sampled_image_array_non_uniform_indexing, + ), + ( + F::BUFFER_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING); + } + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_image_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING); + } + if descriptor_indexing.descriptor_binding_partially_bound != 0 && !intel_windows { + features |= F::PARTIALLY_BOUND_BINDING_ARRAY; + } + } + + features.set(F::DEPTH_CLIP_CONTROL, self.core.depth_clamp != 0); + features.set(F::DUAL_SOURCE_BLENDING, self.core.dual_src_blend != 0); + + if let Some(ref multiview) = self.multiview { + features.set(F::MULTIVIEW, multiview.multiview != 0); + } + + features.set( + F::TEXTURE_FORMAT_16BIT_NORM, + is_format_16bit_norm_supported(instance, phd), + ); + + if let Some(ref astc_hdr) = self.astc_hdr { + features.set( + F::TEXTURE_COMPRESSION_ASTC_HDR, + astc_hdr.texture_compression_astc_hdr != 0, + ); + } + + if let Some((ref f16_i8, ref bit16)) = self.shader_float16 { + features.set( + F::SHADER_F16, + f16_i8.shader_float16 != 0 + && bit16.storage_buffer16_bit_access != 0 + && bit16.uniform_and_storage_buffer16_bit_access != 0, + ); + } + + let supports_depth_format = |format| { + supports_format( + instance, + phd, + format, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ) + }; + + let texture_s8 = supports_depth_format(vk::Format::S8_UINT); + let texture_d32 = supports_depth_format(vk::Format::D32_SFLOAT); + let texture_d24_s8 = supports_depth_format(vk::Format::D24_UNORM_S8_UINT); + let texture_d32_s8 = supports_depth_format(vk::Format::D32_SFLOAT_S8_UINT); + + let stencil8 = texture_s8 || texture_d24_s8; + let depth24_plus_stencil8 = texture_d24_s8 || texture_d32_s8; + + dl_flags.set( + Df::WEBGPU_TEXTURE_FORMAT_SUPPORT, + stencil8 && depth24_plus_stencil8 && texture_d32, + ); + + features.set(F::DEPTH32FLOAT_STENCIL8, texture_d32_s8); + + features.set( + F::RAY_TRACING_ACCELERATION_STRUCTURE, + caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) + && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()), + ); + + features.set( + F::RAY_QUERY, + caps.supports_extension(vk::KhrRayQueryFn::name()), + ); + + let rg11b10ufloat_renderable = supports_format( + instance, + phd, + vk::Format::B10G11R11_UFLOAT_PACK32, + vk::ImageTiling::OPTIMAL, + vk::FormatFeatureFlags::COLOR_ATTACHMENT + | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, + ); + features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); + features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); + + features.set( + F::BGRA8UNORM_STORAGE, + supports_bgra8unorm_storage(instance, phd, caps.device_api_version), + ); + + features.set( + F::FLOAT32_FILTERABLE, + is_float32_filterable_supported(instance, phd), + ); + + if let Some(ref _sampler_ycbcr_conversion) = self.sampler_ycbcr_conversion { + features.set( + F::TEXTURE_FORMAT_NV12, + supports_format( + instance, + phd, + vk::Format::G8_B8R8_2PLANE_420_UNORM, + vk::ImageTiling::OPTIMAL, + vk::FormatFeatureFlags::SAMPLED_IMAGE + | vk::FormatFeatureFlags::TRANSFER_SRC + | vk::FormatFeatureFlags::TRANSFER_DST, + ) && !caps + .driver + .map(|driver| driver.driver_id == vk::DriverId::MOLTENVK) + .unwrap_or_default(), + ); + } + + (features, dl_flags) + } + + fn all_features_supported( + features: &wgt::Features, + implications: &[(wgt::Features, vk::Bool32)], + ) -> bool { + implications + .iter() + .all(|&(flag, support)| !features.contains(flag) || support != 0) + } +} + +/// Information gathered about a physical device capabilities. +#[derive(Default, Debug)] +pub struct PhysicalDeviceCapabilities { + supported_extensions: Vec<vk::ExtensionProperties>, + properties: vk::PhysicalDeviceProperties, + maintenance_3: Option<vk::PhysicalDeviceMaintenance3Properties>, + descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingPropertiesEXT>, + acceleration_structure: Option<vk::PhysicalDeviceAccelerationStructurePropertiesKHR>, + driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// The device API version. + /// + /// Which is the version of Vulkan supported for device-level functionality. + /// + /// It is associated with a `VkPhysicalDevice` and its children. + device_api_version: u32, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceCapabilities {} +unsafe impl Sync for PhysicalDeviceCapabilities {} + +impl PhysicalDeviceCapabilities { + pub fn properties(&self) -> vk::PhysicalDeviceProperties { + self.properties + } + + pub fn supports_extension(&self, extension: &CStr) -> bool { + use crate::auxil::cstr_from_bytes_until_nul; + self.supported_extensions + .iter() + .any(|ep| cstr_from_bytes_until_nul(&ep.extension_name) == Some(extension)) + } + + /// Map `requested_features` to the list of Vulkan extension strings required to create the logical device. + fn get_required_extensions(&self, requested_features: wgt::Features) -> Vec<&'static CStr> { + let mut extensions = Vec::new(); + + // Note that quite a few extensions depend on the `VK_KHR_get_physical_device_properties2` instance extension. + // We enable `VK_KHR_get_physical_device_properties2` unconditionally (if available). + + // Require `VK_KHR_swapchain` + extensions.push(vk::KhrSwapchainFn::name()); + + if self.device_api_version < vk::API_VERSION_1_1 { + // Require either `VK_KHR_maintenance1` or `VK_AMD_negative_viewport_height` + if self.supports_extension(vk::KhrMaintenance1Fn::name()) { + extensions.push(vk::KhrMaintenance1Fn::name()); + } else { + // `VK_AMD_negative_viewport_height` is obsoleted by `VK_KHR_maintenance1` and must not be enabled alongside it + extensions.push(vk::AmdNegativeViewportHeightFn::name()); + } + + // Optional `VK_KHR_maintenance2` + if self.supports_extension(vk::KhrMaintenance2Fn::name()) { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + + // Optional `VK_KHR_maintenance3` + if self.supports_extension(vk::KhrMaintenance3Fn::name()) { + extensions.push(vk::KhrMaintenance3Fn::name()); + } + + // Require `VK_KHR_storage_buffer_storage_class` + extensions.push(vk::KhrStorageBufferStorageClassFn::name()); + + // Require `VK_KHR_multiview` if the associated feature was requested + if requested_features.contains(wgt::Features::MULTIVIEW) { + extensions.push(vk::KhrMultiviewFn::name()); + } + + // Require `VK_KHR_sampler_ycbcr_conversion` if the associated feature was requested + if requested_features.contains(wgt::Features::TEXTURE_FORMAT_NV12) { + extensions.push(vk::KhrSamplerYcbcrConversionFn::name()); + } + } + + if self.device_api_version < vk::API_VERSION_1_2 { + // Optional `VK_KHR_image_format_list` + if self.supports_extension(vk::KhrImageFormatListFn::name()) { + extensions.push(vk::KhrImageFormatListFn::name()); + } + + // Optional `VK_KHR_imageless_framebuffer` + if self.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + extensions.push(vk::KhrImagelessFramebufferFn::name()); + // Require `VK_KHR_maintenance2` due to it being a dependency + if self.device_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + } + + // Optional `VK_KHR_driver_properties` + if self.supports_extension(vk::KhrDriverPropertiesFn::name()) { + extensions.push(vk::KhrDriverPropertiesFn::name()); + } + + // Optional `VK_KHR_timeline_semaphore` + if self.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + extensions.push(vk::KhrTimelineSemaphoreFn::name()); + } + + // Require `VK_EXT_descriptor_indexing` if one of the associated features was requested + if requested_features.intersects(indexing_features()) { + extensions.push(vk::ExtDescriptorIndexingFn::name()); + } + + // Require `VK_KHR_shader_float16_int8` and `VK_KHR_16bit_storage` if the associated feature was requested + if requested_features.contains(wgt::Features::SHADER_F16) { + extensions.push(vk::KhrShaderFloat16Int8Fn::name()); + // `VK_KHR_16bit_storage` requires `VK_KHR_storage_buffer_storage_class`, however we require that one already + if self.device_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::Khr16bitStorageFn::name()); + } + } + + //extensions.push(vk::KhrSamplerMirrorClampToEdgeFn::name()); + //extensions.push(vk::ExtSamplerFilterMinmaxFn::name()); + } + + if self.device_api_version < vk::API_VERSION_1_3 { + // Optional `VK_EXT_image_robustness` + if self.supports_extension(vk::ExtImageRobustnessFn::name()) { + extensions.push(vk::ExtImageRobustnessFn::name()); + } + } + + // Optional `VK_KHR_swapchain_mutable_format` + if self.supports_extension(vk::KhrSwapchainMutableFormatFn::name()) { + extensions.push(vk::KhrSwapchainMutableFormatFn::name()); + } + + // Optional `VK_EXT_robustness2` + if self.supports_extension(vk::ExtRobustness2Fn::name()) { + extensions.push(vk::ExtRobustness2Fn::name()); + } + + // Require `VK_KHR_draw_indirect_count` if the associated feature was requested + // Even though Vulkan 1.2 has promoted the extension to core, we must require the extension to avoid + // large amounts of spaghetti involved with using PhysicalDeviceVulkan12Features. + if requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT_COUNT) { + extensions.push(vk::KhrDrawIndirectCountFn::name()); + } + + // Require `VK_KHR_deferred_host_operations`, `VK_KHR_acceleration_structure` and `VK_KHR_buffer_device_address` if the feature `RAY_TRACING` was requested + if requested_features.contains(wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE) { + extensions.push(vk::KhrDeferredHostOperationsFn::name()); + extensions.push(vk::KhrAccelerationStructureFn::name()); + extensions.push(vk::KhrBufferDeviceAddressFn::name()); + } + + // Require `VK_KHR_ray_query` if the associated feature was requested + if requested_features.contains(wgt::Features::RAY_QUERY) { + extensions.push(vk::KhrRayQueryFn::name()); + } + + // Require `VK_EXT_conservative_rasterization` if the associated feature was requested + if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { + extensions.push(vk::ExtConservativeRasterizationFn::name()); + } + + // Require `VK_KHR_portability_subset` on macOS/iOS + #[cfg(any(target_os = "macos", target_os = "ios"))] + extensions.push(vk::KhrPortabilitySubsetFn::name()); + + // Require `VK_EXT_texture_compression_astc_hdr` if the associated feature was requested + if requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR) { + extensions.push(vk::ExtTextureCompressionAstcHdrFn::name()); + } + + extensions + } + + fn to_wgpu_limits(&self) -> wgt::Limits { + let limits = &self.properties.limits; + + let max_compute_workgroup_sizes = limits.max_compute_work_group_size; + let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0] + .min(limits.max_compute_work_group_count[1]) + .min(limits.max_compute_work_group_count[2]); + + // Prevent very large buffers on mesa and most android devices. + let is_nvidia = self.properties.vendor_id == crate::auxil::db::nvidia::VENDOR; + let max_buffer_size = + if (cfg!(target_os = "linux") || cfg!(target_os = "android")) && !is_nvidia { + i32::MAX as u64 + } else { + u64::MAX + }; + + wgt::Limits { + max_texture_dimension_1d: limits.max_image_dimension1_d, + max_texture_dimension_2d: limits.max_image_dimension2_d, + max_texture_dimension_3d: limits.max_image_dimension3_d, + max_texture_array_layers: limits.max_image_array_layers, + max_bind_groups: limits + .max_bound_descriptor_sets + .min(crate::MAX_BIND_GROUPS as u32), + max_bindings_per_bind_group: wgt::Limits::default().max_bindings_per_bind_group, + max_dynamic_uniform_buffers_per_pipeline_layout: limits + .max_descriptor_set_uniform_buffers_dynamic, + max_dynamic_storage_buffers_per_pipeline_layout: limits + .max_descriptor_set_storage_buffers_dynamic, + max_sampled_textures_per_shader_stage: limits.max_per_stage_descriptor_sampled_images, + max_samplers_per_shader_stage: limits.max_per_stage_descriptor_samplers, + max_storage_buffers_per_shader_stage: limits.max_per_stage_descriptor_storage_buffers, + max_storage_textures_per_shader_stage: limits.max_per_stage_descriptor_storage_images, + max_uniform_buffers_per_shader_stage: limits.max_per_stage_descriptor_uniform_buffers, + max_uniform_buffer_binding_size: limits + .max_uniform_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_storage_buffer_binding_size: limits + .max_storage_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_vertex_buffers: limits + .max_vertex_input_bindings + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: limits.max_vertex_input_attributes, + max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + max_push_constant_size: limits.max_push_constants_size, + min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, + min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, + max_inter_stage_shader_components: limits + .max_vertex_output_components + .min(limits.max_fragment_input_components), + max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size, + max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations, + max_compute_workgroup_size_x: max_compute_workgroup_sizes[0], + max_compute_workgroup_size_y: max_compute_workgroup_sizes[1], + max_compute_workgroup_size_z: max_compute_workgroup_sizes[2], + max_compute_workgroups_per_dimension, + max_buffer_size, + max_non_sampler_bindings: std::u32::MAX, + } + } + + fn to_hal_alignments(&self) -> crate::Alignments { + let limits = &self.properties.limits; + crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(limits.optimal_buffer_copy_offset_alignment) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment) + .unwrap(), + } + } +} + +impl super::InstanceShared { + #[allow(trivial_casts)] // false positives + fn inspect( + &self, + phd: vk::PhysicalDevice, + ) -> (PhysicalDeviceCapabilities, PhysicalDeviceFeatures) { + let capabilities = { + let mut capabilities = PhysicalDeviceCapabilities::default(); + capabilities.supported_extensions = + unsafe { self.raw.enumerate_device_extension_properties(phd).unwrap() }; + capabilities.properties = unsafe { self.raw.get_physical_device_properties(phd) }; + capabilities.device_api_version = capabilities.properties.api_version; + + if let Some(ref get_device_properties) = self.get_physical_device_properties { + // Get these now to avoid borrowing conflicts later + let supports_maintenance3 = capabilities.device_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMaintenance3Fn::name()); + let supports_descriptor_indexing = capabilities.device_api_version + >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()); + let supports_driver_properties = capabilities.device_api_version + >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + + let supports_acceleration_structure = + capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); + + let mut builder = vk::PhysicalDeviceProperties2KHR::builder(); + if supports_maintenance3 { + capabilities.maintenance_3 = + Some(vk::PhysicalDeviceMaintenance3Properties::default()); + builder = builder.push_next(capabilities.maintenance_3.as_mut().unwrap()); + } + + if supports_descriptor_indexing { + let next = capabilities + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingPropertiesEXT::default()); + builder = builder.push_next(next); + } + + if supports_acceleration_structure { + let next = capabilities + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructurePropertiesKHR::default()); + builder = builder.push_next(next); + } + + if supports_driver_properties { + let next = capabilities + .driver + .insert(vk::PhysicalDeviceDriverPropertiesKHR::default()); + builder = builder.push_next(next); + } + + let mut properties2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_properties2(phd, &mut properties2); + } + + if is_intel_igpu_outdated_for_robustness2( + capabilities.properties, + capabilities.driver, + ) { + use crate::auxil::cstr_from_bytes_until_nul; + capabilities.supported_extensions.retain(|&x| { + cstr_from_bytes_until_nul(&x.extension_name) + != Some(vk::ExtRobustness2Fn::name()) + }); + } + }; + capabilities + }; + + let mut features = PhysicalDeviceFeatures::default(); + features.core = if let Some(ref get_device_properties) = self.get_physical_device_properties + { + let core = vk::PhysicalDeviceFeatures::default(); + let mut builder = vk::PhysicalDeviceFeatures2KHR::builder().features(core); + + // `VK_KHR_multiview` is promoted to 1.1 + if capabilities.device_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMultiviewFn::name()) + { + let next = features + .multiview + .insert(vk::PhysicalDeviceMultiviewFeatures::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_sampler_ycbcr_conversion` is promoted to 1.1 + if capabilities.device_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrSamplerYcbcrConversionFn::name()) + { + let next = features + .sampler_ycbcr_conversion + .insert(vk::PhysicalDeviceSamplerYcbcrConversionFeatures::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()) { + let next = features + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_imageless_framebuffer` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + let next = features + .imageless_framebuffer + .insert(vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_timeline_semaphore` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + let next = features + .timeline_semaphore + .insert(vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtImageRobustnessFn::name()) { + let next = features + .image_robustness + .insert(vk::PhysicalDeviceImageRobustnessFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtRobustness2Fn::name()) { + let next = features + .robustness2 + .insert(vk::PhysicalDeviceRobustness2FeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtTextureCompressionAstcHdrFn::name()) { + let next = features + .astc_hdr + .insert(vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::KhrShaderFloat16Int8Fn::name()) + && capabilities.supports_extension(vk::Khr16bitStorageFn::name()) + { + let next = features.shader_float16.insert(( + vk::PhysicalDeviceShaderFloat16Int8FeaturesKHR::default(), + vk::PhysicalDevice16BitStorageFeaturesKHR::default(), + )); + builder = builder.push_next(&mut next.0); + builder = builder.push_next(&mut next.1); + } + if capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()) { + let next = features + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_zero_initialize_workgroup_memory` is promoted to 1.3 + if capabilities.device_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + let next = features + .zero_initialize_workgroup_memory + .insert(vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::default()); + builder = builder.push_next(next); + } + + let mut features2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_features2(phd, &mut features2); + } + features2.features + } else { + unsafe { self.raw.get_physical_device_features(phd) } + }; + + (capabilities, features) + } +} + +impl super::Instance { + pub fn expose_adapter( + &self, + phd: vk::PhysicalDevice, + ) -> Option<crate::ExposedAdapter<super::Api>> { + use crate::auxil::cstr_from_bytes_until_nul; + use crate::auxil::db; + + let (phd_capabilities, phd_features) = self.shared.inspect(phd); + + let info = wgt::AdapterInfo { + name: { + cstr_from_bytes_until_nul(&phd_capabilities.properties.device_name) + .and_then(|info| info.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + vendor: phd_capabilities.properties.vendor_id, + device: phd_capabilities.properties.device_id, + device_type: match phd_capabilities.properties.device_type { + ash::vk::PhysicalDeviceType::OTHER => wgt::DeviceType::Other, + ash::vk::PhysicalDeviceType::INTEGRATED_GPU => wgt::DeviceType::IntegratedGpu, + ash::vk::PhysicalDeviceType::DISCRETE_GPU => wgt::DeviceType::DiscreteGpu, + ash::vk::PhysicalDeviceType::VIRTUAL_GPU => wgt::DeviceType::VirtualGpu, + ash::vk::PhysicalDeviceType::CPU => wgt::DeviceType::Cpu, + _ => wgt::DeviceType::Other, + }, + driver: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_name)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + driver_info: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_info)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + backend: wgt::Backend::Vulkan, + }; + + let (available_features, downlevel_flags) = + phd_features.to_wgpu(&self.shared.raw, phd, &phd_capabilities); + let mut workarounds = super::Workarounds::empty(); + { + // TODO: only enable for particular devices + workarounds |= super::Workarounds::SEPARATE_ENTRY_POINTS; + workarounds.set( + super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS, + phd_capabilities.properties.vendor_id == db::qualcomm::VENDOR, + ); + workarounds.set( + super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16, + phd_capabilities.properties.vendor_id == db::nvidia::VENDOR, + ); + }; + + if let Some(driver) = phd_capabilities.driver { + if driver.conformance_version.major == 0 { + if driver.driver_id == ash::vk::DriverId::MOLTENVK { + log::debug!("Adapter is not Vulkan compliant, but is MoltenVK, continuing"); + } else if self + .shared + .flags + .contains(wgt::InstanceFlags::ALLOW_UNDERLYING_NONCOMPLIANT_ADAPTER) + { + log::warn!("Adapter is not Vulkan compliant: {}", info.name); + } else { + log::warn!( + "Adapter is not Vulkan compliant, hiding adapter: {}", + info.name + ); + return None; + } + } + } + if phd_capabilities.device_api_version == vk::API_VERSION_1_0 + && !phd_capabilities.supports_extension(vk::KhrStorageBufferStorageClassFn::name()) + { + log::warn!( + "SPIR-V storage buffer class is not supported, hiding adapter: {}", + info.name + ); + return None; + } + if !phd_capabilities.supports_extension(vk::AmdNegativeViewportHeightFn::name()) + && !phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()) + && phd_capabilities.device_api_version < vk::API_VERSION_1_1 + { + log::warn!( + "viewport Y-flip is not supported, hiding adapter: {}", + info.name + ); + return None; + } + + let queue_families = unsafe { + self.shared + .raw + .get_physical_device_queue_family_properties(phd) + }; + let queue_flags = queue_families.first()?.queue_flags; + if !queue_flags.contains(vk::QueueFlags::GRAPHICS) { + log::warn!("The first queue only exposes {:?}", queue_flags); + return None; + } + + let private_caps = super::PrivateCapabilities { + flip_y_requires_shift: phd_capabilities.device_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()), + imageless_framebuffers: match phd_features.imageless_framebuffer { + Some(features) => features.imageless_framebuffer == vk::TRUE, + None => phd_features + .imageless_framebuffer + .map_or(false, |ext| ext.imageless_framebuffer != 0), + }, + image_view_usage: phd_capabilities.device_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance2Fn::name()), + timeline_semaphores: match phd_features.timeline_semaphore { + Some(features) => features.timeline_semaphore == vk::TRUE, + None => phd_features + .timeline_semaphore + .map_or(false, |ext| ext.timeline_semaphore != 0), + }, + texture_d24: supports_format( + &self.shared.raw, + phd, + vk::Format::X8_D24_UNORM_PACK32, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_d24_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::D24_UNORM_S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + non_coherent_map_mask: phd_capabilities.properties.limits.non_coherent_atom_size - 1, + can_present: true, + //TODO: make configurable + robust_buffer_access: phd_features.core.robust_buffer_access != 0, + robust_image_access: match phd_features.robustness2 { + Some(ref f) => f.robust_image_access2 != 0, + None => phd_features + .image_robustness + .map_or(false, |ext| ext.robust_image_access != 0), + }, + robust_buffer_access2: phd_features + .robustness2 + .as_ref() + .map(|r| r.robust_buffer_access2 == 1) + .unwrap_or_default(), + robust_image_access2: phd_features + .robustness2 + .as_ref() + .map(|r| r.robust_image_access2 == 1) + .unwrap_or_default(), + zero_initialize_workgroup_memory: phd_features + .zero_initialize_workgroup_memory + .map_or(false, |ext| { + ext.shader_zero_initialize_workgroup_memory == vk::TRUE + }), + image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2 + || phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()), + }; + let capabilities = crate::Capabilities { + limits: phd_capabilities.to_wgpu_limits(), + alignments: phd_capabilities.to_hal_alignments(), + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: wgt::DownlevelLimits {}, + shader_model: wgt::ShaderModel::Sm5, //TODO? + }, + }; + + let adapter = super::Adapter { + raw: phd, + instance: Arc::clone(&self.shared), + //queue_families, + known_memory_flags: vk::MemoryPropertyFlags::DEVICE_LOCAL + | vk::MemoryPropertyFlags::HOST_VISIBLE + | vk::MemoryPropertyFlags::HOST_COHERENT + | vk::MemoryPropertyFlags::HOST_CACHED + | vk::MemoryPropertyFlags::LAZILY_ALLOCATED, + phd_capabilities, + //phd_features, + downlevel_flags, + private_caps, + workarounds, + }; + + Some(crate::ExposedAdapter { + adapter, + info, + features: available_features, + capabilities, + }) + } +} + +impl super::Adapter { + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.raw + } + + pub fn physical_device_capabilities(&self) -> &PhysicalDeviceCapabilities { + &self.phd_capabilities + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.instance + } + + pub fn required_device_extensions(&self, features: wgt::Features) -> Vec<&'static CStr> { + let (supported_extensions, unsupported_extensions) = self + .phd_capabilities + .get_required_extensions(features) + .iter() + .partition::<Vec<&CStr>, _>(|&&extension| { + self.phd_capabilities.supports_extension(extension) + }); + + if !unsupported_extensions.is_empty() { + log::warn!("Missing extensions: {:?}", unsupported_extensions); + } + + log::debug!("Supported extensions: {:?}", supported_extensions); + supported_extensions + } + + /// `features` must be the same features used to create `enabled_extensions`. + pub fn physical_device_features( + &self, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + ) -> PhysicalDeviceFeatures { + PhysicalDeviceFeatures::from_extensions_and_requested_features( + self.phd_capabilities.device_api_version, + enabled_extensions, + features, + self.downlevel_flags, + &self.private_caps, + ) + } + + /// # Safety + /// + /// - `raw_device` must be created from this adapter. + /// - `raw_device` must be created using `family_index`, `enabled_extensions` and `physical_device_features()` + /// - `enabled_extensions` must be a superset of `required_device_extensions()`. + #[allow(clippy::too_many_arguments)] + pub unsafe fn device_from_raw( + &self, + raw_device: ash::Device, + handle_is_owned: bool, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + family_index: u32, + queue_index: u32, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let mem_properties = { + profiling::scope!("vkGetPhysicalDeviceMemoryProperties"); + unsafe { + self.instance + .raw + .get_physical_device_memory_properties(self.raw) + } + }; + let memory_types = + &mem_properties.memory_types[..mem_properties.memory_type_count as usize]; + let valid_ash_memory_types = memory_types.iter().enumerate().fold(0, |u, (i, mem)| { + if self.known_memory_flags.contains(mem.property_flags) { + u | (1 << i) + } else { + u + } + }); + + let swapchain_fn = khr::Swapchain::new(&self.instance.raw, &raw_device); + + let indirect_count_fn = if enabled_extensions.contains(&khr::DrawIndirectCount::name()) { + Some(khr::DrawIndirectCount::new(&self.instance.raw, &raw_device)) + } else { + None + }; + let timeline_semaphore_fn = if enabled_extensions.contains(&khr::TimelineSemaphore::name()) + { + Some(super::ExtensionFn::Extension(khr::TimelineSemaphore::new( + &self.instance.raw, + &raw_device, + ))) + } else if self.phd_capabilities.device_api_version >= vk::API_VERSION_1_2 { + Some(super::ExtensionFn::Promoted) + } else { + None + }; + let ray_tracing_fns = if enabled_extensions.contains(&khr::AccelerationStructure::name()) + && enabled_extensions.contains(&khr::BufferDeviceAddress::name()) + { + Some(super::RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure::new( + &self.instance.raw, + &raw_device, + ), + buffer_device_address: khr::BufferDeviceAddress::new( + &self.instance.raw, + &raw_device, + ), + }) + } else { + None + }; + + let naga_options = { + use naga::back::spv; + + // The following capabilities are always available + // see https://registry.khronos.org/vulkan/specs/1.3-extensions/html/chap52.html#spirvenv-capabilities + let mut capabilities = vec![ + spv::Capability::Shader, + spv::Capability::Matrix, + spv::Capability::Sampled1D, + spv::Capability::Image1D, + spv::Capability::ImageQuery, + spv::Capability::DerivativeControl, + spv::Capability::StorageImageExtendedFormats, + ]; + + if self + .downlevel_flags + .contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES) + { + capabilities.push(spv::Capability::SampledCubeArray); + } + + if self + .downlevel_flags + .contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING) + { + capabilities.push(spv::Capability::SampleRateShading); + } + + if features.contains(wgt::Features::MULTIVIEW) { + capabilities.push(spv::Capability::MultiView); + } + + if features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX) { + capabilities.push(spv::Capability::Geometry); + } + + if features.intersects( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ) { + capabilities.push(spv::Capability::ShaderNonUniform); + } + if features.contains(wgt::Features::BGRA8UNORM_STORAGE) { + capabilities.push(spv::Capability::StorageImageWriteWithoutFormat); + } + + if features.contains(wgt::Features::RAY_QUERY) { + capabilities.push(spv::Capability::RayQueryKHR); + } + + let mut flags = spv::WriterFlags::empty(); + flags.set( + spv::WriterFlags::DEBUG, + self.instance.flags.contains(wgt::InstanceFlags::DEBUG), + ); + flags.set( + spv::WriterFlags::LABEL_VARYINGS, + self.phd_capabilities.properties.vendor_id != crate::auxil::db::qualcomm::VENDOR, + ); + flags.set( + spv::WriterFlags::FORCE_POINT_SIZE, + //Note: we could technically disable this when we are compiling separate entry points, + // and we know exactly that the primitive topology is not `PointList`. + // But this requires cloning the `spv::Options` struct, which has heap allocations. + true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` + ); + spv::Options { + lang_version: (1, 0), + flags, + capabilities: Some(capabilities.iter().cloned().collect()), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Restrict, + buffer: if self.private_caps.robust_buffer_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + image_load: if self.private_caps.robust_image_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + image_store: naga::proc::BoundsCheckPolicy::Unchecked, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: if self + .private_caps + .zero_initialize_workgroup_memory + { + spv::ZeroInitializeWorkgroupMemoryMode::Native + } else { + spv::ZeroInitializeWorkgroupMemoryMode::Polyfill + }, + // We need to build this separately for each invocation, so just default it out here + binding_map: BTreeMap::default(), + debug_info: None, + } + }; + + let raw_queue = { + profiling::scope!("vkGetDeviceQueue"); + unsafe { raw_device.get_device_queue(family_index, queue_index) } + }; + + let shared = Arc::new(super::DeviceShared { + raw: raw_device, + family_index, + queue_index, + raw_queue, + handle_is_owned, + instance: Arc::clone(&self.instance), + physical_device: self.raw, + enabled_extensions: enabled_extensions.into(), + extension_fns: super::DeviceExtensionFunctions { + draw_indirect_count: indirect_count_fn, + timeline_semaphore: timeline_semaphore_fn, + ray_tracing: ray_tracing_fns, + }, + vendor_id: self.phd_capabilities.properties.vendor_id, + timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, + private_caps: self.private_caps.clone(), + workarounds: self.workarounds, + render_passes: Mutex::new(Default::default()), + framebuffers: Mutex::new(Default::default()), + }); + let mut relay_semaphores = [vk::Semaphore::null(); 2]; + for sem in relay_semaphores.iter_mut() { + unsafe { + *sem = shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None)? + }; + } + let queue = super::Queue { + raw: raw_queue, + swapchain_fn, + device: Arc::clone(&shared), + family_index, + relay_semaphores, + relay_index: AtomicIsize::new(-1), + }; + + let mem_allocator = { + let limits = self.phd_capabilities.properties.limits; + let config = gpu_alloc::Config::i_am_prototyping(); //TODO + let max_memory_allocation_size = + if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 { + maintenance_3.max_memory_allocation_size + } else { + u64::max_value() + }; + let properties = gpu_alloc::DeviceProperties { + max_memory_allocation_count: limits.max_memory_allocation_count, + max_memory_allocation_size, + non_coherent_atom_size: limits.non_coherent_atom_size, + memory_types: memory_types + .iter() + .map(|memory_type| gpu_alloc::MemoryType { + props: gpu_alloc::MemoryPropertyFlags::from_bits_truncate( + memory_type.property_flags.as_raw() as u8, + ), + heap: memory_type.heap_index, + }) + .collect(), + memory_heaps: mem_properties.memory_heaps + [..mem_properties.memory_heap_count as usize] + .iter() + .map(|&memory_heap| gpu_alloc::MemoryHeap { + size: memory_heap.size, + }) + .collect(), + buffer_device_address: enabled_extensions + .contains(&khr::BufferDeviceAddress::name()), + }; + gpu_alloc::GpuAllocator::new(config, properties) + }; + let desc_allocator = gpu_descriptor::DescriptorAllocator::new( + if let Some(di) = self.phd_capabilities.descriptor_indexing { + di.max_update_after_bind_descriptors_in_all_pools + } else { + 0 + }, + ); + + let device = super::Device { + shared, + mem_allocator: Mutex::new(mem_allocator), + desc_allocator: Mutex::new(desc_allocator), + valid_ash_memory_types, + naga_options, + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + }; + + Ok(crate::OpenDevice { device, queue }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let enabled_extensions = self.required_device_extensions(features); + let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features); + + let family_index = 0; //TODO + let family_info = vk::DeviceQueueCreateInfo::builder() + .queue_family_index(family_index) + .queue_priorities(&[1.0]) + .build(); + let family_infos = [family_info]; + + let str_pointers = enabled_extensions + .iter() + .map(|&s| { + // Safe because `enabled_extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let pre_info = vk::DeviceCreateInfo::builder() + .queue_create_infos(&family_infos) + .enabled_extension_names(&str_pointers); + let info = enabled_phd_features + .add_to_device_create_builder(pre_info) + .build(); + let raw_device = { + profiling::scope!("vkCreateDevice"); + unsafe { self.instance.raw.create_device(self.raw, &info, None)? } + }; + + unsafe { + self.device_from_raw( + raw_device, + true, + &enabled_extensions, + features, + family_info.queue_family_index, + 0, + ) + } + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let vk_format = self.private_caps.map_texture_format(format); + let properties = unsafe { + self.instance + .raw + .get_physical_device_format_properties(self.raw, vk_format) + }; + let features = properties.optimal_tiling_features; + + let mut flags = Tfc::empty(); + flags.set( + Tfc::SAMPLED, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE), + ); + flags.set( + Tfc::SAMPLED_LINEAR, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_LINEAR), + ); + // flags.set( + // Tfc::SAMPLED_MINMAX, + // features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_MINMAX), + // ); + flags.set( + Tfc::STORAGE | Tfc::STORAGE_READ_WRITE, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE), + ); + flags.set( + Tfc::STORAGE_ATOMIC, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE_ATOMIC), + ); + flags.set( + Tfc::COLOR_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT), + ); + flags.set( + Tfc::COLOR_ATTACHMENT_BLEND, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND), + ); + flags.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT), + ); + flags.set( + Tfc::COPY_SRC, + features.intersects(vk::FormatFeatureFlags::TRANSFER_SRC), + ); + flags.set( + Tfc::COPY_DST, + features.intersects(vk::FormatFeatureFlags::TRANSFER_DST), + ); + // Vulkan is very permissive about MSAA + flags.set(Tfc::MULTISAMPLE_RESOLVE, !format.is_compressed()); + + // get the supported sample counts + let format_aspect = crate::FormatAspects::from(format); + let limits = self.phd_capabilities.properties.limits; + + let sample_flags = if format_aspect.contains(crate::FormatAspects::DEPTH) { + limits + .framebuffer_depth_sample_counts + .min(limits.sampled_image_depth_sample_counts) + } else if format_aspect.contains(crate::FormatAspects::STENCIL) { + limits + .framebuffer_stencil_sample_counts + .min(limits.sampled_image_stencil_sample_counts) + } else { + let first_aspect = format_aspect + .iter() + .next() + .expect("All texture should at least one aspect") + .map(); + + // We should never get depth or stencil out of this, due to the above. + assert_ne!(first_aspect, wgt::TextureAspect::DepthOnly); + assert_ne!(first_aspect, wgt::TextureAspect::StencilOnly); + + match format.sample_type(Some(first_aspect), None).unwrap() { + wgt::TextureSampleType::Float { .. } => limits + .framebuffer_color_sample_counts + .min(limits.sampled_image_color_sample_counts), + wgt::TextureSampleType::Sint | wgt::TextureSampleType::Uint => { + limits.sampled_image_integer_sample_counts + } + _ => unreachable!(), + } + }; + + flags.set( + Tfc::MULTISAMPLE_X2, + sample_flags.contains(vk::SampleCountFlags::TYPE_2), + ); + flags.set( + Tfc::MULTISAMPLE_X4, + sample_flags.contains(vk::SampleCountFlags::TYPE_4), + ); + flags.set( + Tfc::MULTISAMPLE_X8, + sample_flags.contains(vk::SampleCountFlags::TYPE_8), + ); + flags.set( + Tfc::MULTISAMPLE_X16, + sample_flags.contains(vk::SampleCountFlags::TYPE_16), + ); + + flags + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if !self.private_caps.can_present { + return None; + } + let queue_family_index = 0; //TODO + { + profiling::scope!("vkGetPhysicalDeviceSurfaceSupportKHR"); + match unsafe { + surface.functor.get_physical_device_surface_support( + self.raw, + queue_family_index, + surface.raw, + ) + } { + Ok(true) => (), + Ok(false) => return None, + Err(e) => { + log::error!("get_physical_device_surface_support: {}", e); + return None; + } + } + } + + let caps = { + profiling::scope!("vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_capabilities(self.raw, surface.raw) + } { + Ok(caps) => caps, + Err(e) => { + log::error!("get_physical_device_surface_capabilities: {}", e); + return None; + } + } + }; + + // If image count is 0, the support number of images is unlimited. + let max_image_count = if caps.max_image_count == 0 { + !0 + } else { + caps.max_image_count + }; + + // `0xFFFFFFFF` indicates that the extent depends on the created swapchain. + let current_extent = if caps.current_extent.width != !0 && caps.current_extent.height != !0 + { + Some(wgt::Extent3d { + width: caps.current_extent.width, + height: caps.current_extent.height, + depth_or_array_layers: 1, + }) + } else { + None + }; + + let raw_present_modes = { + profiling::scope!("vkGetPhysicalDeviceSurfacePresentModesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_present_modes(self.raw, surface.raw) + } { + Ok(present_modes) => present_modes, + Err(e) => { + log::error!("get_physical_device_surface_present_modes: {}", e); + Vec::new() + } + } + }; + + let raw_surface_formats = { + profiling::scope!("vkGetPhysicalDeviceSurfaceFormatsKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_formats(self.raw, surface.raw) + } { + Ok(formats) => formats, + Err(e) => { + log::error!("get_physical_device_surface_formats: {}", e); + Vec::new() + } + } + }; + + let formats = raw_surface_formats + .into_iter() + .filter_map(conv::map_vk_surface_formats) + .collect(); + Some(crate::SurfaceCapabilities { + formats, + // TODO: Right now we're always trunkating the swap chain + // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size) + // Instead, we should use extensions when available to wait in present. + // See https://github.com/gfx-rs/wgpu/issues/2869 + maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`. + current_extent, + usage: conv::map_vk_image_usage(caps.supported_usage_flags), + present_modes: raw_present_modes + .into_iter() + .flat_map(conv::map_vk_present_mode) + .collect(), + composite_alpha_modes: conv::map_vk_composite_alpha(caps.supported_composite_alpha), + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + // VK_GOOGLE_display_timing is the only way to get presentation + // timestamps on vulkan right now and it is only ever available + // on android and linux. This includes mac, but there's no alternative + // on mac, so this is fine. + #[cfg(unix)] + { + let mut timespec = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { + libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut timespec); + } + + wgt::PresentationTimestamp( + timespec.tv_sec as u128 * 1_000_000_000 + timespec.tv_nsec as u128, + ) + } + #[cfg(not(unix))] + { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } + } +} + +fn is_format_16bit_norm_supported(instance: &ash::Instance, phd: vk::PhysicalDevice) -> bool { + let tiling = vk::ImageTiling::OPTIMAL; + let features = vk::FormatFeatureFlags::SAMPLED_IMAGE + | vk::FormatFeatureFlags::STORAGE_IMAGE + | vk::FormatFeatureFlags::TRANSFER_SRC + | vk::FormatFeatureFlags::TRANSFER_DST; + let r16unorm = supports_format(instance, phd, vk::Format::R16_UNORM, tiling, features); + let r16snorm = supports_format(instance, phd, vk::Format::R16_SNORM, tiling, features); + let rg16unorm = supports_format(instance, phd, vk::Format::R16G16_UNORM, tiling, features); + let rg16snorm = supports_format(instance, phd, vk::Format::R16G16_SNORM, tiling, features); + let rgba16unorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_UNORM, + tiling, + features, + ); + let rgba16snorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_SNORM, + tiling, + features, + ); + + r16unorm && r16snorm && rg16unorm && rg16snorm && rgba16unorm && rgba16snorm +} + +fn is_float32_filterable_supported(instance: &ash::Instance, phd: vk::PhysicalDevice) -> bool { + let tiling = vk::ImageTiling::OPTIMAL; + let features = vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_LINEAR; + let r_float = supports_format(instance, phd, vk::Format::R32_SFLOAT, tiling, features); + let rg_float = supports_format(instance, phd, vk::Format::R32G32_SFLOAT, tiling, features); + let rgba_float = supports_format( + instance, + phd, + vk::Format::R32G32B32A32_SFLOAT, + tiling, + features, + ); + r_float && rg_float && rgba_float +} + +fn supports_format( + instance: &ash::Instance, + phd: vk::PhysicalDevice, + format: vk::Format, + tiling: vk::ImageTiling, + features: vk::FormatFeatureFlags, +) -> bool { + let properties = unsafe { instance.get_physical_device_format_properties(phd, format) }; + match tiling { + vk::ImageTiling::LINEAR => properties.linear_tiling_features.contains(features), + vk::ImageTiling::OPTIMAL => properties.optimal_tiling_features.contains(features), + _ => false, + } +} + +fn supports_bgra8unorm_storage( + instance: &ash::Instance, + phd: vk::PhysicalDevice, + device_api_version: u32, +) -> bool { + // See https://github.com/KhronosGroup/Vulkan-Docs/issues/2027#issuecomment-1380608011 + + // This check gates the function call and structures used below. + // TODO: check for (`VK_KHR_get_physical_device_properties2` or VK1.1) and (`VK_KHR_format_feature_flags2` or VK1.3). + // Right now we only check for VK1.3. + if device_api_version < vk::API_VERSION_1_3 { + return false; + } + + unsafe { + let mut properties3 = vk::FormatProperties3::default(); + let mut properties2 = vk::FormatProperties2::builder().push_next(&mut properties3); + + instance.get_physical_device_format_properties2( + phd, + vk::Format::B8G8R8A8_UNORM, + &mut properties2, + ); + + let features2 = properties2.format_properties.optimal_tiling_features; + let features3 = properties3.optimal_tiling_features; + + features2.contains(vk::FormatFeatureFlags::STORAGE_IMAGE) + && features3.contains(vk::FormatFeatureFlags2::STORAGE_WRITE_WITHOUT_FORMAT) + } +} + +// For https://github.com/gfx-rs/wgpu/issues/4599 +// Intel iGPUs with outdated drivers can break rendering if `VK_EXT_robustness2` is used. +// Driver version 31.0.101.2115 works, but there's probably an earlier functional version. +fn is_intel_igpu_outdated_for_robustness2( + props: vk::PhysicalDeviceProperties, + driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, +) -> bool { + const DRIVER_VERSION_WORKING: u32 = (101 << 14) | 2115; // X.X.101.2115 + + let is_outdated = props.vendor_id == crate::auxil::db::intel::VENDOR + && props.device_type == vk::PhysicalDeviceType::INTEGRATED_GPU + && props.driver_version < DRIVER_VERSION_WORKING + && driver + .map(|driver| driver.driver_id == vk::DriverId::INTEL_PROPRIETARY_WINDOWS) + .unwrap_or_default(); + + if is_outdated { + log::warn!( + "Disabling robustBufferAccess2 and robustImageAccess2: IntegratedGpu Intel Driver is outdated. Found with version 0x{:X}, less than the known good version 0x{:X} (31.0.101.2115)", + props.driver_version, + DRIVER_VERSION_WORKING + ); + } + is_outdated +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs new file mode 100644 index 0000000000..42ea907738 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -0,0 +1,1142 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::ext, vk}; + +use std::{mem, ops::Range, slice}; + +const ALLOCATION_GRANULARITY: u32 = 16; +const DST_IMAGE_LAYOUT: vk::ImageLayout = vk::ImageLayout::TRANSFER_DST_OPTIMAL; + +impl super::Texture { + fn map_buffer_copies<T>(&self, regions: T) -> impl Iterator<Item = vk::BufferImageCopy> + where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (block_width, block_height) = self.format.block_dimensions(); + let format = self.format; + let copy_size = self.copy_size; + regions.map(move |r| { + let extent = r.texture_base.max_copy_size(©_size).min(&r.size); + let (image_subresource, image_offset) = conv::map_subresource_layers(&r.texture_base); + vk::BufferImageCopy { + buffer_offset: r.buffer_layout.offset, + buffer_row_length: r.buffer_layout.bytes_per_row.map_or(0, |bpr| { + let block_size = format + .block_copy_size(Some(r.texture_base.aspect.map())) + .unwrap(); + block_width * (bpr / block_size) + }), + buffer_image_height: r + .buffer_layout + .rows_per_image + .map_or(0, |rpi| rpi * block_height), + image_subresource, + image_offset, + image_extent: conv::map_copy_extent(&extent), + } + }) + } +} + +impl super::DeviceShared { + fn debug_messenger(&self) -> Option<&ext::DebugUtils> { + Some(&self.instance.debug_utils.as_ref()?.extension) + } +} + +impl super::CommandEncoder { + fn write_pass_end_timestamp_if_requested(&mut self) { + if let Some((query_set, index)) = self.end_of_pass_timer_query.take() { + unsafe { + self.device.raw.cmd_write_timestamp( + self.active, + vk::PipelineStageFlags::BOTTOM_OF_PIPE, + query_set, + index, + ); + } + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + if self.free.is_empty() { + let vk_info = vk::CommandBufferAllocateInfo::builder() + .command_pool(self.raw) + .command_buffer_count(ALLOCATION_GRANULARITY) + .build(); + let cmd_buf_vec = unsafe { self.device.raw.allocate_command_buffers(&vk_info)? }; + self.free.extend(cmd_buf_vec); + } + let raw = self.free.pop().unwrap(); + + // Set the name unconditionally, since there might be a + // previous name assigned to this. + unsafe { + self.device.set_object_name( + vk::ObjectType::COMMAND_BUFFER, + raw, + label.unwrap_or_default(), + ) + }; + + // Reset this in case the last renderpass was never ended. + self.rpass_debug_marker_active = false; + + let vk_info = vk::CommandBufferBeginInfo::builder() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT) + .build(); + unsafe { self.device.raw.begin_command_buffer(raw, &vk_info) }?; + self.active = raw; + + Ok(()) + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.active; + self.active = vk::CommandBuffer::null(); + unsafe { self.device.raw.end_command_buffer(raw) }?; + Ok(super::CommandBuffer { raw }) + } + + unsafe fn discard_encoding(&mut self) { + self.discarded.push(self.active); + self.active = vk::CommandBuffer::null(); + } + + unsafe fn reset_all<I>(&mut self, cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + self.temp.clear(); + self.free + .extend(cmd_bufs.into_iter().map(|cmd_buf| cmd_buf.raw)); + self.free.append(&mut self.discarded); + let _ = unsafe { + self.device + .raw + .reset_command_pool(self.raw, vk::CommandPoolResetFlags::default()) + }; + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + //Note: this is done so that we never end up with empty stage flags + let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE; + let mut dst_stages = vk::PipelineStageFlags::BOTTOM_OF_PIPE; + let vk_barriers = &mut self.temp.buffer_barriers; + vk_barriers.clear(); + + for bar in barriers { + let (src_stage, src_access) = conv::map_buffer_usage_to_barrier(bar.usage.start); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_buffer_usage_to_barrier(bar.usage.end); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::BufferMemoryBarrier::builder() + .buffer(bar.buffer.raw) + .size(vk::WHOLE_SIZE) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .build(), + ) + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + vk_barriers, + &[], + ) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + let mut src_stages = vk::PipelineStageFlags::empty(); + let mut dst_stages = vk::PipelineStageFlags::empty(); + let vk_barriers = &mut self.temp.image_barriers; + vk_barriers.clear(); + + for bar in barriers { + let range = conv::map_subresource_range_combined_aspect( + &bar.range, + bar.texture.format, + &self.device.private_caps, + ); + let (src_stage, src_access) = conv::map_texture_usage_to_barrier(bar.usage.start); + let src_layout = conv::derive_image_layout(bar.usage.start, bar.texture.format); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_texture_usage_to_barrier(bar.usage.end); + let dst_layout = conv::derive_image_layout(bar.usage.end, bar.texture.format); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::ImageMemoryBarrier::builder() + .image(bar.texture.raw) + .subresource_range(range) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .old_layout(src_layout) + .new_layout(dst_layout) + .build(), + ); + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + &[], + vk_barriers, + ) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let range_size = range.end - range.start; + if self.device.workarounds.contains( + super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16, + ) && range_size >= 4096 + && range.start % 16 != 0 + { + let rounded_start = wgt::math::align_to(range.start, 16); + let prefix_size = rounded_start - range.start; + + unsafe { + self.device.raw.cmd_fill_buffer( + self.active, + buffer.raw, + range.start, + prefix_size, + 0, + ) + }; + + // This will never be zero, as rounding can only add up to 12 bytes, and the total size is 4096. + let suffix_size = range.end - rounded_start; + + unsafe { + self.device.raw.cmd_fill_buffer( + self.active, + buffer.raw, + rounded_start, + suffix_size, + 0, + ) + }; + } else { + unsafe { + self.device + .raw + .cmd_fill_buffer(self.active, buffer.raw, range.start, range_size, 0) + }; + } + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let vk_regions_iter = regions.map(|r| vk::BufferCopy { + src_offset: r.src_offset, + dst_offset: r.dst_offset, + size: r.size.get(), + }); + + unsafe { + self.device.raw.cmd_copy_buffer( + self.active, + src.raw, + dst.raw, + &smallvec::SmallVec::<[vk::BufferCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + + let vk_regions_iter = regions.map(|r| { + let (src_subresource, src_offset) = conv::map_subresource_layers(&r.src_base); + let (dst_subresource, dst_offset) = conv::map_subresource_layers(&r.dst_base); + let extent = r + .size + .min(&r.src_base.max_copy_size(&src.copy_size)) + .min(&r.dst_base.max_copy_size(&dst.copy_size)); + vk::ImageCopy { + src_subresource, + src_offset, + dst_subresource, + dst_offset, + extent: conv::map_copy_extent(&extent), + } + }); + + unsafe { + self.device.raw.cmd_copy_image( + self.active, + src.raw, + src_layout, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::ImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let vk_regions_iter = dst.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_buffer_to_image( + self.active, + src.raw, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + let vk_regions_iter = src.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_image_to_buffer( + self.active, + src.raw, + src_layout, + dst.raw, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_begin_query( + self.active, + set.raw, + index, + vk::QueryControlFlags::empty(), + ) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { self.device.raw.cmd_end_query(self.active, set.raw, index) }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_write_timestamp( + self.active, + vk::PipelineStageFlags::BOTTOM_OF_PIPE, + set.raw, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + unsafe { + self.device.raw.cmd_reset_query_pool( + self.active, + set.raw, + range.start, + range.end - range.start, + ) + }; + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + unsafe { + self.device.raw.cmd_copy_query_pool_results( + self.active, + set.raw, + range.start, + range.end - range.start, + buffer.raw, + offset, + stride.get(), + vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT, + ) + }; + } + + unsafe fn build_acceleration_structures<'a, T>(&mut self, descriptor_count: u32, descriptors: T) + where + super::Api: 'a, + T: IntoIterator<Item = crate::BuildAccelerationStructureDescriptor<'a, super::Api>>, + { + const CAPACITY_OUTER: usize = 8; + const CAPACITY_INNER: usize = 1; + let descriptor_count = descriptor_count as usize; + + let ray_tracing_functions = self + .device + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let get_device_address = |buffer: Option<&super::Buffer>| unsafe { + match buffer { + Some(buffer) => ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + None => panic!("Buffers are required to build acceleration structures"), + } + }; + + // storage to all the data required for cmd_build_acceleration_structures + let mut ranges_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptor_count); + let mut geometries_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureGeometryKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptor_count); + + // pointers to all the data required for cmd_build_acceleration_structures + let mut geometry_infos = smallvec::SmallVec::< + [vk::AccelerationStructureBuildGeometryInfoKHR; CAPACITY_OUTER], + >::with_capacity(descriptor_count); + let mut ranges_ptrs = smallvec::SmallVec::< + [&[vk::AccelerationStructureBuildRangeInfoKHR]; CAPACITY_OUTER], + >::with_capacity(descriptor_count); + + for desc in descriptors { + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder( + ) + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(instances.buffer), + }); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instance_data, + }); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count) + .primitive_offset(instances.offset); + + (smallvec::smallvec![*geometry], smallvec::smallvec![*range]) + } + crate::AccelerationStructureEntries::Triangles(ref in_geometries) => { + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(triangles.vertex_buffer), + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(ref indices) = triangles.indices { + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(indices.buffer), + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + + if let Some(ref transform) = triangles.transform { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = + triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(conv::map_acceleration_structure_geometry_flags( + triangles.flags, + )); + + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(ref in_geometries) => { + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(aabb.buffer), + }) + .stride(aabb.stride); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(aabb.count) + .primitive_offset(aabb.offset); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { + aabbs: *aabbs_data, + }) + .flags(conv::map_acceleration_structure_geometry_flags(aabb.flags)); + + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + }; + + ranges_storage.push(ranges); + geometries_storage.push(geometries); + + let scratch_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), + ) + }; + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(ty) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .dst_acceleration_structure(desc.destination_acceleration_structure.raw) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: scratch_device_address + desc.scratch_buffer_offset, + }); + + if desc.mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = desc + .source_acceleration_structure + .unwrap_or(desc.destination_acceleration_structure) + .raw; + } + + geometry_infos.push(*geometry_info); + } + + for (i, geometry_info) in geometry_infos.iter_mut().enumerate() { + geometry_info.geometry_count = geometries_storage[i].len() as u32; + geometry_info.p_geometries = geometries_storage[i].as_ptr(); + ranges_ptrs.push(&ranges_storage[i]); + } + + unsafe { + ray_tracing_functions + .acceleration_structure + .cmd_build_acceleration_structures(self.active, &geometry_infos, &ranges_ptrs); + } + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + barrier: crate::AccelerationStructureBarrier, + ) { + let (src_stage, src_access) = + conv::map_acceleration_structure_usage_to_barrier(barrier.usage.start); + let (dst_stage, dst_access) = + conv::map_acceleration_structure_usage_to_barrier(barrier.usage.end); + + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stage | vk::PipelineStageFlags::TOP_OF_PIPE, + dst_stage | vk::PipelineStageFlags::BOTTOM_OF_PIPE, + vk::DependencyFlags::empty(), + &[vk::MemoryBarrier::builder() + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .build()], + &[], + &[], + ) + }; + } + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + let mut vk_clear_values = + ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut rp_key = super::RenderPassKey::default(); + let mut fb_key = super::FramebufferKey { + attachments: ArrayVec::default(), + extent: desc.extent, + sample_count: desc.sample_count, + }; + let caps = &self.device.private_caps; + + for cat in desc.color_attachments { + if let Some(cat) = cat.as_ref() { + vk_clear_values.push(vk::ClearValue { + color: unsafe { cat.make_vk_clear_color() }, + }); + vk_image_views.push(cat.target.view.raw); + let color = super::ColorAttachmentKey { + base: cat.target.make_attachment_key(cat.ops, caps), + resolve: cat.resolve_target.as_ref().map(|target| { + target.make_attachment_key(crate::AttachmentOps::STORE, caps) + }), + }; + + rp_key.colors.push(Some(color)); + fb_key.attachments.push(cat.target.view.attachment.clone()); + if let Some(ref at) = cat.resolve_target { + vk_clear_values.push(unsafe { mem::zeroed() }); + vk_image_views.push(at.view.raw); + fb_key.attachments.push(at.view.attachment.clone()); + } + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(cat.target.view.layers, multiview); + if let Some(ref resolve_target) = cat.resolve_target { + assert_eq!(resolve_target.view.layers, multiview); + } + } + } else { + rp_key.colors.push(None); + } + } + if let Some(ref ds) = desc.depth_stencil_attachment { + vk_clear_values.push(vk::ClearValue { + depth_stencil: vk::ClearDepthStencilValue { + depth: ds.clear_value.0, + stencil: ds.clear_value.1, + }, + }); + vk_image_views.push(ds.target.view.raw); + rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: ds.target.make_attachment_key(ds.depth_ops, caps), + stencil_ops: ds.stencil_ops, + }); + fb_key.attachments.push(ds.target.view.attachment.clone()); + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(ds.target.view.layers, multiview); + } + } + rp_key.sample_count = fb_key.sample_count; + rp_key.multiview = desc.multiview; + + let render_area = vk::Rect2D { + offset: vk::Offset2D { x: 0, y: 0 }, + extent: vk::Extent2D { + width: desc.extent.width, + height: desc.extent.height, + }, + }; + let vk_viewports = [vk::Viewport { + x: 0.0, + y: if self.device.private_caps.flip_y_requires_shift { + desc.extent.height as f32 + } else { + 0.0 + }, + width: desc.extent.width as f32, + height: -(desc.extent.height as f32), + min_depth: 0.0, + max_depth: 1.0, + }]; + + let raw_pass = self.device.make_render_pass(rp_key).unwrap(); + let raw_framebuffer = self + .device + .make_framebuffer(fb_key, raw_pass, desc.label) + .unwrap(); + + let mut vk_info = vk::RenderPassBeginInfo::builder() + .render_pass(raw_pass) + .render_area(render_area) + .clear_values(&vk_clear_values) + .framebuffer(raw_framebuffer); + let mut vk_attachment_info = if caps.imageless_framebuffers { + Some( + vk::RenderPassAttachmentBeginInfo::builder() + .attachments(&vk_image_views) + .build(), + ) + } else { + None + }; + if let Some(attachment_info) = vk_attachment_info.as_mut() { + vk_info = vk_info.push_next(attachment_info); + } + + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + + // Start timestamp if any (before all other commands but after debug marker) + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } + + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports); + self.device + .raw + .cmd_set_scissor(self.active, 0, &[render_area]); + self.device.raw.cmd_begin_render_pass( + self.active, + &vk_info, + vk::SubpassContents::INLINE, + ); + }; + + self.bind_point = vk::PipelineBindPoint::GRAPHICS; + } + unsafe fn end_render_pass(&mut self) { + unsafe { + self.device.raw.cmd_end_render_pass(self.active); + } + + // After all other commands but before debug marker, so this is still seen as part of this pass. + self.write_pass_end_timestamp_if_requested(); + + if self.rpass_debug_marker_active { + unsafe { + self.end_debug_marker(); + } + self.rpass_debug_marker_active = false; + } + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let sets = [*group.set.raw()]; + unsafe { + self.device.raw.cmd_bind_descriptor_sets( + self.active, + self.bind_point, + layout.raw, + index, + &sets, + dynamic_offsets, + ) + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset_bytes: u32, + data: &[u32], + ) { + unsafe { + self.device.raw.cmd_push_constants( + self.active, + layout.raw, + conv::map_shader_stage(stages), + offset_bytes, + slice::from_raw_parts(data.as_ptr() as _, data.len() * 4), + ) + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_insert_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(group_label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_begin_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(ext) = self.device.debug_messenger() { + unsafe { ext.cmd_end_debug_utils_label(self.active) }; + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::GRAPHICS, + pipeline.raw, + ) + }; + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + unsafe { + self.device.raw.cmd_bind_index_buffer( + self.active, + binding.buffer.raw, + binding.offset, + conv::map_index_format(format), + ) + }; + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vk_buffers = [binding.buffer.raw]; + let vk_offsets = [binding.offset]; + unsafe { + self.device + .raw + .cmd_bind_vertex_buffers(self.active, index, &vk_buffers, &vk_offsets) + }; + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let vk_viewports = [vk::Viewport { + x: rect.x, + y: if self.device.private_caps.flip_y_requires_shift { + rect.y + rect.h + } else { + rect.y + }, + width: rect.w, + height: -rect.h, // flip Y + min_depth: depth_range.start, + max_depth: depth_range.end, + }]; + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports) + }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let vk_scissors = [vk::Rect2D { + offset: vk::Offset2D { + x: rect.x as i32, + y: rect.y as i32, + }, + extent: vk::Extent2D { + width: rect.w, + height: rect.h, + }, + }]; + unsafe { + self.device + .raw + .cmd_set_scissor(self.active, 0, &vk_scissors) + }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + unsafe { + self.device.raw.cmd_set_stencil_reference( + self.active, + vk::StencilFaceFlags::FRONT_AND_BACK, + value, + ) + }; + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + unsafe { self.device.raw.cmd_set_blend_constants(self.active, color) }; + } + + unsafe fn draw( + &mut self, + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw( + self.active, + vertex_count, + instance_count, + first_vertex, + first_instance, + ) + }; + } + unsafe fn draw_indexed( + &mut self, + first_index: u32, + index_count: u32, + base_vertex: i32, + first_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed( + self.active, + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + ) + }; + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indexed_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor<'_, super::Api>) { + self.bind_point = vk::PipelineBindPoint::COMPUTE; + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + if let Some(index) = timestamp_writes.beginning_of_pass_write_index { + unsafe { + self.write_timestamp(timestamp_writes.query_set, index); + } + } + self.end_of_pass_timer_query = timestamp_writes + .end_of_pass_write_index + .map(|index| (timestamp_writes.query_set.raw, index)); + } + } + unsafe fn end_compute_pass(&mut self) { + self.write_pass_end_timestamp_if_requested(); + + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker() }; + self.rpass_debug_marker_active = false + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::COMPUTE, + pipeline.raw, + ) + }; + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + unsafe { + self.device + .raw + .cmd_dispatch(self.active, count[0], count[1], count[2]) + }; + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + unsafe { + self.device + .raw + .cmd_dispatch_indirect(self.active, buffer.raw, offset) + } + } +} + +#[test] +fn check_dst_image_layout() { + assert_eq!( + conv::derive_image_layout(crate::TextureUses::COPY_DST, wgt::TextureFormat::Rgba8Unorm), + DST_IMAGE_LAYOUT + ); +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs new file mode 100644 index 0000000000..8202c93aa3 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -0,0 +1,965 @@ +use ash::vk; + +impl super::PrivateCapabilities { + pub fn map_texture_format(&self, format: wgt::TextureFormat) -> vk::Format { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + match format { + Tf::R8Unorm => F::R8_UNORM, + Tf::R8Snorm => F::R8_SNORM, + Tf::R8Uint => F::R8_UINT, + Tf::R8Sint => F::R8_SINT, + Tf::R16Uint => F::R16_UINT, + Tf::R16Sint => F::R16_SINT, + Tf::R16Unorm => F::R16_UNORM, + Tf::R16Snorm => F::R16_SNORM, + Tf::R16Float => F::R16_SFLOAT, + Tf::Rg8Unorm => F::R8G8_UNORM, + Tf::Rg8Snorm => F::R8G8_SNORM, + Tf::Rg8Uint => F::R8G8_UINT, + Tf::Rg8Sint => F::R8G8_SINT, + Tf::Rg16Unorm => F::R16G16_UNORM, + Tf::Rg16Snorm => F::R16G16_SNORM, + Tf::R32Uint => F::R32_UINT, + Tf::R32Sint => F::R32_SINT, + Tf::R32Float => F::R32_SFLOAT, + Tf::Rg16Uint => F::R16G16_UINT, + Tf::Rg16Sint => F::R16G16_SINT, + Tf::Rg16Float => F::R16G16_SFLOAT, + Tf::Rgba8Unorm => F::R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => F::R8G8B8A8_SRGB, + Tf::Bgra8UnormSrgb => F::B8G8R8A8_SRGB, + Tf::Rgba8Snorm => F::R8G8B8A8_SNORM, + Tf::Bgra8Unorm => F::B8G8R8A8_UNORM, + Tf::Rgba8Uint => F::R8G8B8A8_UINT, + Tf::Rgba8Sint => F::R8G8B8A8_SINT, + Tf::Rgb10a2Uint => F::A2B10G10R10_UINT_PACK32, + Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32, + Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32, + Tf::Rg32Uint => F::R32G32_UINT, + Tf::Rg32Sint => F::R32G32_SINT, + Tf::Rg32Float => F::R32G32_SFLOAT, + Tf::Rgba16Uint => F::R16G16B16A16_UINT, + Tf::Rgba16Sint => F::R16G16B16A16_SINT, + Tf::Rgba16Unorm => F::R16G16B16A16_UNORM, + Tf::Rgba16Snorm => F::R16G16B16A16_SNORM, + Tf::Rgba16Float => F::R16G16B16A16_SFLOAT, + Tf::Rgba32Uint => F::R32G32B32A32_UINT, + Tf::Rgba32Sint => F::R32G32B32A32_SINT, + Tf::Rgba32Float => F::R32G32B32A32_SFLOAT, + Tf::Depth32Float => F::D32_SFLOAT, + Tf::Depth32FloatStencil8 => F::D32_SFLOAT_S8_UINT, + Tf::Depth24Plus => { + if self.texture_d24 { + F::X8_D24_UNORM_PACK32 + } else { + F::D32_SFLOAT + } + } + Tf::Depth24PlusStencil8 => { + if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Stencil8 => { + if self.texture_s8 { + F::S8_UINT + } else if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Depth16Unorm => F::D16_UNORM, + Tf::NV12 => F::G8_B8R8_2PLANE_420_UNORM, + Tf::Rgb9e5Ufloat => F::E5B9G9R9_UFLOAT_PACK32, + Tf::Bc1RgbaUnorm => F::BC1_RGBA_UNORM_BLOCK, + Tf::Bc1RgbaUnormSrgb => F::BC1_RGBA_SRGB_BLOCK, + Tf::Bc2RgbaUnorm => F::BC2_UNORM_BLOCK, + Tf::Bc2RgbaUnormSrgb => F::BC2_SRGB_BLOCK, + Tf::Bc3RgbaUnorm => F::BC3_UNORM_BLOCK, + Tf::Bc3RgbaUnormSrgb => F::BC3_SRGB_BLOCK, + Tf::Bc4RUnorm => F::BC4_UNORM_BLOCK, + Tf::Bc4RSnorm => F::BC4_SNORM_BLOCK, + Tf::Bc5RgUnorm => F::BC5_UNORM_BLOCK, + Tf::Bc5RgSnorm => F::BC5_SNORM_BLOCK, + Tf::Bc6hRgbUfloat => F::BC6H_UFLOAT_BLOCK, + Tf::Bc6hRgbFloat => F::BC6H_SFLOAT_BLOCK, + Tf::Bc7RgbaUnorm => F::BC7_UNORM_BLOCK, + Tf::Bc7RgbaUnormSrgb => F::BC7_SRGB_BLOCK, + Tf::Etc2Rgb8Unorm => F::ETC2_R8G8B8_UNORM_BLOCK, + Tf::Etc2Rgb8UnormSrgb => F::ETC2_R8G8B8_SRGB_BLOCK, + Tf::Etc2Rgb8A1Unorm => F::ETC2_R8G8B8A1_UNORM_BLOCK, + Tf::Etc2Rgb8A1UnormSrgb => F::ETC2_R8G8B8A1_SRGB_BLOCK, + Tf::Etc2Rgba8Unorm => F::ETC2_R8G8B8A8_UNORM_BLOCK, + Tf::Etc2Rgba8UnormSrgb => F::ETC2_R8G8B8A8_SRGB_BLOCK, + Tf::EacR11Unorm => F::EAC_R11_UNORM_BLOCK, + Tf::EacR11Snorm => F::EAC_R11_SNORM_BLOCK, + Tf::EacRg11Unorm => F::EAC_R11G11_UNORM_BLOCK, + Tf::EacRg11Snorm => F::EAC_R11G11_SNORM_BLOCK, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => F::ASTC_4X4_UNORM_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_UNORM_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_UNORM_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_UNORM_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_UNORM_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_UNORM_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_UNORM_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_UNORM_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_UNORM_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_UNORM_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_UNORM_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_UNORM_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_UNORM_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_UNORM_BLOCK, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SRGB_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_SRGB_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_SRGB_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_SRGB_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_SRGB_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_SRGB_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_SRGB_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_SRGB_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_SRGB_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_SRGB_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_SRGB_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_SRGB_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_SRGB_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_SRGB_BLOCK, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x4 => F::ASTC_5X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x5 => F::ASTC_5X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x5 => F::ASTC_6X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x6 => F::ASTC_6X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x5 => F::ASTC_8X5_SFLOAT_BLOCK_EXT, + AstcBlock::B8x6 => F::ASTC_8X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x8 => F::ASTC_8X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x5 => F::ASTC_10X5_SFLOAT_BLOCK_EXT, + AstcBlock::B10x6 => F::ASTC_10X6_SFLOAT_BLOCK_EXT, + AstcBlock::B10x8 => F::ASTC_10X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x10 => F::ASTC_10X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x10 => F::ASTC_12X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x12 => F::ASTC_12X12_SFLOAT_BLOCK_EXT, + }, + }, + } + } +} + +pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFormat> { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + // List we care about pulled from https://vulkan.gpuinfo.org/listsurfaceformats.php + Some(match sf.color_space { + vk::ColorSpaceKHR::SRGB_NONLINEAR => match sf.format { + F::B8G8R8A8_UNORM => Tf::Bgra8Unorm, + F::B8G8R8A8_SRGB => Tf::Bgra8UnormSrgb, + F::R8G8B8A8_SNORM => Tf::Rgba8Snorm, + F::R8G8B8A8_UNORM => Tf::Rgba8Unorm, + F::R8G8B8A8_SRGB => Tf::Rgba8UnormSrgb, + _ => return None, + }, + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT => match sf.format { + F::R16G16B16A16_SFLOAT => Tf::Rgba16Float, + F::R16G16B16A16_SNORM => Tf::Rgba16Snorm, + F::R16G16B16A16_UNORM => Tf::Rgba16Unorm, + F::A2B10G10R10_UNORM_PACK32 => Tf::Rgb10a2Unorm, + _ => return None, + }, + _ => return None, + }) +} + +impl crate::Attachment<'_, super::Api> { + pub(super) fn make_attachment_key( + &self, + ops: crate::AttachmentOps, + caps: &super::PrivateCapabilities, + ) -> super::AttachmentKey { + super::AttachmentKey { + format: caps.map_texture_format(self.view.attachment.view_format), + layout: derive_image_layout(self.usage, self.view.attachment.view_format), + ops, + } + } +} + +impl crate::ColorAttachment<'_, super::Api> { + pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue { + let cv = &self.clear_value; + match self + .target + .view + .attachment + .view_format + .sample_type(None, None) + .unwrap() + { + wgt::TextureSampleType::Float { .. } => vk::ClearColorValue { + float32: [cv.r as f32, cv.g as f32, cv.b as f32, cv.a as f32], + }, + wgt::TextureSampleType::Sint => vk::ClearColorValue { + int32: [cv.r as i32, cv.g as i32, cv.b as i32, cv.a as i32], + }, + wgt::TextureSampleType::Uint => vk::ClearColorValue { + uint32: [cv.r as u32, cv.g as u32, cv.b as u32, cv.a as u32], + }, + wgt::TextureSampleType::Depth => unreachable!(), + } + } +} + +pub fn derive_image_layout( + usage: crate::TextureUses, + format: wgt::TextureFormat, +) -> vk::ImageLayout { + // Note: depth textures are always sampled with RODS layout + let is_color = !format.is_depth_stencil_format(); + match usage { + crate::TextureUses::UNINITIALIZED => vk::ImageLayout::UNDEFINED, + crate::TextureUses::COPY_SRC => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + crate::TextureUses::COPY_DST => vk::ImageLayout::TRANSFER_DST_OPTIMAL, + crate::TextureUses::RESOURCE if is_color => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, + crate::TextureUses::COLOR_TARGET => vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + crate::TextureUses::DEPTH_STENCIL_WRITE => { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + } + _ => { + if usage == crate::TextureUses::PRESENT { + vk::ImageLayout::PRESENT_SRC_KHR + } else if is_color { + vk::ImageLayout::GENERAL + } else { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } + } + } +} + +pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { + let mut flags = vk::ImageUsageFlags::empty(); + if usage.contains(crate::TextureUses::COPY_SRC) { + flags |= vk::ImageUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= vk::ImageUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= vk::ImageUsageFlags::SAMPLED; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= vk::ImageUsageFlags::COLOR_ATTACHMENT; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT; + } + if usage.intersects(crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE) { + flags |= vk::ImageUsageFlags::STORAGE; + } + flags +} + +pub fn map_texture_usage_to_barrier( + usage: crate::TextureUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::TextureUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::TextureUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::TextureUses::RESOURCE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + stages |= vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT; + access |= vk::AccessFlags::COLOR_ATTACHMENT_READ | vk::AccessFlags::COLOR_ATTACHMENT_WRITE; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_READ) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ + | vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE; + } + if usage.contains(crate::TextureUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + + if usage == crate::TextureUses::UNINITIALIZED || usage == crate::TextureUses::PRESENT { + ( + vk::PipelineStageFlags::TOP_OF_PIPE, + vk::AccessFlags::empty(), + ) + } else { + (stages, access) + } +} + +pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> crate::TextureUses { + let mut bits = crate::TextureUses::empty(); + if usage.contains(vk::ImageUsageFlags::TRANSFER_SRC) { + bits |= crate::TextureUses::COPY_SRC; + } + if usage.contains(vk::ImageUsageFlags::TRANSFER_DST) { + bits |= crate::TextureUses::COPY_DST; + } + if usage.contains(vk::ImageUsageFlags::SAMPLED) { + bits |= crate::TextureUses::RESOURCE; + } + if usage.contains(vk::ImageUsageFlags::COLOR_ATTACHMENT) { + bits |= crate::TextureUses::COLOR_TARGET; + } + if usage.contains(vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT) { + bits |= crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE; + } + if usage.contains(vk::ImageUsageFlags::STORAGE) { + bits |= crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE; + } + bits +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> vk::ImageType { + match dim { + wgt::TextureDimension::D1 => vk::ImageType::TYPE_1D, + wgt::TextureDimension::D2 => vk::ImageType::TYPE_2D, + wgt::TextureDimension::D3 => vk::ImageType::TYPE_3D, + } +} + +pub fn map_index_format(index_format: wgt::IndexFormat) -> vk::IndexType { + match index_format { + wgt::IndexFormat::Uint16 => vk::IndexType::UINT16, + wgt::IndexFormat::Uint32 => vk::IndexType::UINT32, + } +} + +pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { + use wgt::VertexFormat as Vf; + match vertex_format { + Vf::Uint8x2 => vk::Format::R8G8_UINT, + Vf::Uint8x4 => vk::Format::R8G8B8A8_UINT, + Vf::Sint8x2 => vk::Format::R8G8_SINT, + Vf::Sint8x4 => vk::Format::R8G8B8A8_SINT, + Vf::Unorm8x2 => vk::Format::R8G8_UNORM, + Vf::Unorm8x4 => vk::Format::R8G8B8A8_UNORM, + Vf::Snorm8x2 => vk::Format::R8G8_SNORM, + Vf::Snorm8x4 => vk::Format::R8G8B8A8_SNORM, + Vf::Uint16x2 => vk::Format::R16G16_UINT, + Vf::Uint16x4 => vk::Format::R16G16B16A16_UINT, + Vf::Sint16x2 => vk::Format::R16G16_SINT, + Vf::Sint16x4 => vk::Format::R16G16B16A16_SINT, + Vf::Unorm16x2 => vk::Format::R16G16_UNORM, + Vf::Unorm16x4 => vk::Format::R16G16B16A16_UNORM, + Vf::Snorm16x2 => vk::Format::R16G16_SNORM, + Vf::Snorm16x4 => vk::Format::R16G16B16A16_SNORM, + Vf::Float16x2 => vk::Format::R16G16_SFLOAT, + Vf::Float16x4 => vk::Format::R16G16B16A16_SFLOAT, + Vf::Float32 => vk::Format::R32_SFLOAT, + Vf::Float32x2 => vk::Format::R32G32_SFLOAT, + Vf::Float32x3 => vk::Format::R32G32B32_SFLOAT, + Vf::Float32x4 => vk::Format::R32G32B32A32_SFLOAT, + Vf::Uint32 => vk::Format::R32_UINT, + Vf::Uint32x2 => vk::Format::R32G32_UINT, + Vf::Uint32x3 => vk::Format::R32G32B32_UINT, + Vf::Uint32x4 => vk::Format::R32G32B32A32_UINT, + Vf::Sint32 => vk::Format::R32_SINT, + Vf::Sint32x2 => vk::Format::R32G32_SINT, + Vf::Sint32x3 => vk::Format::R32G32B32_SINT, + Vf::Sint32x4 => vk::Format::R32G32B32A32_SINT, + Vf::Float64 => vk::Format::R64_SFLOAT, + Vf::Float64x2 => vk::Format::R64G64_SFLOAT, + Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, + Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + } +} + +pub fn map_aspects(aspects: crate::FormatAspects) -> vk::ImageAspectFlags { + let mut flags = vk::ImageAspectFlags::empty(); + if aspects.contains(crate::FormatAspects::COLOR) { + flags |= vk::ImageAspectFlags::COLOR; + } + if aspects.contains(crate::FormatAspects::DEPTH) { + flags |= vk::ImageAspectFlags::DEPTH; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + flags |= vk::ImageAspectFlags::STENCIL; + } + if aspects.contains(crate::FormatAspects::PLANE_0) { + flags |= vk::ImageAspectFlags::PLANE_0; + } + if aspects.contains(crate::FormatAspects::PLANE_1) { + flags |= vk::ImageAspectFlags::PLANE_1; + } + if aspects.contains(crate::FormatAspects::PLANE_2) { + flags |= vk::ImageAspectFlags::PLANE_2; + } + flags +} + +pub fn map_attachment_ops( + op: crate::AttachmentOps, +) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) { + let load_op = if op.contains(crate::AttachmentOps::LOAD) { + vk::AttachmentLoadOp::LOAD + } else { + vk::AttachmentLoadOp::CLEAR + }; + let store_op = if op.contains(crate::AttachmentOps::STORE) { + vk::AttachmentStoreOp::STORE + } else { + vk::AttachmentStoreOp::DONT_CARE + }; + (load_op, store_op) +} + +pub fn map_present_mode(mode: wgt::PresentMode) -> vk::PresentModeKHR { + match mode { + wgt::PresentMode::Immediate => vk::PresentModeKHR::IMMEDIATE, + wgt::PresentMode::Mailbox => vk::PresentModeKHR::MAILBOX, + wgt::PresentMode::Fifo => vk::PresentModeKHR::FIFO, + wgt::PresentMode::FifoRelaxed => vk::PresentModeKHR::FIFO_RELAXED, + wgt::PresentMode::AutoNoVsync | wgt::PresentMode::AutoVsync => { + unreachable!("Cannot create swapchain with Auto PresentationMode") + } + } +} + +pub fn map_vk_present_mode(mode: vk::PresentModeKHR) -> Option<wgt::PresentMode> { + if mode == vk::PresentModeKHR::IMMEDIATE { + Some(wgt::PresentMode::Immediate) + } else if mode == vk::PresentModeKHR::MAILBOX { + Some(wgt::PresentMode::Mailbox) + } else if mode == vk::PresentModeKHR::FIFO { + Some(wgt::PresentMode::Fifo) + } else if mode == vk::PresentModeKHR::FIFO_RELAXED { + Some(wgt::PresentMode::FifoRelaxed) + } else { + log::warn!("Unrecognized present mode {:?}", mode); + None + } +} + +pub fn map_composite_alpha_mode(mode: wgt::CompositeAlphaMode) -> vk::CompositeAlphaFlagsKHR { + match mode { + wgt::CompositeAlphaMode::Opaque => vk::CompositeAlphaFlagsKHR::OPAQUE, + wgt::CompositeAlphaMode::PreMultiplied => vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED, + wgt::CompositeAlphaMode::PostMultiplied => vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED, + wgt::CompositeAlphaMode::Inherit => vk::CompositeAlphaFlagsKHR::INHERIT, + wgt::CompositeAlphaMode::Auto => unreachable!(), + } +} + +pub fn map_vk_composite_alpha(flags: vk::CompositeAlphaFlagsKHR) -> Vec<wgt::CompositeAlphaMode> { + let mut modes = Vec::new(); + if flags.contains(vk::CompositeAlphaFlagsKHR::OPAQUE) { + modes.push(wgt::CompositeAlphaMode::Opaque); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PreMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PostMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::INHERIT) { + modes.push(wgt::CompositeAlphaMode::Inherit); + } + modes +} + +pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { + let mut flags = vk::BufferUsageFlags::empty(); + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= vk::BufferUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= vk::BufferUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= vk::BufferUsageFlags::UNIFORM_BUFFER; + } + if usage.intersects(crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= vk::BufferUsageFlags::INDEX_BUFFER; + } + if usage.contains(crate::BufferUses::VERTEX) { + flags |= vk::BufferUsageFlags::VERTEX_BUFFER; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; + } + if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } + flags +} + +pub fn map_buffer_usage_to_barrier( + usage: crate::BufferUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::BufferUses::MAP_READ) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_READ; + } + if usage.contains(crate::BufferUses::MAP_WRITE) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_WRITE; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::BufferUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::BufferUses::UNIFORM) { + stages |= shader_stages; + access |= vk::AccessFlags::UNIFORM_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + if usage.contains(crate::BufferUses::INDEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::INDEX_READ; + } + if usage.contains(crate::BufferUses::VERTEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::VERTEX_ATTRIBUTE_READ; + } + if usage.contains(crate::BufferUses::INDIRECT) { + stages |= vk::PipelineStageFlags::DRAW_INDIRECT; + access |= vk::AccessFlags::INDIRECT_COMMAND_READ; + } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + ) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR + | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } + + (stages, access) +} + +pub fn map_view_dimension(dim: wgt::TextureViewDimension) -> vk::ImageViewType { + match dim { + wgt::TextureViewDimension::D1 => vk::ImageViewType::TYPE_1D, + wgt::TextureViewDimension::D2 => vk::ImageViewType::TYPE_2D, + wgt::TextureViewDimension::D2Array => vk::ImageViewType::TYPE_2D_ARRAY, + wgt::TextureViewDimension::Cube => vk::ImageViewType::CUBE, + wgt::TextureViewDimension::CubeArray => vk::ImageViewType::CUBE_ARRAY, + wgt::TextureViewDimension::D3 => vk::ImageViewType::TYPE_3D, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> vk::Extent3D { + vk::Extent3D { + width: extent.width, + height: extent.height, + depth: extent.depth, + } +} + +pub fn map_subresource_range( + range: &wgt::ImageSubresourceRange, + format: wgt::TextureFormat, +) -> vk::ImageSubresourceRange { + vk::ImageSubresourceRange { + aspect_mask: map_aspects(crate::FormatAspects::new(format, range.aspect)), + base_mip_level: range.base_mip_level, + level_count: range.mip_level_count.unwrap_or(vk::REMAINING_MIP_LEVELS), + base_array_layer: range.base_array_layer, + layer_count: range + .array_layer_count + .unwrap_or(vk::REMAINING_ARRAY_LAYERS), + } +} + +// Special subresource range mapping for dealing with barriers +// so that we account for the "hidden" depth aspect in emulated Stencil8. +pub(super) fn map_subresource_range_combined_aspect( + range: &wgt::ImageSubresourceRange, + format: wgt::TextureFormat, + private_caps: &super::PrivateCapabilities, +) -> vk::ImageSubresourceRange { + let mut range = map_subresource_range(range, format); + if !private_caps.texture_s8 && format == wgt::TextureFormat::Stencil8 { + range.aspect_mask |= vk::ImageAspectFlags::DEPTH; + } + range +} + +pub fn map_subresource_layers( + base: &crate::TextureCopyBase, +) -> (vk::ImageSubresourceLayers, vk::Offset3D) { + let offset = vk::Offset3D { + x: base.origin.x as i32, + y: base.origin.y as i32, + z: base.origin.z as i32, + }; + let subresource = vk::ImageSubresourceLayers { + aspect_mask: map_aspects(base.aspect), + mip_level: base.mip_level, + base_array_layer: base.array_layer, + layer_count: 1, + }; + (subresource, offset) +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> vk::Filter { + match mode { + wgt::FilterMode::Nearest => vk::Filter::NEAREST, + wgt::FilterMode::Linear => vk::Filter::LINEAR, + } +} + +pub fn map_mip_filter_mode(mode: wgt::FilterMode) -> vk::SamplerMipmapMode { + match mode { + wgt::FilterMode::Nearest => vk::SamplerMipmapMode::NEAREST, + wgt::FilterMode::Linear => vk::SamplerMipmapMode::LINEAR, + } +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> vk::SamplerAddressMode { + match mode { + wgt::AddressMode::ClampToEdge => vk::SamplerAddressMode::CLAMP_TO_EDGE, + wgt::AddressMode::Repeat => vk::SamplerAddressMode::REPEAT, + wgt::AddressMode::MirrorRepeat => vk::SamplerAddressMode::MIRRORED_REPEAT, + wgt::AddressMode::ClampToBorder => vk::SamplerAddressMode::CLAMP_TO_BORDER, + // wgt::AddressMode::MirrorClamp => vk::SamplerAddressMode::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> vk::BorderColor { + match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + vk::BorderColor::FLOAT_TRANSPARENT_BLACK + } + wgt::SamplerBorderColor::OpaqueBlack => vk::BorderColor::FLOAT_OPAQUE_BLACK, + wgt::SamplerBorderColor::OpaqueWhite => vk::BorderColor::FLOAT_OPAQUE_WHITE, + } +} + +pub fn map_comparison(fun: wgt::CompareFunction) -> vk::CompareOp { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => vk::CompareOp::NEVER, + Cf::Less => vk::CompareOp::LESS, + Cf::LessEqual => vk::CompareOp::LESS_OR_EQUAL, + Cf::Equal => vk::CompareOp::EQUAL, + Cf::GreaterEqual => vk::CompareOp::GREATER_OR_EQUAL, + Cf::Greater => vk::CompareOp::GREATER, + Cf::NotEqual => vk::CompareOp::NOT_EQUAL, + Cf::Always => vk::CompareOp::ALWAYS, + } +} + +pub fn map_shader_stage(stage: wgt::ShaderStages) -> vk::ShaderStageFlags { + let mut flags = vk::ShaderStageFlags::empty(); + if stage.contains(wgt::ShaderStages::VERTEX) { + flags |= vk::ShaderStageFlags::VERTEX; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + flags |= vk::ShaderStageFlags::FRAGMENT; + } + if stage.contains(wgt::ShaderStages::COMPUTE) { + flags |= vk::ShaderStageFlags::COMPUTE; + } + flags +} + +pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { + match ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Storage { .. } => match has_dynamic_offset { + true => vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + false => vk::DescriptorType::STORAGE_BUFFER, + }, + wgt::BufferBindingType::Uniform => match has_dynamic_offset { + true => vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + false => vk::DescriptorType::UNIFORM_BUFFER, + }, + }, + wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, + wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, + wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + wgt::BindingType::AccelerationStructure => vk::DescriptorType::ACCELERATION_STRUCTURE_KHR, + } +} + +pub fn map_topology(topology: wgt::PrimitiveTopology) -> vk::PrimitiveTopology { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => vk::PrimitiveTopology::POINT_LIST, + Pt::LineList => vk::PrimitiveTopology::LINE_LIST, + Pt::LineStrip => vk::PrimitiveTopology::LINE_STRIP, + Pt::TriangleList => vk::PrimitiveTopology::TRIANGLE_LIST, + Pt::TriangleStrip => vk::PrimitiveTopology::TRIANGLE_STRIP, + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> vk::PolygonMode { + match mode { + wgt::PolygonMode::Fill => vk::PolygonMode::FILL, + wgt::PolygonMode::Line => vk::PolygonMode::LINE, + wgt::PolygonMode::Point => vk::PolygonMode::POINT, + } +} + +pub fn map_front_face(front_face: wgt::FrontFace) -> vk::FrontFace { + match front_face { + wgt::FrontFace::Cw => vk::FrontFace::CLOCKWISE, + wgt::FrontFace::Ccw => vk::FrontFace::COUNTER_CLOCKWISE, + } +} + +pub fn map_cull_face(face: wgt::Face) -> vk::CullModeFlags { + match face { + wgt::Face::Front => vk::CullModeFlags::FRONT, + wgt::Face::Back => vk::CullModeFlags::BACK, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> vk::StencilOp { + use wgt::StencilOperation as So; + match op { + So::Keep => vk::StencilOp::KEEP, + So::Zero => vk::StencilOp::ZERO, + So::Replace => vk::StencilOp::REPLACE, + So::Invert => vk::StencilOp::INVERT, + So::IncrementClamp => vk::StencilOp::INCREMENT_AND_CLAMP, + So::IncrementWrap => vk::StencilOp::INCREMENT_AND_WRAP, + So::DecrementClamp => vk::StencilOp::DECREMENT_AND_CLAMP, + So::DecrementWrap => vk::StencilOp::DECREMENT_AND_WRAP, + } +} + +pub fn map_stencil_face( + face: &wgt::StencilFaceState, + compare_mask: u32, + write_mask: u32, +) -> vk::StencilOpState { + vk::StencilOpState { + fail_op: map_stencil_op(face.fail_op), + pass_op: map_stencil_op(face.pass_op), + depth_fail_op: map_stencil_op(face.depth_fail_op), + compare_op: map_comparison(face.compare), + compare_mask, + write_mask, + reference: 0, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> vk::BlendFactor { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => vk::BlendFactor::ZERO, + Bf::One => vk::BlendFactor::ONE, + Bf::Src => vk::BlendFactor::SRC_COLOR, + Bf::OneMinusSrc => vk::BlendFactor::ONE_MINUS_SRC_COLOR, + Bf::SrcAlpha => vk::BlendFactor::SRC_ALPHA, + Bf::OneMinusSrcAlpha => vk::BlendFactor::ONE_MINUS_SRC_ALPHA, + Bf::Dst => vk::BlendFactor::DST_COLOR, + Bf::OneMinusDst => vk::BlendFactor::ONE_MINUS_DST_COLOR, + Bf::DstAlpha => vk::BlendFactor::DST_ALPHA, + Bf::OneMinusDstAlpha => vk::BlendFactor::ONE_MINUS_DST_ALPHA, + Bf::SrcAlphaSaturated => vk::BlendFactor::SRC_ALPHA_SATURATE, + Bf::Constant => vk::BlendFactor::CONSTANT_COLOR, + Bf::OneMinusConstant => vk::BlendFactor::ONE_MINUS_CONSTANT_COLOR, + Bf::Src1 => vk::BlendFactor::SRC1_COLOR, + Bf::OneMinusSrc1 => vk::BlendFactor::ONE_MINUS_SRC1_COLOR, + Bf::Src1Alpha => vk::BlendFactor::SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => vk::BlendFactor::ONE_MINUS_SRC1_ALPHA, + } +} + +fn map_blend_op(operation: wgt::BlendOperation) -> vk::BlendOp { + use wgt::BlendOperation as Bo; + match operation { + Bo::Add => vk::BlendOp::ADD, + Bo::Subtract => vk::BlendOp::SUBTRACT, + Bo::ReverseSubtract => vk::BlendOp::REVERSE_SUBTRACT, + Bo::Min => vk::BlendOp::MIN, + Bo::Max => vk::BlendOp::MAX, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> (vk::BlendOp, vk::BlendFactor, vk::BlendFactor) { + let op = map_blend_op(component.operation); + let src = map_blend_factor(component.src_factor); + let dst = map_blend_factor(component.dst_factor); + (op, src, dst) +} + +pub fn map_pipeline_statistics( + types: wgt::PipelineStatisticsTypes, +) -> vk::QueryPipelineStatisticFlags { + use wgt::PipelineStatisticsTypes as Pst; + let mut flags = vk::QueryPipelineStatisticFlags::empty(); + if types.contains(Pst::VERTEX_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::VERTEX_SHADER_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_PRIMITIVES_OUT) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_PRIMITIVES; + } + if types.contains(Pst::FRAGMENT_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::FRAGMENT_SHADER_INVOCATIONS; + } + if types.contains(Pst::COMPUTE_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::COMPUTE_SHADER_INVOCATIONS; + } + flags +} + +pub fn map_acceleration_structure_format( + format: crate::AccelerationStructureFormat, +) -> vk::AccelerationStructureTypeKHR { + match format { + crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + crate::AccelerationStructureFormat::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + } +} + +pub fn map_acceleration_structure_build_mode( + format: crate::AccelerationStructureBuildMode, +) -> vk::BuildAccelerationStructureModeKHR { + match format { + crate::AccelerationStructureBuildMode::Build => { + vk::BuildAccelerationStructureModeKHR::BUILD + } + crate::AccelerationStructureBuildMode::Update => { + vk::BuildAccelerationStructureModeKHR::UPDATE + } + } +} + +pub fn map_acceleration_structure_flags( + flags: crate::AccelerationStructureBuildFlags, +) -> vk::BuildAccelerationStructureFlagsKHR { + let mut vk_flags = vk::BuildAccelerationStructureFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_TRACE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_BUILD) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_BUILD; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_UPDATE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_UPDATE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::LOW_MEMORY) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_COMPACTION) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_COMPACTION + } + + vk_flags +} + +pub fn map_acceleration_structure_geometry_flags( + flags: crate::AccelerationStructureGeometryFlags, +) -> vk::GeometryFlagsKHR { + let mut vk_flags = vk::GeometryFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureGeometryFlags::OPAQUE) { + vk_flags |= vk::GeometryFlagsKHR::OPAQUE; + } + + if flags.contains(crate::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION) { + vk_flags |= vk::GeometryFlagsKHR::NO_DUPLICATE_ANY_HIT_INVOCATION; + } + + vk_flags +} + +pub fn map_acceleration_structure_usage_to_barrier( + usage: crate::AccelerationStructureUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + + if usage.contains(crate::AccelerationStructureUses::BUILD_INPUT) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } + if usage.contains(crate::AccelerationStructureUses::BUILD_OUTPUT) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } + if usage.contains(crate::AccelerationStructureUses::SHADER_INPUT) { + stages |= vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } + + (stages, access) +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs new file mode 100644 index 0000000000..c00c3d1d43 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -0,0 +1,2352 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{ + borrow::Cow, + collections::{hash_map::Entry, BTreeMap}, + ffi::{CStr, CString}, + num::NonZeroU32, + ptr, + sync::Arc, +}; + +impl super::DeviceShared { + pub(super) unsafe fn set_object_name( + &self, + object_type: vk::ObjectType, + object: impl vk::Handle, + name: &str, + ) { + let extension = match self.instance.debug_utils { + Some(ref debug_utils) => &debug_utils.extension, + None => return, + }; + + // Keep variables outside the if-else block to ensure they do not + // go out of scope while we hold a pointer to them + let mut buffer: [u8; 64] = [0u8; 64]; + let buffer_vec: Vec<u8>; + + // Append a null terminator to the string + let name_bytes = if name.len() < buffer.len() { + // Common case, string is very small. Allocate a copy on the stack. + buffer[..name.len()].copy_from_slice(name.as_bytes()); + // Add null terminator + buffer[name.len()] = 0; + &buffer[..name.len() + 1] + } else { + // Less common case, the string is large. + // This requires a heap allocation. + buffer_vec = name + .as_bytes() + .iter() + .cloned() + .chain(std::iter::once(0)) + .collect(); + &buffer_vec + }; + + let name = unsafe { CStr::from_bytes_with_nul_unchecked(name_bytes) }; + + let _result = unsafe { + extension.set_debug_utils_object_name( + self.raw.handle(), + &vk::DebugUtilsObjectNameInfoEXT::builder() + .object_type(object_type) + .object_handle(object.as_raw()) + .object_name(name), + ) + }; + } + + pub fn make_render_pass( + &self, + key: super::RenderPassKey, + ) -> Result<vk::RenderPass, crate::DeviceError> { + Ok(match self.render_passes.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let mut vk_attachments = Vec::new(); + let mut color_refs = Vec::with_capacity(e.key().colors.len()); + let mut resolve_refs = Vec::with_capacity(color_refs.capacity()); + let mut ds_ref = None; + let samples = vk::SampleCountFlags::from_raw(e.key().sample_count); + let unused = vk::AttachmentReference { + attachment: vk::ATTACHMENT_UNUSED, + layout: vk::ImageLayout::UNDEFINED, + }; + for cat in e.key().colors.iter() { + let (color_ref, resolve_ref) = if let Some(cat) = cat.as_ref() { + let color_ref = vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: cat.base.layout, + }; + vk_attachments.push({ + let (load_op, store_op) = conv::map_attachment_ops(cat.base.ops); + vk::AttachmentDescription::builder() + .format(cat.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .initial_layout(cat.base.layout) + .final_layout(cat.base.layout) + .build() + }); + let resolve_ref = if let Some(ref rat) = cat.resolve { + let (load_op, store_op) = conv::map_attachment_ops(rat.ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(rat.format) + .samples(vk::SampleCountFlags::TYPE_1) + .load_op(load_op) + .store_op(store_op) + .initial_layout(rat.layout) + .final_layout(rat.layout) + .build(); + vk_attachments.push(vk_attachment); + + vk::AttachmentReference { + attachment: vk_attachments.len() as u32 - 1, + layout: rat.layout, + } + } else { + unused + }; + + (color_ref, resolve_ref) + } else { + (unused, unused) + }; + + color_refs.push(color_ref); + resolve_refs.push(resolve_ref); + } + + if let Some(ref ds) = e.key().depth_stencil { + ds_ref = Some(vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: ds.base.layout, + }); + let (load_op, store_op) = conv::map_attachment_ops(ds.base.ops); + let (stencil_load_op, stencil_store_op) = + conv::map_attachment_ops(ds.stencil_ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(ds.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .stencil_load_op(stencil_load_op) + .stencil_store_op(stencil_store_op) + .initial_layout(ds.base.layout) + .final_layout(ds.base.layout) + .build(); + vk_attachments.push(vk_attachment); + } + + let vk_subpasses = [{ + let mut vk_subpass = vk::SubpassDescription::builder() + .pipeline_bind_point(vk::PipelineBindPoint::GRAPHICS) + .color_attachments(&color_refs) + .resolve_attachments(&resolve_refs); + + if self + .workarounds + .contains(super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS) + && resolve_refs.is_empty() + { + vk_subpass.p_resolve_attachments = ptr::null(); + } + + if let Some(ref reference) = ds_ref { + vk_subpass = vk_subpass.depth_stencil_attachment(reference) + } + vk_subpass.build() + }]; + + let mut vk_info = vk::RenderPassCreateInfo::builder() + .attachments(&vk_attachments) + .subpasses(&vk_subpasses); + + let mut multiview_info; + let mask; + if let Some(multiview) = e.key().multiview { + // Sanity checks, better to panic here than cause a driver crash + assert!(multiview.get() <= 8); + assert!(multiview.get() > 1); + + // Right now we enable all bits on the view masks and correlation masks. + // This means we're rendering to all views in the subpass, and that all views + // can be rendered concurrently. + mask = [(1 << multiview.get()) - 1]; + + // On Vulkan 1.1 or later, this is an alias for core functionality + multiview_info = vk::RenderPassMultiviewCreateInfoKHR::builder() + .view_masks(&mask) + .correlation_masks(&mask) + .build(); + vk_info = vk_info.push_next(&mut multiview_info); + } + + let raw = unsafe { self.raw.create_render_pass(&vk_info, None)? }; + + *e.insert(raw) + } + }) + } + + pub fn make_framebuffer( + &self, + key: super::FramebufferKey, + raw_pass: vk::RenderPass, + pass_label: crate::Label, + ) -> Result<vk::Framebuffer, crate::DeviceError> { + Ok(match self.framebuffers.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let vk_views = e + .key() + .attachments + .iter() + .map(|at| at.raw) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats = e + .key() + .attachments + .iter() + .map(|at| self.private_caps.map_texture_format(at.view_format)) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats_list = e + .key() + .attachments + .iter() + .map(|at| at.raw_view_formats.clone()) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let vk_image_infos = e + .key() + .attachments + .iter() + .enumerate() + .map(|(i, at)| { + let mut info = vk::FramebufferAttachmentImageInfo::builder() + .usage(conv::map_texture_usage(at.view_usage)) + .flags(at.raw_image_flags) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layer_count(e.key().extent.depth_or_array_layers); + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03214 + if vk_view_formats_list[i].is_empty() { + info = info.view_formats(&vk_view_formats[i..i + 1]); + } else { + info = info.view_formats(&vk_view_formats_list[i]); + }; + info.build() + }) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let mut vk_attachment_info = vk::FramebufferAttachmentsCreateInfo::builder() + .attachment_image_infos(&vk_image_infos) + .build(); + let mut vk_info = vk::FramebufferCreateInfo::builder() + .render_pass(raw_pass) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layers(e.key().extent.depth_or_array_layers); + + if self.private_caps.imageless_framebuffers { + //TODO: https://github.com/MaikKlein/ash/issues/450 + vk_info = vk_info + .flags(vk::FramebufferCreateFlags::IMAGELESS_KHR) + .push_next(&mut vk_attachment_info); + vk_info.attachment_count = e.key().attachments.len() as u32; + } else { + vk_info = vk_info.attachments(&vk_views); + } + + *e.insert(unsafe { + let raw = self.raw.create_framebuffer(&vk_info, None).unwrap(); + if let Some(label) = pass_label { + self.set_object_name(vk::ObjectType::FRAMEBUFFER, raw, label); + } + raw + }) + } + }) + } + + fn make_memory_ranges<'a, I: 'a + Iterator<Item = crate::MemoryRange>>( + &self, + buffer: &'a super::Buffer, + ranges: I, + ) -> Option<impl 'a + Iterator<Item = vk::MappedMemoryRange>> { + let block = buffer.block.as_ref()?.lock(); + let mask = self.private_caps.non_coherent_map_mask; + Some(ranges.map(move |range| { + vk::MappedMemoryRange::builder() + .memory(*block.memory()) + .offset((block.offset() + range.start) & !mask) + .size((range.end - range.start + mask) & !mask) + .build() + })) + } + + unsafe fn free_resources(&self) { + for &raw in self.render_passes.lock().values() { + unsafe { self.raw.destroy_render_pass(raw, None) }; + } + for &raw in self.framebuffers.lock().values() { + unsafe { self.raw.destroy_framebuffer(raw, None) }; + } + if self.handle_is_owned { + unsafe { self.raw.destroy_device(None) }; + } + } +} + +impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared { + unsafe fn allocate_memory( + &self, + size: u64, + memory_type: u32, + flags: gpu_alloc::AllocationFlags, + ) -> Result<vk::DeviceMemory, gpu_alloc::OutOfMemory> { + let mut info = vk::MemoryAllocateInfo::builder() + .allocation_size(size) + .memory_type_index(memory_type); + + let mut info_flags; + + if flags.contains(gpu_alloc::AllocationFlags::DEVICE_ADDRESS) { + info_flags = vk::MemoryAllocateFlagsInfo::builder() + .flags(vk::MemoryAllocateFlags::DEVICE_ADDRESS); + info = info.push_next(&mut info_flags); + } + + match unsafe { self.raw.allocate_memory(&info, None) } { + Ok(memory) => Ok(memory), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfHostMemory) + } + Err(vk::Result::ERROR_TOO_MANY_OBJECTS) => panic!("Too many objects"), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn deallocate_memory(&self, memory: vk::DeviceMemory) { + unsafe { self.raw.free_memory(memory, None) }; + } + + unsafe fn map_memory( + &self, + memory: &mut vk::DeviceMemory, + offset: u64, + size: u64, + ) -> Result<ptr::NonNull<u8>, gpu_alloc::DeviceMapError> { + match unsafe { + self.raw + .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty()) + } { + Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8) + .expect("Pointer to memory mapping must not be null")), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfHostMemory) + } + Err(vk::Result::ERROR_MEMORY_MAP_FAILED) => Err(gpu_alloc::DeviceMapError::MapFailed), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn unmap_memory(&self, memory: &mut vk::DeviceMemory) { + unsafe { self.raw.unmap_memory(*memory) }; + } + + unsafe fn invalidate_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } + + unsafe fn flush_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } +} + +impl + gpu_descriptor::DescriptorDevice<vk::DescriptorSetLayout, vk::DescriptorPool, vk::DescriptorSet> + for super::DeviceShared +{ + unsafe fn create_descriptor_pool( + &self, + descriptor_count: &gpu_descriptor::DescriptorTotalCount, + max_sets: u32, + flags: gpu_descriptor::DescriptorPoolCreateFlags, + ) -> Result<vk::DescriptorPool, gpu_descriptor::CreatePoolError> { + //Note: ignoring other types, since they can't appear here + let unfiltered_counts = [ + (vk::DescriptorType::SAMPLER, descriptor_count.sampler), + ( + vk::DescriptorType::SAMPLED_IMAGE, + descriptor_count.sampled_image, + ), + ( + vk::DescriptorType::STORAGE_IMAGE, + descriptor_count.storage_image, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER, + descriptor_count.uniform_buffer, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + descriptor_count.uniform_buffer_dynamic, + ), + ( + vk::DescriptorType::STORAGE_BUFFER, + descriptor_count.storage_buffer, + ), + ( + vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + descriptor_count.storage_buffer_dynamic, + ), + ]; + + let filtered_counts = unfiltered_counts + .iter() + .cloned() + .filter(|&(_, count)| count != 0) + .map(|(ty, count)| vk::DescriptorPoolSize { + ty, + descriptor_count: count, + }) + .collect::<ArrayVec<_, 8>>(); + + let mut vk_flags = + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND) { + vk::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND + } else { + vk::DescriptorPoolCreateFlags::empty() + }; + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET) { + vk_flags |= vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET; + } + let vk_info = vk::DescriptorPoolCreateInfo::builder() + .max_sets(max_sets) + .flags(vk_flags) + .pool_sizes(&filtered_counts) + .build(); + + match unsafe { self.raw.create_descriptor_pool(&vk_info, None) } { + Ok(pool) => Ok(pool), + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTATION) => { + Err(gpu_descriptor::CreatePoolError::Fragmentation) + } + Err(other) => { + log::error!("create_descriptor_pool: {:?}", other); + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + } + } + + unsafe fn destroy_descriptor_pool(&self, pool: vk::DescriptorPool) { + unsafe { self.raw.destroy_descriptor_pool(pool, None) } + } + + unsafe fn alloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + layouts: impl ExactSizeIterator<Item = &'a vk::DescriptorSetLayout>, + sets: &mut impl Extend<vk::DescriptorSet>, + ) -> Result<(), gpu_descriptor::DeviceAllocationError> { + let result = unsafe { + self.raw.allocate_descriptor_sets( + &vk::DescriptorSetAllocateInfo::builder() + .descriptor_pool(*pool) + .set_layouts( + &smallvec::SmallVec::<[vk::DescriptorSetLayout; 32]>::from_iter( + layouts.cloned(), + ), + ) + .build(), + ) + }; + + match result { + Ok(vk_sets) => { + sets.extend(vk_sets); + Ok(()) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) + | Err(vk::Result::ERROR_OUT_OF_POOL_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTED_POOL) => { + Err(gpu_descriptor::DeviceAllocationError::FragmentedPool) + } + Err(other) => { + log::error!("allocate_descriptor_sets: {:?}", other); + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + } + } + + unsafe fn dealloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + sets: impl Iterator<Item = vk::DescriptorSet>, + ) { + let result = unsafe { + self.raw.free_descriptor_sets( + *pool, + &smallvec::SmallVec::<[vk::DescriptorSet; 32]>::from_iter(sets), + ) + }; + match result { + Ok(()) => {} + Err(err) => log::error!("free_descriptor_sets: {:?}", err), + } + } +} + +struct CompiledStage { + create_info: vk::PipelineShaderStageCreateInfo, + _entry_point: CString, + temp_raw_module: Option<vk::ShaderModule>, +} + +impl super::Device { + pub(super) unsafe fn create_swapchain( + &self, + surface: &super::Surface, + config: &crate::SurfaceConfiguration, + provided_old_swapchain: Option<super::Swapchain>, + ) -> Result<super::Swapchain, crate::SurfaceError> { + profiling::scope!("Device::create_swapchain"); + let functor = khr::Swapchain::new(&surface.instance.raw, &self.shared.raw); + + let old_swapchain = match provided_old_swapchain { + Some(osc) => osc.raw, + None => vk::SwapchainKHR::null(), + }; + + let color_space = if config.format == wgt::TextureFormat::Rgba16Float { + // Enable wide color gamut mode + // Vulkan swapchain for Android only supports DISPLAY_P3_NONLINEAR_EXT and EXTENDED_SRGB_LINEAR_EXT + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT + } else { + vk::ColorSpaceKHR::SRGB_NONLINEAR + }; + + let original_format = self.shared.private_caps.map_texture_format(config.format); + let mut raw_flags = vk::SwapchainCreateFlagsKHR::empty(); + let mut raw_view_formats: Vec<vk::Format> = vec![]; + let mut wgt_view_formats = vec![]; + if !config.view_formats.is_empty() { + raw_flags |= vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT; + raw_view_formats = config + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + raw_view_formats.push(original_format); + + wgt_view_formats = config.view_formats.clone(); + wgt_view_formats.push(config.format); + } + + let mut info = vk::SwapchainCreateInfoKHR::builder() + .flags(raw_flags) + .surface(surface.raw) + .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869 + .image_format(original_format) + .image_color_space(color_space) + .image_extent(vk::Extent2D { + width: config.extent.width, + height: config.extent.height, + }) + .image_array_layers(config.extent.depth_or_array_layers) + .image_usage(conv::map_texture_usage(config.usage)) + .image_sharing_mode(vk::SharingMode::EXCLUSIVE) + .pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY) + .composite_alpha(conv::map_composite_alpha_mode(config.composite_alpha_mode)) + .present_mode(conv::map_present_mode(config.present_mode)) + .clipped(true) + .old_swapchain(old_swapchain); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !raw_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&raw_view_formats); + info = info.push_next(&mut format_list_info); + } + + let result = { + profiling::scope!("vkCreateSwapchainKHR"); + unsafe { functor.create_swapchain(&info, None) } + }; + + // doing this before bailing out with error + if old_swapchain != vk::SwapchainKHR::null() { + unsafe { functor.destroy_swapchain(old_swapchain, None) } + } + + let raw = match result { + Ok(swapchain) => swapchain, + Err(error) => { + return Err(match error { + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + vk::Result::ERROR_NATIVE_WINDOW_IN_USE_KHR => { + crate::SurfaceError::Other("Native window is in use") + } + other => crate::DeviceError::from(other).into(), + }) + } + }; + + let images = + unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; + + // NOTE: It's important that we define at least images.len() + 1 wait + // semaphores, since we prospectively need to provide the call to + // acquire the next image with an unsignaled semaphore. + let surface_semaphores = (0..images.len() + 1) + .map(|_| unsafe { + self.shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None) + }) + .collect::<Result<Vec<_>, _>>() + .map_err(crate::DeviceError::from)?; + + Ok(super::Swapchain { + raw, + raw_flags, + functor, + device: Arc::clone(&self.shared), + images, + config: config.clone(), + view_formats: wgt_view_formats, + surface_semaphores, + next_surface_index: 0, + }) + } + + /// # Safety + /// + /// - `vk_image` must be created respecting `desc` + /// - If `drop_guard` is `Some`, the application must manually destroy the image handle. This + /// can be done inside the `Drop` impl of `drop_guard`. + /// - If the `ImageCreateFlags` does not contain `MUTABLE_FORMAT`, the `view_formats` of `desc` must be empty. + pub unsafe fn texture_from_raw( + vk_image: vk::Image, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + let mut raw_flags = vk::ImageCreateFlags::empty(); + let mut view_formats = vec![]; + for tf in desc.view_formats.iter() { + if *tf == desc.format { + continue; + } + view_formats.push(*tf); + } + if !view_formats.is_empty() { + raw_flags |= + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE; + view_formats.push(desc.format) + } + if desc.format.is_multi_planar_format() { + raw_flags |= vk::ImageCreateFlags::MUTABLE_FORMAT; + } + + super::Texture { + raw: vk_image, + drop_guard, + block: None, + usage: desc.usage, + format: desc.format, + raw_flags: vk::ImageCreateFlags::empty(), + copy_size: desc.copy_extent(), + view_formats, + } + } + + /// # Safety + /// + /// - `vk_buffer`'s memory must be managed by the caller + /// - Externally imported buffers can't be mapped by `wgpu` + pub unsafe fn buffer_from_raw(vk_buffer: vk::Buffer) -> super::Buffer { + super::Buffer { + raw: vk_buffer, + block: None, + } + } + + fn create_shader_module_impl( + &self, + spv: &[u32], + ) -> Result<vk::ShaderModule, crate::DeviceError> { + let vk_info = vk::ShaderModuleCreateInfo::builder() + .flags(vk::ShaderModuleCreateFlags::empty()) + .code(spv); + + let raw = unsafe { + profiling::scope!("vkCreateShaderModule"); + self.shared.raw.create_shader_module(&vk_info, None)? + }; + Ok(raw) + } + + fn compile_stage( + &self, + stage: &crate::ProgrammableStage<super::Api>, + naga_stage: naga::ShaderStage, + binding_map: &naga::back::spv::BindingMap, + ) -> Result<CompiledStage, crate::PipelineError> { + let stage_flags = crate::auxil::map_naga_stage(naga_stage); + let vk_module = match *stage.module { + super::ShaderModule::Raw(raw) => raw, + super::ShaderModule::Intermediate { + ref naga_shader, + runtime_checks, + } => { + let pipeline_options = naga::back::spv::PipelineOptions { + entry_point: stage.entry_point.to_string(), + shader_stage: naga_stage, + }; + let needs_temp_options = !runtime_checks + || !binding_map.is_empty() + || naga_shader.debug_source.is_some(); + let mut temp_options; + let options = if needs_temp_options { + temp_options = self.naga_options.clone(); + if !runtime_checks { + temp_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image_load: naga::proc::BoundsCheckPolicy::Unchecked, + image_store: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + if !binding_map.is_empty() { + temp_options.binding_map = binding_map.clone(); + } + + if let Some(ref debug) = naga_shader.debug_source { + temp_options.debug_info = Some(naga::back::spv::DebugInfo { + source_code: &debug.source_code, + file_name: debug.file_name.as_ref().as_ref(), + }) + } + + &temp_options + } else { + &self.naga_options + }; + let spv = { + profiling::scope!("naga::spv::write_vec"); + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + options, + Some(&pipeline_options), + ) + } + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + self.create_shader_module_impl(&spv)? + } + }; + + let entry_point = CString::new(stage.entry_point).unwrap(); + let create_info = vk::PipelineShaderStageCreateInfo::builder() + .stage(conv::map_shader_stage(stage_flags)) + .module(vk_module) + .name(&entry_point) + .build(); + + Ok(CompiledStage { + create_info, + _entry_point: entry_point, + temp_raw_module: match *stage.module { + super::ShaderModule::Raw(_) => None, + super::ShaderModule::Intermediate { .. } => Some(vk_module), + }, + }) + } + + /// Returns the queue family index of the device's internal queue. + /// + /// This is useful for constructing memory barriers needed for queue family ownership transfer when + /// external memory is involved (from/to `VK_QUEUE_FAMILY_EXTERNAL_KHR` and `VK_QUEUE_FAMILY_FOREIGN_EXT` + /// for example). + pub fn queue_family_index(&self) -> u32 { + self.shared.family_index + } + + pub fn queue_index(&self) -> u32 { + self.shared.queue_index + } + + pub fn raw_device(&self) -> &ash::Device { + &self.shared.raw + } + + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.shared.physical_device + } + + pub fn raw_queue(&self) -> ash::vk::Queue { + self.shared.raw_queue + } + + pub fn enabled_device_extensions(&self) -> &[&'static CStr] { + &self.shared.enabled_extensions + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared.instance + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) }; + unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) }; + for &sem in queue.relay_semaphores.iter() { + unsafe { self.shared.raw.destroy_semaphore(sem, None) }; + } + unsafe { self.shared.free_resources() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let vk_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(conv::map_buffer_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw = unsafe { self.shared.raw.create_buffer(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) }; + + let mut alloc_usage = if desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) + { + let mut flags = gpu_alloc::UsageFlags::HOST_ACCESS; + //TODO: find a way to use `crate::MemoryFlags::PREFER_COHERENT` + flags.set( + gpu_alloc::UsageFlags::DOWNLOAD, + desc.usage.contains(crate::BufferUses::MAP_READ), + ); + flags.set( + gpu_alloc::UsageFlags::UPLOAD, + desc.usage.contains(crate::BufferUses::MAP_WRITE), + ); + flags + } else { + gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS + }; + alloc_usage.set( + gpu_alloc::UsageFlags::TRANSIENT, + desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), + ); + + let alignment_mask = if desc.usage.intersects( + crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + 16 + } else { + req.alignment + } - 1; + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: alignment_mask, + usage: alloc_usage, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_buffer_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw, label) + }; + } + + Ok(super::Buffer { + raw, + block: Some(Mutex::new(block)), + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + unsafe { self.shared.raw.destroy_buffer(buffer.raw, None) }; + if let Some(block) = buffer.block { + unsafe { + self.mem_allocator + .lock() + .dealloc(&*self.shared, block.into_inner()) + }; + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + if let Some(ref block) = buffer.block { + let size = range.end - range.start; + let mut block = block.lock(); + let ptr = unsafe { block.map(&*self.shared, range.start, size as usize)? }; + let is_coherent = block + .props() + .contains(gpu_alloc::MemoryPropertyFlags::HOST_COHERENT); + Ok(crate::BufferMapping { ptr, is_coherent }) + } else { + Err(crate::DeviceError::OutOfMemory) + } + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + if let Some(ref block) = buffer.block { + unsafe { block.lock().unmap(&*self.shared) }; + Ok(()) + } else { + Err(crate::DeviceError::OutOfMemory) + } + } + + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + if let Some(vk_ranges) = self.shared.make_memory_ranges(buffer, ranges) { + unsafe { + self.shared + .raw + .flush_mapped_memory_ranges( + &smallvec::SmallVec::<[vk::MappedMemoryRange; 32]>::from_iter(vk_ranges), + ) + } + .unwrap(); + } + } + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + if let Some(vk_ranges) = self.shared.make_memory_ranges(buffer, ranges) { + unsafe { + self.shared + .raw + .invalidate_mapped_memory_ranges(&smallvec::SmallVec::< + [vk::MappedMemoryRange; 32], + >::from_iter(vk_ranges)) + } + .unwrap(); + } + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let copy_size = desc.copy_extent(); + + let mut raw_flags = vk::ImageCreateFlags::empty(); + if desc.is_cube_compatible() { + raw_flags |= vk::ImageCreateFlags::CUBE_COMPATIBLE; + } + + let original_format = self.shared.private_caps.map_texture_format(desc.format); + let mut vk_view_formats = vec![]; + let mut wgt_view_formats = vec![]; + if !desc.view_formats.is_empty() { + raw_flags |= vk::ImageCreateFlags::MUTABLE_FORMAT; + wgt_view_formats = desc.view_formats.clone(); + wgt_view_formats.push(desc.format); + + if self.shared.private_caps.image_format_list { + vk_view_formats = desc + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + vk_view_formats.push(original_format) + } + } + if desc.format.is_multi_planar_format() { + raw_flags |= vk::ImageCreateFlags::MUTABLE_FORMAT; + } + + let mut vk_info = vk::ImageCreateInfo::builder() + .flags(raw_flags) + .image_type(conv::map_texture_dimension(desc.dimension)) + .format(original_format) + .extent(conv::map_copy_extent(©_size)) + .mip_levels(desc.mip_level_count) + .array_layers(desc.array_layer_count()) + .samples(vk::SampleCountFlags::from_raw(desc.sample_count)) + .tiling(vk::ImageTiling::OPTIMAL) + .usage(conv::map_texture_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !vk_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&vk_view_formats); + vk_info = vk_info.push_next(&mut format_list_info); + } + + let raw = unsafe { self.shared.raw.create_image(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_image_memory_requirements(raw) }; + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_image_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE, raw, label) + }; + } + + Ok(super::Texture { + raw, + drop_guard: None, + block: Some(block), + usage: desc.usage, + format: desc.format, + raw_flags, + copy_size, + view_formats: wgt_view_formats, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + unsafe { self.shared.raw.destroy_image(texture.raw, None) }; + } + if let Some(block) = texture.block { + unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) }; + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let subresource_range = conv::map_subresource_range(&desc.range, texture.format); + let mut vk_info = vk::ImageViewCreateInfo::builder() + .flags(vk::ImageViewCreateFlags::empty()) + .image(texture.raw) + .view_type(conv::map_view_dimension(desc.dimension)) + .format(self.shared.private_caps.map_texture_format(desc.format)) + .subresource_range(subresource_range); + let layers = + NonZeroU32::new(subresource_range.layer_count).expect("Unexpected zero layer count"); + + let mut image_view_info; + let view_usage = if self.shared.private_caps.image_view_usage && !desc.usage.is_empty() { + image_view_info = vk::ImageViewUsageCreateInfo::builder() + .usage(conv::map_texture_usage(desc.usage)) + .build(); + vk_info = vk_info.push_next(&mut image_view_info); + desc.usage + } else { + texture.usage + }; + + let raw = unsafe { self.shared.raw.create_image_view(&vk_info, None) }?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE_VIEW, raw, label) + }; + } + + let attachment = super::FramebufferAttachment { + raw: if self.shared.private_caps.imageless_framebuffers { + vk::ImageView::null() + } else { + raw + }, + raw_image_flags: texture.raw_flags, + view_usage, + view_format: desc.format, + raw_view_formats: texture + .view_formats + .iter() + .map(|tf| self.shared.private_caps.map_texture_format(*tf)) + .collect(), + }; + + Ok(super::TextureView { + raw, + layers, + attachment, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if !self.shared.private_caps.imageless_framebuffers { + let mut fbuf_lock = self.shared.framebuffers.lock(); + for (key, &raw_fbuf) in fbuf_lock.iter() { + if key.attachments.iter().any(|at| at.raw == view.raw) { + unsafe { self.shared.raw.destroy_framebuffer(raw_fbuf, None) }; + } + } + fbuf_lock.retain(|key, _| !key.attachments.iter().any(|at| at.raw == view.raw)); + } + unsafe { self.shared.raw.destroy_image_view(view.raw, None) }; + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let mut vk_info = vk::SamplerCreateInfo::builder() + .flags(vk::SamplerCreateFlags::empty()) + .mag_filter(conv::map_filter_mode(desc.mag_filter)) + .min_filter(conv::map_filter_mode(desc.min_filter)) + .mipmap_mode(conv::map_mip_filter_mode(desc.mipmap_filter)) + .address_mode_u(conv::map_address_mode(desc.address_modes[0])) + .address_mode_v(conv::map_address_mode(desc.address_modes[1])) + .address_mode_w(conv::map_address_mode(desc.address_modes[2])) + .min_lod(desc.lod_clamp.start) + .max_lod(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + vk_info = vk_info + .compare_enable(true) + .compare_op(conv::map_comparison(fun)); + } + + if desc.anisotropy_clamp != 1 { + // We only enable anisotropy if it is supported, and wgpu-hal interface guarantees + // the clamp is in the range [1, 16] which is always supported if anisotropy is. + vk_info = vk_info + .anisotropy_enable(true) + .max_anisotropy(desc.anisotropy_clamp as f32); + } + + if let Some(color) = desc.border_color { + vk_info = vk_info.border_color(conv::map_border_color(color)); + } + + let raw = unsafe { self.shared.raw.create_sampler(&vk_info, None)? }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SAMPLER, raw, label) + }; + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + unsafe { self.shared.raw.destroy_sampler(sampler.raw, None) }; + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let vk_info = vk::CommandPoolCreateInfo::builder() + .queue_family_index(desc.queue.family_index) + .flags(vk::CommandPoolCreateFlags::TRANSIENT) + .build(); + let raw = unsafe { self.shared.raw.create_command_pool(&vk_info, None)? }; + + Ok(super::CommandEncoder { + raw, + device: Arc::clone(&self.shared), + active: vk::CommandBuffer::null(), + bind_point: vk::PipelineBindPoint::default(), + temp: super::Temp::default(), + free: Vec::new(), + discarded: Vec::new(), + rpass_debug_marker_active: false, + end_of_pass_timer_query: None, + }) + } + unsafe fn destroy_command_encoder(&self, cmd_encoder: super::CommandEncoder) { + unsafe { + // `vkDestroyCommandPool` also frees any command buffers allocated + // from that pool, so there's no need to explicitly call + // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded` + // fields. + self.shared.raw.destroy_command_pool(cmd_encoder.raw, None); + } + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let mut desc_count = gpu_descriptor::DescriptorTotalCount::default(); + let mut types = Vec::new(); + for entry in desc.entries { + let count = entry.count.map_or(1, |c| c.get()); + if entry.binding as usize >= types.len() { + types.resize( + entry.binding as usize + 1, + (vk::DescriptorType::INPUT_ATTACHMENT, 0), + ); + } + types[entry.binding as usize] = ( + conv::map_binding_type(entry.ty), + entry.count.map_or(1, |c| c.get()), + ); + + match entry.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Uniform => { + if has_dynamic_offset { + desc_count.uniform_buffer_dynamic += count; + } else { + desc_count.uniform_buffer += count; + } + } + wgt::BufferBindingType::Storage { .. } => { + if has_dynamic_offset { + desc_count.storage_buffer_dynamic += count; + } else { + desc_count.storage_buffer += count; + } + } + }, + wgt::BindingType::Sampler { .. } => { + desc_count.sampler += count; + } + wgt::BindingType::Texture { .. } => { + desc_count.sampled_image += count; + } + wgt::BindingType::StorageTexture { .. } => { + desc_count.storage_image += count; + } + wgt::BindingType::AccelerationStructure => { + desc_count.acceleration_structure += count; + } + } + } + + //Note: not bothering with on stack array here as it's low frequency + let vk_bindings = desc + .entries + .iter() + .map(|entry| vk::DescriptorSetLayoutBinding { + binding: entry.binding, + descriptor_type: types[entry.binding as usize].0, + descriptor_count: types[entry.binding as usize].1, + stage_flags: conv::map_shader_stage(entry.visibility), + p_immutable_samplers: ptr::null(), + }) + .collect::<Vec<_>>(); + + let vk_info = vk::DescriptorSetLayoutCreateInfo::builder().bindings(&vk_bindings); + + let binding_arrays = desc + .entries + .iter() + .enumerate() + .filter_map(|(idx, entry)| entry.count.map(|count| (idx as u32, count))) + .collect(); + + let mut binding_flag_info; + let binding_flag_vec; + + let partially_bound = desc + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + + let vk_info = if partially_bound { + binding_flag_vec = desc + .entries + .iter() + .map(|entry| { + let mut flags = vk::DescriptorBindingFlags::empty(); + + if partially_bound && entry.count.is_some() { + flags |= vk::DescriptorBindingFlags::PARTIALLY_BOUND; + } + + flags + }) + .collect::<Vec<_>>(); + + binding_flag_info = vk::DescriptorSetLayoutBindingFlagsCreateInfo::builder() + .binding_flags(&binding_flag_vec); + + vk_info.push_next(&mut binding_flag_info) + } else { + vk_info + }; + + let raw = unsafe { + self.shared + .raw + .create_descriptor_set_layout(&vk_info, None)? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET_LAYOUT, raw, label) + }; + } + + Ok(super::BindGroupLayout { + raw, + desc_count, + types: types.into_boxed_slice(), + binding_arrays, + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + unsafe { + self.shared + .raw + .destroy_descriptor_set_layout(bg_layout.raw, None) + }; + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + //Note: not bothering with on stack array here as it's low frequency + let vk_set_layouts = desc + .bind_group_layouts + .iter() + .map(|bgl| bgl.raw) + .collect::<Vec<_>>(); + let vk_push_constant_ranges = desc + .push_constant_ranges + .iter() + .map(|pcr| vk::PushConstantRange { + stage_flags: conv::map_shader_stage(pcr.stages), + offset: pcr.range.start, + size: pcr.range.end - pcr.range.start, + }) + .collect::<Vec<_>>(); + + let vk_info = vk::PipelineLayoutCreateInfo::builder() + .flags(vk::PipelineLayoutCreateFlags::empty()) + .set_layouts(&vk_set_layouts) + .push_constant_ranges(&vk_push_constant_ranges); + + let raw = { + profiling::scope!("vkCreatePipelineLayout"); + unsafe { self.shared.raw.create_pipeline_layout(&vk_info, None)? } + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE_LAYOUT, raw, label) + }; + } + + let mut binding_arrays = BTreeMap::new(); + for (group, &layout) in desc.bind_group_layouts.iter().enumerate() { + for &(binding, binding_array_size) in &layout.binding_arrays { + binding_arrays.insert( + naga::ResourceBinding { + group: group as u32, + binding, + }, + naga::back::spv::BindingInfo { + binding_array_size: Some(binding_array_size.get()), + }, + ); + } + } + + Ok(super::PipelineLayout { + raw, + binding_arrays, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { + self.shared + .raw + .destroy_pipeline_layout(pipeline_layout.raw, None) + }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut vk_sets = unsafe { + self.desc_allocator.lock().allocate( + &*self.shared, + &desc.layout.raw, + gpu_descriptor::DescriptorSetLayoutCreateFlags::empty(), + &desc.layout.desc_count, + 1, + )? + }; + + let set = vk_sets.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET, *set.raw(), label) + }; + } + + let mut writes = Vec::with_capacity(desc.entries.len()); + let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); + let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); + let mut image_infos = Vec::with_capacity(desc.textures.len()); + let mut acceleration_structure_infos = + Vec::with_capacity(desc.acceleration_structures.len()); + let mut raw_acceleration_structures = + Vec::with_capacity(desc.acceleration_structures.len()); + for entry in desc.entries { + let (ty, size) = desc.layout.types[entry.binding as usize]; + if size == 0 { + continue; // empty slot + } + let mut write = vk::WriteDescriptorSet::builder() + .dst_set(*set.raw()) + .dst_binding(entry.binding) + .descriptor_type(ty); + + let mut extra_descriptor_count = 0; + + write = match ty { + vk::DescriptorType::SAMPLER => { + let index = sampler_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + sampler_infos.extend(desc.samplers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorImageInfo::builder() + .sampler(binding.raw) + .build() + }, + )); + write.image_info(&sampler_infos[index..]) + } + vk::DescriptorType::SAMPLED_IMAGE | vk::DescriptorType::STORAGE_IMAGE => { + let index = image_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + image_infos.extend(desc.textures[start as usize..end as usize].iter().map( + |binding| { + let layout = conv::derive_image_layout( + binding.usage, + binding.view.attachment.view_format, + ); + vk::DescriptorImageInfo::builder() + .image_view(binding.view.raw) + .image_layout(layout) + .build() + }, + )); + write.image_info(&image_infos[index..]) + } + vk::DescriptorType::UNIFORM_BUFFER + | vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC + | vk::DescriptorType::STORAGE_BUFFER + | vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => { + let index = buffer_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + buffer_infos.extend(desc.buffers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorBufferInfo::builder() + .buffer(binding.buffer.raw) + .offset(binding.offset) + .range(binding.size.map_or(vk::WHOLE_SIZE, wgt::BufferSize::get)) + .build() + }, + )); + write.buffer_info(&buffer_infos[index..]) + } + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { + let index = acceleration_structure_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + + let raw_start = raw_acceleration_structures.len(); + + raw_acceleration_structures.extend( + desc.acceleration_structures[start as usize..end as usize] + .iter() + .map(|acceleration_structure| acceleration_structure.raw), + ); + + let acceleration_structure_info = + vk::WriteDescriptorSetAccelerationStructureKHR::builder() + .acceleration_structures(&raw_acceleration_structures[raw_start..]); + + // todo: Dereference the struct to get around lifetime issues. Safe as long as we never resize + // `raw_acceleration_structures`. + let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; + + assert!( + index < desc.acceleration_structures.len(), + "Encountered more acceleration structures then expected" + ); + acceleration_structure_infos.push(acceleration_structure_info); + + extra_descriptor_count += 1; + + write.push_next(&mut acceleration_structure_infos[index]) + } + _ => unreachable!(), + }; + + let mut write = write.build(); + write.descriptor_count += extra_descriptor_count; + + writes.push(write); + } + + unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; + Ok(super::BindGroup { set }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + unsafe { + self.desc_allocator + .lock() + .free(&*self.shared, Some(group.set)) + }; + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let spv = match shader { + crate::ShaderInput::Naga(naga_shader) => { + if self + .shared + .workarounds + .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + { + return Ok(super::ShaderModule::Intermediate { + naga_shader, + runtime_checks: desc.runtime_checks, + }); + } + let mut naga_options = self.naga_options.clone(); + naga_options.debug_info = + naga_shader + .debug_source + .as_ref() + .map(|d| naga::back::spv::DebugInfo { + source_code: d.source_code.as_ref(), + file_name: d.file_name.as_ref().as_ref(), + }); + if !desc.runtime_checks { + naga_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image_load: naga::proc::BoundsCheckPolicy::Unchecked, + image_store: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + Cow::Owned( + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + &naga_options, + None, + ) + .map_err(|e| crate::ShaderError::Compilation(format!("{e}")))?, + ) + } + crate::ShaderInput::SpirV(spv) => Cow::Borrowed(spv), + }; + + let raw = self.create_shader_module_impl(&spv)?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SHADER_MODULE, raw, label) + }; + } + + Ok(super::ShaderModule::Raw(raw)) + } + unsafe fn destroy_shader_module(&self, module: super::ShaderModule) { + match module { + super::ShaderModule::Raw(raw) => { + unsafe { self.shared.raw.destroy_shader_module(raw, None) }; + } + super::ShaderModule::Intermediate { .. } => {} + } + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let dynamic_states = [ + vk::DynamicState::VIEWPORT, + vk::DynamicState::SCISSOR, + vk::DynamicState::BLEND_CONSTANTS, + vk::DynamicState::STENCIL_REFERENCE, + ]; + let mut compatible_rp_key = super::RenderPassKey { + sample_count: desc.multisample.count, + multiview: desc.multiview, + ..Default::default() + }; + let mut stages = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); + let mut vertex_buffers = Vec::with_capacity(desc.vertex_buffers.len()); + let mut vertex_attributes = Vec::new(); + + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + vertex_buffers.push(vk::VertexInputBindingDescription { + binding: i as u32, + stride: vb.array_stride as u32, + input_rate: match vb.step_mode { + wgt::VertexStepMode::Vertex => vk::VertexInputRate::VERTEX, + wgt::VertexStepMode::Instance => vk::VertexInputRate::INSTANCE, + }, + }); + for at in vb.attributes { + vertex_attributes.push(vk::VertexInputAttributeDescription { + location: at.shader_location, + binding: i as u32, + format: conv::map_vertex_format(at.format), + offset: at.offset as u32, + }); + } + } + + let vk_vertex_input = vk::PipelineVertexInputStateCreateInfo::builder() + .vertex_binding_descriptions(&vertex_buffers) + .vertex_attribute_descriptions(&vertex_attributes) + .build(); + + let vk_input_assembly = vk::PipelineInputAssemblyStateCreateInfo::builder() + .topology(conv::map_topology(desc.primitive.topology)) + .primitive_restart_enable(desc.primitive.strip_index_format.is_some()) + .build(); + + let compiled_vs = self.compile_stage( + &desc.vertex_stage, + naga::ShaderStage::Vertex, + &desc.layout.binding_arrays, + )?; + stages.push(compiled_vs.create_info); + let compiled_fs = match desc.fragment_stage { + Some(ref stage) => { + let compiled = self.compile_stage( + stage, + naga::ShaderStage::Fragment, + &desc.layout.binding_arrays, + )?; + stages.push(compiled.create_info); + Some(compiled) + } + None => None, + }; + + let mut vk_rasterization = vk::PipelineRasterizationStateCreateInfo::builder() + .polygon_mode(conv::map_polygon_mode(desc.primitive.polygon_mode)) + .front_face(conv::map_front_face(desc.primitive.front_face)) + .line_width(1.0) + .depth_clamp_enable(desc.primitive.unclipped_depth); + if let Some(face) = desc.primitive.cull_mode { + vk_rasterization = vk_rasterization.cull_mode(conv::map_cull_face(face)) + } + let mut vk_rasterization_conservative_state = + vk::PipelineRasterizationConservativeStateCreateInfoEXT::builder() + .conservative_rasterization_mode(vk::ConservativeRasterizationModeEXT::OVERESTIMATE) + .build(); + if desc.primitive.conservative { + vk_rasterization = vk_rasterization.push_next(&mut vk_rasterization_conservative_state); + } + + let mut vk_depth_stencil = vk::PipelineDepthStencilStateCreateInfo::builder(); + if let Some(ref ds) = desc.depth_stencil { + let vk_format = self.shared.private_caps.map_texture_format(ds.format); + let vk_layout = if ds.is_read_only(desc.primitive.cull_mode) { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } else { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + }; + compatible_rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: super::AttachmentKey::compatible(vk_format, vk_layout), + stencil_ops: crate::AttachmentOps::all(), + }); + + if ds.is_depth_enabled() { + vk_depth_stencil = vk_depth_stencil + .depth_test_enable(true) + .depth_write_enable(ds.depth_write_enabled) + .depth_compare_op(conv::map_comparison(ds.depth_compare)); + } + if ds.stencil.is_enabled() { + let s = &ds.stencil; + let front = conv::map_stencil_face(&s.front, s.read_mask, s.write_mask); + let back = conv::map_stencil_face(&s.back, s.read_mask, s.write_mask); + vk_depth_stencil = vk_depth_stencil + .stencil_test_enable(true) + .front(front) + .back(back); + } + + if ds.bias.is_enabled() { + vk_rasterization = vk_rasterization + .depth_bias_enable(true) + .depth_bias_constant_factor(ds.bias.constant as f32) + .depth_bias_clamp(ds.bias.clamp) + .depth_bias_slope_factor(ds.bias.slope_scale); + } + } + + let vk_viewport = vk::PipelineViewportStateCreateInfo::builder() + .flags(vk::PipelineViewportStateCreateFlags::empty()) + .scissor_count(1) + .viewport_count(1) + .build(); + + let vk_sample_mask = [ + desc.multisample.mask as u32, + (desc.multisample.mask >> 32) as u32, + ]; + let vk_multisample = vk::PipelineMultisampleStateCreateInfo::builder() + .rasterization_samples(vk::SampleCountFlags::from_raw(desc.multisample.count)) + .alpha_to_coverage_enable(desc.multisample.alpha_to_coverage_enabled) + .sample_mask(&vk_sample_mask) + .build(); + + let mut vk_attachments = Vec::with_capacity(desc.color_targets.len()); + for cat in desc.color_targets { + let (key, attarchment) = if let Some(cat) = cat.as_ref() { + let mut vk_attachment = vk::PipelineColorBlendAttachmentState::builder() + .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits())); + if let Some(ref blend) = cat.blend { + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + vk_attachment = vk_attachment + .blend_enable(true) + .color_blend_op(color_op) + .src_color_blend_factor(color_src) + .dst_color_blend_factor(color_dst) + .alpha_blend_op(alpha_op) + .src_alpha_blend_factor(alpha_src) + .dst_alpha_blend_factor(alpha_dst); + } + + let vk_format = self.shared.private_caps.map_texture_format(cat.format); + ( + Some(super::ColorAttachmentKey { + base: super::AttachmentKey::compatible( + vk_format, + vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + ), + resolve: None, + }), + vk_attachment.build(), + ) + } else { + (None, vk::PipelineColorBlendAttachmentState::default()) + }; + + compatible_rp_key.colors.push(key); + vk_attachments.push(attarchment); + } + + let vk_color_blend = vk::PipelineColorBlendStateCreateInfo::builder() + .attachments(&vk_attachments) + .build(); + + let vk_dynamic_state = vk::PipelineDynamicStateCreateInfo::builder() + .dynamic_states(&dynamic_states) + .build(); + + let raw_pass = self + .shared + .make_render_pass(compatible_rp_key) + .map_err(crate::DeviceError::from)?; + + let vk_infos = [{ + vk::GraphicsPipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stages(&stages) + .vertex_input_state(&vk_vertex_input) + .input_assembly_state(&vk_input_assembly) + .rasterization_state(&vk_rasterization) + .viewport_state(&vk_viewport) + .multisample_state(&vk_multisample) + .depth_stencil_state(&vk_depth_stencil) + .color_blend_state(&vk_color_blend) + .dynamic_state(&vk_dynamic_state) + .render_pass(raw_pass) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateGraphicsPipelines"); + unsafe { + self.shared + .raw + .create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled_vs.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + if let Some(CompiledStage { + temp_raw_module: Some(raw_module), + .. + }) = compiled_fs + { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::RenderPipeline { raw }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let compiled = self.compile_stage( + &desc.stage, + naga::ShaderStage::Compute, + &desc.layout.binding_arrays, + )?; + + let vk_infos = [{ + vk::ComputePipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stage(compiled.create_info) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateComputePipelines"); + unsafe { + self.shared + .raw + .create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::ComputePipeline { raw }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (vk_type, pipeline_statistics) = match desc.ty { + wgt::QueryType::Occlusion => ( + vk::QueryType::OCCLUSION, + vk::QueryPipelineStatisticFlags::empty(), + ), + wgt::QueryType::PipelineStatistics(statistics) => ( + vk::QueryType::PIPELINE_STATISTICS, + conv::map_pipeline_statistics(statistics), + ), + wgt::QueryType::Timestamp => ( + vk::QueryType::TIMESTAMP, + vk::QueryPipelineStatisticFlags::empty(), + ), + }; + + let vk_info = vk::QueryPoolCreateInfo::builder() + .query_type(vk_type) + .query_count(desc.count) + .pipeline_statistics(pipeline_statistics) + .build(); + + let raw = unsafe { self.shared.raw.create_query_pool(&vk_info, None) }?; + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::QUERY_POOL, raw, label) + }; + } + + Ok(super::QuerySet { raw }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { self.shared.raw.destroy_query_pool(set.raw, None) }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(if self.shared.private_caps.timeline_semaphores { + let mut sem_type_info = + vk::SemaphoreTypeCreateInfo::builder().semaphore_type(vk::SemaphoreType::TIMELINE); + let vk_info = vk::SemaphoreCreateInfo::builder().push_next(&mut sem_type_info); + let raw = unsafe { self.shared.raw.create_semaphore(&vk_info, None) }?; + super::Fence::TimelineSemaphore(raw) + } else { + super::Fence::FencePool { + last_completed: 0, + active: Vec::new(), + free: Vec::new(), + } + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + match fence { + super::Fence::TimelineSemaphore(raw) => { + unsafe { self.shared.raw.destroy_semaphore(raw, None) }; + } + super::Fence::FencePool { + active, + free, + last_completed: _, + } => { + for (_, raw) in active { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + for raw in free { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + } + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + fence.get_latest( + &self.shared.raw, + self.shared.extension_fns.timeline_semaphore.as_ref(), + ) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS; + match *fence { + super::Fence::TimelineSemaphore(raw) => { + let semaphores = [raw]; + let values = [wait_value]; + let vk_info = vk::SemaphoreWaitInfo::builder() + .semaphores(&semaphores) + .values(&values); + let result = match self.shared.extension_fns.timeline_semaphore { + Some(super::ExtensionFn::Extension(ref ext)) => unsafe { + ext.wait_semaphores(&vk_info, timeout_ns) + }, + Some(super::ExtensionFn::Promoted) => unsafe { + self.shared.raw.wait_semaphores(&vk_info, timeout_ns) + }, + None => unreachable!(), + }; + match result { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + super::Fence::FencePool { + last_completed, + ref active, + free: _, + } => { + if wait_value <= last_completed { + Ok(true) + } else { + match active.iter().find(|&&(value, _)| value >= wait_value) { + Some(&(_, raw)) => { + match unsafe { + self.shared.raw.wait_for_fences(&[raw], true, timeout_ns) + } { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + None => { + log::error!("No signals reached value {}", wait_value); + Err(crate::DeviceError::Lost) + } + } + } + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + unsafe { + self.render_doc + .start_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + + unsafe { + self.render_doc + .end_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + } + + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + const CAPACITY: usize = 8; + + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let (geometries, primitive_counts) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::default(); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: instance_data, + }); + + ( + smallvec::smallvec![*geometry], + smallvec::smallvec![instances.count], + ) + } + crate::AccelerationStructureEntries::Triangles(ref in_geometries) => { + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); + + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let pritive_count = if let Some(ref indices) = triangles.indices { + triangle_data = + triangle_data.index_type(conv::map_index_format(indices.format)); + indices.count / 3 + } else { + triangles.vertex_count + }; + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(conv::map_acceleration_structure_geometry_flags( + triangles.flags, + )); + + geometries.push(*geometry); + primitive_counts.push(pritive_count); + } + (geometries, primitive_counts) + } + crate::AccelerationStructureEntries::AABBs(ref in_geometries) => { + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .stride(aabb.stride); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { aabbs: *aabbs_data }) + .flags(conv::map_acceleration_structure_geometry_flags(aabb.flags)); + + geometries.push(*geometry); + primitive_counts.push(aabb.count); + } + (geometries, primitive_counts) + } + }; + + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(ty) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(&geometries); + + let raw = unsafe { + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &primitive_counts, + ) + }; + + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: raw.acceleration_structure_size, + update_scratch_size: raw.update_scratch_size, + build_scratch_size: raw.build_scratch_size, + } + } + + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + unsafe { + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) + } + } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result<super::AccelerationStructure, crate::DeviceError> { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let vk_buffer_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + unsafe { + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; + + self.shared + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } + + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); + + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; + + if let Some(label) = desc.label { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); + } + + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) + } + } + + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + unsafe { + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } + } +} + +impl From<gpu_alloc::AllocationError> for crate::DeviceError { + fn from(error: gpu_alloc::AllocationError) -> Self { + use gpu_alloc::AllocationError as Ae; + match error { + Ae::OutOfDeviceMemory | Ae::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory allocation: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_alloc::MapError> for crate::DeviceError { + fn from(error: gpu_alloc::MapError) -> Self { + use gpu_alloc::MapError as Me; + match error { + Me::OutOfDeviceMemory | Me::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory mapping: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_descriptor::AllocationError> for crate::DeviceError { + fn from(error: gpu_descriptor::AllocationError) -> Self { + log::error!("descriptor allocation: {:?}", error); + Self::OutOfMemory + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/instance.rs b/third_party/rust/wgpu-hal/src/vulkan/instance.rs new file mode 100644 index 0000000000..c4ef573461 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/instance.rs @@ -0,0 +1,1011 @@ +use std::{ + ffi::{c_void, CStr, CString}, + slice, + str::FromStr, + sync::Arc, + thread, +}; + +use ash::{ + extensions::{ext, khr}, + vk, +}; +use parking_lot::RwLock; + +unsafe extern "system" fn debug_utils_messenger_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_type: vk::DebugUtilsMessageTypeFlagsEXT, + callback_data_ptr: *const vk::DebugUtilsMessengerCallbackDataEXT, + user_data: *mut c_void, +) -> vk::Bool32 { + use std::borrow::Cow; + + if thread::panicking() { + return vk::FALSE; + } + + let cd = unsafe { &*callback_data_ptr }; + let user_data = unsafe { &*(user_data as *mut super::DebugUtilsMessengerUserData) }; + + const VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912: i32 = 0x56146426; + if cd.message_id_number == VUID_VKCMDENDDEBUGUTILSLABELEXT_COMMANDBUFFER_01912 { + // https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5671 + // Versions 1.3.240 through 1.3.250 return a spurious error here if + // the debug range start and end appear in different command buffers. + let khronos_validation_layer = + std::ffi::CStr::from_bytes_with_nul(b"Khronos Validation Layer\0").unwrap(); + if user_data.validation_layer_description.as_ref() == khronos_validation_layer + && user_data.validation_layer_spec_version >= vk::make_api_version(0, 1, 3, 240) + && user_data.validation_layer_spec_version <= vk::make_api_version(0, 1, 3, 250) + { + return vk::FALSE; + } + } + + // Silence Vulkan Validation error "VUID-VkSwapchainCreateInfoKHR-pNext-07781" + // This happens when a surface is configured with a size outside the allowed extent. + // It's s false positive due to the inherent racy-ness of surface resizing. + const VUID_VKSWAPCHAINCREATEINFOKHR_PNEXT_07781: i32 = 0x4c8929c1; + if cd.message_id_number == VUID_VKSWAPCHAINCREATEINFOKHR_PNEXT_07781 { + return vk::FALSE; + } + + // Silence Vulkan Validation error "VUID-VkRenderPassBeginInfo-framebuffer-04627" + // if the OBS layer is enabled. This is a bug in the OBS layer. As the OBS layer + // does not have a version number they increment, there is no way to qualify the + // suppression of the error to a specific version of the OBS layer. + // + // See https://github.com/obsproject/obs-studio/issues/9353 + const VUID_VKRENDERPASSBEGININFO_FRAMEBUFFER_04627: i32 = 0x45125641; + if cd.message_id_number == VUID_VKRENDERPASSBEGININFO_FRAMEBUFFER_04627 + && user_data.has_obs_layer + { + return vk::FALSE; + } + + let level = match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => log::Level::Debug, + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => log::Level::Warn, + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => log::Level::Error, + _ => log::Level::Warn, + }; + + let message_id_name = if cd.p_message_id_name.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message_id_name) }.to_string_lossy() + }; + let message = if cd.p_message.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message) }.to_string_lossy() + }; + + let _ = std::panic::catch_unwind(|| { + log::log!( + level, + "{:?} [{} (0x{:x})]\n\t{}", + message_type, + message_id_name, + cd.message_id_number, + message, + ); + }); + + if cd.queue_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_queue_labels, cd.queue_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tqueues: {}", names.join(", ")); + }); + } + + if cd.cmd_buf_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_cmd_buf_labels, cd.cmd_buf_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tcommand buffers: {}", names.join(", ")); + }); + } + + if cd.object_count != 0 { + let labels = unsafe { slice::from_raw_parts(cd.p_objects, cd.object_count as usize) }; + //TODO: use color fields of `vk::DebugUtilsLabelExt`? + let names = labels + .iter() + .map(|obj_info| { + let name = unsafe { obj_info.p_object_name.as_ref() } + .map(|name| unsafe { CStr::from_ptr(name) }.to_string_lossy()) + .unwrap_or(Cow::Borrowed("?")); + + format!( + "(type: {:?}, hndl: 0x{:x}, name: {})", + obj_info.object_type, obj_info.object_handle, name + ) + }) + .collect::<Vec<_>>(); + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tobjects: {}", names.join(", ")); + }); + } + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.add(message.to_string()); + } + + vk::FALSE +} + +impl super::DebugUtilsCreateInfo { + fn to_vk_create_info(&self) -> vk::DebugUtilsMessengerCreateInfoEXTBuilder<'_> { + let user_data_ptr: *const super::DebugUtilsMessengerUserData = &*self.callback_data; + vk::DebugUtilsMessengerCreateInfoEXT::builder() + .message_severity(self.severity) + .message_type(self.message_type) + .user_data(user_data_ptr as *mut _) + .pfn_user_callback(Some(debug_utils_messenger_callback)) + } +} + +impl super::Swapchain { + /// # Safety + /// + /// - The device must have been made idle before calling this function. + unsafe fn release_resources(mut self, device: &ash::Device) -> Self { + profiling::scope!("Swapchain::release_resources"); + { + profiling::scope!("vkDeviceWaitIdle"); + // We need to also wait until all presentation work is done. Because there is no way to portably wait until + // the presentation work is done, we are forced to wait until the device is idle. + let _ = unsafe { device.device_wait_idle() }; + }; + + for semaphore in self.surface_semaphores.drain(..) { + unsafe { + device.destroy_semaphore(semaphore, None); + } + } + + self + } +} + +impl super::InstanceShared { + pub fn entry(&self) -> &ash::Entry { + &self.entry + } + + pub fn raw_instance(&self) -> &ash::Instance { + &self.raw + } + + pub fn instance_api_version(&self) -> u32 { + self.instance_api_version + } + + pub fn extensions(&self) -> &[&'static CStr] { + &self.extensions[..] + } +} + +impl super::Instance { + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared + } + + /// Return the instance extension names wgpu would like to enable. + /// + /// Return a vector of the names of instance extensions actually available + /// on `entry` that wgpu would like to enable. + /// + /// The `instance_api_version` argument should be the instance's Vulkan API + /// version, as obtained from `vkEnumerateInstanceVersion`. This is the same + /// space of values as the `VK_API_VERSION` constants. + /// + /// Note that wgpu can function without many of these extensions (for + /// example, `VK_KHR_wayland_surface` is certainly not going to be available + /// everywhere), but if one of these extensions is available at all, wgpu + /// assumes that it has been enabled. + pub fn desired_extensions( + entry: &ash::Entry, + _instance_api_version: u32, + flags: wgt::InstanceFlags, + ) -> Result<Vec<&'static CStr>, crate::InstanceError> { + let instance_extensions = { + profiling::scope!("vkEnumerateInstanceExtensionProperties"); + entry.enumerate_instance_extension_properties(None) + }; + let instance_extensions = instance_extensions.map_err(|e| { + crate::InstanceError::with_source( + String::from("enumerate_instance_extension_properties() failed"), + e, + ) + })?; + + // Check our extensions against the available extensions + let mut extensions: Vec<&'static CStr> = Vec::new(); + + // VK_KHR_surface + extensions.push(khr::Surface::name()); + + // Platform-specific WSI extensions + if cfg!(all( + unix, + not(target_os = "android"), + not(target_os = "macos") + )) { + // VK_KHR_xlib_surface + extensions.push(khr::XlibSurface::name()); + // VK_KHR_xcb_surface + extensions.push(khr::XcbSurface::name()); + // VK_KHR_wayland_surface + extensions.push(khr::WaylandSurface::name()); + } + if cfg!(target_os = "android") { + // VK_KHR_android_surface + extensions.push(khr::AndroidSurface::name()); + } + if cfg!(target_os = "windows") { + // VK_KHR_win32_surface + extensions.push(khr::Win32Surface::name()); + } + if cfg!(target_os = "macos") { + // VK_EXT_metal_surface + extensions.push(ext::MetalSurface::name()); + extensions.push(ash::vk::KhrPortabilityEnumerationFn::name()); + } + + if flags.contains(wgt::InstanceFlags::DEBUG) { + // VK_EXT_debug_utils + extensions.push(ext::DebugUtils::name()); + } + + // VK_EXT_swapchain_colorspace + // Provides wide color gamut + extensions.push(vk::ExtSwapchainColorspaceFn::name()); + + // VK_KHR_get_physical_device_properties2 + // Even though the extension was promoted to Vulkan 1.1, we still require the extension + // so that we don't have to conditionally use the functions provided by the 1.1 instance + extensions.push(vk::KhrGetPhysicalDeviceProperties2Fn::name()); + + // Only keep available extensions. + extensions.retain(|&ext| { + if instance_extensions.iter().any(|inst_ext| { + crate::auxil::cstr_from_bytes_until_nul(&inst_ext.extension_name) == Some(ext) + }) { + true + } else { + log::warn!("Unable to find extension: {}", ext.to_string_lossy()); + false + } + }); + Ok(extensions) + } + + /// # Safety + /// + /// - `raw_instance` must be created from `entry` + /// - `raw_instance` must be created respecting `instance_api_version`, `extensions` and `flags` + /// - `extensions` must be a superset of `desired_extensions()` and must be created from the + /// same entry, `instance_api_version`` and flags. + /// - `android_sdk_version` is ignored and can be `0` for all platforms besides Android + /// + /// If `debug_utils_user_data` is `Some`, then the validation layer is + /// available, so create a [`vk::DebugUtilsMessengerEXT`]. + #[allow(clippy::too_many_arguments)] + pub unsafe fn from_raw( + entry: ash::Entry, + raw_instance: ash::Instance, + instance_api_version: u32, + android_sdk_version: u32, + debug_utils_create_info: Option<super::DebugUtilsCreateInfo>, + extensions: Vec<&'static CStr>, + flags: wgt::InstanceFlags, + has_nv_optimus: bool, + drop_guard: Option<crate::DropGuard>, + ) -> Result<Self, crate::InstanceError> { + log::debug!("Instance version: 0x{:x}", instance_api_version); + + let debug_utils = if let Some(debug_utils_create_info) = debug_utils_create_info { + if extensions.contains(&ext::DebugUtils::name()) { + log::info!("Enabling debug utils"); + + let extension = ext::DebugUtils::new(&entry, &raw_instance); + let vk_info = debug_utils_create_info.to_vk_create_info(); + let messenger = + unsafe { extension.create_debug_utils_messenger(&vk_info, None) }.unwrap(); + + Some(super::DebugUtils { + extension, + messenger, + callback_data: debug_utils_create_info.callback_data, + }) + } else { + log::info!("Debug utils not enabled: extension not listed"); + None + } + } else { + log::info!( + "Debug utils not enabled: \ + debug_utils_user_data not passed to Instance::from_raw" + ); + None + }; + + let get_physical_device_properties = + if extensions.contains(&khr::GetPhysicalDeviceProperties2::name()) { + log::debug!("Enabling device properties2"); + Some(khr::GetPhysicalDeviceProperties2::new( + &entry, + &raw_instance, + )) + } else { + None + }; + + Ok(Self { + shared: Arc::new(super::InstanceShared { + raw: raw_instance, + extensions, + drop_guard, + flags, + debug_utils, + get_physical_device_properties, + entry, + has_nv_optimus, + instance_api_version, + android_sdk_version, + }), + }) + } + + #[allow(dead_code)] + fn create_surface_from_xlib( + &self, + dpy: *mut vk::Display, + window: vk::Window, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XlibSurface::name()) { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_KHR_xlib_surface", + ))); + } + + let surface = { + let xlib_loader = khr::XlibSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XlibSurfaceCreateInfoKHR::builder() + .flags(vk::XlibSurfaceCreateFlagsKHR::empty()) + .window(window) + .dpy(dpy); + + unsafe { xlib_loader.create_xlib_surface(&info, None) } + .expect("XlibSurface::create_xlib_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_xcb( + &self, + connection: *mut vk::xcb_connection_t, + window: vk::xcb_window_t, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XcbSurface::name()) { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_KHR_xcb_surface", + ))); + } + + let surface = { + let xcb_loader = khr::XcbSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XcbSurfaceCreateInfoKHR::builder() + .flags(vk::XcbSurfaceCreateFlagsKHR::empty()) + .window(window) + .connection(connection); + + unsafe { xcb_loader.create_xcb_surface(&info, None) } + .expect("XcbSurface::create_xcb_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_wayland( + &self, + display: *mut c_void, + surface: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::WaylandSurface::name()) + { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_KHR_wayland_surface", + ))); + } + + let surface = { + let w_loader = khr::WaylandSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::WaylandSurfaceCreateInfoKHR::builder() + .flags(vk::WaylandSurfaceCreateFlagsKHR::empty()) + .display(display) + .surface(surface); + + unsafe { w_loader.create_wayland_surface(&info, None) }.expect("WaylandSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_android( + &self, + window: *const c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::AndroidSurface::name()) + { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_KHR_android_surface", + ))); + } + + let surface = { + let a_loader = khr::AndroidSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::AndroidSurfaceCreateInfoKHR::builder() + .flags(vk::AndroidSurfaceCreateFlagsKHR::empty()) + .window(window as *mut _); + + unsafe { a_loader.create_android_surface(&info, None) }.expect("AndroidSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_hwnd( + &self, + hinstance: *mut c_void, + hwnd: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::Win32Surface::name()) { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_KHR_win32_surface", + ))); + } + + let surface = { + let info = vk::Win32SurfaceCreateInfoKHR::builder() + .flags(vk::Win32SurfaceCreateFlagsKHR::empty()) + .hinstance(hinstance) + .hwnd(hwnd); + let win32_loader = khr::Win32Surface::new(&self.shared.entry, &self.shared.raw); + unsafe { + win32_loader + .create_win32_surface(&info, None) + .expect("Unable to create Win32 surface") + } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[cfg(metal)] + fn create_surface_from_view( + &self, + view: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&ext::MetalSurface::name()) { + return Err(crate::InstanceError::new(String::from( + "Vulkan driver does not support VK_EXT_metal_surface", + ))); + } + + let layer = unsafe { + crate::metal::Surface::get_metal_layer(view as *mut objc::runtime::Object, None) + }; + + let surface = { + let metal_loader = ext::MetalSurface::new(&self.shared.entry, &self.shared.raw); + let vk_info = vk::MetalSurfaceCreateInfoEXT::builder() + .flags(vk::MetalSurfaceCreateFlagsEXT::empty()) + .layer(layer as *mut _) + .build(); + + unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + fn create_surface_from_vk_surface_khr(&self, surface: vk::SurfaceKHR) -> super::Surface { + let functor = khr::Surface::new(&self.shared.entry, &self.shared.raw); + super::Surface { + raw: surface, + functor, + instance: Arc::clone(&self.shared), + swapchain: RwLock::new(None), + } + } +} + +impl Drop for super::InstanceShared { + fn drop(&mut self) { + unsafe { + // Keep du alive since destroy_instance may also log + let _du = self.debug_utils.take().map(|du| { + du.extension + .destroy_debug_utils_messenger(du.messenger, None); + du + }); + if let Some(_drop_guard) = self.drop_guard.take() { + self.raw.destroy_instance(None); + } + } + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + profiling::scope!("Init Vulkan Backend"); + use crate::auxil::cstr_from_bytes_until_nul; + + let entry = unsafe { + profiling::scope!("Load vk library"); + ash::Entry::load() + } + .map_err(|err| { + crate::InstanceError::with_source(String::from("missing Vulkan entry points"), err) + })?; + let version = { + profiling::scope!("vkEnumerateInstanceVersion"); + entry.try_enumerate_instance_version() + }; + let instance_api_version = match version { + // Vulkan 1.1+ + Ok(Some(version)) => version, + Ok(None) => vk::API_VERSION_1_0, + Err(err) => { + return Err(crate::InstanceError::with_source( + String::from("try_enumerate_instance_version() failed"), + err, + )); + } + }; + + let app_name = CString::new(desc.name).unwrap(); + let app_info = vk::ApplicationInfo::builder() + .application_name(app_name.as_c_str()) + .application_version(1) + .engine_name(CStr::from_bytes_with_nul(b"wgpu-hal\0").unwrap()) + .engine_version(2) + .api_version( + // Vulkan 1.0 doesn't like anything but 1.0 passed in here... + if instance_api_version < vk::API_VERSION_1_1 { + vk::API_VERSION_1_0 + } else { + // This is the max Vulkan API version supported by `wgpu-hal`. + // + // If we want to increment this, there are some things that must be done first: + // - Audit the behavioral differences between the previous and new API versions. + // - Audit all extensions used by this backend: + // - If any were promoted in the new API version and the behavior has changed, we must handle the new behavior in addition to the old behavior. + // - If any were obsoleted in the new API version, we must implement a fallback for the new API version + // - If any are non-KHR-vendored, we must ensure the new behavior is still correct (since backwards-compatibility is not guaranteed). + vk::API_VERSION_1_3 + }, + ); + + let extensions = Self::desired_extensions(&entry, instance_api_version, desc.flags)?; + + let instance_layers = { + profiling::scope!("vkEnumerateInstanceLayerProperties"); + entry.enumerate_instance_layer_properties() + }; + let instance_layers = instance_layers.map_err(|e| { + log::debug!("enumerate_instance_layer_properties: {:?}", e); + crate::InstanceError::with_source( + String::from("enumerate_instance_layer_properties() failed"), + e, + ) + })?; + + fn find_layer<'layers>( + instance_layers: &'layers [vk::LayerProperties], + name: &CStr, + ) -> Option<&'layers vk::LayerProperties> { + instance_layers + .iter() + .find(|inst_layer| cstr_from_bytes_until_nul(&inst_layer.layer_name) == Some(name)) + } + + let nv_optimus_layer = CStr::from_bytes_with_nul(b"VK_LAYER_NV_optimus\0").unwrap(); + let has_nv_optimus = find_layer(&instance_layers, nv_optimus_layer).is_some(); + + let obs_layer = CStr::from_bytes_with_nul(b"VK_LAYER_OBS_HOOK\0").unwrap(); + let has_obs_layer = find_layer(&instance_layers, obs_layer).is_some(); + + let mut layers: Vec<&'static CStr> = Vec::new(); + + // Request validation layer if asked. + let mut debug_utils = None; + if desc.flags.intersects(wgt::InstanceFlags::VALIDATION) { + let validation_layer_name = + CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap(); + if let Some(layer_properties) = find_layer(&instance_layers, validation_layer_name) { + layers.push(validation_layer_name); + + if extensions.contains(&ext::DebugUtils::name()) { + // Put the callback data on the heap, to ensure it will never be + // moved. + let callback_data = Box::new(super::DebugUtilsMessengerUserData { + validation_layer_description: cstr_from_bytes_until_nul( + &layer_properties.description, + ) + .unwrap() + .to_owned(), + validation_layer_spec_version: layer_properties.spec_version, + has_obs_layer, + }); + + // having ERROR unconditionally because Vk doesn't like empty flags + let mut severity = vk::DebugUtilsMessageSeverityFlagsEXT::ERROR; + if log::max_level() >= log::LevelFilter::Debug { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE; + } + if log::max_level() >= log::LevelFilter::Info { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::INFO; + } + if log::max_level() >= log::LevelFilter::Warn { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::WARNING; + } + + let message_type = vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE; + + let create_info = super::DebugUtilsCreateInfo { + severity, + message_type, + callback_data, + }; + + let vk_create_info = create_info.to_vk_create_info().build(); + + debug_utils = Some((create_info, vk_create_info)); + } + } else { + log::warn!( + "InstanceFlags::VALIDATION requested, but unable to find layer: {}", + validation_layer_name.to_string_lossy() + ); + } + } + + #[cfg(target_os = "android")] + let android_sdk_version = { + let properties = android_system_properties::AndroidSystemProperties::new(); + // See: https://developer.android.com/reference/android/os/Build.VERSION_CODES + if let Some(val) = properties.get("ro.build.version.sdk") { + match val.parse::<u32>() { + Ok(sdk_ver) => sdk_ver, + Err(err) => { + log::error!( + "Couldn't parse Android's ro.build.version.sdk system property ({val}): {err}" + ); + 0 + } + } + } else { + log::error!("Couldn't read Android's ro.build.version.sdk system property"); + 0 + } + }; + #[cfg(not(target_os = "android"))] + let android_sdk_version = 0; + + let mut flags = vk::InstanceCreateFlags::empty(); + + // Avoid VUID-VkInstanceCreateInfo-flags-06559: Only ask the instance to + // enumerate incomplete Vulkan implementations (which we need on Mac) if + // we managed to find the extension that provides the flag. + if extensions.contains(&ash::vk::KhrPortabilityEnumerationFn::name()) { + flags |= vk::InstanceCreateFlags::ENUMERATE_PORTABILITY_KHR; + } + let vk_instance = { + let str_pointers = layers + .iter() + .chain(extensions.iter()) + .map(|&s: &&'static _| { + // Safe because `layers` and `extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let mut create_info = vk::InstanceCreateInfo::builder() + .flags(flags) + .application_info(&app_info) + .enabled_layer_names(&str_pointers[..layers.len()]) + .enabled_extension_names(&str_pointers[layers.len()..]); + + if let Some(&mut (_, ref mut vk_create_info)) = debug_utils.as_mut() { + create_info = create_info.push_next(vk_create_info); + } + + unsafe { + profiling::scope!("vkCreateInstance"); + entry.create_instance(&create_info, None) + } + .map_err(|e| { + crate::InstanceError::with_source( + String::from("Entry::create_instance() failed"), + e, + ) + })? + }; + + unsafe { + Self::from_raw( + entry, + vk_instance, + instance_api_version, + android_sdk_version, + debug_utils.map(|(i, _)| i), + extensions, + desc.flags, + has_nv_optimus, + Some(Box::new(())), // `Some` signals that wgpu-hal is in charge of destroying vk_instance + ) + } + } + + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + use raw_window_handle::{RawDisplayHandle as Rdh, RawWindowHandle as Rwh}; + + match (window_handle, display_handle) { + (Rwh::Wayland(handle), Rdh::Wayland(display)) => { + self.create_surface_from_wayland(display.display.as_ptr(), handle.surface.as_ptr()) + } + (Rwh::Xlib(handle), Rdh::Xlib(display)) => { + let display = display.display.expect("Display pointer is not set."); + self.create_surface_from_xlib(display.as_ptr() as *mut *const c_void, handle.window) + } + (Rwh::Xcb(handle), Rdh::Xcb(display)) => { + let connection = display.connection.expect("Pointer to X-Server is not set."); + self.create_surface_from_xcb(connection.as_ptr(), handle.window.get()) + } + (Rwh::AndroidNdk(handle), _) => { + self.create_surface_android(handle.a_native_window.as_ptr()) + } + #[cfg(windows)] + (Rwh::Win32(handle), _) => { + use winapi::um::libloaderapi::GetModuleHandleW; + + let hinstance = unsafe { GetModuleHandleW(std::ptr::null()) }; + self.create_surface_from_hwnd(hinstance as *mut _, handle.hwnd.get() as *mut _) + } + #[cfg(all(target_os = "macos", feature = "metal"))] + (Rwh::AppKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ns_view.as_ptr()) + } + #[cfg(all(target_os = "ios", feature = "metal"))] + (Rwh::UiKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ui_view.as_ptr()) + } + (_, _) => Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a Vulkan-compatible handle" + ))), + } + } + + unsafe fn destroy_surface(&self, surface: super::Surface) { + unsafe { surface.functor.destroy_surface(surface.raw, None) }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + use crate::auxil::db; + + let raw_devices = match unsafe { self.shared.raw.enumerate_physical_devices() } { + Ok(devices) => devices, + Err(err) => { + log::error!("enumerate_adapters: {}", err); + Vec::new() + } + }; + + let mut exposed_adapters = raw_devices + .into_iter() + .flat_map(|device| self.expose_adapter(device)) + .collect::<Vec<_>>(); + + // Detect if it's an Intel + NVidia configuration with Optimus + let has_nvidia_dgpu = exposed_adapters.iter().any(|exposed| { + exposed.info.device_type == wgt::DeviceType::DiscreteGpu + && exposed.info.vendor == db::nvidia::VENDOR + }); + if cfg!(target_os = "linux") && has_nvidia_dgpu && self.shared.has_nv_optimus { + for exposed in exposed_adapters.iter_mut() { + if exposed.info.device_type == wgt::DeviceType::IntegratedGpu + && exposed.info.vendor == db::intel::VENDOR + { + // Check if mesa driver and version less than 21.2 + if let Some(version) = exposed.info.driver_info.split_once("Mesa ").map(|s| { + let mut components = s.1.split('.'); + let major = components.next().and_then(|s| u8::from_str(s).ok()); + let minor = components.next().and_then(|s| u8::from_str(s).ok()); + if let (Some(major), Some(minor)) = (major, minor) { + (major, minor) + } else { + (0, 0) + } + }) { + if version < (21, 2) { + // See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4688 + log::warn!( + "Disabling presentation on '{}' (id {:?}) due to NV Optimus and Intel Mesa < v21.2", + exposed.info.name, + exposed.adapter.raw + ); + exposed.adapter.private_caps.can_present = false; + } + } + } + } + } + + exposed_adapters + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use. + let mut swap_chain = self.swapchain.write(); + let old = swap_chain + .take() + .map(|sc| unsafe { sc.release_resources(&device.shared.raw) }); + + let swapchain = unsafe { device.create_swapchain(self, config, old)? }; + *swap_chain = Some(swapchain); + + Ok(()) + } + + unsafe fn unconfigure(&self, device: &super::Device) { + if let Some(sc) = self.swapchain.write().take() { + // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use. + let swapchain = unsafe { sc.release_resources(&device.shared.raw) }; + unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) }; + } + } + + unsafe fn acquire_texture( + &self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let mut swapchain = self.swapchain.write(); + let sc = swapchain.as_mut().unwrap(); + + let mut timeout_ns = match timeout { + Some(duration) => duration.as_nanos() as u64, + None => u64::MAX, + }; + + // AcquireNextImageKHR on Android (prior to Android 11) doesn't support timeouts + // and will also log verbose warnings if tying to use a timeout. + // + // Android 10 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-10.0.0_r13/vulkan/libvulkan/swapchain.cpp#1426 + // Android 11 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-11.0.0_r45/vulkan/libvulkan/swapchain.cpp#1438 + // + // Android 11 corresponds to an SDK_INT/ro.build.version.sdk of 30 + if cfg!(target_os = "android") && self.instance.android_sdk_version < 30 { + timeout_ns = u64::MAX; + } + + let wait_semaphore = sc.surface_semaphores[sc.next_surface_index]; + + // will block if no image is available + let (index, suboptimal) = match unsafe { + sc.functor + .acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null()) + } { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // See the comment in `Queue::present`. + #[cfg(target_os = "android")] + Ok((index, _)) => (index, false), + #[cfg(not(target_os = "android"))] + Ok(pair) => pair, + Err(error) => { + return match error { + vk::Result::TIMEOUT => Ok(None), + vk::Result::NOT_READY | vk::Result::ERROR_OUT_OF_DATE_KHR => { + Err(crate::SurfaceError::Outdated) + } + vk::Result::ERROR_SURFACE_LOST_KHR => Err(crate::SurfaceError::Lost), + other => Err(crate::DeviceError::from(other).into()), + } + } + }; + + sc.next_surface_index += 1; + sc.next_surface_index %= sc.surface_semaphores.len(); + + // special case for Intel Vulkan returning bizarre values (ugh) + if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { + return Err(crate::SurfaceError::Outdated); + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 + let raw_flags = if sc + .raw_flags + .contains(vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT) + { + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE + } else { + vk::ImageCreateFlags::empty() + }; + + let texture = super::SurfaceTexture { + index, + texture: super::Texture { + raw: sc.images[index as usize], + drop_guard: None, + block: None, + usage: sc.config.usage, + format: sc.config.format, + raw_flags, + copy_size: crate::CopyExtent { + width: sc.config.extent.width, + height: sc.config.extent.height, + depth: 1, + }, + view_formats: sc.view_formats.clone(), + }, + wait_semaphore, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal, + })) + } + + unsafe fn discard_texture(&self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs new file mode 100644 index 0000000000..787ebd7267 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -0,0 +1,738 @@ +/*! +# Vulkan API internals. + +## Stack memory + +Ash expects slices, which we don't generally have available. +We cope with this requirement by the combination of the following ways: + - temporarily allocating `Vec` on heap, where overhead is permitted + - growing temporary local storage + - using `implace_it` on iterators + +## Framebuffers and Render passes + +Render passes are cached on the device and kept forever. + +Framebuffers are also cached on the device, but they are removed when +any of the image views (they have) gets removed. +If Vulkan supports image-less framebuffers, +then the actual views are excluded from the framebuffer key. + +## Fences + +If timeline semaphores are available, they are used 1:1 with wgpu-hal fences. +Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod instance; + +use std::{ + borrow::Borrow, + ffi::CStr, + fmt, + num::NonZeroU32, + sync::{ + atomic::{AtomicIsize, Ordering}, + Arc, + }, +}; + +use arrayvec::ArrayVec; +use ash::{ + extensions::{ext, khr}, + vk, +}; +use parking_lot::{Mutex, RwLock}; + +const MILLIS_TO_NANOS: u64 = 1_000_000; +const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; + +#[derive(Clone, Debug)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + type AccelerationStructure = AccelerationStructure; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +struct DebugUtils { + extension: ext::DebugUtils, + messenger: vk::DebugUtilsMessengerEXT, + + /// Owning pointer to the debug messenger callback user data. + /// + /// `InstanceShared::drop` destroys the debug messenger before + /// dropping this, so the callback should never receive a dangling + /// user data pointer. + #[allow(dead_code)] + callback_data: Box<DebugUtilsMessengerUserData>, +} + +pub struct DebugUtilsCreateInfo { + severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_type: vk::DebugUtilsMessageTypeFlagsEXT, + callback_data: Box<DebugUtilsMessengerUserData>, +} + +/// User data needed by `instance::debug_utils_messenger_callback`. +/// +/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData` +/// pointer refers to one of these values. +#[derive(Debug)] +pub struct DebugUtilsMessengerUserData { + /// Validation layer description, from `vk::LayerProperties`. + validation_layer_description: std::ffi::CString, + + /// Validation layer specification version, from `vk::LayerProperties`. + validation_layer_spec_version: u32, + + /// If the OBS layer is present. OBS never increments the version of their layer, + /// so there's no reason to have the version. + has_obs_layer: bool, +} + +pub struct InstanceShared { + raw: ash::Instance, + extensions: Vec<&'static CStr>, + drop_guard: Option<crate::DropGuard>, + flags: wgt::InstanceFlags, + debug_utils: Option<DebugUtils>, + get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>, + entry: ash::Entry, + has_nv_optimus: bool, + android_sdk_version: u32, + /// The instance API version. + /// + /// Which is the version of Vulkan supported for instance-level functionality. + /// + /// It is associated with a `VkInstance` and its children, + /// except for a `VkPhysicalDevice` and its children. + instance_api_version: u32, +} + +pub struct Instance { + shared: Arc<InstanceShared>, +} + +struct Swapchain { + raw: vk::SwapchainKHR, + raw_flags: vk::SwapchainCreateFlagsKHR, + functor: khr::Swapchain, + device: Arc<DeviceShared>, + images: Vec<vk::Image>, + config: crate::SurfaceConfiguration, + view_formats: Vec<wgt::TextureFormat>, + /// One wait semaphore per swapchain image. This will be associated with the + /// surface texture, and later collected during submission. + surface_semaphores: Vec<vk::Semaphore>, + /// Current semaphore index to use when acquiring a surface. + next_surface_index: usize, +} + +pub struct Surface { + raw: vk::SurfaceKHR, + functor: khr::Surface, + instance: Arc<InstanceShared>, + swapchain: RwLock<Option<Swapchain>>, +} + +#[derive(Debug)] +pub struct SurfaceTexture { + index: u32, + texture: Texture, + wait_semaphore: vk::Semaphore, +} + +impl Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +pub struct Adapter { + raw: vk::PhysicalDevice, + instance: Arc<InstanceShared>, + //queue_families: Vec<vk::QueueFamilyProperties>, + known_memory_flags: vk::MemoryPropertyFlags, + phd_capabilities: adapter::PhysicalDeviceCapabilities, + //phd_features: adapter::PhysicalDeviceFeatures, + downlevel_flags: wgt::DownlevelFlags, + private_caps: PrivateCapabilities, + workarounds: Workarounds, +} + +// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`. +enum ExtensionFn<T> { + /// The loaded function pointer struct for an extension. + Extension(T), + /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used. + Promoted, +} + +struct DeviceExtensionFunctions { + draw_indirect_count: Option<khr::DrawIndirectCount>, + timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>, + ray_tracing: Option<RayTracingDeviceExtensionFunctions>, +} + +struct RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure, + buffer_device_address: khr::BufferDeviceAddress, +} + +/// Set of internal capabilities, which don't show up in the exposed +/// device geometry, but affect the code paths taken internally. +#[derive(Clone, Debug)] +struct PrivateCapabilities { + /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift. + /// + /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`). + flip_y_requires_shift: bool, + imageless_framebuffers: bool, + image_view_usage: bool, + timeline_semaphores: bool, + texture_d24: bool, + texture_d24_s8: bool, + texture_s8: bool, + /// Ability to present contents to any screen. Only needed to work around broken platform configurations. + can_present: bool, + non_coherent_map_mask: wgt::BufferAddress, + robust_buffer_access: bool, + robust_image_access: bool, + robust_buffer_access2: bool, + robust_image_access2: bool, + zero_initialize_workgroup_memory: bool, + image_format_list: bool, +} + +bitflags::bitflags!( + /// Workaround flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct Workarounds: u32 { + /// Only generate SPIR-V for one entry point at a time. + const SEPARATE_ENTRY_POINTS = 0x1; + /// Qualcomm OOMs when there are zero color attachments but a non-null pointer + /// to a subpass resolve attachment array. This nulls out that pointer in that case. + const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2; + /// If the following code returns false, then nvidia will end up filling the wrong range. + /// + /// ```skip + /// fn nvidia_succeeds() -> bool { + /// # let (copy_length, start_offset) = (0, 0); + /// if copy_length >= 4096 { + /// if start_offset % 16 != 0 { + /// if copy_length == 4096 { + /// return true; + /// } + /// if copy_length % 16 == 0 { + /// return false; + /// } + /// } + /// } + /// true + /// } + /// ``` + /// + /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes + /// if they cover a range of 4096 bytes or more. + const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4; + } +); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct AttachmentKey { + format: vk::Format, + layout: vk::ImageLayout, + ops: crate::AttachmentOps, +} + +impl AttachmentKey { + /// Returns an attachment key for a compatible attachment. + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { + Self { + format, + layout, + ops: crate::AttachmentOps::all(), + } + } +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct ColorAttachmentKey { + base: AttachmentKey, + resolve: Option<AttachmentKey>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct DepthStencilAttachmentKey { + base: AttachmentKey, + stencil_ops: crate::AttachmentOps, +} + +#[derive(Clone, Eq, Default, Hash, PartialEq)] +struct RenderPassKey { + colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>, + depth_stencil: Option<DepthStencilAttachmentKey>, + sample_count: u32, + multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct FramebufferAttachment { + /// Can be NULL if the framebuffer is image-less + raw: vk::ImageView, + raw_image_flags: vk::ImageCreateFlags, + view_usage: crate::TextureUses, + view_format: wgt::TextureFormat, + raw_view_formats: Vec<vk::Format>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct FramebufferKey { + attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>, + extent: wgt::Extent3d, + sample_count: u32, +} + +struct DeviceShared { + raw: ash::Device, + family_index: u32, + queue_index: u32, + raw_queue: ash::vk::Queue, + handle_is_owned: bool, + instance: Arc<InstanceShared>, + physical_device: ash::vk::PhysicalDevice, + enabled_extensions: Vec<&'static CStr>, + extension_fns: DeviceExtensionFunctions, + vendor_id: u32, + timestamp_period: f32, + private_caps: PrivateCapabilities, + workarounds: Workarounds, + render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>, + framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>, +} + +pub struct Device { + shared: Arc<DeviceShared>, + mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>, + desc_allocator: + Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>, + valid_ash_memory_types: u32, + naga_options: naga::back::spv::Options<'static>, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + raw: vk::Queue, + swapchain_fn: khr::Swapchain, + device: Arc<DeviceShared>, + family_index: u32, + /// We use a redundant chain of semaphores to pass on the signal + /// from submissions to the last present, since it's required by the + /// specification. + /// It would be correct to use a single semaphore there, but + /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508). + relay_semaphores: [vk::Semaphore; 2], + relay_index: AtomicIsize, +} + +#[derive(Debug)] +pub struct Buffer { + raw: vk::Buffer, + block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>, +} + +#[derive(Debug)] +pub struct AccelerationStructure { + raw: vk::AccelerationStructureKHR, + buffer: vk::Buffer, + block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, +} + +#[derive(Debug)] +pub struct Texture { + raw: vk::Image, + drop_guard: Option<crate::DropGuard>, + block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, + usage: crate::TextureUses, + format: wgt::TextureFormat, + raw_flags: vk::ImageCreateFlags, + copy_size: crate::CopyExtent, + view_formats: Vec<wgt::TextureFormat>, +} + +impl Texture { + /// # Safety + /// + /// - The image handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::Image { + self.raw + } +} + +#[derive(Debug)] +pub struct TextureView { + raw: vk::ImageView, + layers: NonZeroU32, + attachment: FramebufferAttachment, +} + +#[derive(Debug)] +pub struct Sampler { + raw: vk::Sampler, +} + +#[derive(Debug)] +pub struct BindGroupLayout { + raw: vk::DescriptorSetLayout, + desc_count: gpu_descriptor::DescriptorTotalCount, + types: Box<[(vk::DescriptorType, u32)]>, + /// Map of binding index to size, + binding_arrays: Vec<(u32, NonZeroU32)>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + raw: vk::PipelineLayout, + binding_arrays: naga::back::spv::BindingMap, +} + +#[derive(Debug)] +pub struct BindGroup { + set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, +} + +#[derive(Default)] +struct Temp { + marker: Vec<u8>, + buffer_barriers: Vec<vk::BufferMemoryBarrier>, + image_barriers: Vec<vk::ImageMemoryBarrier>, +} + +unsafe impl Send for Temp {} +unsafe impl Sync for Temp {} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.buffer_barriers.clear(); + self.image_barriers.clear(); + //see also - https://github.com/NotIntMan/inplace_it/issues/8 + } + + fn make_c_str(&mut self, name: &str) -> &CStr { + self.marker.clear(); + self.marker.extend_from_slice(name.as_bytes()); + self.marker.push(0); + unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) } + } +} + +pub struct CommandEncoder { + raw: vk::CommandPool, + device: Arc<DeviceShared>, + active: vk::CommandBuffer, + bind_point: vk::PipelineBindPoint, + temp: Temp, + free: Vec<vk::CommandBuffer>, + discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, + /// and needs to be disabled on renderpass close. + rpass_debug_marker_active: bool, + + /// If set, the end of the next render/compute pass will write a timestamp at + /// the given pool & location. + end_of_pass_timer_query: Option<(vk::QueryPool, u32)>, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw", &self.raw) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: vk::CommandBuffer, +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum ShaderModule { + Raw(vk::ShaderModule), + Intermediate { + naga_shader: crate::NagaShader, + runtime_checks: bool, + }, +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct QuerySet { + raw: vk::QueryPool, +} + +#[derive(Debug)] +pub enum Fence { + TimelineSemaphore(vk::Semaphore), + FencePool { + last_completed: crate::FenceValue, + /// The pending fence values have to be ascending. + active: Vec<(crate::FenceValue, vk::Fence)>, + free: Vec<vk::Fence>, + }, +} + +impl Fence { + fn check_active( + device: &ash::Device, + mut max_value: crate::FenceValue, + active: &[(crate::FenceValue, vk::Fence)], + ) -> Result<crate::FenceValue, crate::DeviceError> { + for &(value, raw) in active.iter() { + unsafe { + if value > max_value && device.get_fence_status(raw)? { + max_value = value; + } + } + } + Ok(max_value) + } + + fn get_latest( + &self, + device: &ash::Device, + extension: Option<&ExtensionFn<khr::TimelineSemaphore>>, + ) -> Result<crate::FenceValue, crate::DeviceError> { + match *self { + Self::TimelineSemaphore(raw) => unsafe { + Ok(match *extension.unwrap() { + ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?, + ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?, + }) + }, + Self::FencePool { + last_completed, + ref active, + free: _, + } => Self::check_active(device, last_completed, active), + } + } + + fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { + match *self { + Self::TimelineSemaphore(_) => {} + Self::FencePool { + ref mut last_completed, + ref mut active, + ref mut free, + } => { + let latest = Self::check_active(device, *last_completed, active)?; + let base_free = free.len(); + for &(value, raw) in active.iter() { + if value <= latest { + free.push(raw); + } + } + if free.len() != base_free { + active.retain(|&(value, _)| value > latest); + unsafe { + device.reset_fences(&free[base_free..])?; + } + } + *last_completed = latest; + } + } + Ok(()) + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &self, + command_buffers: &[&CommandBuffer], + surface_textures: &[&SurfaceTexture], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let mut fence_raw = vk::Fence::null(); + + let mut wait_stage_masks = Vec::new(); + let mut wait_semaphores = Vec::new(); + let mut signal_semaphores = ArrayVec::<_, 2>::new(); + let mut signal_values = ArrayVec::<_, 2>::new(); + + for &surface_texture in surface_textures { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(surface_texture.wait_semaphore); + } + + let old_index = self.relay_index.load(Ordering::Relaxed); + + let sem_index = if old_index >= 0 { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(self.relay_semaphores[old_index as usize]); + (old_index as usize + 1) % self.relay_semaphores.len() + } else { + 0 + }; + + signal_semaphores.push(self.relay_semaphores[sem_index]); + + self.relay_index + .store(sem_index as isize, Ordering::Relaxed); + + if let Some((fence, value)) = signal_fence { + fence.maintain(&self.device.raw)?; + match *fence { + Fence::TimelineSemaphore(raw) => { + signal_semaphores.push(raw); + signal_values.push(!0); + signal_values.push(value); + } + Fence::FencePool { + ref mut active, + ref mut free, + .. + } => { + fence_raw = match free.pop() { + Some(raw) => raw, + None => unsafe { + self.device + .raw + .create_fence(&vk::FenceCreateInfo::builder(), None)? + }, + }; + active.push((value, fence_raw)); + } + } + } + + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::<Vec<_>>(); + + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + vk_info = vk_info + .wait_semaphores(&wait_semaphores) + .wait_dst_stage_mask(&wait_stage_masks) + .signal_semaphores(&signal_semaphores); + + let mut vk_timeline_info; + + if !signal_values.is_empty() { + vk_timeline_info = + vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } + + profiling::scope!("vkQueueSubmit"); + unsafe { + self.device + .raw + .queue_submit(self.raw, &[vk_info.build()], fence_raw)? + }; + Ok(()) + } + + unsafe fn present( + &self, + surface: &Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let mut swapchain = surface.swapchain.write(); + let ssc = swapchain.as_mut().unwrap(); + + let swapchains = [ssc.raw]; + let image_indices = [texture.index]; + let mut vk_info = vk::PresentInfoKHR::builder() + .swapchains(&swapchains) + .image_indices(&image_indices); + + let old_index = self.relay_index.swap(-1, Ordering::Relaxed); + if old_index >= 0 { + vk_info = vk_info.wait_semaphores( + &self.relay_semaphores[old_index as usize..old_index as usize + 1], + ); + } + + let suboptimal = { + profiling::scope!("vkQueuePresentKHR"); + unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| { + match error { + vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated, + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + _ => crate::DeviceError::from(error).into(), + } + })? + }; + if suboptimal { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation + // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation). + // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`. + #[cfg(not(target_os = "android"))] + log::warn!("Suboptimal present of frame {}", texture.index); + } + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.device.timestamp_period + } +} + +impl From<vk::Result> for crate::DeviceError { + fn from(result: vk::Result) -> Self { + match result { + vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => { + Self::OutOfMemory + } + vk::Result::ERROR_DEVICE_LOST => Self::Lost, + _ => { + log::warn!("Unrecognized device error {:?}", result); + Self::Lost + } + } + } +} |