diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/wgpu-hal | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-hal')
62 files changed, 31026 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-hal/.cargo-checksum.json b/third_party/rust/wgpu-hal/.cargo-checksum.json new file mode 100644 index 0000000000..74450cad95 --- /dev/null +++ b/third_party/rust/wgpu-hal/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"fbdbc7be59db718e8925f31802a7729abd732bbe01fbcb36409ac2e82f356976","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","README.md":"78377f5876fafd77963eff7e3c2ba3a7e3ad5cf9201b09ed5612e49c2288eb18","examples/halmark/main.rs":"2f80ca7029cd86c96cf1251d486c508f8c14e9de76f541e2325e5504ad11c656","examples/halmark/shader.wgsl":"26c256ec36d6f0e9a1647431ca772766bee4382d64eaa718ba7b488dcfb6bcca","examples/raw-gles.em.html":"70fbe68394a1a4522192de1dcfaf7d399f60d7bdf5de70b708f9bb0417427546","examples/raw-gles.rs":"3e2701628e6b525a19313ddeb15a50af87311afcfa1869cddb9790546ae4c0b9","src/auxil/dxgi/conv.rs":"c1e8d584e408712097d075455375e04d1895a0ff9a494305c16d658277ce7b90","src/auxil/dxgi/exception.rs":"f885049c33535134f58a0911b18b87c31d522ec27b4fefd4bdd047ad2c3422d0","src/auxil/dxgi/factory.rs":"e3d3984ede07235096e4d963ba0425ce950070d7e7226679aa1d4ac65d4ff336","src/auxil/dxgi/mod.rs":"a202564d9ac97530b16a234b87d180cd345aae705e082a9b1177dcde813645f9","src/auxil/dxgi/result.rs":"20c8eb03d738062dff198feca6327addb9882ed0462be842c789eadf7dca0573","src/auxil/dxgi/time.rs":"b6f966b250e9424d5d7e4065f2108cba87197c1e30baae6d87083055d1bc5a4b","src/auxil/mod.rs":"c38f0d3b10804d1c1d7e3b8e4a975fcb87271f8b1904f2f4a3153cceddafb56b","src/auxil/renderdoc.rs":"c2f849f70f576b0c9b0d32dd155b6a6353f74dff59cbeeaa994a12789d047c0f","src/dx11/adapter.rs":"621c7d06ebac419f17f88c64b1a7cd2930499f2826f62a209cd93c42c47c316c","src/dx11/command.rs":"cdad8dcdb800acba56c931f1726ddada652af18db0f066465af643f82a034492","src/dx11/device.rs":"96ccd8d6645839f3daf832ddf569676643ac92d1b332ab9a0c8563b3b5026295","src/dx11/instance.rs":"8a552abf39a455208c1e406d4161e67c60c83c07bd3821256e05268d796287c8","src/dx11/library.rs":"c53e7ccd92590836e2e689a6e6a792c94d392fd41fc31771a27389115a21fe36","src/dx11/mod.rs":"9684e99b5302d4219014bacd8271cc9e068d19a2f42fb341a55d3b777a84d57c","src/dx12/adapter.rs":"df5a07cee905bc15fe588085bb2df753b1100f9476fa5ba1ea9da43190f59e1c","src/dx12/command.rs":"798916d9c0c517a8634697a3f59926184d39354aa850147328bae2e2ffdfb510","src/dx12/conv.rs":"4746ab2e46d8fbf4eac66c4dde11ca932f7ca9ba8177b5316eb12710a89700c2","src/dx12/descriptor.rs":"8765855f9bfd54d6884f1a0da0a3c859deff821559d04aec8bd307fe0756f715","src/dx12/device.rs":"8cd2f86d61052d234c9d833a6ef814b4c09310edacebc7dfdbcae189026b31b2","src/dx12/instance.rs":"bc84c923fad598a3a3a2f5ba291232d24ecab2aff8f43cafbcd0fe74922fb874","src/dx12/mod.rs":"a4a48b924b1d97a8f362dcc9cfdd589a4145d56cda040802d25d36d28e99762a","src/dx12/shader_compilation.rs":"18541616bf390d66695a1d66d6bf6df697f81cb98384ace56924fda567508bb8","src/dx12/suballocation.rs":"1893a77dac08d0cadd16677b4c80b27220ad6057d81069c2524de6cee84520ac","src/dx12/types.rs":"29b11268d11c1b7437de3dac403af574ec8aa84fd9954a95a16d80f5808be44d","src/dx12/view.rs":"c09241520377e6a47ad8822c69075e6e16e6c558e18bb64398c8d7424fc57dcf","src/empty.rs":"78c5cc9a37e7401a2618dcabbe38f012de45e5a0b4793cfc70ef7a6874087b15","src/gles/adapter.rs":"1f74dcf1a8823c429d9b0d53037d281de3fc1e70b6d27e403426b7ad7ddbf728","src/gles/command.rs":"ca3e45324cc971801334d16a7613c11b375c3ea4622b95d65c855767c62dab90","src/gles/conv.rs":"84164c6b93afdbd07b037cdbee4c94dd40df0a21d42703481d0c293e92a13efd","src/gles/device.rs":"c9e6b108115dc73841b79b0d65ebfbf9f9629e19eb89a67aa86b8d471a31fc84","src/gles/egl.rs":"ef6c2fe12e655551adfd30eb0ac3ff69da203af87142219926ceca064fe51ac2","src/gles/emscripten.rs":"19bb73a9d140645f3f32cd48b002151711a9b8456e213eab5f3a2be79239e147","src/gles/mod.rs":"f641feae35a52ca66aa9364fe57d7232d2b540be5e72bbb3bf5b268156bfc4cf","src/gles/queue.rs":"d8bdfbe04987ce99cb1b1cb7393a368d812dd78d1753334417237c97229a1b4b","src/gles/shaders/clear.frag":"aac702eed9ece5482db5ba6783a678b119a5e7802b1ecf93f4975dee8acab0b3","src/gles/shaders/clear.vert":"8f636168e1da2cac48091c466a543c3b09fb4a0dd8c60c1c9bf34cc890766740","src/gles/shaders/srgb_present.frag":"dd9a43c339a2fa4ccf7f6a1854c6f400cabf271a7d5e9230768e9f39d47f3ff5","src/gles/shaders/srgb_present.vert":"6e85d489403d80b81cc94790730bb53b309dfc5eeede8f1ea3412a660f31d357","src/gles/web.rs":"3de8dab74d06e0633618ac486c95b6c1d446da107f514be30742899c7686c9ea","src/lib.rs":"b84c4ea9926522587dd0de6b30aac9fd6693840755fa959a184de825fb59d257","src/metal/adapter.rs":"d99636bf9ca166ea7d224161870b9732363583e958fa8c747043e3894968560b","src/metal/command.rs":"3c9492241d88d8fe70966bbda0bf1b17e8c8429ae8cc0dfa374650ad4bf3df76","src/metal/conv.rs":"e4aeafcddc75b2f9b54245faedaf0566f1e63681807ae786ceb46ac35f0e13bf","src/metal/device.rs":"88f6c1a2a9e7b9a53919391303952d9814d0906bc753258d45b23993791f8ab2","src/metal/mod.rs":"b3bbda971a93244f817fc65a9bfa9f584f3f4acc1f5d75aac061ee93cbbd3259","src/metal/surface.rs":"1659973bcbf7fae9761848db0fec5f393841a4407188df0f82260ebcd180f047","src/metal/time.rs":"c32d69f30e846dfcc0e39e01097fb80df63b2bebb6586143bb62494999850246","src/vulkan/adapter.rs":"cbdf570f4379d7d4fdac58811575c54ee05cc062ecc3f53b7f3ce7c16f6ff6c0","src/vulkan/command.rs":"ed82a645664a8e691a0bac18a039f2ba33ebb31a064ba1b8a255586581ab5558","src/vulkan/conv.rs":"313bd4b00b4c5d04823be1035964d55b9d8491ece1b9ce1fd15c65e46aa56bd3","src/vulkan/device.rs":"c65d03b542afee665f0f957d32f78b4c7acfeb95718bbcb06768b4f78a4b1c5f","src/vulkan/instance.rs":"02c51b4366dd7bc9876b1c132007a2099a06fa9af0cef6a97d77b12abd6c6f49","src/vulkan/mod.rs":"5161384ad5a19bb0451d6ac51b400dddbd295c38cecb4120611bb7cc78c2d30e"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/Cargo.toml b/third_party/rust/wgpu-hal/Cargo.toml new file mode 100644 index 0000000000..61471209c1 --- /dev/null +++ b/third_party/rust/wgpu-hal/Cargo.toml @@ -0,0 +1,253 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.60" +name = "wgpu-hal" +version = "0.16.0" +authors = ["wgpu developers"] +description = "WebGPU hardware abstraction layer" +homepage = "https://wgpu.rs/" +readme = "README.md" +keywords = ["graphics"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/gfx-rs/wgpu" + +[package.metadata.docs.rs] +features = [ + "vulkan", + "gles", + "renderdoc", +] +rustdoc-args = [ + "--cfg", + "docsrs", +] +targets = [ + "x86_64-unknown-linux-gnu", + "x86_64-apple-darwin", + "x86_64-pc-windows-msvc", + "wasm32-unknown-unknown", +] + +[lib] + +[[example]] +name = "halmark" + +[[example]] +name = "raw-gles" +required-features = ["gles"] + +[dependencies] +arrayvec = "0.7" +bitflags = "2" +log = "0.4" +parking_lot = ">=0.11,<0.13" +raw-window-handle = "0.5" +rustc-hash = "1.1" +thiserror = "1" + +[dependencies.glow] +version = "0.12.1" +optional = true + +[dependencies.naga] +version = "0.12.0" +git = "https://github.com/gfx-rs/naga" +rev = "b99d58ea435090e561377949f428bce2c18451bb" +features = ["clone"] + +[dependencies.profiling] +version = "1" +default-features = false + +[dependencies.wgt] +version = "0.16" +path = "../wgpu-types" +package = "wgpu-types" + +[dev-dependencies] +env_logger = "0.10" +winit = "0.27.1" + +[dev-dependencies.naga] +version = "0.12.0" +git = "https://github.com/gfx-rs/naga" +rev = "b99d58ea435090e561377949f428bce2c18451bb" +features = ["wgsl-in"] + +[features] +default = [] +dx11 = [ + "naga/hlsl-out", + "d3d12", + "libloading", + "winapi/d3d11", + "winapi/std", + "winapi/d3d11_1", + "winapi/d3d11_2", + "winapi/d3d11sdklayers", + "winapi/dxgi1_6", +] +dx12 = [ + "naga/hlsl-out", + "d3d12", + "bit-set", + "range-alloc", + "winapi/std", + "winapi/winbase", + "winapi/d3d12", + "winapi/d3d12shader", + "winapi/d3d12sdklayers", + "winapi/dxgi1_6", +] +dxc_shader_compiler = ["hassle-rs"] +gles = [ + "naga/glsl-out", + "glow", + "khronos-egl", + "libloading", +] +metal = [ + "naga/msl-out", + "block", + "foreign-types", +] +renderdoc = [ + "libloading", + "renderdoc-sys", +] +vulkan = [ + "naga/spv-out", + "ash", + "gpu-alloc", + "gpu-descriptor", + "libloading", + "smallvec", +] +windows_rs = ["gpu-allocator"] + +[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies] +js-sys = "0.3.61" +wasm-bindgen = "0.2.84" + +[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.web-sys] +version = "0.3.61" +features = [ + "Window", + "HtmlCanvasElement", + "WebGl2RenderingContext", + "OffscreenCanvas", +] + +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies] +core-graphics-types = "0.1" +metal = "0.24.0" +objc = "0.2.5" + +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies.block] +version = "0.1" +optional = true + +[target."cfg(any(target_os=\"macos\", target_os=\"ios\"))".dependencies.foreign-types] +version = "0.3" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.ash] +version = "0.37.2" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.gpu-alloc] +version = "0.5" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.gpu-descriptor] +version = "0.2" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.khronos-egl] +version = "4.1" +features = ["dynamic"] +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.libloading] +version = ">=0.7,<0.9" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.renderdoc-sys] +version = "1.0.0" +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dependencies.smallvec] +version = "1" +features = ["union"] +optional = true + +[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies] +glutin = "0.29.1" + +[target."cfg(target_os = \"android\")".dependencies] +android_system_properties = "0.1.1" + +[target."cfg(target_os = \"emscripten\")".dependencies.khronos-egl] +version = "4.1" +features = [ + "static", + "no-pkg-config", +] + +[target."cfg(target_os = \"emscripten\")".dependencies.libloading] +version = ">=0.7,<0.9" +optional = true + +[target."cfg(unix)".dependencies] +libc = "0.2" + +[target."cfg(windows)".dependencies.bit-set] +version = "0.5" +optional = true + +[target."cfg(windows)".dependencies.d3d12] +version = "0.6.0" +git = "https://github.com/gfx-rs/d3d12-rs" +rev = "b940b1d71" +features = ["libloading"] +optional = true + +[target."cfg(windows)".dependencies.gpu-allocator] +version = "0.22" +features = [ + "d3d12", + "windows", + "public-winapi", +] +optional = true +default_features = false + +[target."cfg(windows)".dependencies.hassle-rs] +version = "0.10" +optional = true + +[target."cfg(windows)".dependencies.range-alloc] +version = "0.1" +optional = true + +[target."cfg(windows)".dependencies.winapi] +version = "0.3" +features = [ + "profileapi", + "libloaderapi", + "windef", + "winuser", + "dcomp", +] diff --git a/third_party/rust/wgpu-hal/LICENSE.APACHE b/third_party/rust/wgpu-hal/LICENSE.APACHE new file mode 100644 index 0000000000..d9a10c0d8e --- /dev/null +++ b/third_party/rust/wgpu-hal/LICENSE.APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/third_party/rust/wgpu-hal/LICENSE.MIT b/third_party/rust/wgpu-hal/LICENSE.MIT new file mode 100644 index 0000000000..4699691b8e --- /dev/null +++ b/third_party/rust/wgpu-hal/LICENSE.MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 The gfx-rs developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/wgpu-hal/README.md b/third_party/rust/wgpu-hal/README.md new file mode 100644 index 0000000000..0ab7a0283a --- /dev/null +++ b/third_party/rust/wgpu-hal/README.md @@ -0,0 +1,23 @@ +*wgpu-hal* is an explicit low-level GPU abstraction powering *wgpu-core*. +It's a spiritual successor to [gfx-hal](https://github.com/gfx-rs/gfx), +but with reduced scope, and oriented towards WebGPU implementation goals. + +It has no overhead for validation or tracking, and the API translation overhead is kept to the bare minimum by the design of WebGPU. +This API can be used for resource-demanding applications and engines. + +# Usage notes + +All of the API is `unsafe`. Documenting the exact safety requirements for the +state and function arguments is desired, but will likely be incomplete while the library is in early development. + +The returned errors are only for cases that the user can't anticipate, +such as running out-of-memory, or losing the device. +For the counter-example, there is no error for mapping a buffer that's not mappable. +As the buffer creator, the user should already know if they can map it. + +The API accept iterators in order to avoid forcing the user to store data in particular containers. The implementation doesn't guarantee that any of the iterators are drained, unless stated otherwise by the function documentation. +For this reason, we recommend that iterators don't do any mutating work. + +# Debugging + +Most of the information in https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications still applies to this API, with an exception of API tracing/replay functionality, which is only available in *wgpu-core*. diff --git a/third_party/rust/wgpu-hal/examples/halmark/main.rs b/third_party/rust/wgpu-hal/examples/halmark/main.rs new file mode 100644 index 0000000000..588de61063 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/halmark/main.rs @@ -0,0 +1,837 @@ +//! This example shows basic usage of wgpu-hal by rendering +//! a ton of moving sprites, each with a separate texture and draw call. +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; +use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; + +use std::{ + borrow::{Borrow, Cow}, + iter, mem, ptr, + time::Instant, +}; + +const MAX_BUNNIES: usize = 1 << 20; +const BUNNY_SIZE: f32 = 0.15 * 256.0; +const GRAVITY: f32 = -9.8 * 100.0; +const MAX_VELOCITY: f32 = 750.0; +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_FRAMES: u32 = 3; + +#[repr(C)] +#[derive(Clone, Copy)] +struct Globals { + mvp: [[f32; 4]; 4], + size: [f32; 2], + pad: [f32; 2], +} + +#[repr(C, align(256))] +#[derive(Clone, Copy)] +struct Locals { + position: [f32; 2], + velocity: [f32; 2], + color: u32, + _pad: u32, +} + +struct ExecutionContext<A: hal::Api> { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec<A::TextureView>, + used_cmd_bufs: Vec<A::CommandBuffer>, + frames_recorded: usize, +} + +impl<A: hal::Api> ExecutionContext<A> { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example<A: hal::Api> { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + global_group: A::BindGroup, + local_group: A::BindGroup, + global_group_layout: A::BindGroupLayout, + local_group_layout: A::BindGroupLayout, + pipeline_layout: A::PipelineLayout, + shader: A::ShaderModule, + pipeline: A::RenderPipeline, + bunnies: Vec<Locals>, + local_buffer: A::Buffer, + local_alignment: u32, + global_buffer: A::Buffer, + sampler: A::Sampler, + texture: A::Texture, + texture_view: A::TextureView, + contexts: Vec<ExecutionContext<A>>, + context_index: usize, + extent: [u32; 2], + start: Instant, +} + +impl<A: hal::Api> Example<A> { + fn init(window: &winit::window::Window) -> Result<Self, hal::InstanceError> { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: if cfg!(debug_assertions) { + hal::InstanceFlags::all() + } else { + hal::InstanceFlags::empty() + }, + // Can't rely on having DXC available, so use FXC instead + dx12_shader_compiler: wgt::Dx12Compiler::Fxc, + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let mut surface = unsafe { + instance + .create_surface(window.raw_display_handle(), window.raw_window_handle()) + .unwrap() + }; + + let (adapter, capabilities) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + return Err(hal::InstanceError); + } + let exposed = adapters.swap_remove(0); + (exposed.adapter, exposed.capabilities) + }; + let surface_caps = + unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?; + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, mut queue } = unsafe { + adapter + .open(wgt::Features::empty(), &wgt::Limits::default()) + .unwrap() + }; + + let window_size: (u32, u32) = window.inner_size().into(); + let surface_config = hal::SurfaceConfiguration { + swap_chain_size: DESIRED_FRAMES.clamp( + *surface_caps.swap_chain_sizes.start(), + *surface_caps.swap_chain_sizes.end(), + ), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + format: wgt::TextureFormat::Bgra8UnormSrgb, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET, + view_formats: vec![], + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + let naga_shader = { + let shader_file = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("halmark") + .join("shader.wgsl"); + let source = std::fs::read_to_string(shader_file).unwrap(); + let module = naga::front::wgsl::Frontend::new().parse(&source).unwrap(); + let info = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::empty(), + ) + .validate(&module) + .unwrap(); + hal::NagaShader { + module: Cow::Owned(module), + info, + } + }; + let shader_desc = hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }; + let shader = unsafe { + device + .create_shader_module(&shader_desc, hal::ShaderInput::Naga(naga_shader)) + .unwrap() + }; + + let global_bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::VERTEX, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::<Globals>() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::FRAGMENT, + ty: wgt::BindingType::Texture { + sample_type: wgt::TextureSampleType::Float { filterable: true }, + view_dimension: wgt::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::FRAGMENT, + ty: wgt::BindingType::Sampler(wgt::SamplerBindingType::Filtering), + count: None, + }, + ], + }; + + let global_group_layout = + unsafe { device.create_bind_group_layout(&global_bgl_desc).unwrap() }; + + let local_bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::VERTEX, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: wgt::BufferSize::new(mem::size_of::<Locals>() as _), + }, + count: None, + }], + }; + let local_group_layout = + unsafe { device.create_bind_group_layout(&local_bgl_desc).unwrap() }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&global_group_layout, &local_group_layout], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline_desc = hal::RenderPipelineDescriptor { + label: None, + layout: &pipeline_layout, + vertex_stage: hal::ProgrammableStage { + module: &shader, + entry_point: "vs_main", + }, + vertex_buffers: &[], + fragment_stage: Some(hal::ProgrammableStage { + module: &shader, + entry_point: "fs_main", + }), + primitive: wgt::PrimitiveState { + topology: wgt::PrimitiveTopology::TriangleStrip, + ..wgt::PrimitiveState::default() + }, + depth_stencil: None, + multisample: wgt::MultisampleState::default(), + color_targets: &[Some(wgt::ColorTargetState { + format: surface_config.format, + blend: Some(wgt::BlendState::ALPHA_BLENDING), + write_mask: wgt::ColorWrites::default(), + })], + multiview: None, + }; + let pipeline = unsafe { device.create_render_pipeline(&pipeline_desc).unwrap() }; + + let texture_data = vec![0xFFu8; 4]; + + let staging_buffer_desc = hal::BufferDescriptor { + label: Some("stage"), + size: texture_data.len() as wgt::BufferAddress, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }; + let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; + unsafe { + let mapping = device + .map_buffer(&staging_buffer, 0..staging_buffer_desc.size) + .unwrap(); + ptr::copy_nonoverlapping( + texture_data.as_ptr(), + mapping.ptr.as_ptr(), + texture_data.len(), + ); + device.unmap_buffer(&staging_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8UnormSrgb, + usage: hal::TextureUses::COPY_DST | hal::TextureUses::RESOURCE, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + { + let buffer_barrier = hal::BufferBarrier { + buffer: &staging_buffer, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_SRC, + }; + let texture_barrier1 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + let texture_barrier2 = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::RESOURCE, + }; + let copy = hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: 0, + bytes_per_row: Some(4), + rows_per_image: None, + }, + texture_base: hal::TextureCopyBase { + origin: wgt::Origin3d::ZERO, + mip_level: 0, + array_layer: 0, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 1, + height: 1, + depth: 1, + }, + }; + unsafe { + cmd_encoder.transition_buffers(iter::once(buffer_barrier)); + cmd_encoder.transition_textures(iter::once(texture_barrier1)); + cmd_encoder.copy_buffer_to_texture(&staging_buffer, &texture, iter::once(copy)); + cmd_encoder.transition_textures(iter::once(texture_barrier2)); + } + } + + let sampler_desc = hal::SamplerDescriptor { + label: None, + address_modes: [wgt::AddressMode::ClampToEdge; 3], + mag_filter: wgt::FilterMode::Linear, + min_filter: wgt::FilterMode::Nearest, + mipmap_filter: wgt::FilterMode::Nearest, + lod_clamp: 0.0..32.0, + compare: None, + anisotropy_clamp: 1, + border_color: None, + }; + let sampler = unsafe { device.create_sampler(&sampler_desc).unwrap() }; + + let globals = Globals { + // cgmath::ortho() projection + mvp: [ + [2.0 / window_size.0 as f32, 0.0, 0.0, 0.0], + [0.0, 2.0 / window_size.1 as f32, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [-1.0, -1.0, 0.0, 1.0], + ], + size: [BUNNY_SIZE; 2], + pad: [0.0; 2], + }; + + let global_buffer_desc = hal::BufferDescriptor { + label: Some("global"), + size: mem::size_of::<Globals>() as wgt::BufferAddress, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }; + let global_buffer = unsafe { + let buffer = device.create_buffer(&global_buffer_desc).unwrap(); + let mapping = device + .map_buffer(&buffer, 0..global_buffer_desc.size) + .unwrap(); + ptr::copy_nonoverlapping( + &globals as *const Globals as *const u8, + mapping.ptr.as_ptr(), + mem::size_of::<Globals>(), + ); + device.unmap_buffer(&buffer).unwrap(); + assert!(mapping.is_coherent); + buffer + }; + + let local_alignment = wgt::math::align_to( + mem::size_of::<Locals>() as u32, + capabilities.limits.min_uniform_buffer_offset_alignment, + ); + let local_buffer_desc = hal::BufferDescriptor { + label: Some("local"), + size: (MAX_BUNNIES as wgt::BufferAddress) * (local_alignment as wgt::BufferAddress), + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }; + let local_buffer = unsafe { device.create_buffer(&local_buffer_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::RESOURCE, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let global_group = { + let global_buffer_binding = hal::BufferBinding { + buffer: &global_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::RESOURCE, + }; + let global_group_desc = hal::BindGroupDescriptor { + label: Some("global"), + layout: &global_group_layout, + buffers: &[global_buffer_binding], + samplers: &[&sampler], + textures: &[texture_binding], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&global_group_desc).unwrap() } + }; + + let local_group = { + let local_buffer_binding = hal::BufferBinding { + buffer: &local_buffer, + offset: 0, + size: wgt::BufferSize::new(mem::size_of::<Locals>() as _), + }; + let local_group_desc = hal::BindGroupDescriptor { + label: Some("local"), + layout: &local_group_layout, + buffers: &[local_buffer_binding], + samplers: &[], + textures: &[], + entries: &[hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }], + }; + unsafe { device.create_bind_group(&local_group_desc).unwrap() } + }; + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + device.destroy_buffer(staging_buffer); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Example { + instance, + surface, + surface_format: surface_config.format, + adapter, + device, + queue, + pipeline_layout, + shader, + pipeline, + global_group, + local_group, + global_group_layout, + local_group_layout, + bunnies: Vec::new(), + local_buffer, + local_alignment, + global_buffer, + sampler, + texture, + texture_view, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + }) + } + + fn is_empty(&self) -> bool { + self.bunnies.is_empty() + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.local_group); + self.device.destroy_bind_group(self.global_group); + self.device.destroy_buffer(self.local_buffer); + self.device.destroy_buffer(self.global_buffer); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_sampler(self.sampler); + self.device.destroy_shader_module(self.shader); + self.device.destroy_render_pipeline(self.pipeline); + self.device + .destroy_bind_group_layout(self.local_group_layout); + self.device + .destroy_bind_group_layout(self.global_group_layout); + self.device.destroy_pipeline_layout(self.pipeline_layout); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } + + fn update(&mut self, event: winit::event::WindowEvent) { + if let winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Space), + state: winit::event::ElementState::Pressed, + .. + }, + .. + } = event + { + let spawn_count = 64 + self.bunnies.len() / 2; + let elapsed = self.start.elapsed(); + let color = elapsed.as_nanos() as u32; + println!( + "Spawning {} bunnies, total at {}", + spawn_count, + self.bunnies.len() + spawn_count + ); + for i in 0..spawn_count { + let random = ((elapsed.as_nanos() * (i + 1) as u128) & 0xFF) as f32 / 255.0; + let speed = random * MAX_VELOCITY - (MAX_VELOCITY * 0.5); + self.bunnies.push(Locals { + position: [0.0, 0.5 * (self.extent[1] as f32)], + velocity: [speed, 0.0], + color, + _pad: 0, + }); + } + } + } + + fn render(&mut self) { + let delta = 0.01; + for bunny in self.bunnies.iter_mut() { + bunny.position[0] += bunny.velocity[0] * delta; + bunny.position[1] += bunny.velocity[1] * delta; + bunny.velocity[1] += GRAVITY * delta; + if (bunny.velocity[0] > 0.0 + && bunny.position[0] + 0.5 * BUNNY_SIZE > self.extent[0] as f32) + || (bunny.velocity[0] < 0.0 && bunny.position[0] - 0.5 * BUNNY_SIZE < 0.0) + { + bunny.velocity[0] *= -1.0; + } + if bunny.velocity[1] < 0.0 && bunny.position[1] < 0.5 * BUNNY_SIZE { + bunny.velocity[1] *= -1.0; + } + } + + if !self.bunnies.is_empty() { + let size = self.bunnies.len() * self.local_alignment as usize; + unsafe { + let mapping = self + .device + .map_buffer(&self.local_buffer, 0..size as wgt::BufferAddress) + .unwrap(); + ptr::copy_nonoverlapping( + self.bunnies.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + size, + ); + assert!(mapping.is_coherent); + self.device.unmap_buffer(&self.local_buffer).unwrap(); + } + } + + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COLOR_TARGET, + }; + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + let pass_desc = hal::RenderPassDescriptor { + label: None, + extent: wgt::Extent3d { + width: self.extent[0], + height: self.extent[1], + depth_or_array_layers: 1, + }, + sample_count: 1, + color_attachments: &[Some(hal::ColorAttachment { + target: hal::Attachment { + view: &surface_tex_view, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color { + r: 0.1, + g: 0.2, + b: 0.3, + a: 1.0, + }, + })], + depth_stencil_attachment: None, + multiview: None, + }; + unsafe { + ctx.encoder.begin_render_pass(&pass_desc); + ctx.encoder.set_render_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.global_group, &[]); + } + + for i in 0..self.bunnies.len() { + let offset = (i as wgt::DynamicOffset) * (self.local_alignment as wgt::DynamicOffset); + unsafe { + ctx.encoder + .set_bind_group(&self.pipeline_layout, 1, &self.local_group, &[offset]); + ctx.encoder.draw(0, 4, 0, 1); + } + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::PRESENT, + }; + unsafe { + ctx.encoder.end_render_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue.present(&mut self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } +} + +#[cfg(all(feature = "metal"))] +type Api = hal::api::Metal; +#[cfg(all(feature = "vulkan", not(feature = "metal")))] +type Api = hal::api::Vulkan; +#[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] +type Api = hal::api::Gles; +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] +type Api = hal::api::Empty; + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-bunnymark") + .build(&event_loop) + .unwrap(); + + let example_result = Example::<Api>::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + let mut last_frame_inst = Instant::now(); + let (mut frame_count, mut accum_time) = (0, 0.0); + + event_loop.run(move |event, _, control_flow| { + let _ = &window; // force ownership by the closure + *control_flow = winit::event_loop::ControlFlow::Poll; + match event { + winit::event::Event::RedrawEventsCleared => { + window.request_redraw(); + } + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Escape), + state: winit::event::ElementState::Pressed, + .. + }, + .. + } + | winit::event::WindowEvent::CloseRequested => { + *control_flow = winit::event_loop::ControlFlow::Exit; + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::RedrawRequested(_) => { + let ex = example.as_mut().unwrap(); + { + accum_time += last_frame_inst.elapsed().as_secs_f32(); + last_frame_inst = Instant::now(); + frame_count += 1; + if frame_count == 100 && !ex.is_empty() { + println!( + "Avg frame time {}ms", + accum_time * 1000.0 / frame_count as f32 + ); + accum_time = 0.0; + frame_count = 0; + } + } + ex.render(); + } + winit::event::Event::LoopDestroyed => { + example.take().unwrap().exit(); + } + _ => {} + } + }); +} diff --git a/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl b/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl new file mode 100644 index 0000000000..ffa7264591 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/halmark/shader.wgsl @@ -0,0 +1,50 @@ +struct Globals { + mvp: mat4x4<f32>, + size: vec2<f32>, + _pad0: u32, + _pad1: u32, +}; + +struct Locals { + position: vec2<f32>, + velocity: vec2<f32>, + color: u32, + _pad0: u32, + _pad1: u32, + _pad2: u32, +}; + +@group(0) +@binding(0) +var<uniform> globals: Globals; + +@group(1) +@binding(0) +var<uniform> locals: Locals; + +struct VertexOutput { + @builtin(position) position: vec4<f32>, + @location(0) tex_coords: vec2<f32>, + @location(1) color: vec4<f32>, +}; + +@vertex +fn vs_main(@builtin(vertex_index) vi: u32) -> VertexOutput { + let tc = vec2<f32>(f32(vi & 1u), 0.5 * f32(vi & 2u)); + let offset = vec2<f32>(tc.x * globals.size.x, tc.y * globals.size.y); + let pos = globals.mvp * vec4<f32>(locals.position + offset, 0.0, 1.0); + let color = vec4<f32>((vec4<u32>(locals.color) >> vec4<u32>(0u, 8u, 16u, 24u)) & vec4<u32>(255u)) / 255.0; + return VertexOutput(pos, tc, color); +} + +@group(0) +@binding(1) +var tex: texture_2d<f32>; +@group(0) +@binding(2) +var sam: sampler; + +@fragment +fn fs_main(vertex: VertexOutput) -> @location(0) vec4<f32> { + return vertex.color * textureSampleLevel(tex, sam, vertex.tex_coords, 0.0); +} diff --git a/third_party/rust/wgpu-hal/examples/raw-gles.em.html b/third_party/rust/wgpu-hal/examples/raw-gles.em.html new file mode 100644 index 0000000000..f3587e8575 --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/raw-gles.em.html @@ -0,0 +1,16 @@ +<html> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + </head> + <body> + <canvas id="canvas" width="640" height="400"></canvas> + <script> + var Module = { + canvas: document.getElementById("canvas"), + preRun: [function() {ENV.RUST_LOG = "debug"}] + }; + </script> + <script src="raw-gles.js"></script> + </body> +</html>
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/examples/raw-gles.rs b/third_party/rust/wgpu-hal/examples/raw-gles.rs new file mode 100644 index 0000000000..d9dfc492fa --- /dev/null +++ b/third_party/rust/wgpu-hal/examples/raw-gles.rs @@ -0,0 +1,185 @@ +//! This example shows interop with raw GLES contexts - +//! the ability to hook up wgpu-hal to an existing context and draw into it. +//! +//! Emscripten build: +//! 1. install emsdk +//! 2. build this example with cargo: +//! EMCC_CFLAGS="-g -s ERROR_ON_UNDEFINED_SYMBOLS=0 --no-entry -s FULL_ES3=1" cargo build --example raw-gles --target wasm32-unknown-emscripten +//! 3. copy raw-gles.em.html into target directory and open it in browser: +//! cp wgpu-hal/examples/raw-gles.em.html target/wasm32-unknown-emscripten/debug/examples + +extern crate wgpu_hal as hal; + +#[cfg(not(target_arch = "wasm32"))] +fn main() { + env_logger::init(); + println!("Initializing external GL context"); + + let event_loop = glutin::event_loop::EventLoop::new(); + let window_builder = glutin::window::WindowBuilder::new(); + let gl_context = unsafe { + glutin::ContextBuilder::new() + .with_gl(glutin::GlRequest::Specific(glutin::Api::OpenGlEs, (3, 0))) + .build_windowed(window_builder, &event_loop) + .unwrap() + .make_current() + .unwrap() + }; + let inner_size = gl_context.window().inner_size(); + + println!("Hooking up to wgpu-hal"); + let exposed = unsafe { + <hal::api::Gles as hal::Api>::Adapter::new_external(|name| { + gl_context.get_proc_address(name) + }) + } + .expect("GL adapter can't be initialized"); + + fill_screen(&exposed, inner_size.width, inner_size.height); + + println!("Showing the window"); + gl_context.swap_buffers().unwrap(); + + event_loop.run(move |event, _, control_flow| { + use glutin::{ + event::{Event, KeyboardInput, VirtualKeyCode, WindowEvent}, + event_loop::ControlFlow, + }; + *control_flow = ControlFlow::Wait; + + match event { + Event::LoopDestroyed => (), + Event::WindowEvent { event, .. } => match event { + WindowEvent::CloseRequested + | WindowEvent::KeyboardInput { + input: + KeyboardInput { + virtual_keycode: Some(VirtualKeyCode::Escape), + .. + }, + .. + } => *control_flow = ControlFlow::Exit, + _ => (), + }, + _ => (), + } + }); +} + +#[cfg(target_os = "emscripten")] +fn main() { + env_logger::init(); + + println!("Initializing external GL context"); + let egl = khronos_egl::Instance::new(khronos_egl::Static); + let display = egl.get_display(khronos_egl::DEFAULT_DISPLAY).unwrap(); + egl.initialize(display) + .expect("unable to initialize display"); + + let attributes = [ + khronos_egl::RED_SIZE, + 8, + khronos_egl::GREEN_SIZE, + 8, + khronos_egl::BLUE_SIZE, + 8, + khronos_egl::NONE, + ]; + + let config = egl + .choose_first_config(display, &attributes) + .unwrap() + .expect("unable to choose config"); + let surface = unsafe { + let window = std::ptr::null_mut::<std::ffi::c_void>(); + egl.create_window_surface(display, config, window, None) + } + .expect("unable to create surface"); + + let context_attributes = [khronos_egl::CONTEXT_CLIENT_VERSION, 3, khronos_egl::NONE]; + + let gl_context = egl + .create_context(display, config, None, &context_attributes) + .expect("unable to create context"); + egl.make_current(display, Some(surface), Some(surface), Some(gl_context)) + .expect("can't make context current"); + + println!("Hooking up to wgpu-hal"); + let exposed = unsafe { + <hal::api::Gles as hal::Api>::Adapter::new_external(|name| { + egl.get_proc_address(name) + .map_or(std::ptr::null(), |p| p as *const _) + }) + } + .expect("GL adapter can't be initialized"); + + fill_screen(&exposed, 640, 400); +} + +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +fn main() {} + +fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height: u32) { + use hal::{Adapter as _, CommandEncoder as _, Device as _, Queue as _}; + + let mut od = unsafe { + exposed + .adapter + .open(wgt::Features::empty(), &wgt::Limits::downlevel_defaults()) + } + .unwrap(); + + let format = wgt::TextureFormat::Rgba8UnormSrgb; + let texture = <hal::api::Gles as hal::Api>::Texture::default_framebuffer(format); + let view = unsafe { + od.device + .create_texture_view( + &texture, + &hal::TextureViewDescriptor { + label: None, + format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }, + ) + .unwrap() + }; + + println!("Filling the screen"); + let mut encoder = unsafe { + od.device + .create_command_encoder(&hal::CommandEncoderDescriptor { + label: None, + queue: &od.queue, + }) + .unwrap() + }; + let rp_desc = hal::RenderPassDescriptor { + label: None, + extent: wgt::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + sample_count: 1, + color_attachments: &[Some(hal::ColorAttachment { + target: hal::Attachment { + view: &view, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color::BLUE, + })], + depth_stencil_attachment: None, + multiview: None, + }; + unsafe { + encoder.begin_encoding(None).unwrap(); + encoder.begin_render_pass(&rp_desc); + encoder.end_render_pass(); + let cmd_buf = encoder.end_encoding().unwrap(); + od.queue.submit(&[&cmd_buf], None).unwrap(); + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs new file mode 100644 index 0000000000..0456cd719b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs @@ -0,0 +1,258 @@ +use winapi::shared::dxgiformat; + +pub fn map_texture_format_failable(format: wgt::TextureFormat) -> Option<dxgiformat::DXGI_FORMAT> { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + Some(match format { + Tf::R8Unorm => DXGI_FORMAT_R8_UNORM, + Tf::R8Snorm => DXGI_FORMAT_R8_SNORM, + Tf::R8Uint => DXGI_FORMAT_R8_UINT, + Tf::R8Sint => DXGI_FORMAT_R8_SINT, + Tf::R16Uint => DXGI_FORMAT_R16_UINT, + Tf::R16Sint => DXGI_FORMAT_R16_SINT, + Tf::R16Unorm => DXGI_FORMAT_R16_UNORM, + Tf::R16Snorm => DXGI_FORMAT_R16_SNORM, + Tf::R16Float => DXGI_FORMAT_R16_FLOAT, + Tf::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Tf::Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Tf::Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Tf::Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Tf::Rg16Unorm => DXGI_FORMAT_R16G16_UNORM, + Tf::Rg16Snorm => DXGI_FORMAT_R16G16_SNORM, + Tf::R32Uint => DXGI_FORMAT_R32_UINT, + Tf::R32Sint => DXGI_FORMAT_R32_SINT, + Tf::R32Float => DXGI_FORMAT_R32_FLOAT, + Tf::Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Tf::Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Tf::Rg16Float => DXGI_FORMAT_R16G16_FLOAT, + Tf::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Tf::Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Tf::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Tf::Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Tf::Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Tf::Rgb9e5Ufloat => DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, + Tf::Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Tf::Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Tf::Rgba16Unorm => DXGI_FORMAT_R16G16B16A16_UNORM, + Tf::Rgba16Snorm => DXGI_FORMAT_R16G16B16A16_SNORM, + Tf::Rgba16Float => DXGI_FORMAT_R16G16B16A16_FLOAT, + Tf::Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Tf::Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Tf::Rgba32Float => DXGI_FORMAT_R32G32B32A32_FLOAT, + Tf::Stencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth16Unorm => DXGI_FORMAT_D16_UNORM, + Tf::Depth24Plus => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth24PlusStencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth32Float => DXGI_FORMAT_D32_FLOAT, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_D32_FLOAT_S8X24_UINT, + Tf::Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Tf::Bc2RgbaUnorm => DXGI_FORMAT_BC2_UNORM, + Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Tf::Bc3RgbaUnorm => DXGI_FORMAT_BC3_UNORM, + Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Tf::Bc4RUnorm => DXGI_FORMAT_BC4_UNORM, + Tf::Bc4RSnorm => DXGI_FORMAT_BC4_SNORM, + Tf::Bc5RgUnorm => DXGI_FORMAT_BC5_UNORM, + Tf::Bc5RgSnorm => DXGI_FORMAT_BC5_SNORM, + Tf::Bc6hRgbUfloat => DXGI_FORMAT_BC6H_UF16, + Tf::Bc6hRgbFloat => DXGI_FORMAT_BC6H_SF16, + Tf::Bc7RgbaUnorm => DXGI_FORMAT_BC7_UNORM, + Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_UNORM_SRGB, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm + | Tf::Astc { + block: _, + channel: _, + } => return None, + }) +} + +pub fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match map_texture_format_failable(format) { + Some(f) => f, + None => unreachable!(), + } +} + +// Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. +pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + _ => map_texture_format(format), + } +} + +// SRV and UAV can't use the depth or typeless formats +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#view-creation +pub fn map_texture_format_for_srv_uav( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + (wgt::TextureFormat::Depth32Float, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT + } + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS + } + ( + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS, + + (wgt::TextureFormat::Depth32FloatStencil8, crate::FormatAspects::STENCIL) => { + dxgiformat::DXGI_FORMAT_X32_TYPELESS_G8X24_UINT + } + ( + wgt::TextureFormat::Stencil8 | wgt::TextureFormat::Depth24PlusStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_X24_TYPELESS_G8_UINT, + + (format, crate::FormatAspects::COLOR) => map_texture_format(format), + + _ => return None, + }) +} + +// see https://microsoft.github.io/DirectX-Specs/d3d/PlanarDepthStencilDDISpec.html#planar-layout-for-staging-from-buffer +pub fn map_texture_format_for_copy( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> Option<dxgiformat::DXGI_FORMAT> { + Some(match (format, aspect) { + (wgt::TextureFormat::Depth16Unorm, crate::FormatAspects::DEPTH) => { + dxgiformat::DXGI_FORMAT_R16_UNORM + } + ( + wgt::TextureFormat::Depth32Float | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::DEPTH, + ) => dxgiformat::DXGI_FORMAT_R32_FLOAT, + + ( + wgt::TextureFormat::Stencil8 + | wgt::TextureFormat::Depth24PlusStencil8 + | wgt::TextureFormat::Depth32FloatStencil8, + crate::FormatAspects::STENCIL, + ) => dxgiformat::DXGI_FORMAT_R8_UINT, + + (format, crate::FormatAspects::COLOR) => map_texture_format(format), + + _ => return None, + }) +} + +pub fn map_texture_format_for_resource( + format: wgt::TextureFormat, + usage: crate::TextureUses, + has_view_formats: bool, + casting_fully_typed_format_supported: bool, +) -> dxgiformat::DXGI_FORMAT { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + if casting_fully_typed_format_supported { + map_texture_format(format) + + // We might view this resource as srgb or non-srgb + } else if has_view_formats { + match format { + Tf::Rgba8Unorm | Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_TYPELESS, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_TYPELESS, + Tf::Bc1RgbaUnorm | Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_TYPELESS, + Tf::Bc2RgbaUnorm | Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_TYPELESS, + Tf::Bc3RgbaUnorm | Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_TYPELESS, + Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_TYPELESS, + format => map_texture_format(format), + } + + // We might view this resource as SRV/UAV but also as DSV + } else if format.is_depth_stencil_format() + && usage.intersects( + crate::TextureUses::RESOURCE + | crate::TextureUses::STORAGE_READ + | crate::TextureUses::STORAGE_READ_WRITE, + ) + { + match format { + Tf::Depth16Unorm => DXGI_FORMAT_R16_TYPELESS, + Tf::Depth32Float => DXGI_FORMAT_R32_TYPELESS, + Tf::Depth32FloatStencil8 => DXGI_FORMAT_R32G8X24_TYPELESS, + Tf::Stencil8 | Tf::Depth24Plus | Tf::Depth24PlusStencil8 => DXGI_FORMAT_R24G8_TYPELESS, + _ => unreachable!(), + } + } else { + map_texture_format(format) + } +} + +pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, + wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT, + } +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_acomposite_alpha_mode(_mode: wgt::CompositeAlphaMode) -> d3d12::AlphaMode { + d3d12::AlphaMode::Ignore +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs new file mode 100644 index 0000000000..fceac7db5f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/exception.rs @@ -0,0 +1,100 @@ +use std::{borrow::Cow, slice}; + +use parking_lot::{lock_api::RawMutex, Mutex}; +use winapi::{ + um::{errhandlingapi, winnt}, + vc::excpt, +}; + +// This is a mutex as opposed to an atomic as we need to completely +// lock everyone out until we have registered or unregistered the +// exception handler, otherwise really nasty races could happen. +// +// By routing all the registration through these functions we can guarentee +// there is either 1 or 0 exception handlers registered, not multiple. +static EXCEPTION_HANLDER_COUNT: Mutex<usize> = Mutex::const_new(parking_lot::RawMutex::INIT, 0); + +pub fn register_exception_handler() { + let mut count_guard = EXCEPTION_HANLDER_COUNT.lock(); + if *count_guard == 0 { + unsafe { + errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)) + }; + } + *count_guard += 1; +} + +pub fn unregister_exception_handler() { + let mut count_guard = EXCEPTION_HANLDER_COUNT.lock(); + if *count_guard == 1 { + unsafe { + errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _) + }; + } + *count_guard -= 1; +} + +const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[ + ("CORRUPTION", log::Level::Error), + ("ERROR", log::Level::Error), + ("WARNING", log::Level::Warn), + ("INFO", log::Level::Info), + ("MESSAGE", log::Level::Debug), +]; + +unsafe extern "system" fn output_debug_string_handler( + exception_info: *mut winnt::EXCEPTION_POINTERS, +) -> i32 { + // See https://stackoverflow.com/a/41480827 + let record = unsafe { &*(*exception_info).ExceptionRecord }; + if record.NumberParameters != 2 { + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + let message = match record.ExceptionCode { + winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u8, + record.ExceptionInformation[0], + ) + }), + winnt::DBG_PRINTEXCEPTION_WIDE_C => Cow::Owned(String::from_utf16_lossy(unsafe { + slice::from_raw_parts( + record.ExceptionInformation[1] as *const u16, + record.ExceptionInformation[0], + ) + })), + _ => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let message = match message.strip_prefix("D3D12 ") { + Some(msg) => msg + .trim_end_matches("\n\0") + .trim_end_matches("[ STATE_CREATION WARNING #0: UNKNOWN]"), + None => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let (message, level) = match MESSAGE_PREFIXES + .iter() + .find(|&&(prefix, _)| message.starts_with(prefix)) + { + Some(&(prefix, level)) => (&message[prefix.len() + 2..], level), + None => (message, log::Level::Debug), + }; + + if level == log::Level::Warn && message.contains("#82") { + // This is are useless spammy warnings (#820, #821): + // "The application did not pass any clear value to resource creation" + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "{}", message); + }); + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } + + excpt::EXCEPTION_CONTINUE_EXECUTION +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs new file mode 100644 index 0000000000..ec3c74c194 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/factory.rs @@ -0,0 +1,210 @@ +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, winerror}, + Interface, +}; + +use super::result::HResult as _; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum DxgiFactoryType { + Factory1, + Factory2, + Factory4, + Factory6, +} + +pub fn enumerate_adapters(factory: d3d12::DxgiFactory) -> Vec<d3d12::DxgiAdapter> { + let mut adapters = Vec::with_capacity(8); + + for cur_index in 0.. { + if let Some(factory6) = factory.as_factory6() { + profiling::scope!("IDXGIFactory6::EnumAdapterByGpuPreference"); + // We're already at dxgi1.6, we can grab IDXGIAdapater4 directly + let mut adapter4 = d3d12::WeakPtr::<dxgi1_6::IDXGIAdapter4>::null(); + let hr = unsafe { + factory6.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_6::IDXGIAdapter4::uuidof(), + adapter4.mut_void(), + ) + }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + adapters.push(d3d12::DxgiAdapter::Adapter4(adapter4)); + continue; + } + + profiling::scope!("IDXGIFactory1::EnumAdapters1"); + let mut adapter1 = d3d12::WeakPtr::<dxgi::IDXGIAdapter1>::null(); + let hr = unsafe { factory.EnumAdapters1(cur_index, adapter1.mut_self()) }; + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.into_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + // Do the most aggressive casts first, skipping Adpater4 as we definitely don't have dxgi1_6. + + // Adapter1 -> Adapter3 + unsafe { + match adapter1.cast::<dxgi1_4::IDXGIAdapter3>().into_result() { + Ok(adapter3) => { + adapter1.destroy(); + adapters.push(d3d12::DxgiAdapter::Adapter3(adapter3)); + continue; + } + Err(err) => { + log::info!("Failed casting Adapter1 to Adapter3: {}", err); + } + } + } + + // Adapter1 -> Adapter2 + unsafe { + match adapter1.cast::<dxgi1_2::IDXGIAdapter2>().into_result() { + Ok(adapter2) => { + adapter1.destroy(); + adapters.push(d3d12::DxgiAdapter::Adapter2(adapter2)); + continue; + } + Err(err) => { + log::info!("Failed casting Adapter1 to Adapter2: {}", err); + } + } + } + + adapters.push(d3d12::DxgiAdapter::Adapter1(adapter1)); + } + + adapters +} + +/// Tries to create a IDXGIFactory6, then a IDXGIFactory4, then a IDXGIFactory2, then a IDXGIFactory1, +/// returning the one that succeeds, or if the required_factory_type fails to be +/// created. +pub fn create_factory( + required_factory_type: DxgiFactoryType, + instance_flags: crate::InstanceFlags, +) -> Result<(d3d12::DxgiLib, d3d12::DxgiFactory), crate::InstanceError> { + let lib_dxgi = d3d12::DxgiLib::new().map_err(|_| crate::InstanceError)?; + + let mut factory_flags = d3d12::FactoryCreationFlags::empty(); + + if instance_flags.contains(crate::InstanceFlags::VALIDATION) { + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + unsafe { debug_controller.destroy() }; + factory_flags |= d3d12::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + + // Intercept `OutputDebugString` calls + super::exception::register_exception_handler(); + } + + // Try to create IDXGIFactory4 + let factory4 = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.into_result() { + Ok(factory) => Some(factory), + // We hard error here as we _should have_ been able to make a factory4 but couldn't. + Err(err) => { + log::error!("Failed to create IDXGIFactory4: {}", err); + return Err(crate::InstanceError); + } + }, + // If we require factory4, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory4 => { + log::error!("IDXGIFactory1 creation function not found: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info as all win7 will hit this case. + Err(err) => { + log::info!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + if let Some(factory4) = factory4 { + // Try to cast the IDXGIFactory4 into IDXGIFactory6 + let factory6 = unsafe { factory4.cast::<dxgi1_6::IDXGIFactory6>().into_result() }; + match factory6 { + Ok(factory6) => { + unsafe { + factory4.destroy(); + } + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory6(factory6))); + } + // If we require factory6, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory6 => { + log::warn!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info. + Err(err) => { + log::info!("Failed to cast IDXGIFactory4 to IDXGIFactory6: {:?}", err); + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory4(factory4))); + } + } + } + + // Try to create IDXGIFactory1 + let factory1 = match lib_dxgi.create_factory1() { + Ok(pair) => match pair.into_result() { + Ok(factory) => factory, + Err(err) => { + log::error!("Failed to create IDXGIFactory1: {}", err); + return Err(crate::InstanceError); + } + }, + // We always require at least factory1, so hard error + Err(err) => { + log::error!("IDXGIFactory1 creation function not found: {:?}", err); + return Err(crate::InstanceError); + } + }; + + // Try to cast the IDXGIFactory1 into IDXGIFactory2 + let factory2 = unsafe { factory1.cast::<dxgi1_2::IDXGIFactory2>().into_result() }; + match factory2 { + Ok(factory2) => { + unsafe { + factory1.destroy(); + } + return Ok((lib_dxgi, d3d12::DxgiFactory::Factory2(factory2))); + } + // If we require factory2, hard error. + Err(err) if required_factory_type == DxgiFactoryType::Factory2 => { + log::warn!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err); + return Err(crate::InstanceError); + } + // If we don't print it to info. + Err(err) => { + log::info!("Failed to cast IDXGIFactory1 to IDXGIFactory2: {:?}", err); + } + } + + // We tried to create 4 and 2, but only succeeded with 1. + Ok((lib_dxgi, d3d12::DxgiFactory::Factory1(factory1))) +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs new file mode 100644 index 0000000000..559969633c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/mod.rs @@ -0,0 +1,5 @@ +pub mod conv; +pub mod exception; +pub mod factory; +pub mod result; +pub mod time; diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs new file mode 100644 index 0000000000..db013d2dec --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/result.rs @@ -0,0 +1,42 @@ +use std::borrow::Cow; + +use winapi::shared::winerror; + +pub(crate) trait HResult<O> { + fn into_result(self) -> Result<O, Cow<'static, str>>; + fn into_device_result(self, description: &str) -> Result<O, crate::DeviceError>; +} +impl HResult<()> for i32 { + fn into_result(self) -> Result<(), Cow<'static, str>> { + if self >= 0 { + return Ok(()); + } + let description = match self { + winerror::E_UNEXPECTED => "unexpected", + winerror::E_NOTIMPL => "not implemented", + winerror::E_OUTOFMEMORY => "out of memory", + winerror::E_INVALIDARG => "invalid argument", + _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))), + }; + Err(Cow::Borrowed(description)) + } + fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.into_result().map_err(|err| { + log::error!("{} failed: {}", description, err); + if self == winerror::E_OUTOFMEMORY { + crate::DeviceError::OutOfMemory + } else { + crate::DeviceError::Lost + } + }) + } +} + +impl<T> HResult<T> for (T, i32) { + fn into_result(self) -> Result<T, Cow<'static, str>> { + self.1.into_result().map(|()| self.0) + } + fn into_device_result(self, description: &str) -> Result<T, crate::DeviceError> { + self.1.into_device_result(description).map(|()| self.0) + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs new file mode 100644 index 0000000000..fd99c097d7 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/time.rs @@ -0,0 +1,94 @@ +#![allow(dead_code)] // IPresentationManager is unused currently + +use std::mem; + +use winapi::um::{ + profileapi::{QueryPerformanceCounter, QueryPerformanceFrequency}, + winnt::LARGE_INTEGER, +}; + +pub enum PresentationTimer { + /// DXGI uses QueryPerformanceCounter + Dxgi { + /// How many ticks of QPC per second + frequency: u64, + }, + /// IPresentationManager uses QueryInterruptTimePrecise + #[allow(non_snake_case)] + IPresentationManager { + fnQueryInterruptTimePrecise: unsafe extern "system" fn(*mut winapi::ctypes::c_ulonglong), + }, +} + +impl std::fmt::Debug for PresentationTimer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + Self::Dxgi { frequency } => f + .debug_struct("DXGI") + .field("frequency", &frequency) + .finish(), + Self::IPresentationManager { + fnQueryInterruptTimePrecise, + } => f + .debug_struct("IPresentationManager") + .field( + "QueryInterruptTimePrecise", + &(fnQueryInterruptTimePrecise as usize), + ) + .finish(), + } + } +} + +impl PresentationTimer { + /// Create a presentation timer using QueryPerformanceFrequency (what DXGI uses for presentation times) + pub fn new_dxgi() -> Self { + let mut frequency: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceFrequency(&mut frequency) }; + assert_ne!(success, 0); + + Self::Dxgi { + frequency: unsafe { *frequency.QuadPart() } as u64, + } + } + + /// Create a presentation timer using QueryInterruptTimePrecise (what IPresentationManager uses for presentation times) + /// + /// Panics if QueryInterruptTimePrecise isn't found (below Win10) + pub fn new_ipresentation_manager() -> Self { + // We need to load this explicitly, as QueryInterruptTimePrecise is only available on Windows 10+ + // + // Docs say it's in kernel32.dll, but it's actually in kernelbase.dll. + let kernelbase = + libloading::os::windows::Library::open_already_loaded("kernelbase.dll").unwrap(); + // No concerns about lifetimes here as kernelbase is always there. + let ptr = unsafe { kernelbase.get(b"QueryInterruptTimePrecise").unwrap() }; + Self::IPresentationManager { + fnQueryInterruptTimePrecise: *ptr, + } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + // Always do u128 math _after_ hitting the timing function. + match *self { + PresentationTimer::Dxgi { frequency } => { + let mut counter: LARGE_INTEGER = unsafe { mem::zeroed() }; + let success = unsafe { QueryPerformanceCounter(&mut counter) }; + assert_ne!(success, 0); + + // counter * (1_000_000_000 / freq) but re-ordered to make more precise + (unsafe { *counter.QuadPart() } as u128 * 1_000_000_000) / frequency as u128 + } + PresentationTimer::IPresentationManager { + fnQueryInterruptTimePrecise, + } => { + let mut counter = 0; + unsafe { fnQueryInterruptTimePrecise(&mut counter) }; + + // QueryInterruptTimePrecise uses units of 100ns for its tick. + counter as u128 * 100 + } + } + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/mod.rs b/third_party/rust/wgpu-hal/src/auxil/mod.rs new file mode 100644 index 0000000000..f0aa6a4a89 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/mod.rs @@ -0,0 +1,138 @@ +#[cfg(all(any(feature = "dx11", feature = "dx12"), windows))] +pub(super) mod dxgi; + +#[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] +pub(super) mod renderdoc; + +pub mod db { + pub mod amd { + pub const VENDOR: u32 = 0x1002; + } + pub mod apple { + pub const VENDOR: u32 = 0x106B; + } + pub mod arm { + pub const VENDOR: u32 = 0x13B5; + } + pub mod broadcom { + pub const VENDOR: u32 = 0x14E4; + } + pub mod imgtec { + pub const VENDOR: u32 = 0x1010; + } + pub mod intel { + pub const VENDOR: u32 = 0x8086; + pub const DEVICE_KABY_LAKE_MASK: u32 = 0x5900; + pub const DEVICE_SKY_LAKE_MASK: u32 = 0x1900; + } + pub mod mesa { + // Mesa does not actually have a PCI vendor id. + // + // To match Vulkan, we use the VkVendorId for Mesa in the gles backend so that lavapipe (Vulkan) and + // llvmpipe (OpenGL) have the same vendor id. + pub const VENDOR: u32 = 0x10005; + } + pub mod nvidia { + pub const VENDOR: u32 = 0x10DE; + } + pub mod qualcomm { + pub const VENDOR: u32 = 0x5143; + } +} + +/// Maximum binding size for the shaders that only support `i32` indexing. +/// Interestingly, the index itself can't reach that high, because the minimum +/// element size is 4 bytes, but the compiler toolchain still computes the +/// offset at some intermediate point, internally, as i32. +pub const MAX_I32_BINDING_SIZE: u32 = 1 << 31; + +pub fn map_naga_stage(stage: naga::ShaderStage) -> wgt::ShaderStages { + match stage { + naga::ShaderStage::Vertex => wgt::ShaderStages::VERTEX, + naga::ShaderStage::Fragment => wgt::ShaderStages::FRAGMENT, + naga::ShaderStage::Compute => wgt::ShaderStages::COMPUTE, + } +} + +impl crate::CopyExtent { + pub fn map_extent_to_copy_size(extent: &wgt::Extent3d, dim: wgt::TextureDimension) -> Self { + Self { + width: extent.width, + height: extent.height, + depth: match dim { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => 1, + wgt::TextureDimension::D3 => extent.depth_or_array_layers, + }, + } + } + + pub fn min(&self, other: &Self) -> Self { + Self { + width: self.width.min(other.width), + height: self.height.min(other.height), + depth: self.depth.min(other.depth), + } + } + + // Get the copy size at a specific mipmap level. This doesn't make most sense, + // since the copy extents are provided *for* a mipmap level to start with. + // But backends use `CopyExtent` more sparingly, and this piece is shared. + pub fn at_mip_level(&self, level: u32) -> Self { + Self { + width: (self.width >> level).max(1), + height: (self.height >> level).max(1), + depth: (self.depth >> level).max(1), + } + } +} + +impl crate::TextureCopyBase { + pub fn max_copy_size(&self, full_size: &crate::CopyExtent) -> crate::CopyExtent { + let mip = full_size.at_mip_level(self.mip_level); + crate::CopyExtent { + width: mip.width - self.origin.x, + height: mip.height - self.origin.y, + depth: mip.depth - self.origin.z, + } + } +} + +impl crate::BufferTextureCopy { + pub fn clamp_size_to_virtual(&mut self, full_size: &crate::CopyExtent) { + let max_size = self.texture_base.max_copy_size(full_size); + self.size = self.size.min(&max_size); + } +} + +impl crate::TextureCopy { + pub fn clamp_size_to_virtual( + &mut self, + full_src_size: &crate::CopyExtent, + full_dst_size: &crate::CopyExtent, + ) { + let max_src_size = self.src_base.max_copy_size(full_src_size); + let max_dst_size = self.dst_base.max_copy_size(full_dst_size); + self.size = self.size.min(&max_src_size).min(&max_dst_size); + } +} + +/// Construct a `CStr` from a byte slice, up to the first zero byte. +/// +/// Return a `CStr` extending from the start of `bytes` up to and +/// including the first zero byte. If there is no zero byte in +/// `bytes`, return `None`. +/// +/// This can be removed when `CStr::from_bytes_until_nul` is stabilized. +/// ([#95027](https://github.com/rust-lang/rust/issues/95027)) +#[allow(dead_code)] +pub(crate) fn cstr_from_bytes_until_nul(bytes: &[std::os::raw::c_char]) -> Option<&std::ffi::CStr> { + if bytes.contains(&0) { + // Safety for `CStr::from_ptr`: + // - We've ensured that the slice does contain a null terminator. + // - The range is valid to read, because the slice covers it. + // - The memory won't be changed, because the slice borrows it. + unsafe { Some(std::ffi::CStr::from_ptr(bytes.as_ptr())) } + } else { + None + } +} diff --git a/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs new file mode 100644 index 0000000000..15b2c1039a --- /dev/null +++ b/third_party/rust/wgpu-hal/src/auxil/renderdoc.rs @@ -0,0 +1,138 @@ +//! RenderDoc integration - <https://renderdoc.org/> + +use std::{ffi, os, ptr}; + +/// The dynamically loaded RenderDoc API function table +#[repr(C)] +#[derive(Debug)] +pub struct RenderDocApi { + api: renderdoc_sys::RENDERDOC_API_1_4_1, + lib: libloading::Library, +} + +unsafe impl Send for RenderDocApi {} +unsafe impl Sync for RenderDocApi {} + +/// RenderDoc API type +#[derive(Debug)] +pub enum RenderDoc { + /// RenderDoc functionality is available + Available { + /// RenderDoc API with function pointers + api: RenderDocApi, + }, + /// RenderDoc functionality is _not_ available + NotAvailable { + /// A description why renderdoc functionality is not available + reason: String, + }, +} + +// TODO: replace with libloading API once supported +#[cfg(unix)] +const RTLD_NOLOAD: i32 = 0x4; + +impl RenderDoc { + pub unsafe fn new() -> Self { + type GetApiFn = unsafe extern "C" fn(version: u32, out: *mut *mut ffi::c_void) -> i32; + + #[cfg(windows)] + let renderdoc_filename = "renderdoc.dll"; + #[cfg(all(unix, not(target_os = "android")))] + let renderdoc_filename = "librenderdoc.so"; + #[cfg(target_os = "android")] + let renderdoc_filename = "libVkLayer_GLES_RenderDoc.so"; + + #[cfg(unix)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = unsafe { + libloading::os::unix::Library::open( + Some(renderdoc_filename), + libloading::os::unix::RTLD_NOW | RTLD_NOLOAD, + ) + } + .map(|lib| lib.into()); + + #[cfg(windows)] + let renderdoc_result: Result<libloading::Library, libloading::Error> = + libloading::os::windows::Library::open_already_loaded(renderdoc_filename) + .map(|lib| lib.into()); + + let renderdoc_lib = match renderdoc_result { + Ok(lib) => lib, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to load renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + + let get_api: libloading::Symbol<GetApiFn> = + match unsafe { renderdoc_lib.get(b"RENDERDOC_GetAPI\0") } { + Ok(api) => api, + Err(e) => { + return RenderDoc::NotAvailable { + reason: format!( + "Unable to get RENDERDOC_GetAPI from renderdoc library '{renderdoc_filename}': {e:?}" + ), + } + } + }; + let mut obj = ptr::null_mut(); + match unsafe { get_api(10401, &mut obj) } { + 1 => RenderDoc::Available { + api: RenderDocApi { + api: unsafe { *(obj as *mut renderdoc_sys::RENDERDOC_API_1_4_1) }, + lib: renderdoc_lib, + }, + }, + return_value => RenderDoc::NotAvailable { + reason: format!( + "Unable to get API from renderdoc library '{renderdoc_filename}': {return_value}" + ), + }, + } + } +} + +impl Default for RenderDoc { + fn default() -> Self { + if !cfg!(debug_assertions) { + return RenderDoc::NotAvailable { + reason: "RenderDoc support is only enabled with 'debug_assertions'".into(), + }; + } + unsafe { Self::new() } + } +} +/// A implementation specific handle +pub type Handle = *mut os::raw::c_void; + +impl RenderDoc { + /// Start a RenderDoc frame capture + pub unsafe fn start_frame_capture(&self, device_handle: Handle, window_handle: Handle) -> bool { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.StartFrameCapture.unwrap()(device_handle, window_handle) }; + true + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not start RenderDoc frame capture: {}", reason); + false + } + } + } + + /// End a RenderDoc frame capture + pub unsafe fn end_frame_capture(&self, device_handle: Handle, window_handle: Handle) { + match *self { + Self::Available { api: ref entry } => { + unsafe { entry.api.EndFrameCapture.unwrap()(device_handle, window_handle) }; + } + Self::NotAvailable { ref reason } => { + log::warn!("Could not end RenderDoc frame capture: {}", reason) + } + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/adapter.rs b/third_party/rust/wgpu-hal/src/dx11/adapter.rs new file mode 100644 index 0000000000..a28106a9bb --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/adapter.rs @@ -0,0 +1,291 @@ +use std::num::NonZeroU64; + +use winapi::um::{d3d11, d3dcommon}; + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + todo!() + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + todo!() + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + todo!() + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + todo!() + } +} + +impl super::Adapter { + pub(super) fn expose( + instance: &super::library::D3D11Lib, + adapter: d3d12::DxgiAdapter, + ) -> Option<crate::ExposedAdapter<super::Api>> { + use d3dcommon::{ + D3D_FEATURE_LEVEL_10_0 as FL10_0, D3D_FEATURE_LEVEL_10_1 as FL10_1, + D3D_FEATURE_LEVEL_11_0 as FL11_0, D3D_FEATURE_LEVEL_11_1 as FL11_1, + D3D_FEATURE_LEVEL_9_1 as FL9_1, D3D_FEATURE_LEVEL_9_2 as FL9_2, + D3D_FEATURE_LEVEL_9_3 as FL9_3, + }; + + let (device, feature_level) = instance.create_device(adapter)?; + + // + // Query Features from d3d11 + // + + let d3d9_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D9_OPTIONS1>( + d3d11::D3D11_FEATURE_D3D9_OPTIONS1, + ) + }; + + let d3d10_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS>( + d3d11::D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, + ) + }; + + let d3d11_features = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS, + ) + }; + + let d3d11_features1 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS1>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS1, + ) + }; + + let d3d11_features2 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS2>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS2, + ) + }; + + let d3d11_features3 = unsafe { + device.check_feature_support::<d3d11::D3D11_FEATURE_DATA_D3D11_OPTIONS3>( + d3d11::D3D11_FEATURE_D3D11_OPTIONS3, + ) + }; + + // + // Fill out features and downlevel features + // + // TODO(cwfitzgerald): Needed downlevel features: 3D dispatch + + let mut features = wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO; + let mut downlevel = wgt::DownlevelFlags::BASE_VERTEX + | wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL + | wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER + | wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES; + + // Features from queries + downlevel.set( + wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES, + d3d9_features.FullNonPow2TextureSupported == 1, + ); + downlevel.set( + wgt::DownlevelFlags::COMPUTE_SHADERS, + d3d10_features.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x == 1, + ); + + // Features from feature level + if feature_level >= FL9_2 { + downlevel |= wgt::DownlevelFlags::INDEPENDENT_BLEND; + // formally FL9_1 supports aniso 2, but we don't support that level of distinction + downlevel |= wgt::DownlevelFlags::ANISOTROPIC_FILTERING; + // this is actually the first FL that supports u32 at all + downlevel |= wgt::DownlevelFlags::FULL_DRAW_INDEX_UINT32; + } + + if feature_level >= FL9_3 { + downlevel |= wgt::DownlevelFlags::COMPARISON_SAMPLERS; + } + + if feature_level >= FL10_0 { + downlevel |= wgt::DownlevelFlags::FRAGMENT_STORAGE; + downlevel |= wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE; + downlevel |= wgt::DownlevelFlags::DEPTH_BIAS_CLAMP; + downlevel |= wgt::DownlevelFlags::VERTEX_STORAGE; + features |= wgt::Features::DEPTH_CLIP_CONTROL; + features |= wgt::Features::TIMESTAMP_QUERY; + features |= wgt::Features::PIPELINE_STATISTICS_QUERY; + features |= wgt::Features::SHADER_PRIMITIVE_INDEX; + } + + if feature_level >= FL10_1 { + downlevel |= wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES; + downlevel |= wgt::DownlevelFlags::MULTISAMPLED_SHADING; + } + + if feature_level >= FL11_0 { + downlevel |= wgt::DownlevelFlags::INDIRECT_EXECUTION; + downlevel |= wgt::DownlevelFlags::WEBGPU_TEXTURE_FORMAT_SUPPORT; + features |= wgt::Features::TEXTURE_COMPRESSION_BC; + } + + if feature_level >= FL11_1 { + features |= wgt::Features::VERTEX_WRITABLE_STORAGE; + } + + // + // Fill out limits and alignments + // + + let max_texture_dimension_2d = match feature_level { + FL9_1 | FL9_2 => 2048, + FL9_3 => 4096, + FL10_0 | FL10_1 => 8192, + _ => d3d11::D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, + }; + + let max_texture_dimension_3d = match feature_level { + FL9_1..=FL9_3 => 256, + _ => d3d11::D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + }; + let max_vertex_buffers = match feature_level { + FL9_1..=FL9_3 => 16, + _ => 32, + }; + let max_compute_workgroup_storage_size = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => 4096 * 4, // This doesn't have an equiv SM4 constant :\ + _ => d3d11::D3D11_CS_TGSM_REGISTER_COUNT * 4, + }; + let max_workgroup_size_xy = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => d3d11::D3D11_CS_4_X_THREAD_GROUP_MAX_X, + _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_X, + }; + let max_workgroup_size_z = match feature_level { + FL9_1..=FL9_3 => 0, + FL10_0 | FL10_1 => 1, + _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_Z, + }; + // let max_workgroup_count_z = match feature_level { + // FL9_1..=FL9_3 => 0, + // FL10_0 | FL10_1 => 1, + // _ => d3d11::D3D11_CS_THREAD_GROUP_MAX_Z, + // }; + + let max_sampled_textures = d3d11::D3D11_COMMONSHADER_INPUT_RESOURCE_REGISTER_COUNT; + let max_samplers = d3d11::D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; + let max_constant_buffers = d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1; + let max_uavs = if device.as_device1().is_some() { + d3d11::D3D11_1_UAV_SLOT_COUNT + } else { + d3d11::D3D11_PS_CS_UAV_REGISTER_COUNT + }; + let max_output_registers = d3d11::D3D11_VS_OUTPUT_REGISTER_COMPONENTS; + let max_compute_invocations_per_workgroup = + d3d11::D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + let max_compute_workgroups_per_dimension = + d3d11::D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + + let limits = wgt::Limits { + max_texture_dimension_1d: max_texture_dimension_2d, + max_texture_dimension_2d, + max_texture_dimension_3d, + max_texture_array_layers: max_texture_dimension_3d, + max_bind_groups: u32::MAX, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: max_constant_buffers, + max_dynamic_storage_buffers_per_pipeline_layout: 0, + max_sampled_textures_per_shader_stage: max_sampled_textures, + max_samplers_per_shader_stage: max_samplers, + max_storage_buffers_per_shader_stage: max_uavs, + max_storage_textures_per_shader_stage: max_uavs, + max_uniform_buffers_per_shader_stage: max_constant_buffers, + max_uniform_buffer_binding_size: 1 << 16, + max_storage_buffer_binding_size: u32::MAX, + max_vertex_buffers, + max_vertex_attributes: max_vertex_buffers, + max_vertex_buffer_array_stride: u32::MAX, + max_push_constant_size: 1 << 16, + min_uniform_buffer_offset_alignment: 256, + min_storage_buffer_offset_alignment: 1, + max_inter_stage_shader_components: max_output_registers, + max_compute_workgroup_storage_size, + max_compute_invocations_per_workgroup, + max_compute_workgroup_size_x: max_workgroup_size_xy, + max_compute_workgroup_size_y: max_workgroup_size_xy, + max_compute_workgroup_size_z: max_workgroup_size_z, + max_compute_workgroups_per_dimension, + // D3D11_BUFFER_DESC represents the buffer size as a 32 bit int. + max_buffer_size: u32::MAX as u64, + }; + + // + // Other capabilities + // + + let shader_model = match feature_level { + FL9_1..=FL9_3 => wgt::ShaderModel::Sm2, + FL10_0 | FL10_1 => wgt::ShaderModel::Sm4, + _ => wgt::ShaderModel::Sm5, + }; + + let device_info = wgt::AdapterInfo { + name: String::new(), + vendor: 0, + device: 0, + device_type: match d3d11_features2.UnifiedMemoryArchitecture { + 0 => wgt::DeviceType::DiscreteGpu, + 1 => wgt::DeviceType::IntegratedGpu, + _ => unreachable!(), + }, + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Dx11, + }; + + // + // Build up the structs + // + + let api_adapter = super::Adapter { device }; + + let alignments = crate::Alignments { + buffer_copy_offset: NonZeroU64::new(1).unwrap(), // todo + buffer_copy_pitch: NonZeroU64::new(1).unwrap(), // todo + }; + + let capabilities = crate::Capabilities { + limits, + alignments, + downlevel: wgt::DownlevelCapabilities { + flags: downlevel, + limits: wgt::DownlevelLimits {}, + shader_model, + }, + }; + + Some(crate::ExposedAdapter { + adapter: api_adapter, + info: device_info, + features, + capabilities, + }) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/command.rs b/third_party/rust/wgpu-hal/src/dx11/command.rs new file mode 100644 index 0000000000..1c73f3c325 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/command.rs @@ -0,0 +1,268 @@ +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn discard_encoding(&mut self) { + todo!() + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + todo!() + } + + unsafe fn reset_all<I>(&mut self, command_buffers: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + todo!() + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + todo!() + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + todo!() + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + todo!() + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + todo!() + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + todo!() + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + todo!() + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + todo!() + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + todo!() + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + todo!() + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + todo!() + } + + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + todo!() + } + + unsafe fn end_debug_marker(&mut self) { + todo!() + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + todo!() + } + + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: std::ops::Range<u32>) { + todo!() + } + + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: std::ops::Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + todo!() + } + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + todo!() + } + + unsafe fn end_render_pass(&mut self) { + todo!() + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + todo!() + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + todo!() + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + todo!() + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: std::ops::Range<f32>) { + todo!() + } + + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + todo!() + } + + unsafe fn set_stencil_reference(&mut self, value: u32) { + todo!() + } + + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + todo!() + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + todo!() + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + todo!() + } + + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + todo!() + } + + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + todo!() + } + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + todo!() + } + + unsafe fn end_compute_pass(&mut self) { + todo!() + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + todo!() + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + todo!() + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/device.rs b/third_party/rust/wgpu-hal/src/dx11/device.rs new file mode 100644 index 0000000000..3b087c4311 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/device.rs @@ -0,0 +1,242 @@ +use std::{ffi::c_void, mem}; + +use winapi::um::d3d11; + +use crate::auxil::dxgi::result::HResult; + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + todo!() + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + todo!() + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + todo!() + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + todo!() + } + + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + todo!() + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_texture(&self, texture: super::Texture) { + todo!() + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + todo!() + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + todo!() + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_command_encoder(&self, pool: super::CommandEncoder) { + todo!() + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + todo!() + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + todo!() + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + todo!() + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + todo!() + } + + unsafe fn destroy_shader_module(&self, module: super::ShaderModule) { + todo!() + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + todo!() + } + + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + todo!() + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + todo!() + } + + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + todo!() + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + todo!() + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + todo!() + } + + unsafe fn destroy_fence(&self, fence: super::Fence) { + todo!() + } + + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + todo!() + } + + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + todo!() + } + + unsafe fn start_capture(&self) -> bool { + todo!() + } + + unsafe fn stop_capture(&self) { + todo!() + } +} + +impl crate::Queue<super::Api> for super::Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&super::CommandBuffer], + signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + todo!() + } + + unsafe fn present( + &mut self, + surface: &mut super::Surface, + texture: super::SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + todo!() + } + + unsafe fn get_timestamp_period(&self) -> f32 { + todo!() + } +} + +impl super::D3D11Device { + #[allow(trivial_casts)] // come on + pub unsafe fn check_feature_support<T>(&self, feature: d3d11::D3D11_FEATURE) -> T { + unsafe { + let mut value = mem::zeroed::<T>(); + let ret = self.CheckFeatureSupport( + feature, + &mut value as *mut T as *mut c_void, + mem::size_of::<T>() as u32, + ); + assert_eq!(ret.into_result(), Ok(())); + + value + } + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/instance.rs b/third_party/rust/wgpu-hal/src/dx11/instance.rs new file mode 100644 index 0000000000..104ba9e045 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/instance.rs @@ -0,0 +1,48 @@ +use crate::auxil; + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + let enable_dx11 = match std::env::var("WGPU_UNSTABLE_DX11_BACKEND") { + Ok(string) => string == "1" || string == "true", + Err(_) => false, + }; + + if !enable_dx11 { + return Err(crate::InstanceError); + } + + let lib_d3d11 = super::library::D3D11Lib::new().ok_or(crate::InstanceError)?; + + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory1, + desc.flags, + )?; + + Ok(super::Instance { + lib_d3d11, + lib_dxgi, + factory, + }) + } + + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + todo!() + } + + unsafe fn destroy_surface(&self, surface: super::Surface) { + todo!() + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory); + + adapters + .into_iter() + .filter_map(|adapter| super::Adapter::expose(&self.lib_d3d11, adapter)) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/library.rs b/third_party/rust/wgpu-hal/src/dx11/library.rs new file mode 100644 index 0000000000..ea597abd56 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/library.rs @@ -0,0 +1,144 @@ +use std::ptr; + +use winapi::{ + shared::{ + dxgi, + minwindef::{HMODULE, UINT}, + winerror, + }, + um::{d3d11, d3d11_1, d3d11_2, d3dcommon}, +}; + +use crate::auxil::dxgi::result::HResult; + +type D3D11CreateDeviceFun = unsafe extern "system" fn( + *mut dxgi::IDXGIAdapter, + d3dcommon::D3D_DRIVER_TYPE, + HMODULE, + UINT, + *const d3dcommon::D3D_FEATURE_LEVEL, + UINT, + UINT, + *mut *mut d3d11::ID3D11Device, + *mut d3dcommon::D3D_FEATURE_LEVEL, + *mut *mut d3d11::ID3D11DeviceContext, +) -> d3d12::HRESULT; + +pub(super) struct D3D11Lib { + // We use the os specific symbol to drop the lifetime parameter. + // + // SAFETY: we must ensure this outlives the Library. + d3d11_create_device: libloading::os::windows::Symbol<D3D11CreateDeviceFun>, + + lib: libloading::Library, +} +impl D3D11Lib { + pub fn new() -> Option<Self> { + unsafe { + let lib = libloading::Library::new("d3d11.dll").ok()?; + + let d3d11_create_device = lib + .get::<D3D11CreateDeviceFun>(b"D3D11CreateDevice") + .ok()? + .into_raw(); + + Some(Self { + lib, + d3d11_create_device, + }) + } + } + + pub fn create_device( + &self, + adapter: d3d12::DxgiAdapter, + ) -> Option<(super::D3D11Device, d3dcommon::D3D_FEATURE_LEVEL)> { + let feature_levels = [ + d3dcommon::D3D_FEATURE_LEVEL_11_1, + d3dcommon::D3D_FEATURE_LEVEL_11_0, + d3dcommon::D3D_FEATURE_LEVEL_10_1, + d3dcommon::D3D_FEATURE_LEVEL_10_0, + d3dcommon::D3D_FEATURE_LEVEL_9_3, + d3dcommon::D3D_FEATURE_LEVEL_9_2, + d3dcommon::D3D_FEATURE_LEVEL_9_1, + ]; + + let mut device = d3d12::WeakPtr::<d3d11::ID3D11Device>::null(); + let mut feature_level: d3dcommon::D3D_FEATURE_LEVEL = 0; + + // We need to try this twice. If the first time fails due to E_INVALIDARG + // we are running on a machine without a D3D11.1 runtime, and need to + // retry without the feature level 11_1 feature level. + // + // Why they thought this was a good API, who knows. + + let mut hr = unsafe { + (self.d3d11_create_device)( + adapter.as_mut_ptr() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), // software implementation DLL??? + 0, // flags + feature_levels.as_ptr(), + feature_levels.len() as u32, + d3d11::D3D11_SDK_VERSION, + device.mut_self(), + &mut feature_level, + ptr::null_mut(), // device context + ) + }; + + // Try again without FL11_1 + if hr == winerror::E_INVALIDARG { + hr = unsafe { + (self.d3d11_create_device)( + adapter.as_mut_ptr() as *mut _, + d3dcommon::D3D_DRIVER_TYPE_UNKNOWN, + ptr::null_mut(), // software implementation DLL??? + 0, // flags + feature_levels[1..].as_ptr(), + feature_levels[1..].len() as u32, + d3d11::D3D11_SDK_VERSION, + device.mut_self(), + &mut feature_level, + ptr::null_mut(), // device context + ) + }; + } + + // Any errors here are real and we should complain about + if let Err(err) = hr.into_result() { + log::error!("Failed to make a D3D11 device: {}", err); + return None; + } + + // We always try to upcast in highest -> lowest order + + // Device -> Device2 + unsafe { + match device.cast::<d3d11_2::ID3D11Device2>().into_result() { + Ok(device2) => { + device.destroy(); + return Some((super::D3D11Device::Device2(device2), feature_level)); + } + Err(hr) => { + log::info!("Failed to cast device to ID3D11Device2: {}", hr) + } + } + } + + // Device -> Device1 + unsafe { + match device.cast::<d3d11_1::ID3D11Device1>().into_result() { + Ok(device1) => { + device.destroy(); + return Some((super::D3D11Device::Device1(device1), feature_level)); + } + Err(hr) => { + log::info!("Failed to cast device to ID3D11Device1: {}", hr) + } + } + } + + Some((super::D3D11Device::Device(device), feature_level)) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx11/mod.rs b/third_party/rust/wgpu-hal/src/dx11/mod.rs new file mode 100644 index 0000000000..91827874b1 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx11/mod.rs @@ -0,0 +1,137 @@ +#![allow(dead_code)] +#![allow(unused_variables)] + +use winapi::um::{d3d11, d3d11_1, d3d11_2}; + +mod adapter; +mod command; +mod device; +mod instance; +mod library; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +pub struct Instance { + lib_d3d11: library::D3D11Lib, + lib_dxgi: d3d12::DxgiLib, + factory: d3d12::DxgiFactory, +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +pub struct Surface {} + +pub struct Adapter { + device: D3D11Device, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +d3d12::weak_com_inheritance_chain! { + #[derive(Debug, Copy, Clone, PartialEq)] + enum D3D11Device { + Device(d3d11::ID3D11Device), from_device, as_device, device; + Device1(d3d11_1::ID3D11Device1), from_device1, as_device1, unwrap_device1; + Device2(d3d11_2::ID3D11Device2), from_device2, as_device2, unwrap_device2; + } +} + +pub struct Device {} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue {} + +#[derive(Debug)] +pub struct CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer {} +#[derive(Debug)] +pub struct Texture {} +#[derive(Debug)] +pub struct SurfaceTexture {} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + todo!() + } +} + +#[derive(Debug)] +pub struct TextureView {} +#[derive(Debug)] +pub struct Sampler {} +#[derive(Debug)] +pub struct QuerySet {} +#[derive(Debug)] +pub struct Fence {} +#[derive(Debug)] + +pub struct BindGroupLayout {} +#[derive(Debug)] +pub struct BindGroup {} +#[derive(Debug)] +pub struct PipelineLayout {} +#[derive(Debug)] +pub struct ShaderModule {} +pub struct RenderPipeline {} +pub struct ComputePipeline {} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + todo!() + } + + unsafe fn unconfigure(&mut self, device: &Device) { + todo!() + } + + unsafe fn acquire_texture( + &mut self, + _timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + todo!() + } + + unsafe fn discard_texture(&mut self, texture: SurfaceTexture) { + todo!() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..13530afb3e --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,587 @@ +use crate::{ + auxil::{self, dxgi::result::HResult as _}, + dx12::SurfaceTarget, +}; +use std::{mem, ptr, sync::Arc, thread}; +use winapi::{ + shared::{dxgi, dxgi1_2, minwindef::DWORD, windef, winerror}, + um::{d3d12 as d3d12_ty, d3d12sdklayers, winuser}, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + // Debug tracking alive objects + if !thread::panicking() + && self + .private_caps + .instance_flags + .contains(crate::InstanceFlags::VALIDATION) + { + unsafe { + self.report_live_objects(); + } + } + unsafe { + self.raw.destroy(); + } + } +} + +impl super::Adapter { + pub unsafe fn report_live_objects(&self) { + if let Ok(debug_device) = unsafe { + self.raw + .cast::<d3d12sdklayers::ID3D12DebugDevice>() + .into_result() + } { + unsafe { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ) + }; + unsafe { debug_device.destroy() }; + } + } + + pub fn raw_adapter(&self) -> &d3d12::DxgiAdapter { + &self.raw + } + + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: d3d12::DxgiAdapter, + library: &Arc<d3d12::D3D12Lib>, + instance_flags: crate::InstanceFlags, + dx12_shader_compiler: &wgt::Dx12Compiler, + ) -> Option<crate::ExposedAdapter<super::Api>> { + // Create the device so that we can get the capabilities. + let device = { + profiling::scope!("ID3D12Device::create_device"); + match library.create_device(*adapter, d3d12::FeatureLevel::L11_0) { + Ok(pair) => match pair.into_result() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + } + }; + + profiling::scope!("feature queries"); + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.unwrap_adapter2().GetDesc2(&mut desc); + } + + let device_name = { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_ARCHITECTURE>() as _, + ) + }); + + let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL = + d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL { + HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0, + }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_SHADER_MODEL, + &mut shader_model_support as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId, + device: desc.DeviceId, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::Cpu + } else if features_architecture.UMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + driver: String::new(), + driver_info: String::new(), + }; + + let mut options: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS>() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS2>() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let casting_fully_typed_format_supported = { + let mut features3: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 21, // D3D12_FEATURE_D3D12_OPTIONS3 + &mut features3 as *mut _ as *mut _, + mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS3>() as _, + ) + }; + hr == 0 && features3.CastingFullyTypedFormatSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + instance_flags, + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12_ty::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + heap_create_not_zeroed: false, //TODO: winapi support for Options7 + casting_fully_typed_format_supported, + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, _uav_count) = match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, // conservative, is 64 on feature level 11.1 + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12_ty::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLIP_CONTROL + | wgt::Features::DEPTH32FLOAT_STENCIL8 + | wgt::Features::INDIRECT_FIRST_INSTANCE + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO + | wgt::Features::POLYGON_MODE_LINE + | wgt::Features::POLYGON_MODE_POINT + | wgt::Features::VERTEX_WRITABLE_STORAGE + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES + | wgt::Features::TEXTURE_COMPRESSION_BC + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::PUSH_CONSTANTS + | wgt::Features::SHADER_PRIMITIVE_INDEX + | wgt::Features::RG11B10UFLOAT_RENDERABLE; + //TODO: in order to expose this, we need to run a compute shader + // that extract the necessary statistics out of the D3D12 result. + // Alternatively, we could allocate a buffer for the query set, + // write the results there, and issue a bunch of copy commands. + //| wgt::Features::PIPELINE_STATISTICS_QUERY + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + features.set( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1, + ); + + // TODO: Determine if IPresentationManager is supported + let presentation_timer = auxil::dxgi::time::PresentationTimer::new_dxgi(); + + let base = wgt::Limits::default(); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + presentation_timer, + workarounds, + dx12_shader_compiler: dx12_shader_compiler.clone(), + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12_ty::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12_ty::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12_ty::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12_ty::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12_ty::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 128, + _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12_ty::D3D12_RESOURCE_BINDING_TIER_1 => 16, + _ => d3d12_ty::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base + .max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: + d3d12_ty::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, + max_storage_buffer_binding_size: crate::auxil::MAX_I32_BINDING_SIZE, + max_vertex_buffers: d3d12_ty::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + // The push constants are part of the root signature which + // has a limit of 64 DWORDS (256 bytes), but other resources + // also share the root signature: + // + // - push constants consume a `DWORD` for each `4 bytes` of data + // - If a bind group has buffers it will consume a `DWORD` + // for the descriptor table + // - If a bind group has samplers it will consume a `DWORD` + // for the descriptor table + // - Each dynamic buffer will consume `2 DWORDs` for the + // root descriptor + // - The special constants buffer count as constants + // + // Since we can't know beforehand all root signatures that + // will be created, the max size to be used for push + // constants needs to be set to a reasonable number instead. + // + // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs + max_push_constant_size: 128, + min_uniform_buffer_offset_alignment: + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, + min_storage_buffer_offset_alignment: 4, + max_inter_stage_shader_components: base.max_inter_stage_shader_components, + max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO? + max_compute_invocations_per_workgroup: + d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP, + max_compute_workgroup_size_x: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_X, + max_compute_workgroup_size_y: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Y, + max_compute_workgroup_size_z: d3d12_ty::D3D12_CS_THREAD_GROUP_MAX_Z, + max_compute_workgroups_per_dimension: + d3d12_ty::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + max_buffer_size: u64::MAX, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + }, + downlevel: wgt::DownlevelCapabilities::default(), + }, + }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + _features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = { + profiling::scope!("ID3D12Device::CreateCommandQueue"); + self.device + .create_command_queue( + d3d12::CmdListType::Direct, + d3d12::Priority::Normal, + d3d12::CommandQueueFlags::empty(), + 0, + ) + .into_device_result("Queue creation")? + }; + + let device = super::Device::new( + self.device, + queue, + self.private_caps, + &self.library, + self.dx12_shader_compiler.clone(), + )?; + Ok(crate::OpenDevice { + device, + queue: super::Queue { + raw: queue, + temp_lists: Vec::new(), + }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let raw_format = match auxil::dxgi::conv::map_texture_format_failable(format) { + Some(f) => f, + None => return Tfc::empty(), + }; + let srv_uav_format = if format.is_combined_depth_stencil_format() { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + // use the depth aspect here as opposed to stencil since it has more capabilities + crate::FormatAspects::DEPTH, + ) + } else { + auxil::dxgi::conv::map_texture_format_for_srv_uav( + format, + crate::FormatAspects::from(format), + ) + } + .unwrap(); + + let mut data = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: unsafe { mem::zeroed() }, + Support2: unsafe { mem::zeroed() }, + }; + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>() as _, + ) + }); + + // Because we use a different format for SRV and UAV views of depth textures, we need to check + // the features that use SRV/UAVs using the no-depth format. + let mut data_srv_uav = d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: srv_uav_format, + Support1: d3d12_ty::D3D12_FORMAT_SUPPORT1_NONE, + Support2: d3d12_ty::D3D12_FORMAT_SUPPORT2_NONE, + }; + if raw_format != srv_uav_format { + // Only-recheck if we're using a different format + assert_eq!(winerror::S_OK, unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_FORMAT_SUPPORT, + ptr::addr_of_mut!(data_srv_uav).cast(), + DWORD::try_from(mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_FORMAT_SUPPORT>()) + .unwrap(), + ) + }); + } else { + // Same format, just copy over. + data_srv_uav = data; + } + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let is_texture = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12_ty::D3D12_FORMAT_SUPPORT1_TEXTURECUBE) + != 0; + // SRVs use srv_uav_format + caps.set( + Tfc::SAMPLED, + is_texture && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_LOAD != 0, + ); + caps.set( + Tfc::SAMPLED_LINEAR, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + // UAVs use srv_uav_format + caps.set( + Tfc::STORAGE, + data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW + != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data_srv_uav.Support2 & d3d12_ty::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + // We load via UAV/SRV so use srv_uav_format + let no_msaa_load = caps.contains(Tfc::SAMPLED) + && data_srv_uav.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD == 0; + + let no_msaa_target = data.Support1 + & (d3d12_ty::D3D12_FORMAT_SUPPORT1_RENDER_TARGET + | d3d12_ty::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) + != 0 + && data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET == 0; + + caps.set( + Tfc::MULTISAMPLE_RESOLVE, + data.Support1 & d3d12_ty::D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE != 0, + ); + + let mut ms_levels = d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS { + Format: raw_format, + SampleCount: 0, + Flags: d3d12_ty::D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, + NumQualityLevels: 0, + }; + + let mut set_sample_count = |sc: u32, tfc: Tfc| { + ms_levels.SampleCount = sc; + + if unsafe { + self.device.CheckFeatureSupport( + d3d12_ty::D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + <*mut _>::cast(&mut ms_levels), + mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS>() as _, + ) + } == winerror::S_OK + && ms_levels.NumQualityLevels != 0 + { + caps.set(tfc, !no_msaa_load && !no_msaa_target); + } + }; + + set_sample_count(2, Tfc::MULTISAMPLE_X2); + set_sample_count(4, Tfc::MULTISAMPLE_X4); + set_sample_count(8, Tfc::MULTISAMPLE_X8); + set_sample_count(16, Tfc::MULTISAMPLE_X16); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = { + match surface.target { + SurfaceTarget::WndHandle(wnd_handle) => { + let mut rect: windef::RECT = unsafe { mem::zeroed() }; + if unsafe { winuser::GetClientRect(wnd_handle, &mut rect) } != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => None, + } + }; + + let mut present_modes = vec![wgt::PresentMode::Mailbox, wgt::PresentMode::Fifo]; + if surface.supports_allow_tearing { + present_modes.push(wgt::PresentMode::Immediate); + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // we currently use a flip effect which supports 2..=16 buffers + swap_chain_sizes: 2..=16, + current_extent, + // TODO: figure out the exact bounds + extents: wgt::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: 4096, + height: 4096, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp(self.presentation_timer.get_timestamp_ns()) + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/command.rs b/third_party/rust/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..4786a61bf9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,1143 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::conv; +use std::{mem, ops::Range, ptr}; +use winapi::um::d3d12 as d3d12_ty; + +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12_ty::D3D12_BOX { + d3d12_ty::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + +impl crate::BufferTextureCopy { + fn to_subresource_footprint( + &self, + format: wgt::TextureFormat, + ) -> d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + let (block_width, block_height) = format.block_dimensions(); + d3d12_ty::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: self.buffer_layout.offset, + Footprint: d3d12_ty::D3D12_SUBRESOURCE_FOOTPRINT { + Format: auxil::dxgi::conv::map_texture_format_for_copy( + format, + self.texture_base.aspect, + ) + .unwrap(), + Width: self.size.width, + Height: self + .buffer_layout + .rows_per_image + .map_or(self.size.height, |count| count * block_height), + Depth: self.size.depth, + RowPitch: { + let actual = self.buffer_layout.bytes_per_row.unwrap_or_else(|| { + // this may happen for single-line updates + let block_size = format + .block_size(Some(self.texture_base.aspect.map())) + .unwrap(); + (self.size.width / block_width) * block_size + }); + wgt::math::align_to(actual, d3d12_ty::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }, + }, + } + } +} + +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { + let list = self.list.unwrap(); + self.pass.kind = kind; + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { list.BeginEvent(0, wide_label.as_ptr() as *const _, size) }; + self.pass.has_label = true; + } + self.pass.dirty_root_elements = 0; + self.pass.dirty_vertex_buffers = 0; + list.set_descriptor_heaps(&[self.shared.heap_views.raw, self.shared.heap_samplers.raw]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.unwrap(); + list.set_descriptor_heaps(&[]); + if self.pass.has_label { + unsafe { list.EndEvent() }; + } + self.pass.clear(); + } + + unsafe fn prepare_draw(&mut self, base_vertex: i32, base_instance: u32) { + while self.pass.dirty_vertex_buffers != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + unsafe { + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex: other_vertex, + base_instance: other_instance, + other: _, + } => base_vertex != other_vertex || base_instance != other_instance, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other: 0, + }; + } + } + self.update_root_elements(); + } + + fn prepare_dispatch(&mut self, count: [u32; 3]) { + if let Some(root_index) = self.pass.layout.special_constants_root_index { + let needs_update = match self.pass.root_elements[root_index as usize] { + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => [base_vertex as u32, base_instance, other] != count, + _ => true, + }; + if needs_update { + self.pass.dirty_root_elements |= 1 << root_index; + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: count[0] as i32, + base_instance: count[1], + other: count[2], + }; + } + } + self.update_root_elements(); + } + + //Note: we have to call this lazily before draw calls. Otherwise, D3D complains + // about the root parameters being incompatible with root signature. + fn update_root_elements(&mut self) { + use super::{BufferViewKind as Bvk, PassKind as Pk}; + + while self.pass.dirty_root_elements != 0 { + let list = self.list.unwrap(); + let index = self.pass.dirty_root_elements.trailing_zeros(); + self.pass.dirty_root_elements ^= 1 << index; + + match self.pass.root_elements[index as usize] { + super::RootElement::Empty => log::error!("Root index {} is not bound", index), + super::RootElement::Constant => { + let info = self.pass.layout.root_constant_info.as_ref().unwrap(); + + for offset in info.range.clone() { + let val = self.pass.constant_data[offset as usize]; + match self.pass.kind { + Pk::Render => list.set_graphics_root_constant(index, val, offset), + Pk::Compute => list.set_compute_root_constant(index, val, offset), + Pk::Transfer => (), + } + } + } + super::RootElement::SpecialConstantBuffer { + base_vertex, + base_instance, + other, + } => match self.pass.kind { + Pk::Render => { + list.set_graphics_root_constant(index, base_vertex as u32, 0); + list.set_graphics_root_constant(index, base_instance, 1); + } + Pk::Compute => { + list.set_compute_root_constant(index, base_vertex as u32, 0); + list.set_compute_root_constant(index, base_instance, 1); + list.set_compute_root_constant(index, other, 2); + } + Pk::Transfer => (), + }, + super::RootElement::Table(descriptor) => match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(index, descriptor), + Pk::Transfer => (), + }, + super::RootElement::DynamicOffsetBuffer { kind, address } => { + match (self.pass.kind, kind) { + (Pk::Render, Bvk::Constant) => { + list.set_graphics_root_constant_buffer_view(index, address) + } + (Pk::Compute, Bvk::Constant) => { + list.set_compute_root_constant_buffer_view(index, address) + } + (Pk::Render, Bvk::ShaderResource) => { + list.set_graphics_root_shader_resource_view(index, address) + } + (Pk::Compute, Bvk::ShaderResource) => { + list.set_compute_root_shader_resource_view(index, address) + } + (Pk::Render, Bvk::UnorderedAccess) => { + list.set_graphics_root_unordered_access_view(index, address) + } + (Pk::Compute, Bvk::UnorderedAccess) => { + list.set_compute_root_unordered_access_view(index, address) + } + (Pk::Transfer, _) => (), + } + } + } + } + } + + fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) { + log::trace!("Reset signature {:?}", layout.signature); + if let Some(root_index) = layout.special_constants_root_index { + self.pass.root_elements[root_index as usize] = + super::RootElement::SpecialConstantBuffer { + base_vertex: 0, + base_instance: 0, + other: 0, + }; + } + self.pass.layout = layout.clone(); + self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = loop { + if let Some(list) = self.free_lists.pop() { + let reset_result = list + .reset(self.allocator, d3d12::PipelineState::null()) + .into_result(); + if reset_result.is_ok() { + break Some(list); + } else { + unsafe { + list.destroy(); + } + } + } else { + break None; + } + }; + + let list = if let Some(list) = list { + list + } else { + self.device + .create_graphics_command_list( + d3d12::CmdListType::Direct, + self.allocator, + d3d12::PipelineState::null(), + 0, + ) + .into_device_result("Create command list")? + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + unsafe { list.SetName(cwstr.as_ptr()) }; + } + + self.list = Some(list); + self.temp.clear(); + self.pass.clear(); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + if list.close().into_result().is_ok() { + self.free_lists.push(list); + } else { + unsafe { + list.destroy(); + } + } + } + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.list.take().unwrap(); + let closed = raw.close().into_result().is_ok(); + Ok(super::CommandBuffer { raw, closed }) + } + unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + if cmd_buf.closed { + self.free_lists.push(cmd_buf.raw); + } else { + unsafe { + cmd_buf.raw.destroy(); + } + } + } + self.allocator.reset(); + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} buffer transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}", + barrier.buffer.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end + ); + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + self.temp.barriers.clear(); + + log::trace!("List {:p} texture transitions", self.list.unwrap().as_ptr()); + for barrier in barriers { + log::trace!( + "\t{:p}: usage {:?}..{:?}, range {:?}", + barrier.texture.resource.as_ptr(), + barrier.usage.start, + barrier.usage.end, + barrier.range + ); + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12_ty::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + } + }; + + let tex_mip_level_count = barrier.texture.mip_level_count; + let tex_array_layer_count = barrier.texture.array_layer_count(); + + if barrier.range.is_full_resource( + barrier.texture.format, + tex_mip_level_count, + tex_array_layer_count, + ) { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Selected texture aspect is relevant if the texture format has both depth _and_ stencil aspects. + let planes = if barrier.texture.format.is_combined_depth_stencil_format() { + match barrier.range.aspect { + wgt::TextureAspect::All => 0..2, + wgt::TextureAspect::DepthOnly => 0..1, + wgt::TextureAspect::StencilOnly => 1..2, + } + } else { + match barrier.texture.format { + wgt::TextureFormat::Stencil8 => 1..2, + wgt::TextureFormat::Depth24Plus => 0..2, // TODO: investigate why tests fail if we set this to 0..1 + _ => 0..1, + } + }; + + for mip_level in barrier.range.mip_range(tex_mip_level_count) { + for array_layer in barrier.range.layer_range(tex_array_layer_count) { + for plane in planes.clone() { + unsafe { + raw.u.Transition_mut().Subresource = barrier + .texture + .calc_subresource(mip_level, array_layer, plane); + }; + self.temp.barriers.push(raw); + } + } + } + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_READ_WRITE { + let mut raw = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw.u.UAV_mut() = d3d12_ty::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + } + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + unsafe { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let list = self.list.unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + unsafe { + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.shared.zero_buffer.as_mut_ptr(), + 0, + size, + ) + }; + offset += size; + } + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let list = self.list.unwrap(); + for r in regions { + unsafe { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ) + }; + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + + for r in regions { + let src_box = make_box(&r.src_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base) + }; + + unsafe { + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + unsafe { + *src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format) + }; + unsafe { + *dst_location.u.SubresourceIndex_mut() = + dst.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ) + }; + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let list = self.list.unwrap(); + let mut src_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: unsafe { mem::zeroed() }, + }; + let mut dst_location = d3d12_ty::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12_ty::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: unsafe { mem::zeroed() }, + }; + for r in regions { + let src_box = make_box(&r.texture_base.origin, &r.size); + unsafe { + *src_location.u.SubresourceIndex_mut() = + src.calc_subresource_for_copy(&r.texture_base) + }; + unsafe { + *dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format) + }; + unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box) }; + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index) + }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.list.unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + // nothing to do here + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + unsafe { + self.list.unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + unsafe { self.begin_pass(super::PassKind::Render, desc.label) }; + let mut color_views = [d3d12::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_ATTACHMENTS]; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + *rtv = cat.target.view.handle_rtv.unwrap().raw; + } else { + *rtv = self.null_rtv_handle.raw; + } + } + + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + let list = self.list.unwrap(); + unsafe { + list.OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ) + }; + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if let Some(cat) = cat.as_ref() { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base, + dst: target.view.target_base, + format: target.view.raw_format, + }); + } + } + } + + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = d3d12::ClearFlags::empty(); + let aspects = ds.target.view.aspects; + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::DEPTH) + { + flags |= d3d12::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) + && aspects.contains(crate::FormatAspects::STENCIL) + { + flags |= d3d12::ClearFlags::STENCIL; + } + + if !ds_view.is_null() && !flags.is_empty() { + list.clear_depth_stencil_view( + unsafe { *ds_view }, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } + + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: 0.0, + TopLeftY: 0.0, + Width: desc.extent.width as f32, + Height: desc.extent.height as f32, + MinDepth: 0.0, + MaxDepth: 1.0, + }; + let raw_rect = d3d12_ty::D3D12_RECT { + left: 0, + top: 0, + right: desc.extent.width as i32, + bottom: desc.extent.height as i32, + }; + unsafe { list.RSSetViewports(1, &raw_vp) }; + unsafe { list.RSSetScissorRects(1, &raw_rect) }; + } + + unsafe fn end_render_pass(&mut self) { + if !self.pass.resolves.is_empty() { + let list = self.list.unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12_ty::D3D12_RESOURCE_BARRIER { + Type: d3d12_ty::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12_ty::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: unsafe { mem::zeroed() }, + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, + } + }; + self.temp.barriers.push(barrier); + unsafe { + *barrier.u.Transition_mut() = d3d12_ty::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12_ty::D3D12_RESOURCE_STATE_RESOLVE_DEST, + } + }; + self.temp.barriers.push(barrier); + } + + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + + for resolve in self.pass.resolves.iter() { + profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource"); + unsafe { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ) + }; + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = unsafe { barrier.u.Transition_mut() }; + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + if !self.temp.barriers.is_empty() { + profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier"); + unsafe { + list.ResourceBarrier( + self.temp.barriers.len() as u32, + self.temp.barriers.as_ptr(), + ) + }; + } + } + + unsafe { self.end_pass() }; + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + log::trace!("Set group[{}]", index); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index as usize; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + log::trace!("\tBind element[{}] = view", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_views.unwrap().gpu); + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + log::trace!("\tBind element[{}] = sampler", root_index); + self.pass.root_elements[root_index] = + super::RootElement::Table(group.handle_samplers.unwrap().gpu); + root_index += 1; + } + + // Bind root descriptors + for ((&kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + log::trace!("\tBind element[{}] = dynamic", root_index); + self.pass.root_elements[root_index] = super::RootElement::DynamicOffsetBuffer { + kind, + address: gpu_base + offset as d3d12::GpuAddress, + }; + root_index += 1; + } + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= (1 << root_index) - (1 << info.base_root_index); + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + let info = layout.shared.root_constant_info.as_ref().unwrap(); + + self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; + + self.pass.constant_data[(offset as usize)..(offset as usize + data.len())] + .copy_from_slice(data); + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << info.root_index; + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + unsafe { + self.list + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + unsafe { + self.list + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size) + }; + } + unsafe fn end_debug_marker(&mut self) { + unsafe { self.list.unwrap().EndEvent() } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_graphics_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + unsafe { list.IASetPrimitiveTopology(pipeline.topology) }; + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.list.unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + auxil::dxgi::conv::map_index_format(format), + ); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let raw_vp = d3d12_ty::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + unsafe { self.list.unwrap().RSSetViewports(1, &raw_vp) }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let raw_rect = d3d12_ty::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + unsafe { self.list.unwrap().RSSetScissorRects(1, &raw_rect) }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.unwrap().set_blend_factor(*color); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(start_vertex as i32, start_instance) }; + self.list + .unwrap() + .draw(vertex_count, instance_count, start_vertex, start_instance); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { self.prepare_draw(base_vertex, start_instance) }; + self.list.unwrap().draw_indexed( + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + unsafe { self.prepare_draw(0, 0) }; + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ) + }; + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + unsafe { self.begin_pass(super::PassKind::Compute, desc.label) }; + } + unsafe fn end_compute_pass(&mut self) { + unsafe { self.end_pass() }; + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + if self.pass.layout.signature != pipeline.layout.signature { + // D3D12 requires full reset on signature change + list.set_compute_root_signature(pipeline.layout.signature); + self.reset_signature(&pipeline.layout); + }; + + list.set_pipeline_state(pipeline.raw); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.prepare_dispatch(count); + self.list.unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.prepare_dispatch([0; 3]); + //TODO: update special constants indirectly + unsafe { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ) + }; + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..7b39e98ad2 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,350 @@ +use std::iter; +use winapi::{ + shared::minwindef::BOOL, + um::{d3d12 as d3d12_ty, d3dcommon}, +}; + +pub fn map_buffer_usage_to_resource_flags( + usage: crate::BufferUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + flags +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12_ty::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12_ty::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12_ty::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.contains(crate::TextureUses::RESOURCE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + flags |= d3d12_ty::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + flags +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12_ty::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12_ty::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12_ty::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12_ty::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12_ty::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12_ty::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12_ty::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12_ty::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12_ty::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12_ty::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12_ty::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12_ty::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option<wgt::SamplerBorderColor>) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | Some(Sbc::Zero) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} + +pub fn map_visibility(visibility: wgt::ShaderStages) -> d3d12::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => d3d12::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => d3d12::ShaderVisibility::PS, + _ => d3d12::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => d3d12::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } => d3d12::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV, + } +} + +pub fn map_label(name: &str) -> Vec<u16> { + name.encode_utf16().chain(iter::once(0)).collect() +} + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } else if usage.intersects(Bu::STORAGE_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12_ty::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12_ty::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::RESOURCE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12_ty::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE) { + state |= d3d12_ty::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12_ty::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12_ty::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12_ty::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12_ty::D3D12_BLEND_ZERO, + Bf::One => d3d12_ty::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12_ty::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12_ty::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12_ty::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12_ty::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12_ty::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12_ty::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12_ty::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12_ty::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12_ty::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12_ty::D3D12_BLEND_OP, + d3d12_ty::D3D12_BLEND, + d3d12_ty::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12_ty::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12_ty::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12_ty::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12_ty::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12_ty::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[Option<wgt::ColorTargetState>], +) -> [d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] { + let dummy_target = d3d12_ty::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12_ty::D3D12_BLEND_ZERO, + DestBlend: d3d12_ty::D3D12_BLEND_ZERO, + BlendOp: d3d12_ty::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12_ty::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12_ty::D3D12_BLEND_OP_ADD, + LogicOp: d3d12_ty::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + if let Some(ct) = ct.as_ref() { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12_ty::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12_ty::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12_ty::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12_ty::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12_ty::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12_ty::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12_ty::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12_ty::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12_ty::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + d3d12_ty::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + d3d12_ty::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: BOOL::from(ds.is_depth_enabled()), + DepthWriteMask: if ds.depth_write_enabled { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12_ty::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: BOOL::from(ds.stencil.is_enabled()), + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/descriptor.rs b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..430e3a2f4b --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,311 @@ +use crate::auxil::dxgi::result::HResult as _; +use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: d3d12::CpuDescriptor, + pub gpu: d3d12::GpuDescriptor, + /// How large the block allocated to this handle is. + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } +} + +type DescriptorIndex = u64; + +pub(super) struct GeneralHeap { + pub raw: d3d12::DescriptorHeap, + ty: d3d12::DescriptorHeapType, + handle_size: u64, + total_handles: u64, + start: DualHandle, + ranges: Mutex<RangeAllocator<DescriptorIndex>>, +} + +impl GeneralHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total_handles: u64, + ) -> Result<Self, crate::DeviceError> { + let raw = { + profiling::scope!("ID3D12Device::CreateDescriptorHeap"); + device + .create_descriptor_heap( + total_handles as u32, + ty, + d3d12::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")? + }; + + Ok(Self { + raw, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + count: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + count, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> d3d12::GpuDescriptor { + d3d12::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } + + pub(super) fn allocate_slice(&self, count: u64) -> Result<DescriptorIndex, crate::DeviceError> { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + raw: d3d12::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: d3d12::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + d3d12::DescriptorHeapFlags::empty(), + 0, + ); + + Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + fn alloc_handle(&mut self) -> d3d12::CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + d3d12::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + fn free_handle(&mut self, handle: d3d12::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } + + unsafe fn destroy(&self) { + unsafe { self.raw.destroy() }; + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: d3d12::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + heaps: Vec<FixedSizeHeap>, + avaliable_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: d3d12::Device, ty: d3d12::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(FixedSizeHeap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub(super) unsafe fn destroy(&self) { + for heap in &self.heaps { + unsafe { heap.destroy() }; + } + } +} + +pub(super) struct CpuHeapInner { + pub raw: d3d12::DescriptorHeap, + pub stage: Vec<d3d12::CpuDescriptor>, +} + +pub(super) struct CpuHeap { + pub inner: Mutex<CpuHeapInner>, + start: d3d12::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: d3d12::Device, + ty: d3d12::DescriptorHeapType, + total: u32, + ) -> Result<Self, crate::DeviceError> { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> d3d12::CpuDescriptor { + d3d12::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + unsafe { self.inner.into_inner().raw.destroy() }; + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: d3d12::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result<DualHandle, crate::DeviceError> { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + unsafe { + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ) + }; + Ok(dst.at(index, count as u64)) +} diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..7e14818572 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,1590 @@ +use crate::auxil::{self, dxgi::result::HResult as _}; + +use super::{conv, descriptor, view}; +use parking_lot::Mutex; +use std::{ffi, mem, num::NonZeroU32, ptr, sync::Arc}; +use winapi::{ + shared::{dxgiformat, dxgitype, minwindef::BOOL, winerror}, + um::{d3d12 as d3d12_ty, synchapi, winbase}, + Interface, +}; + +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; + +impl super::Device { + pub(super) fn new( + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + private_caps: super::PrivateCapabilities, + library: &Arc<d3d12::D3D12Lib>, + dx12_shader_compiler: wgt::Dx12Compiler, + ) -> Result<Self, crate::DeviceError> { + let mem_allocator = super::suballocation::create_allocator_wrapper(&raw)?; + + let dxc_container = match dx12_shader_compiler { + wgt::Dx12Compiler::Dxc { + dxil_path, + dxc_path, + } => super::shader_compilation::get_dxc_container(dxc_path, dxil_path)?, + wgt::Dx12Compiler::Fxc => None, + }; + + let mut idle_fence = d3d12::Fence::null(); + let hr = unsafe { + profiling::scope!("ID3D12Device::CreateFence"); + raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.into_device_result("Idle fence creation")?; + + let mut zero_buffer = d3d12::Resource::null(); + unsafe { + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12_ty::D3D12_RESOURCE_FLAG_NONE, + }; + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + profiling::scope!("Zero Buffer Allocation"); + raw.CreateCommittedResource( + &heap_properties, + d3d12_ty::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ) + .into_device_result("Zero buffer creation")?; + + // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. + }; + + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = 1_000_000; + let capacity_samplers = 2_048; + + let shared = super::DeviceShared { + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw()], + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::draw_indexed()], + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + d3d12::RootSignature::null(), + &[d3d12::IndirectArgument::dispatch()], + mem::size_of::<wgt::DispatchIndirectArgs>() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + heap_views: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + capacity_views, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw, + d3d12::DescriptorHeapType::Sampler, + capacity_samplers, + )?, + }; + + let mut rtv_pool = descriptor::CpuPool::new(raw, d3d12::DescriptorHeapType::Rtv); + let null_rtv_handle = rtv_pool.alloc_handle(); + // A null pResource is used to initialize a null descriptor, + // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded) + raw.create_render_target_view( + d3d12::WeakPtr::null(), + &d3d12::RenderTargetViewDesc::texture_2d( + winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + 0, + 0, + ), + null_rtv_handle.raw, + ); + + Ok(super::Device { + raw, + present_queue, + idler: super::Idler { + fence: idle_fence, + event: d3d12::Event::create(false, false), + }, + private_caps, + shared: Arc::new(shared), + rtv_pool: Mutex::new(rtv_pool), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::CbvSrvUav, + )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + d3d12::DescriptorHeapType::Sampler, + )), + library: Arc::clone(library), + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + null_rtv_handle, + mem_allocator, + dxc_container, + }) + } + + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; + log::info!("Waiting for idle with value {}", value); + self.present_queue.signal(self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + unsafe { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE) }; + Ok(()) + } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result<super::CompiledShader, crate::PipelineError> { + use naga::back::hlsl; + + let stage_bit = crate::auxil::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let reflection_info = { + profiling::scope!("naga::back::hlsl::write"); + writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? + }; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + let raw_ep = reflection_info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{e}")))?; + + let source_name = stage + .module + .raw_name + .as_ref() + .and_then(|cstr| cstr.to_str().ok()) + .unwrap_or_default(); + + // Compile with DXC if available, otherwise fall back to FXC + let (result, log_level) = if let Some(ref dxc_container) = self.dxc_container { + super::shader_compilation::compile_dxc( + self, + &source, + source_name, + raw_ep, + stage_bit, + full_stage, + dxc_container, + ) + } else { + super::shader_compilation::compile_fxc( + self, + &source, + source_name, + &ffi::CString::new(raw_ep.as_str()).unwrap(), + stage_bit, + full_stage, + ) + }; + + log::log!( + log_level, + "Naga generated shader for {:?} at {:?}:\n{}", + raw_ep, + naga_stage, + source + ); + result + } + + pub fn raw_device(&self) -> &d3d12::Device { + &self.raw + } + + pub fn raw_queue(&self) -> &d3d12::CommandQueue { + &self.present_queue + } + + pub unsafe fn texture_from_raw( + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + ) -> super::Texture { + super::Texture { + resource, + format, + dimension, + size, + mip_level_count, + sample_count, + allocation: None, + } + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(mut self, queue: super::Queue) { + self.rtv_pool.lock().free_handle(self.null_rtv_handle); + unsafe { self.rtv_pool.into_inner().destroy() }; + unsafe { self.dsv_pool.into_inner().destroy() }; + unsafe { self.srv_uav_pool.into_inner().destroy() }; + unsafe { self.sampler_pool.into_inner().destroy() }; + unsafe { self.shared.destroy() }; + unsafe { self.idler.destroy() }; + self.mem_allocator = None; + unsafe { queue.raw.destroy() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let mut resource = d3d12::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: d3d12_ty::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = + super::suballocation::create_buffer_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Buffer creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Buffer { + resource, + size, + allocation, + }) + } + + unsafe fn destroy_buffer(&self, mut buffer: super::Buffer) { + unsafe { buffer.resource.destroy() }; + // Only happens when it's using the windows_rs feature and there's an allocation + if let Some(alloc) = buffer.allocation.take() { + super::suballocation::free_buffer_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let mut ptr = ptr::null_mut(); + // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware? + let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) }; + hr.into_device_result("Map buffer")?; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::<u8>() }) + .unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe { (*buffer.resource).Unmap(0, ptr::null()) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + use super::suballocation::create_texture_resource; + + let mut resource = d3d12::Resource::null(); + + let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + Format: auxil::dxgi::conv::map_texture_format_for_resource( + desc.format, + desc.usage, + !desc.view_formats.is_empty(), + self.private_caps.casting_fully_typed_format_supported, + ), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12_ty::D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let (hr, allocation) = create_texture_resource(self, desc, raw_desc, &mut resource)?; + + hr.into_device_result("Texture creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { resource.SetName(cwstr.as_ptr()) }; + } + + Ok(super::Texture { + resource, + format: desc.format, + dimension: desc.dimension, + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + allocation, + }) + } + + unsafe fn destroy_texture(&self, mut texture: super::Texture) { + unsafe { texture.resource.destroy() }; + if let Some(alloc) = texture.allocation.take() { + super::suballocation::free_texture_allocation( + alloc, + // SAFETY: for allocations to exist, the allocator must exist + unsafe { self.mem_allocator.as_ref().unwrap_unchecked() }, + ); + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let view_desc = desc.to_internal(texture); + + Ok(super::TextureView { + raw_format: view_desc.rtv_dsv_format, + aspects: view_desc.aspects, + target_base: ( + texture.resource, + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), + handle_srv: if desc.usage.intersects(crate::TextureUses::RESOURCE) { + let raw_desc = unsafe { view_desc.to_srv() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateShaderResourceView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_uav: if desc.usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + let raw_desc = unsafe { view_desc.to_uav() }; + raw_desc.map(|raw_desc| { + let handle = self.srv_uav_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ) + }; + handle + }) + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + let raw_desc = unsafe { view_desc.to_rtv() }; + let handle = self.rtv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateRenderTargetView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + let raw_desc = unsafe { view_desc.to_dsv(true) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + let raw_desc = unsafe { view_desc.to_dsv(false) }; + let handle = self.dsv_pool.lock().alloc_handle(); + unsafe { + self.raw.CreateDepthStencilView( + texture.resource.as_mut_ptr(), + &raw_desc, + handle.raw, + ) + }; + Some(handle) + } else { + None + }, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } + } + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let handle = self.sampler_pool.lock().alloc_handle(); + + let reduction = match desc.compare { + Some(_) => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let mut filter = conv::map_filter_mode(desc.min_filter) << d3d12_ty::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12_ty::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12_ty::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12_ty::D3D12_FILTER_REDUCTION_TYPE_SHIFT; + + if desc.anisotropy_clamp != 1 { + filter |= d3d12_ty::D3D12_FILTER_ANISOTROPIC; + }; + + let border_color = conv::map_border_color(desc.border_color); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp as u32, + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + border_color, + desc.lod_clamp.clone(), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let allocator = self + .raw + .create_command_allocator(d3d12::CmdListType::Direct) + .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { allocator.SetName(cwstr.as_ptr()) }; + } + + Ok(super::CommandEncoder { + allocator, + device: self.raw, + shared: Arc::clone(&self.shared), + null_rtv_handle: self.null_rtv_handle, + list: None, + free_lists: Vec::new(), + pass: super::PassState::new(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + unsafe { list.destroy() }; + } + for list in encoder.free_lists { + unsafe { list.destroy() }; + } + unsafe { encoder.allocator.destroy() }; + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + let count = entry.count.map_or(1, NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += count, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += count + } + wgt::BindingType::Sampler { .. } => num_samplers += count, + } + } + + let num_views = num_buffer_views + num_texture_views; + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + d3d12::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + unsafe { cpu_heap.destroy() }; + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + unsafe { cpu_heap.destroy() }; + } + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::hlsl; + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Buffers with dynamic offsets are implemented as root descriptors. + // This is easier than trying to patch up the offset on the shader side. + // + // Root signature layout: + // Root Constants: Parameter=0, Space=0 + // ... + // (bind group [0]) - Space=0 + // View descriptor table, if any + // Sampler descriptor table, if any + // Root descriptors (for dynamic offset buffers) + // (bind group [1]) - Space=0 + // ... + // (bind group [2]) - Space=0 + // Special constant buffer: Space=0 + + //TODO: put lower bind group indices futher down the root signature. See: + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + // Currently impossible because wgpu-core only re-binds the descriptor sets based + // on Vulkan-like layout compatibility rules. + + fn native_binding(bt: &hlsl::BindTarget) -> d3d12::Binding { + d3d12::Binding { + space: bt.space as u32, + register: bt.register, + } + } + + log::debug!( + "Creating Root Signature '{}'", + desc.label.unwrap_or_default() + ); + + let mut binding_map = hlsl::BindingMap::default(); + let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + hlsl::BindTarget::default(), + ); + let mut parameters = Vec::new(); + let mut push_constants_target = None; + let mut root_constant_info = None; + + let mut pc_start = u32::MAX; + let mut pc_end = u32::MIN; + + for pc in desc.push_constant_ranges.iter() { + pc_start = pc_start.min(pc.range.start); + pc_end = pc_end.max(pc.range.end); + } + + if pc_start != u32::MAX && pc_end != u32::MIN { + let parameter_index = parameters.len(); + let size = (pc_end - pc_start) / 4; + log::debug!( + "\tParam[{}] = push constant (count = {})", + parameter_index, + size, + ); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, + native_binding(&bind_cbv), + size, + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + root_constant_info = Some(super::RootConstantInfo { + root_index: parameter_index as u32, + range: (pc_start / 4)..(pc_end / 4), + }); + push_constants_target = Some(binding); + + bind_cbv.space += 1; + } + + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut bind_group_infos = + arrayvec::ArrayVec::<super::BindGroupInfo, { crate::MAX_BIND_GROUPS }>::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => continue, + ref other => conv::map_binding_type(other), + }; + let bt = match range_ty { + d3d12::DescriptorRangeType::CBV => &mut bind_cbv, + d3d12::DescriptorRangeType::SRV => &mut bind_srv, + d3d12::DescriptorRangeType::UAV => &mut bind_uav, + d3d12::DescriptorRangeType::Sampler => continue, + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(bt), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = views (vis = {:?}, count = {})", + parameters.len(), + visibility_view_static, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SRV_CBV_UAV; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => d3d12::DescriptorRangeType::Sampler, + _ => continue, + }; + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bind_sampler.clone() + }, + ); + ranges.push(d3d12::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native_binding(&bind_sampler), + d3d12_ty::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1); + } + if ranges.len() > range_base { + log::debug!( + "\tParam[{}] = samplers (vis = {:?}, count = {})", + parameters.len(), + visibility_sampler, + ranges.len() - range_base, + ); + parameters.push(d3d12::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + info.tables |= super::TableTypes::SAMPLERS; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + + let (kind, parameter_ty, bt) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_CBV, + &mut bind_cbv, + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_SRV, + &mut bind_srv, + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + d3d12_ty::D3D12_ROOT_PARAMETER_TYPE_UAV, + &mut bind_uav, + ), + }; + + binding_map.insert( + naga::ResourceBinding { + group: index as u32, + binding: entry.binding, + }, + hlsl::BindTarget { + binding_array_size: entry.count.map(NonZeroU32::get), + ..bt.clone() + }, + ); + info.dynamic_buffers.push(kind); + + log::debug!( + "\tParam[{}] = dynamic {:?} (vis = {:?})", + parameters.len(), + buffer_ty, + dynamic_buffers_visibility, + ); + parameters.push(d3d12::RootParameter::descriptor( + parameter_ty, + dynamic_buffers_visibility, + native_binding(bt), + )); + + bt.register += entry.count.map_or(1, NonZeroU32::get); + } + + bind_group_infos.push(info); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + + let (special_constants_root_index, special_constants_binding) = if desc.flags.intersects( + crate::PipelineLayoutFlags::BASE_VERTEX_INSTANCE + | crate::PipelineLayoutFlags::NUM_WORK_GROUPS, + ) { + let parameter_index = parameters.len(); + log::debug!("\tParam[{}] = special", parameter_index); + parameters.push(d3d12::RootParameter::constants( + d3d12::ShaderVisibility::All, // really needed for VS and CS only + native_binding(&bind_cbv), + 3, // 0 = base vertex, 1 = base instance, 2 = other + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + (Some(parameter_index as u32), Some(binding)) + } else { + (None, None) + }; + + log::trace!("{:#?}", parameters); + log::trace!("Bindings {:#?}", binding_map); + + let (blob, error) = self + .library + .serialize_root_signature( + d3d12::RootSignatureVersion::V1_0, + ¶meters, + &[], + d3d12::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + crate::DeviceError::Lost + })? + .into_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + unsafe { error.as_c_str() }.to_str().unwrap() + ); + unsafe { error.destroy() }; + return Err(crate::DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .into_device_result("Root signature creation")?; + unsafe { blob.destroy() }; + + log::debug!("\traw = {:?}", raw); + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::PipelineLayout { + shared: super::PipelineLayoutShared { + signature: raw, + total_root_elements: parameters.len() as super::RootIndex, + special_constants_root_index, + root_constant_info, + }, + bind_group_infos, + naga_options: hlsl::Options { + shader_model: match self.dxc_container { + // DXC + Some(_) => hlsl::ShaderModel::V6_0, + // FXC doesn't support SM 6.0 + None => hlsl::ShaderModel::V5_1, + }, + binding_map, + fake_missing_bindings: false, + special_constants_binding, + push_constants_target, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { pipeline_layout.shared.signature.destroy() }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + dynamic_buffers.push(data.resolve_address()); + } + } + wgt::BindingType::Buffer { ty, .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.buffers[start..end] { + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + wgt::BufferBindingType::Uniform => { + let size_mask = + d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12_ty::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: ((size - 1) | size_mask) + 1, + }; + unsafe { self.raw.CreateConstantBufferView(&raw_desc, handle) }; + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: + view::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12_ty::D3D12_SRV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_SRV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12_ty::D3D12_BUFFER_SRV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ) + }; + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12_ty::D3D12_UAV_DIMENSION_BUFFER, + u: unsafe { mem::zeroed() }, + }; + unsafe { + *raw_desc.u.Buffer_mut() = d3d12_ty::D3D12_BUFFER_UAV { + FirstElement: data.offset / 4, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12_ty::D3D12_BUFFER_UAV_FLAG_RAW, + } + }; + unsafe { + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ) + }; + } + } + inner.stage.push(handle); + } + } + wgt::BindingType::Texture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.textures[start..end] { + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + entry.count as usize; + for data in &desc.samplers[start..end] { + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = unsafe { + descriptor::upload( + self.raw, + &inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + ) + }?; + Some(dual) + } + None => None, + }; + + Ok(super::BindGroup { + handle_views, + handle_samplers, + dynamic_buffers, + }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + self.shared.heap_samplers.free_slice(dual); + } + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let raw_name = desc.label.and_then(|label| ffi::CString::new(label).ok()); + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga, raw_name }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + } + None => None, + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::VertexStepMode::Vertex => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::VertexStepMode::Instance => { + (d3d12_ty::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(d3d12_ty::D3D12_INPUT_ELEMENT_DESC { + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, + SemanticIndex: attribute.shader_location, + Format: auxil::dxgi::conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12_ty::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + if let Some(ct) = ct.as_ref() { + *rtv_format = auxil::dxgi::conv::map_texture_format(ct.format); + } + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(); + + let raw_rasterizer = d3d12_ty::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12_ty::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12_ty::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12_ty::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: BOOL::from(!desc.primitive.unclipped_depth), + MultisampleEnable: BOOL::from(desc.multisample.count > 1), + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12_ty::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12_ty::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.shared.signature.as_mut_ptr(), + VS: *blob_vs.create_native_shader(), + PS: match blob_fs { + Some(ref shader) => *shader.create_native_shader(), + None => *d3d12::Shader::null(), + }, + GS: *d3d12::Shader::null(), + DS: *d3d12::Shader::null(), + HS: *d3d12::Shader::null(), + StreamOutput: d3d12_ty::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12_ty::D3D12_BLEND_DESC { + AlphaToCoverageEnable: BOOL::from(desc.multisample.alpha_to_coverage_enabled), + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => unsafe { mem::zeroed() }, + }, + InputLayout: d3d12_ty::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + } + Some(wgt::IndexFormat::Uint32) => { + d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12_ty::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + auxil::dxgi::conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12_ty::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12_ty::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = d3d12::PipelineState::null(); + let hr = { + profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); + unsafe { + self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12_ty::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ) + } + }; + + unsafe { blob_vs.destroy() }; + if let Some(blob_fs) = blob_fs { + unsafe { blob_fs.destroy() }; + }; + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::RenderPipeline { + raw, + layout: desc.layout.shared.clone(), + topology, + vertex_strides, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = { + profiling::scope!("ID3D12Device::CreateComputePipelineState"); + self.raw.create_compute_pipeline_state( + desc.layout.shared.signature, + blob_cs.create_native_shader(), + 0, + d3d12::CachedPSO::null(), + d3d12::PipelineStateFlags::empty(), + ) + }; + + unsafe { blob_cs.destroy() }; + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::ComputePipeline { + raw, + layout: desc.layout.shared.clone(), + }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { pipeline.raw.destroy() }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + d3d12::QueryHeapType::Occlusion, + d3d12_ty::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + d3d12::QueryHeapType::PipelineStatistics, + d3d12_ty::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), + wgt::QueryType::Timestamp => ( + d3d12::QueryHeapType::Timestamp, + d3d12_ty::D3D12_QUERY_TYPE_TIMESTAMP, + ), + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .into_device_result("Query heap creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + unsafe { raw.SetName(cwstr.as_ptr()) }; + } + + Ok(super::QuerySet { raw, raw_ty }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { set.raw.destroy() }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + let mut raw = d3d12::Fence::null(); + let hr = unsafe { + self.raw.CreateFence( + 0, + d3d12_ty::D3D12_FENCE_FLAG_NONE, + &d3d12_ty::ID3D12Fence::uuidof(), + raw.mut_void(), + ) + }; + hr.into_device_result("Fence creation")?; + Ok(super::Fence { raw }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + unsafe { fence.raw.destroy() }; + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + Ok(unsafe { fence.raw.GetCompletedValue() }) + } + unsafe fn wait( + &self, + fence: &super::Fence, + value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if unsafe { fence.raw.GetCompletedValue() } >= value { + return Ok(true); + } + let hr = fence.raw.set_event_on_completion(self.idler.event, value); + hr.into_device_result("Set event")?; + + match unsafe { synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::DeviceError::Lost) + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + unsafe { + self.render_doc + .start_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + unsafe { + self.render_doc + .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) + } + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/instance.rs b/third_party/rust/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..be7a3f7306 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,117 @@ +use winapi::shared::{dxgi1_5, minwindef}; + +use super::SurfaceTarget; +use crate::auxil::{self, dxgi::result::HResult as _}; +use std::{mem, sync::Arc}; + +impl Drop for super::Instance { + fn drop(&mut self) { + unsafe { self.factory.destroy() }; + crate::auxil::dxgi::exception::unregister_exception_handler(); + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + let lib_main = d3d12::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.into_result() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + unsafe { debug_controller.Release() }; + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + } + + // Create DXGIFactory4 + let (lib_dxgi, factory) = auxil::dxgi::factory::create_factory( + auxil::dxgi::factory::DxgiFactoryType::Factory4, + desc.flags, + )?; + + // Create IDXGIFactoryMedia + let factory_media = match lib_dxgi.create_factory_media() { + Ok(pair) => match pair.into_result() { + Ok(factory_media) => Some(factory_media), + Err(err) => { + log::error!("Failed to create IDXGIFactoryMedia: {}", err); + None + } + }, + Err(err) => { + log::info!("IDXGIFactory1 creation function not found: {:?}", err); + None + } + }; + + let mut supports_allow_tearing = false; + #[allow(trivial_casts)] + if let Some(factory5) = factory.as_factory5() { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = unsafe { + factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::<minwindef::BOOL>() as _, + ) + }; + + match hr.into_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => supports_allow_tearing = true, + } + } + + Ok(Self { + // The call to create_factory will only succeed if we get a factory4, so this is safe. + factory, + factory_media, + library: Arc::new(lib_main), + _lib_dxgi: lib_dxgi, + supports_allow_tearing, + flags: desc.flags, + dx12_shader_compiler: desc.dx12_shader_compiler.clone(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + match window_handle { + raw_window_handle::RawWindowHandle::Win32(handle) => Ok(super::Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::WndHandle(handle.hwnd as *mut _), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + }), + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let adapters = auxil::dxgi::factory::enumerate_adapters(self.factory); + + adapters + .into_iter() + .filter_map(|raw| { + super::Adapter::expose(raw, &self.library, self.flags, &self.dx12_shader_compiler) + }) + .collect() + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..6cdf3ffe64 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,902 @@ +/*! +# DirectX12 API internals. + +Generally the mapping is straightforwad. + +## Resource transitions + +D3D12 API matches WebGPU internal states very well. The only +caveat here is issuing a special UAV barrier whenever both source +and destination states match, and they are for storage sync. + +## Memory + +For now, all resources are created with "committed" memory. + +## Resource binding + +See ['Device::create_pipeline_layout`] documentation for the structure +of the root signature corresponding to WebGPU pipeline layout. + +Binding groups is mostly straightforward, with one big caveat: +all bindings have to be reset whenever the pipeline layout changes. +This is the rule of D3D12, and we can do nothing to help it. + +We detect this change at both [`crate::CommandEncoder::set_bind_group`] +and [`crate::CommandEncoder::set_render_pipeline`] with +[`crate::CommandEncoder::set_compute_pipeline`]. + +For this reason, in order avoid repeating the binding code, +we are binding everything in `CommandEncoder::update_root_elements`. +When the pipeline layout is changed, we reset all bindings. +Otherwise, we pass a range corresponding only to the current bind group. + +!*/ + +mod adapter; +mod command; +mod conv; +mod descriptor; +mod device; +mod instance; +mod shader_compilation; +mod suballocation; +mod types; +mod view; + +use crate::auxil::{self, dxgi::result::HResult as _}; + +use arrayvec::ArrayVec; +use parking_lot::Mutex; +use std::{ffi, fmt, mem, num::NonZeroU32, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_4, dxgitype, windef, winerror}, + um::{d3d12 as d3d12_ty, dcomp, synchapi, winbase, winnt}, + Interface as _, +}; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. +const MAX_ROOT_ELEMENTS: usize = 64; +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + +pub struct Instance { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + library: Arc<d3d12::D3D12Lib>, + supports_allow_tearing: bool, + _lib_dxgi: d3d12::DxgiLib, + flags: crate::InstanceFlags, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +impl Instance { + pub unsafe fn create_surface_from_visual( + &self, + visual: *mut dcomp::IDCompositionVisual, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::Visual(unsafe { d3d12::WeakPtr::from_raw(visual) }), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } + + pub unsafe fn create_surface_from_surface_handle( + &self, + surface_handle: winnt::HANDLE, + ) -> Surface { + Surface { + factory: self.factory, + factory_media: self.factory_media, + target: SurfaceTarget::SurfaceHandle(surface_handle), + supports_allow_tearing: self.supports_allow_tearing, + swap_chain: None, + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +struct SwapChain { + raw: d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3>, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec<d3d12::Resource>, + waitable: winnt::HANDLE, + acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, +} + +enum SurfaceTarget { + WndHandle(windef::HWND), + Visual(d3d12::WeakPtr<dcomp::IDCompositionVisual>), + SurfaceHandle(winnt::HANDLE), +} + +pub struct Surface { + factory: d3d12::DxgiFactory, + factory_media: Option<d3d12::FactoryMedia>, + target: SurfaceTarget, + supports_allow_tearing: bool, + swap_chain: Option<SwapChain>, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { + #[allow(unused)] + cache_coherent: bool, + }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + instance_flags: crate::InstanceFlags, + #[allow(unused)] + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, + #[allow(unused)] // TODO: Exists until windows-rs is standard, then it can probably be removed? + heap_create_not_zeroed: bool, + casting_fully_typed_format_supported: bool, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: d3d12::DxgiAdapter, + device: d3d12::Device, + library: Arc<d3d12::D3D12Lib>, + private_caps: PrivateCapabilities, + presentation_timer: auxil::dxgi::time::PresentationTimer, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] + workarounds: Workarounds, + dx12_shader_compiler: wgt::Dx12Compiler, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +/// Helper structure for waiting for GPU. +struct Idler { + fence: d3d12::Fence, + event: d3d12::Event, +} + +impl Idler { + unsafe fn destroy(self) { + unsafe { self.fence.destroy() }; + } +} + +struct CommandSignatures { + draw: d3d12::CommandSignature, + draw_indexed: d3d12::CommandSignature, + dispatch: d3d12::CommandSignature, +} + +impl CommandSignatures { + unsafe fn destroy(&self) { + unsafe { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } + } +} + +struct DeviceShared { + zero_buffer: d3d12::Resource, + cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, +} + +impl DeviceShared { + unsafe fn destroy(&self) { + unsafe { + self.zero_buffer.destroy(); + self.cmd_signatures.destroy(); + self.heap_views.raw.destroy(); + self.heap_samplers.raw.destroy(); + } + } +} + +pub struct Device { + raw: d3d12::Device, + present_queue: d3d12::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, + shared: Arc<DeviceShared>, + // CPU only pools + rtv_pool: Mutex<descriptor::CpuPool>, + dsv_pool: Mutex<descriptor::CpuPool>, + srv_uav_pool: Mutex<descriptor::CpuPool>, + sampler_pool: Mutex<descriptor::CpuPool>, + // library + library: Arc<d3d12::D3D12Lib>, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, + null_rtv_handle: descriptor::Handle, + mem_allocator: Option<Mutex<suballocation::GpuAllocatorWrapper>>, + dxc_container: Option<shader_compilation::DxcContainer>, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: d3d12::CommandQueue, + temp_lists: Vec<d3d12::CommandList>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +#[derive(Default)] +struct Temp { + marker: Vec<u16>, + barriers: Vec<d3d12_ty::D3D12_RESOURCE_BARRIER>, +} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.barriers.clear(); + } +} + +struct PassResolve { + src: (d3d12::Resource, u32), + dst: (d3d12::Resource, u32), + format: d3d12::Format, +} + +#[derive(Clone, Copy)] +enum RootElement { + Empty, + Constant, + SpecialConstantBuffer { + base_vertex: i32, + base_instance: u32, + other: u32, + }, + /// Descriptor table. + Table(d3d12::GpuDescriptor), + /// Descriptor for a buffer that has dynamic offset. + DynamicOffsetBuffer { + kind: BufferViewKind, + address: d3d12::GpuAddress, + }, +} + +#[derive(Clone, Copy)] +enum PassKind { + Render, + Compute, + Transfer, +} + +struct PassState { + has_label: bool, + resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>, + layout: PipelineLayoutShared, + root_elements: [RootElement; MAX_ROOT_ELEMENTS], + constant_data: [u32; MAX_ROOT_ELEMENTS], + dirty_root_elements: u64, + vertex_buffers: [d3d12_ty::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, +} + +#[test] +fn test_dirty_mask() { + assert_eq!(MAX_ROOT_ELEMENTS, std::mem::size_of::<u64>() * 8); +} + +impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + layout: PipelineLayoutShared { + signature: d3d12::RootSignature::null(), + total_root_elements: 0, + special_constants_root_index: None, + root_constant_info: None, + }, + root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], + constant_data: [0; MAX_ROOT_ELEMENTS], + dirty_root_elements: 0, + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + + fn clear(&mut self) { + // careful about heap allocations! + *self = Self::new(); + } +} + +pub struct CommandEncoder { + allocator: d3d12::CommandAllocator, + device: d3d12::Device, + shared: Arc<DeviceShared>, + null_rtv_handle: descriptor::Handle, + list: Option<d3d12::GraphicsCommandList>, + free_lists: Vec<d3d12::GraphicsCommandList>, + pass: PassState, + temp: Temp, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("allocator", &self.allocator) + .field("device", &self.allocator) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: d3d12::GraphicsCommandList, + closed: bool, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct Buffer { + resource: d3d12::Resource, + size: wgt::BufferAddress, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + +#[derive(Debug)] +pub struct Texture { + resource: d3d12::Resource, + format: wgt::TextureFormat, + dimension: wgt::TextureDimension, + size: wgt::Extent3d, + mip_level_count: u32, + sample_count: u32, + allocation: Option<suballocation::AllocationWrapper>, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } + + /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count + } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + let plane = match base.aspect { + crate::FormatAspects::COLOR | crate::FormatAspects::DEPTH => 0, + crate::FormatAspects::STENCIL => 1, + _ => unreachable!(), + }; + self.calc_subresource(base.mip_level, base.array_layer, plane) + } +} + +#[derive(Debug)] +pub struct TextureView { + raw_format: d3d12::Format, + aspects: crate::FormatAspects, + target_base: (d3d12::Resource, u32), + handle_srv: Option<descriptor::Handle>, + handle_uav: Option<descriptor::Handle>, + handle_rtv: Option<descriptor::Handle>, + handle_dsv_ro: Option<descriptor::Handle>, + handle_dsv_rw: Option<descriptor::Handle>, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +#[derive(Debug)] +pub struct Sampler { + handle: descriptor::Handle, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet { + raw: d3d12::QueryHeap, + raw_ty: d3d12_ty::D3D12_QUERY_TYPE, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: d3d12::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec<wgt::BindGroupLayoutEntry>, + cpu_heap_views: Option<descriptor::CpuHeap>, + cpu_heap_samplers: Option<descriptor::CpuHeap>, + copy_counts: Vec<u32>, // all 1's +} + +#[derive(Clone, Copy)] +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +#[derive(Debug)] +pub struct BindGroup { + handle_views: Option<descriptor::DualHandle>, + handle_samplers: Option<descriptor::DualHandle>, + dynamic_buffers: Vec<d3d12::GpuAddress>, +} + +bitflags::bitflags! { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct TableTypes: u8 { + const SRV_CBV_UAV = 1 << 0; + const SAMPLERS = 1 << 1; + } +} + +// Element (also known as parameter) index into the root signature. +type RootIndex = u32; + +struct BindGroupInfo { + base_root_index: RootIndex, + tables: TableTypes, + dynamic_buffers: Vec<BufferViewKind>, +} + +#[derive(Clone)] +struct RootConstantInfo { + root_index: RootIndex, + range: std::ops::Range<u32>, +} + +#[derive(Clone)] +struct PipelineLayoutShared { + signature: d3d12::RootSignature, + total_root_elements: RootIndex, + special_constants_root_index: Option<RootIndex>, + root_constant_info: Option<RootConstantInfo>, +} + +unsafe impl Send for PipelineLayoutShared {} +unsafe impl Sync for PipelineLayoutShared {} + +pub struct PipelineLayout { + shared: PipelineLayoutShared, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + bind_group_infos: ArrayVec<BindGroupInfo, { crate::MAX_BIND_GROUPS }>, + naga_options: naga::back::hlsl::Options, +} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + raw_name: Option<ffi::CString>, +} + +pub(super) enum CompiledShader { + #[allow(unused)] + Dxc(Vec<u8>), + Fxc(d3d12::Blob), +} + +impl CompiledShader { + fn create_native_shader(&self) -> d3d12::Shader { + match *self { + CompiledShader::Dxc(ref shader) => d3d12::Shader::from_raw(shader), + CompiledShader::Fxc(shader) => d3d12::Shader::from_blob(shader), + } + } + + unsafe fn destroy(self) { + match self { + CompiledShader::Dxc(_) => {} + CompiledShader::Fxc(shader) => unsafe { + shader.destroy(); + }, + } + } +} + +pub struct RenderPipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, + topology: d3d12_ty::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option<NonZeroU32>; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: d3d12::PipelineState, + layout: PipelineLayoutShared, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +impl SwapChain { + unsafe fn release_resources(self) -> d3d12::WeakPtr<dxgi1_4::IDXGISwapChain3> { + for resource in self.resources { + unsafe { resource.destroy() }; + } + self.raw + } + + unsafe fn wait( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<bool, crate::SurfaceError> { + let timeout_ms = match timeout { + Some(duration) => duration.as_millis() as u32, + None => winbase::INFINITE, + }; + match unsafe { synchapi::WaitForSingleObject(self.waitable, timeout_ms) } { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + +impl crate::Surface<Api> for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + // We always set ALLOW_TEARING on the swapchain no matter + // what kind of swapchain we want because ResizeBuffers + // cannot change if ALLOW_TEARING is applied to the swapchain. + if self.supports_allow_tearing { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + + let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + + let swap_chain = match self.swap_chain.take() { + //Note: this path doesn't properly re-initialize all of the things + Some(sc) => { + // can't have image resources in flight used by GPU + let _ = unsafe { device.wait_idle() }; + + let raw = unsafe { sc.release_resources() }; + let result = unsafe { + raw.ResizeBuffers( + config.swap_chain_size, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ) + }; + if let Err(err) = result.into_result() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let desc = d3d12::SwapchainDesc { + alpha_mode: auxil::dxgi::conv::map_acomposite_alpha_mode( + config.composite_alpha_mode, + ), + width: config.extent.width, + height: config.extent.height, + format: non_srgb_format, + stereo: false, + sample: d3d12::SampleDesc { + count: 1, + quality: 0, + }, + buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + buffer_count: config.swap_chain_size, + scaling: d3d12::Scaling::Stretch, + swap_effect: d3d12::SwapEffect::FlipDiscard, + flags, + }; + let swap_chain1 = match self.target { + SurfaceTarget::Visual(_) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForComposition"); + self.factory + .unwrap_factory2() + .create_swapchain_for_composition( + device.present_queue.as_mut_ptr() as *mut _, + &desc, + ) + .into_result() + } + SurfaceTarget::SurfaceHandle(handle) => { + profiling::scope!( + "IDXGIFactoryMedia::CreateSwapChainForCompositionSurfaceHandle" + ); + self.factory_media + .ok_or(crate::SurfaceError::Other("IDXGIFactoryMedia not found"))? + .create_swapchain_for_composition_surface_handle( + device.present_queue.as_mut_ptr() as *mut _, + handle, + &desc, + ) + .into_result() + } + SurfaceTarget::WndHandle(hwnd) => { + profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd"); + self.factory + .as_factory2() + .unwrap() + .create_swapchain_for_hwnd( + device.present_queue.as_mut_ptr() as *mut _, + hwnd, + &desc, + ) + .into_result() + } + }; + + let swap_chain1 = match swap_chain1 { + Ok(s) => s, + Err(err) => { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + }; + + match self.target { + SurfaceTarget::WndHandle(_) | SurfaceTarget::SurfaceHandle(_) => {} + SurfaceTarget::Visual(visual) => { + if let Err(err) = + unsafe { visual.SetContent(swap_chain1.as_unknown()) }.into_result() + { + log::error!("Unable to SetContent: {}", err); + return Err(crate::SurfaceError::Other( + "IDCompositionVisual::SetContent", + )); + } + } + } + + match unsafe { swap_chain1.cast::<dxgi1_4::IDXGISwapChain3>() }.into_result() { + Ok(swap_chain3) => { + unsafe { swap_chain1.destroy() }; + swap_chain3 + } + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + match self.target { + SurfaceTarget::WndHandle(wnd_handle) => { + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + unsafe { + self.factory.MakeWindowAssociation( + wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ) + }; + } + SurfaceTarget::Visual(_) | SurfaceTarget::SurfaceHandle(_) => {} + } + + unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; + + let mut resources = vec![d3d12::Resource::null(); config.swap_chain_size as usize]; + for (i, res) in resources.iter_mut().enumerate() { + unsafe { + swap_chain.GetBuffer(i as _, &d3d12_ty::ID3D12Resource::uuidof(), res.mut_void()) + }; + } + + self.swap_chain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + present_mode: config.present_mode, + format: config.format, + size: config.extent, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Device) { + if let Some(mut sc) = self.swap_chain.take() { + unsafe { + let _ = sc.wait(None); + //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + let _ = device.wait_idle(); + let raw = sc.release_resources(); + raw.destroy(); + } + } + } + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + let sc = self.swap_chain.as_mut().unwrap(); + + unsafe { sc.wait(timeout) }?; + + let base_index = unsafe { sc.raw.GetCurrentBackBufferIndex() } as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + allocation: None, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + self.temp_lists.clear(); + for cmd_buf in command_buffers { + self.temp_lists.push(cmd_buf.raw.as_list()); + } + + { + profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists"); + self.raw.execute_command_lists(&self.temp_lists); + } + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(fence.raw, value) + .into_device_result("Signal fence")?; + } + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Surface, + _texture: Texture, + ) -> Result<(), crate::SurfaceError> { + let sc = surface.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + // We only allow immediate if ALLOW_TEARING is valid. + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Mailbox => (0, 0), + wgt::PresentMode::Fifo => (1, 0), + m => unreachable!("Cannot make surface with present mode {m:?}"), + }; + + profiling::scope!("IDXGISwapchain3::Present"); + unsafe { sc.raw.Present(interval, flags) }; + + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + let mut frequency = 0u64; + unsafe { self.raw.GetTimestampFrequency(&mut frequency) }; + (1_000_000_000.0 / frequency as f64) as f32 + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs new file mode 100644 index 0000000000..9f9be7c409 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/shader_compilation.rs @@ -0,0 +1,294 @@ +use std::ptr; + +pub(super) use dxc::{compile_dxc, get_dxc_container, DxcContainer}; +use winapi::um::d3dcompiler; + +use crate::auxil::dxgi::result::HResult; + +// This exists so that users who don't want to use dxc can disable the dxc_shader_compiler feature +// and not have to compile hassle_rs. +// Currently this will use Dxc if it is chosen as the dx12 compiler at `Instance` creation time, and will +// fallback to FXC if the Dxc libraries (dxil.dll and dxcompiler.dll) are not found, or if Fxc is chosen at' +// `Instance` creation time. + +pub(super) fn compile_fxc( + device: &super::Device, + source: &String, + source_name: &str, + raw_ep: &std::ffi::CString, + stage_bit: wgt::ShaderStages, + full_stage: String, +) -> ( + Result<super::CompiledShader, crate::PipelineError>, + log::Level, +) { + profiling::scope!("compile_fxc"); + let mut shader_data = d3d12::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG | d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION; + } + let mut error = d3d12::Blob::null(); + let hr = unsafe { + profiling::scope!("d3dcompiler::D3DCompile"); + d3dcompiler::D3DCompile( + source.as_ptr().cast(), + source.len(), + source_name.as_ptr().cast(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr().cast(), + compile_flags, + 0, + shader_data.mut_void().cast(), + error.mut_void().cast(), + ) + }; + + match hr.into_result() { + Ok(()) => ( + Ok(super::CompiledShader::Fxc(shader_data)), + log::Level::Info, + ), + Err(e) => { + let mut full_msg = format!("FXC D3DCompile error ({e})"); + if !error.is_null() { + use std::fmt::Write as _; + let message = unsafe { + std::slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ) + }; + let _ = write!(full_msg, ": {}", String::from_utf8_lossy(message)); + unsafe { + error.destroy(); + } + } + ( + Err(crate::PipelineError::Linkage(stage_bit, full_msg)), + log::Level::Warn, + ) + } + } +} + +// The Dxc implementation is behind a feature flag so that users who don't want to use dxc can disable the feature. +#[cfg(feature = "dxc_shader_compiler")] +mod dxc { + use std::path::PathBuf; + + // Destructor order should be fine since _dxil and _dxc don't rely on each other. + pub(crate) struct DxcContainer { + compiler: hassle_rs::DxcCompiler, + library: hassle_rs::DxcLibrary, + validator: hassle_rs::DxcValidator, + // Has to be held onto for the lifetime of the device otherwise shaders will fail to compile. + _dxc: hassle_rs::Dxc, + // Also Has to be held onto for the lifetime of the device otherwise shaders will fail to validate. + _dxil: hassle_rs::Dxil, + } + + pub(crate) fn get_dxc_container( + dxc_path: Option<PathBuf>, + dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Make sure that dxil.dll exists. + let dxil = match hassle_rs::Dxil::new(dxil_path) { + Ok(dxil) => dxil, + Err(e) => { + log::warn!("Failed to load dxil.dll. Defaulting to Fxc instead: {}", e); + return Ok(None); + } + }; + + // Needed for explicit validation. + let validator = dxil.create_validator()?; + + let dxc = match hassle_rs::Dxc::new(dxc_path) { + Ok(dxc) => dxc, + Err(e) => { + log::warn!( + "Failed to load dxcompiler.dll. Defaulting to Fxc instead: {}", + e + ); + return Ok(None); + } + }; + let compiler = dxc.create_compiler()?; + let library = dxc.create_library()?; + + Ok(Some(DxcContainer { + _dxc: dxc, + compiler, + library, + _dxil: dxil, + validator, + })) + } + + pub(crate) fn compile_dxc( + device: &crate::dx12::Device, + source: &str, + source_name: &str, + raw_ep: &str, + stage_bit: wgt::ShaderStages, + full_stage: String, + dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + profiling::scope!("compile_dxc"); + let mut compile_flags = arrayvec::ArrayVec::<&str, 4>::new_const(); + compile_flags.push("-Ges"); // d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS + compile_flags.push("-Vd"); // Disable implicit validation to work around bugs when dxil.dll isn't in the local directory. + if device + .private_caps + .instance_flags + .contains(crate::InstanceFlags::DEBUG) + { + compile_flags.push("-Zi"); // d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION + compile_flags.push("-Od"); // d3dcompiler::D3DCOMPILE_DEBUG + } + + let blob = match dxc_container + .library + .create_blob_with_encoding_from_str(source) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("DXC blob error: {e}"))) + { + Ok(blob) => blob, + Err(e) => return (Err(e), log::Level::Error), + }; + + let compiled = dxc_container.compiler.compile( + &blob, + source_name, + raw_ep, + &full_stage, + &compile_flags, + None, + &[], + ); + + let (result, log_level) = match compiled { + Ok(dxc_result) => match dxc_result.get_result() { + Ok(dxc_blob) => { + // Validate the shader. + match dxc_container.validator.validate(dxc_blob) { + Ok(validated_blob) => ( + Ok(crate::dx12::CompiledShader::Dxc(validated_blob.to_vec())), + log::Level::Info, + ), + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC validation error: {:?}\n{:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default(), + e.1 + ), + )), + log::Level::Error, + ), + } + } + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!("DXC compile error: {e}"), + )), + log::Level::Error, + ), + }, + Err(e) => ( + Err(crate::PipelineError::Linkage( + stage_bit, + format!( + "DXC compile error: {:?}", + get_error_string_from_dxc_result(&dxc_container.library, &e.0) + .unwrap_or_default() + ), + )), + log::Level::Error, + ), + }; + + (result, log_level) + } + + impl From<hassle_rs::HassleError> for crate::DeviceError { + fn from(value: hassle_rs::HassleError) -> Self { + match value { + hassle_rs::HassleError::Win32Error(e) => { + // TODO: This returns an HRESULT, should we try and use the associated Windows error message? + log::error!("Win32 error: {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LoadLibraryError { filename, inner } => { + log::error!("Failed to load dxc library {filename:?}. Inner error: {inner:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::LibLoadingError(e) => { + log::error!("Failed to load dxc library. {e:?}"); + crate::DeviceError::Lost + } + hassle_rs::HassleError::WindowsOnly(e) => { + log::error!("Signing with dxil.dll is only supported on Windows. {e:?}"); + crate::DeviceError::Lost + } + // `ValidationError` and `CompileError` should never happen in a context involving `DeviceError` + hassle_rs::HassleError::ValidationError(_e) => unimplemented!(), + hassle_rs::HassleError::CompileError(_e) => unimplemented!(), + } + } + } + + fn get_error_string_from_dxc_result( + library: &hassle_rs::DxcLibrary, + error: &hassle_rs::DxcOperationResult, + ) -> Result<String, hassle_rs::HassleError> { + error + .get_error_buffer() + .and_then(|error| library.get_blob_as_string(&hassle_rs::DxcBlob::from(error))) + } +} + +// These are stubs for when the `dxc_shader_compiler` feature is disabled. +#[cfg(not(feature = "dxc_shader_compiler"))] +mod dxc { + use std::path::PathBuf; + + pub(crate) struct DxcContainer {} + + pub(crate) fn get_dxc_container( + _dxc_path: Option<PathBuf>, + _dxil_path: Option<PathBuf>, + ) -> Result<Option<DxcContainer>, crate::DeviceError> { + // Falls back to Fxc and logs an error. + log::error!("DXC shader compiler was requested on Instance creation, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + Ok(None) + } + + // It shouldn't be possible that this gets called with the `dxc_shader_compiler` feature disabled. + pub(crate) fn compile_dxc( + _device: &crate::dx12::Device, + _source: &str, + _source_name: &str, + _raw_ep: &str, + _stage_bit: wgt::ShaderStages, + _full_stage: String, + _dxc_container: &DxcContainer, + ) -> ( + Result<crate::dx12::CompiledShader, crate::PipelineError>, + log::Level, + ) { + unimplemented!("Something went really wrong, please report this. Attempted to compile shader with DXC, but the DXC feature is disabled. Enable the `dxc_shader_compiler` feature on wgpu_hal to use DXC."); + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/suballocation.rs b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs new file mode 100644 index 0000000000..01ca997f21 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/suballocation.rs @@ -0,0 +1,331 @@ +pub(crate) use allocation::{ + create_allocator_wrapper, create_buffer_resource, create_texture_resource, + free_buffer_allocation, free_texture_allocation, AllocationWrapper, GpuAllocatorWrapper, +}; + +// This exists to work around https://github.com/gfx-rs/wgpu/issues/3207 +// Currently this will work the older, slower way if the windows_rs feature is disabled, +// and will use the fast path of suballocating buffers and textures using gpu_allocator if +// the windows_rs feature is enabled. + +// This is the fast path using gpu_allocator to suballocate buffers and textures. +#[cfg(feature = "windows_rs")] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use wgt::assertions::StrictAssertUnwrapExt; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + use gpu_allocator::{ + d3d12::{AllocationCreateDesc, ToWinapi, ToWindows}, + MemoryLocation, + }; + + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper { + pub(crate) allocator: gpu_allocator::d3d12::Allocator, + } + + #[derive(Debug)] + pub(crate) struct AllocationWrapper { + pub(crate) allocation: gpu_allocator::d3d12::Allocation, + } + + pub(crate) fn create_allocator_wrapper( + raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + let device = raw.as_ptr(); + + match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc { + device: device.as_windows().clone(), + debug_settings: Default::default(), + }) { + Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))), + Err(e) => { + log::error!("Failed to create d3d12 allocator, error: {}", e); + Err(e)? + } + } + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + let location = match (is_cpu_read, is_cpu_write) { + (true, true) => MemoryLocation::CpuToGpu, + (true, false) => MemoryLocation::GpuToCpu, + (false, true) => MemoryLocation::CpuToGpu, + (false, false) => MemoryLocation::GpuOnly, + }; + + let name = desc.label.unwrap_or("Unlabeled buffer"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + + // let mut allocator = unsafe { device.mem_allocator.as_ref().unwrap_unchecked().lock() }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let location = MemoryLocation::GpuOnly; + + let name = desc.label.unwrap_or("Unlabeled texture"); + + // SAFETY: allocator exists when the windows_rs feature is enabled + let mut allocator = unsafe { + device + .mem_allocator + .as_ref() + .strict_unwrap_unchecked() + .lock() + }; + let allocation_desc = AllocationCreateDesc::from_winapi_d3d12_resource_desc( + allocator.allocator.device().as_winapi(), + &raw_desc, + name, + location, + ); + let allocation = allocator.allocator.allocate(&allocation_desc)?; + + let hr = unsafe { + device.raw.CreatePlacedResource( + allocation.heap().as_winapi() as *mut _, + allocation.offset(), + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, Some(AllocationWrapper { allocation }))) + } + + pub(crate) fn free_buffer_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 buffer, {e}"), + }; + } + + pub(crate) fn free_texture_allocation( + allocation: AllocationWrapper, + allocator: &Mutex<GpuAllocatorWrapper>, + ) { + match allocator.lock().allocator.free(allocation.allocation) { + Ok(_) => (), + // TODO: Don't panic here + Err(e) => panic!("Failed to destroy dx12 texture, {e}"), + }; + } + + #[cfg(feature = "windows_rs")] + impl From<gpu_allocator::AllocationError> for crate::DeviceError { + fn from(result: gpu_allocator::AllocationError) -> Self { + match result { + gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, + gpu_allocator::AllocationError::FailedToMap(e) => { + log::error!("DX12 gpu-allocator: Failed to map: {}", e); + Self::Lost + } + gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { + log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { + log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { + log::error!( + "DX12 gpu-allocator: Invalid Allocator Creation Description: {}", + e + ); + Self::Lost + } + gpu_allocator::AllocationError::Internal(e) => { + log::error!("DX12 gpu-allocator: Internal Error: {}", e); + Self::Lost + } + } + } + } +} + +// This is the older, slower path where it doesn't suballocate buffers. +// Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207 +#[cfg(not(feature = "windows_rs"))] +mod allocation { + use d3d12::WeakPtr; + use parking_lot::Mutex; + use std::ptr; + use winapi::{ + um::{ + d3d12::{self as d3d12_ty, ID3D12Resource}, + winnt::HRESULT, + }, + Interface, + }; + + const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12_ty::D3D12_HEAP_FLAG_NONE; // TODO: find the exact value + + // Allocator isn't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct GpuAllocatorWrapper {} + + // Allocations aren't needed when not suballocating with gpu_allocator + #[derive(Debug)] + pub(crate) struct AllocationWrapper {} + + pub(crate) fn create_allocator_wrapper( + _raw: &d3d12::Device, + ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> { + Ok(None) + } + + pub(crate) fn create_buffer_resource( + device: &crate::dx12::Device, + desc: &crate::BufferDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_read { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else { + d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12_ty::D3D12_MEMORY_POOL_L1 + } + _ => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn create_texture_resource( + device: &crate::dx12::Device, + _desc: &crate::TextureDescriptor, + raw_desc: d3d12_ty::D3D12_RESOURCE_DESC, + resource: &mut WeakPtr<ID3D12Resource>, + ) -> Result<(HRESULT, Option<AllocationWrapper>), crate::DeviceError> { + let heap_properties = d3d12_ty::D3D12_HEAP_PROPERTIES { + Type: d3d12_ty::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12_ty::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match device.private_caps.memory_architecture { + crate::dx12::MemoryArchitecture::NonUnified => d3d12_ty::D3D12_MEMORY_POOL_L1, + crate::dx12::MemoryArchitecture::Unified { .. } => d3d12_ty::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = unsafe { + device.raw.CreateCommittedResource( + &heap_properties, + if device.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12_ty::D3D12_HEAP_FLAG_NONE + }, + &raw_desc, + d3d12_ty::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), // clear value + &d3d12_ty::ID3D12Resource::uuidof(), + resource.mut_void(), + ) + }; + + Ok((hr, None)) + } + + pub(crate) fn free_buffer_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } + + pub(crate) fn free_texture_allocation( + _allocation: AllocationWrapper, + _allocator: &Mutex<GpuAllocatorWrapper>, + ) { + // No-op when not using gpu-allocator + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs new file mode 100644 index 0000000000..dec4e71337 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/types.rs @@ -0,0 +1,34 @@ +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +winapi::ENUM! { + enum D3D12_VIEW_INSTANCING_TIER { + D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED = 0, + D3D12_VIEW_INSTANCING_TIER_1 = 1, + D3D12_VIEW_INSTANCING_TIER_2 = 2, + D3D12_VIEW_INSTANCING_TIER_3 = 3, + } +} + +winapi::ENUM! { + enum D3D12_COMMAND_LIST_SUPPORT_FLAGS { + D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS, + // D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_ENCODE, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 { + CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL, + CastingFullyTypedFormatSupported: winapi::shared::minwindef::BOOL, + WriteBufferImmediateSupportFlags: D3D12_COMMAND_LIST_SUPPORT_FLAGS, + ViewInstancingTier: D3D12_VIEW_INSTANCING_TIER, + BarycentricsSupported: winapi::shared::minwindef::BOOL, + } +} diff --git a/third_party/rust/wgpu-hal/src/dx12/view.rs b/third_party/rust/wgpu-hal/src/dx12/view.rs new file mode 100644 index 0000000000..e7a051b535 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/dx12/view.rs @@ -0,0 +1,380 @@ +use crate::auxil; +use std::mem; +use winapi::um::d3d12 as d3d12_ty; + +pub(crate) const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; + +pub(super) struct ViewDescriptor { + dimension: wgt::TextureViewDimension, + pub aspects: crate::FormatAspects, + pub rtv_dsv_format: d3d12::Format, + srv_uav_format: Option<d3d12::Format>, + multisampled: bool, + array_layer_base: u32, + array_layer_count: u32, + mip_level_base: u32, + mip_level_count: u32, +} + +impl crate::TextureViewDescriptor<'_> { + pub(super) fn to_internal(&self, texture: &super::Texture) -> ViewDescriptor { + let aspects = crate::FormatAspects::new(self.format, self.range.aspect); + + ViewDescriptor { + dimension: self.dimension, + aspects, + rtv_dsv_format: auxil::dxgi::conv::map_texture_format(self.format), + srv_uav_format: auxil::dxgi::conv::map_texture_format_for_srv_uav(self.format, aspects), + multisampled: texture.sample_count > 1, + mip_level_base: self.range.base_mip_level, + mip_level_count: self.range.mip_level_count.unwrap_or(!0), + array_layer_base: self.range.base_array_layer, + array_layer_count: self.range.array_layer_count.unwrap_or(!0), + } + } +} + +impl ViewDescriptor { + pub(crate) unsafe fn to_srv(&self) -> Option<d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBE; + unsafe { + *desc.u.TextureCube_mut() = d3d12_ty::D3D12_TEXCUBE_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + ResourceMinLODClamp: 0.0, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + desc.ViewDimension = d3d12_ty::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + unsafe { + *desc.u.TextureCubeArray_mut() = d3d12_ty::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: self.mip_level_base, + MipLevels: self.mip_level_count, + First2DArrayFace: self.array_layer_base, + NumCubes: if self.array_layer_count == !0 { + !0 + } else { + self.array_layer_count / 6 + }, + ResourceMinLODClamp: 0.0, + } + } + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_uav(&self) -> Option<d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC> { + let mut desc = d3d12_ty::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: self.srv_uav_format?, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_UAV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_UAV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_UAV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_UAV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_UAV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + Some(desc) + } + + pub(crate) unsafe fn to_rtv(&self) -> d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_RENDER_TARGET_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_RTV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_RTV { + MipSlice: self.mip_level_base, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_RTV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + PlaneSlice: 0, + } + } + } + wgt::TextureViewDimension::D3 => { + desc.ViewDimension = d3d12_ty::D3D12_RTV_DIMENSION_TEXTURE3D; + unsafe { + *desc.u.Texture3D_mut() = d3d12_ty::D3D12_TEX3D_RTV { + MipSlice: self.mip_level_base, + FirstWSlice: self.array_layer_base, + WSize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + desc + } + + pub(crate) unsafe fn to_dsv(&self, read_only: bool) -> d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + let mut desc = d3d12_ty::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: self.rtv_dsv_format, + ViewDimension: 0, + Flags: { + let mut flags = d3d12_ty::D3D12_DSV_FLAG_NONE; + if read_only { + if self.aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if self.aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12_ty::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + } + flags + }, + u: unsafe { mem::zeroed() }, + }; + + match self.dimension { + wgt::TextureViewDimension::D1 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1D; + unsafe { + *desc.u.Texture1D_mut() = d3d12_ty::D3D12_TEX1D_DSV { + MipSlice: self.mip_level_base, + } + } + } + /* + wgt::TextureViewDimension::D1Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *desc.u.Texture1DArray_mut() = d3d12_ty::D3D12_TEX1D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if self.multisampled && self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMS; + unsafe { + *desc.u.Texture2DMS_mut() = d3d12_ty::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + } + wgt::TextureViewDimension::D2 if self.array_layer_base == 0 => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2D; + unsafe { + *desc.u.Texture2D_mut() = d3d12_ty::D3D12_TEX2D_DSV { + MipSlice: self.mip_level_base, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array + if self.multisampled => + { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + unsafe { + *desc.u.Texture2DMSArray_mut() = d3d12_ty::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { + desc.ViewDimension = d3d12_ty::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + unsafe { + *desc.u.Texture2DArray_mut() = d3d12_ty::D3D12_TEX2D_ARRAY_DSV { + MipSlice: self.mip_level_base, + FirstArraySlice: self.array_layer_base, + ArraySize: self.array_layer_count, + } + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + desc + } +} diff --git a/third_party/rust/wgpu-hal/src/empty.rs b/third_party/rust/wgpu-hal/src/empty.rs new file mode 100644 index 0000000000..1497acad91 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/empty.rs @@ -0,0 +1,413 @@ +#![allow(unused_variables)] + +use std::ops::Range; + +#[derive(Clone)] +pub struct Api; +pub struct Context; +#[derive(Debug)] +pub struct Encoder; +#[derive(Debug)] +pub struct Resource; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +impl crate::Api for Api { + type Instance = Context; + type Surface = Context; + type Adapter = Context; + type Device = Context; + + type Queue = Context; + type CommandEncoder = Encoder; + type CommandBuffer = Resource; + + type Buffer = Resource; + type Texture = Resource; + type SurfaceTexture = Resource; + type TextureView = Resource; + type Sampler = Resource; + type QuerySet = Resource; + type Fence = Resource; + + type BindGroupLayout = Resource; + type BindGroup = Resource; + type PipelineLayout = Resource; + type ShaderModule = Resource; + type RenderPipeline = Resource; + type ComputePipeline = Resource; +} + +impl crate::Instance<Api> for Context { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + Ok(Context) + } + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + _window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Context, crate::InstanceError> { + Ok(Context) + } + unsafe fn destroy_surface(&self, surface: Context) {} + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + Vec::new() + } +} + +impl crate::Surface<Api> for Context { + unsafe fn configure( + &mut self, + device: &Context, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Context) {} + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> { + Ok(None) + } + unsafe fn discard_texture(&mut self, texture: Resource) {} +} + +impl crate::Adapter<Api> for Context { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> DeviceResult<crate::OpenDevice<Api>> { + Err(crate::DeviceError::Lost) + } + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + crate::TextureFormatCapabilities::empty() + } + + unsafe fn surface_capabilities(&self, surface: &Context) -> Option<crate::SurfaceCapabilities> { + None + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl crate::Queue<Api> for Context { + unsafe fn submit( + &mut self, + command_buffers: &[&Resource], + signal_fence: Option<(&mut Resource, crate::FenceValue)>, + ) -> DeviceResult<()> { + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Context, + texture: Resource, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +impl crate::Device<Api> for Context { + unsafe fn exit(self, queue: Context) {} + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_buffer(&self, buffer: Resource) {} + unsafe fn map_buffer( + &self, + buffer: &Resource, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + Err(crate::DeviceError::Lost) + } + unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &Resource, ranges: I) {} + + unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( + &self, + texture: &Resource, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_sampler(&self, sampler: Resource) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<Api>, + ) -> DeviceResult<Encoder> { + Ok(Encoder) + } + unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<Api>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<Resource, crate::ShaderError> { + Ok(Resource) + } + unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<Api>, + ) -> Result<Resource, crate::PipelineError> { + Ok(Resource) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_query_set(&self, set: Resource) {} + unsafe fn create_fence(&self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn destroy_fence(&self, fence: Resource) {} + unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult<crate::FenceValue> { + Ok(0) + } + unsafe fn wait( + &self, + fence: &Resource, + value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} +} + +impl crate::CommandEncoder<Api> for Encoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> DeviceResult<()> { + Ok(()) + } + unsafe fn discard_encoding(&mut self) {} + unsafe fn end_encoding(&mut self) -> DeviceResult<Resource> { + Ok(Resource) + } + unsafe fn reset_all<I>(&mut self, command_buffers: I) {} + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange) {} + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &Resource, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} + unsafe fn end_query(&mut self, set: &Resource, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn reset_queries(&mut self, set: &Resource, range: Range<u32>) {} + unsafe fn copy_query_results( + &mut self, + set: &Resource, + range: Range<u32>, + buffer: &Resource, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<Api>) {} + unsafe fn end_render_pass(&mut self) {} + + unsafe fn set_bind_group( + &mut self, + layout: &Resource, + index: u32, + group: &Resource, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + } + unsafe fn set_push_constants( + &mut self, + layout: &Resource, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) {} + unsafe fn begin_debug_marker(&mut self, group_label: &str) {} + unsafe fn end_debug_marker(&mut self) {} + + unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, Api>, + format: wgt::IndexFormat, + ) { + } + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) { + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {} + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) {} + unsafe fn set_stencil_reference(&mut self, value: u32) {} + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {} + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} + unsafe fn end_compute_pass(&mut self) {} + + unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn dispatch(&mut self, count: [u32; 3]) {} + unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs new file mode 100644 index 0000000000..b14857ae22 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs @@ -0,0 +1,972 @@ +use glow::HasContext; +use std::sync::Arc; +use wgt::AstcChannel; + +use crate::auxil::db; + +// https://webgl2fundamentals.org/webgl/lessons/webgl-data-textures.html + +const GL_UNMASKED_VENDOR_WEBGL: u32 = 0x9245; +const GL_UNMASKED_RENDERER_WEBGL: u32 = 0x9246; + +impl super::Adapter { + /// According to the OpenGL specification, the version information is + /// expected to follow the following syntax: + /// + /// ~~~bnf + /// <major> ::= <number> + /// <minor> ::= <number> + /// <revision> ::= <number> + /// <vendor-info> ::= <string> + /// <release> ::= <major> "." <minor> ["." <release>] + /// <version> ::= <release> [" " <vendor-info>] + /// ~~~ + /// + /// Note that this function is intentionally lenient in regards to parsing, + /// and will try to recover at least the first two version numbers without + /// resulting in an `Err`. + /// # Notes + /// `WebGL 2` version returned as `OpenGL ES 3.0` + fn parse_version(mut src: &str) -> Result<(u8, u8), crate::InstanceError> { + let webgl_sig = "WebGL "; + // According to the WebGL specification + // VERSION WebGL<space>1.0<space><vendor-specific information> + // SHADING_LANGUAGE_VERSION WebGL<space>GLSL<space>ES<space>1.0<space><vendor-specific information> + let is_webgl = src.starts_with(webgl_sig); + if is_webgl { + let pos = src.rfind(webgl_sig).unwrap_or(0); + src = &src[pos + webgl_sig.len()..]; + } else { + let es_sig = " ES "; + match src.rfind(es_sig) { + Some(pos) => { + src = &src[pos + es_sig.len()..]; + } + None => { + log::warn!("ES not found in '{}'", src); + return Err(crate::InstanceError); + } + } + }; + + let glsl_es_sig = "GLSL ES "; + let is_glsl = match src.find(glsl_es_sig) { + Some(pos) => { + src = &src[pos + glsl_es_sig.len()..]; + true + } + None => false, + }; + + let (version, _vendor_info) = match src.find(' ') { + Some(i) => (&src[..i], src[i + 1..].to_string()), + None => (src, String::new()), + }; + + // TODO: make this even more lenient so that we can also accept + // `<major> "." <minor> [<???>]` + let mut it = version.split('.'); + let major = it.next().and_then(|s| s.parse().ok()); + let minor = it.next().and_then(|s| { + let trimmed = if s.starts_with('0') { + "0" + } else { + s.trim_end_matches('0') + }; + trimmed.parse().ok() + }); + + match (major, minor) { + (Some(major), Some(minor)) => Ok(( + // Return WebGL 2.0 version as OpenGL ES 3.0 + if is_webgl && !is_glsl { + major + 1 + } else { + major + }, + minor, + )), + _ => { + log::warn!("Unable to extract the version from '{}'", version); + Err(crate::InstanceError) + } + } + } + + fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo { + let vendor = vendor_orig.to_lowercase(); + let renderer = renderer_orig.to_lowercase(); + + // opengl has no way to discern device_type, so we can try to infer it from the renderer string + let strings_that_imply_integrated = [ + " xpress", // space here is on purpose so we don't match express + "amd renoir", + "radeon hd 4200", + "radeon hd 4250", + "radeon hd 4290", + "radeon hd 4270", + "radeon hd 4225", + "radeon hd 3100", + "radeon hd 3200", + "radeon hd 3000", + "radeon hd 3300", + "radeon(tm) r4 graphics", + "radeon(tm) r5 graphics", + "radeon(tm) r6 graphics", + "radeon(tm) r7 graphics", + "radeon r7 graphics", + "nforce", // all nvidia nforce are integrated + "tegra", // all nvidia tegra are integrated + "shield", // all nvidia shield are integrated + "igp", + "mali", + "intel", + "v3d", + "apple m", // all apple m are integrated + ]; + let strings_that_imply_cpu = ["mesa offscreen", "swiftshader", "llvmpipe"]; + + //TODO: handle Intel Iris XE as discreet + let inferred_device_type = if vendor.contains("qualcomm") + || vendor.contains("intel") + || strings_that_imply_integrated + .iter() + .any(|&s| renderer.contains(s)) + { + wgt::DeviceType::IntegratedGpu + } else if strings_that_imply_cpu.iter().any(|&s| renderer.contains(s)) { + wgt::DeviceType::Cpu + } else { + // At this point the Device type is Unknown. + // It's most likely DiscreteGpu, but we do not know for sure. + // Use "Other" to avoid possibly making incorrect assumptions. + // Note that if this same device is available under some other API (ex: Vulkan), + // It will mostly likely get a different device type (probably DiscreteGpu). + wgt::DeviceType::Other + }; + + // source: Sascha Willems at Vulkan + let vendor_id = if vendor.contains("amd") { + db::amd::VENDOR + } else if vendor.contains("imgtec") { + db::imgtec::VENDOR + } else if vendor.contains("nvidia") { + db::nvidia::VENDOR + } else if vendor.contains("arm") { + db::arm::VENDOR + } else if vendor.contains("qualcomm") { + db::qualcomm::VENDOR + } else if vendor.contains("intel") { + db::intel::VENDOR + } else if vendor.contains("broadcom") { + db::broadcom::VENDOR + } else if vendor.contains("mesa") { + db::mesa::VENDOR + } else if vendor.contains("apple") { + db::apple::VENDOR + } else { + 0 + }; + + wgt::AdapterInfo { + name: renderer_orig, + vendor: vendor_id, + device: 0, + device_type: inferred_device_type, + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Gl, + } + } + + pub(super) unsafe fn expose( + context: super::AdapterContext, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let gl = context.lock(); + let extensions = gl.supported_extensions(); + + let (vendor_const, renderer_const) = if extensions.contains("WEBGL_debug_renderer_info") { + // emscripten doesn't enable "WEBGL_debug_renderer_info" extension by default. so, we do it manually. + // See https://github.com/gfx-rs/wgpu/issues/3245 for context + #[cfg(target_os = "emscripten")] + if unsafe { super::emscripten::enable_extension("WEBGL_debug_renderer_info\0") } { + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + } + // glow already enables WEBGL_debug_renderer_info on wasm32-unknown-unknown target by default. + #[cfg(not(target_os = "emscripten"))] + (GL_UNMASKED_VENDOR_WEBGL, GL_UNMASKED_RENDERER_WEBGL) + } else { + (glow::VENDOR, glow::RENDERER) + }; + + let (vendor, renderer) = { + let vendor = unsafe { gl.get_parameter_string(vendor_const) }; + let renderer = unsafe { gl.get_parameter_string(renderer_const) }; + + (vendor, renderer) + }; + let version = unsafe { gl.get_parameter_string(glow::VERSION) }; + log::info!("Vendor: {}", vendor); + log::info!("Renderer: {}", renderer); + log::info!("Version: {}", version); + + log::debug!("Extensions: {:#?}", extensions); + + let ver = Self::parse_version(&version).ok()?; + if ver < (3, 0) { + log::warn!( + "Returned GLES context is {}.{}, when 3.0+ was requested", + ver.0, + ver.1 + ); + return None; + } + + let supports_storage = ver >= (3, 1); + let supports_work_group_params = ver >= (3, 1); + + let shading_language_version = { + let sl_version = unsafe { gl.get_parameter_string(glow::SHADING_LANGUAGE_VERSION) }; + log::info!("SL version: {}", &sl_version); + let (sl_major, sl_minor) = Self::parse_version(&sl_version).ok()?; + let value = sl_major as u16 * 100 + sl_minor as u16 * 10; + naga::back::glsl::Version::Embedded { + version: value, + is_webgl: cfg!(target_arch = "wasm32"), + } + }; + + // ANGLE provides renderer strings like: "ANGLE (Apple, Apple M1 Pro, OpenGL 4.1)" + let is_angle = renderer.contains("ANGLE"); + + let vertex_shader_storage_blocks = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS) } as u32) + } else { + 0 + }; + let fragment_shader_storage_blocks = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS) } as u32) + } else { + 0 + }; + let vertex_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let fragment_shader_storage_textures = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_IMAGE_UNIFORMS) } as u32) + } else { + 0 + }; + let max_storage_block_size = if supports_storage { + (unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } as u32) + } else { + 0 + }; + let max_element_index = unsafe { gl.get_parameter_i32(glow::MAX_ELEMENT_INDEX) } as u32; + + // WORKAROUND: In order to work around an issue with GL on RPI4 and similar, we ignore a + // zero vertex ssbo count if there are vertex sstos. (more info: + // https://github.com/gfx-rs/wgpu/pull/1607#issuecomment-874938961) The hardware does not + // want us to write to these SSBOs, but GLES cannot express that. We detect this case and + // disable writing to SSBOs. + let vertex_ssbo_false_zero = + vertex_shader_storage_blocks == 0 && vertex_shader_storage_textures != 0; + if vertex_ssbo_false_zero { + // We only care about fragment here as the 0 is a lie. + log::warn!("Max vertex shader SSBO == 0 and SSTO != 0. Interpreting as false zero."); + } + + let max_storage_buffers_per_shader_stage = if vertex_shader_storage_blocks == 0 { + fragment_shader_storage_blocks + } else { + vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) + }; + let max_storage_textures_per_shader_stage = if vertex_shader_storage_textures == 0 { + fragment_shader_storage_textures + } else { + vertex_shader_storage_textures.min(fragment_shader_storage_textures) + }; + + let mut downlevel_flags = wgt::DownlevelFlags::empty() + | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES + | wgt::DownlevelFlags::COMPARISON_SAMPLERS; + downlevel_flags.set(wgt::DownlevelFlags::COMPUTE_SHADERS, ver >= (3, 1)); + downlevel_flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + max_storage_block_size != 0, + ); + downlevel_flags.set(wgt::DownlevelFlags::INDIRECT_EXECUTION, ver >= (3, 1)); + //TODO: we can actually support positive `base_vertex` in the same way + // as we emulate the `start_instance`. But we can't deal with negatives... + downlevel_flags.set(wgt::DownlevelFlags::BASE_VERTEX, ver >= (3, 2)); + downlevel_flags.set( + wgt::DownlevelFlags::INDEPENDENT_BLEND, + ver >= (3, 2) || extensions.contains("GL_EXT_draw_buffers_indexed"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::VERTEX_STORAGE, + max_storage_block_size != 0 + && max_storage_buffers_per_shader_stage != 0 + && (vertex_shader_storage_blocks != 0 || vertex_ssbo_false_zero), + ); + downlevel_flags.set(wgt::DownlevelFlags::FRAGMENT_STORAGE, supports_storage); + if extensions.contains("EXT_texture_filter_anisotropic") { + let max_aniso = + unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_MAX_ANISOTROPY_EXT) } as u32; + downlevel_flags.set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, max_aniso >= 16); + } + downlevel_flags.set( + wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED, + !(cfg!(target_arch = "wasm32") || is_angle), + ); + // see https://registry.khronos.org/webgl/specs/latest/2.0/#BUFFER_OBJECT_BINDING + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER, + !cfg!(target_arch = "wasm32"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES, + !cfg!(target_arch = "wasm32"), + ); + downlevel_flags.set( + wgt::DownlevelFlags::FULL_DRAW_INDEX_UINT32, + max_element_index == u32::MAX, + ); + downlevel_flags.set( + wgt::DownlevelFlags::MULTISAMPLED_SHADING, + ver >= (3, 2) || extensions.contains("OES_sample_variables"), + ); + + let mut features = wgt::Features::empty() + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | wgt::Features::CLEAR_TEXTURE + | wgt::Features::PUSH_CONSTANTS; + features.set( + wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER | wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO, + extensions.contains("GL_EXT_texture_border_clamp"), + ); + features.set( + wgt::Features::DEPTH_CLIP_CONTROL, + extensions.contains("GL_EXT_depth_clamp"), + ); + features.set( + wgt::Features::VERTEX_WRITABLE_STORAGE, + downlevel_flags.contains(wgt::DownlevelFlags::VERTEX_STORAGE) + && vertex_shader_storage_textures != 0, + ); + features.set( + wgt::Features::MULTIVIEW, + extensions.contains("OVR_multiview2"), + ); + features.set( + wgt::Features::SHADER_PRIMITIVE_INDEX, + ver >= (3, 2) || extensions.contains("OES_geometry_shader"), + ); + features.set(wgt::Features::SHADER_EARLY_DEPTH_TEST, ver >= (3, 1)); + let gles_bcn_exts = [ + "GL_EXT_texture_compression_s3tc_srgb", + "GL_EXT_texture_compression_rgtc", + "GL_EXT_texture_compression_bptc", + ]; + let webgl_bcn_exts = [ + "WEBGL_compressed_texture_s3tc", + "WEBGL_compressed_texture_s3tc_srgb", + "EXT_texture_compression_rgtc", + "EXT_texture_compression_bptc", + ]; + let bcn_exts = if cfg!(target_arch = "wasm32") { + &webgl_bcn_exts[..] + } else { + &gles_bcn_exts[..] + }; + features.set( + wgt::Features::TEXTURE_COMPRESSION_BC, + bcn_exts.iter().all(|&ext| extensions.contains(ext)), + ); + features.set( + wgt::Features::TEXTURE_COMPRESSION_ETC2, + // This is a part of GLES-3 but not WebGL2 core + !cfg!(target_arch = "wasm32") || extensions.contains("WEBGL_compressed_texture_etc"), + ); + // `OES_texture_compression_astc` provides 2D + 3D, LDR + HDR support + if extensions.contains("WEBGL_compressed_texture_astc") + || extensions.contains("GL_OES_texture_compression_astc") + { + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC); + features.insert(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR); + } else { + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC, + extensions.contains("GL_KHR_texture_compression_astc_ldr"), + ); + features.set( + wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, + extensions.contains("GL_KHR_texture_compression_astc_hdr"), + ); + } + + let mut private_caps = super::PrivateCapabilities::empty(); + private_caps.set( + super::PrivateCapabilities::BUFFER_ALLOCATION, + extensions.contains("GL_EXT_buffer_storage"), + ); + private_caps.set( + super::PrivateCapabilities::SHADER_BINDING_LAYOUT, + ver >= (3, 1), + ); + private_caps.set( + super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD, + extensions.contains("GL_EXT_texture_shadow_lod"), + ); + private_caps.set(super::PrivateCapabilities::MEMORY_BARRIERS, ver >= (3, 1)); + private_caps.set( + super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT, + ver >= (3, 1), + ); + private_caps.set( + super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE, + !cfg!(target_arch = "wasm32"), + ); + private_caps.set( + super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER, + !cfg!(target_arch = "wasm32"), + ); + private_caps.set( + super::PrivateCapabilities::GET_BUFFER_SUB_DATA, + cfg!(target_arch = "wasm32"), + ); + let color_buffer_float = extensions.contains("GL_EXT_color_buffer_float") + || extensions.contains("EXT_color_buffer_float"); + let color_buffer_half_float = extensions.contains("GL_EXT_color_buffer_half_float"); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + color_buffer_half_float || color_buffer_float, + ); + private_caps.set( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + color_buffer_float, + ); + private_caps.set( + super::PrivateCapabilities::TEXTURE_FLOAT_LINEAR, + extensions.contains("OES_texture_float_linear"), + ); + + let max_texture_size = unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) } as u32; + let max_texture_3d_size = unsafe { gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) } as u32; + + let min_uniform_buffer_offset_alignment = + (unsafe { gl.get_parameter_i32(glow::UNIFORM_BUFFER_OFFSET_ALIGNMENT) } as u32); + let min_storage_buffer_offset_alignment = if ver >= (3, 1) { + (unsafe { gl.get_parameter_i32(glow::SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT) } as u32) + } else { + 256 + }; + let max_uniform_buffers_per_shader_stage = + unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS) } + .min(unsafe { gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS) }) + as u32; + + let max_compute_workgroups_per_dimension = if supports_work_group_params { + unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 0) } + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 1) }) + .min(unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 2) }) + as u32 + } else { + 0 + }; + + let limits = wgt::Limits { + max_texture_dimension_1d: max_texture_size, + max_texture_dimension_2d: max_texture_size, + max_texture_dimension_3d: max_texture_3d_size, + max_texture_array_layers: unsafe { + gl.get_parameter_i32(glow::MAX_ARRAY_TEXTURE_LAYERS) + } as u32, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: max_uniform_buffers_per_shader_stage, + max_dynamic_storage_buffers_per_pipeline_layout: max_storage_buffers_per_shader_stage, + max_sampled_textures_per_shader_stage: super::MAX_TEXTURE_SLOTS as u32, + max_samplers_per_shader_stage: super::MAX_SAMPLERS as u32, + max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage, + max_uniform_buffer_binding_size: unsafe { + gl.get_parameter_i32(glow::MAX_UNIFORM_BLOCK_SIZE) + } as u32, + max_storage_buffer_binding_size: if ver >= (3, 1) { + unsafe { gl.get_parameter_i32(glow::MAX_SHADER_STORAGE_BLOCK_SIZE) } + } else { + 0 + } as u32, + max_vertex_buffers: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_BINDINGS) } as u32) + } else { + 16 // should this be different? + }, + max_vertex_attributes: (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIBS) } + as u32) + .min(super::MAX_VERTEX_ATTRIBUTES as u32), + max_vertex_buffer_array_stride: if private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_STRIDE) } as u32) + } else { + !0 + }, + max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4, + min_uniform_buffer_offset_alignment, + min_storage_buffer_offset_alignment, + max_inter_stage_shader_components: unsafe { + gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS) + } as u32, + max_compute_workgroup_storage_size: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) } as u32) + } else { + 0 + }, + max_compute_invocations_per_workgroup: if supports_work_group_params { + (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_WORK_GROUP_INVOCATIONS) } as u32) + } else { + 0 + }, + max_compute_workgroup_size_x: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_y: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 1) } + as u32) + } else { + 0 + }, + max_compute_workgroup_size_z: if supports_work_group_params { + (unsafe { gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 2) } + as u32) + } else { + 0 + }, + max_compute_workgroups_per_dimension, + max_buffer_size: i32::MAX as u64, + }; + + let mut workarounds = super::Workarounds::empty(); + + workarounds.set( + super::Workarounds::EMULATE_BUFFER_MAP, + cfg!(target_arch = "wasm32"), + ); + + let r = renderer.to_lowercase(); + // Check for Mesa sRGB clear bug. See + // [`super::PrivateCapabilities::MESA_I915_SRGB_SHADER_CLEAR`]. + if context.is_owned() + && r.contains("mesa") + && r.contains("intel") + && r.split(&[' ', '(', ')'][..]) + .any(|substr| substr.len() == 3 && substr.chars().nth(2) == Some('l')) + { + log::warn!( + "Detected skylake derivative running on mesa i915. Clears to srgb textures will \ + use manual shader clears." + ); + workarounds.set(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR, true); + } + + let downlevel_defaults = wgt::DownlevelLimits {}; + + // Drop the GL guard so we can move the context into AdapterShared + // ( on WASM the gl handle is just a ref so we tell clippy to allow + // dropping the ref ) + #[allow(clippy::drop_ref)] + drop(gl); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + shared: Arc::new(super::AdapterShared { + context, + private_caps, + workarounds, + features, + shading_language_version, + max_texture_size, + next_shader_id: Default::default(), + program_cache: Default::default(), + }), + }, + info: Self::make_info(vendor, renderer), + features, + capabilities: crate::Capabilities { + limits, + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: downlevel_defaults, + shader_model: wgt::ShaderModel::Sm5, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(4).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + }, + }) + } + + unsafe fn create_shader_clear_program( + gl: &glow::Context, + ) -> (glow::Program, glow::UniformLocation) { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = + unsafe { gl.create_shader(glow::VERTEX_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(vertex, include_str!("./shaders/clear.vert")) }; + unsafe { gl.compile_shader(vertex) }; + let fragment = + unsafe { gl.create_shader(glow::FRAGMENT_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(fragment, include_str!("./shaders/clear.frag")) }; + unsafe { gl.compile_shader(fragment) }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + let color_uniform_location = unsafe { gl.get_uniform_location(program, "color") } + .expect("Could not find color uniform in shader clear shader"); + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + + (program, color_uniform_location) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let gl = &self.shared.context.lock(); + unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) }; + unsafe { gl.pixel_store_i32(glow::PACK_ALIGNMENT, 1) }; + let main_vao = + unsafe { gl.create_vertex_array() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_vertex_array(Some(main_vao)) }; + + let zero_buffer = + unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(zero_buffer)) }; + let zeroes = vec![0u8; super::ZERO_BUFFER_SIZE]; + unsafe { gl.buffer_data_u8_slice(glow::COPY_READ_BUFFER, &zeroes, glow::STATIC_DRAW) }; + + // Compile the shader program we use for doing manual clears to work around Mesa fastclear + // bug. + let (shader_clear_program, shader_clear_program_color_uniform_location) = + unsafe { Self::create_shader_clear_program(gl) }; + + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + main_vao, + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + render_doc: Default::default(), + }, + queue: super::Queue { + shared: Arc::clone(&self.shared), + features, + draw_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + copy_fbo: unsafe { gl.create_framebuffer() } + .map_err(|_| crate::DeviceError::OutOfMemory)?, + shader_clear_program, + shader_clear_program_color_uniform_location, + zero_buffer, + temp_query_results: Vec::new(), + draw_buffer_count: 1, + current_index_buffer: None, + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let sample_count = { + let max_samples = unsafe { + self.shared + .context + .lock() + .get_parameter_i32(glow::MAX_SAMPLES) + }; + if max_samples >= 16 { + Tfc::MULTISAMPLE_X2 + | Tfc::MULTISAMPLE_X4 + | Tfc::MULTISAMPLE_X8 + | Tfc::MULTISAMPLE_X16 + } else if max_samples >= 8 { + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 | Tfc::MULTISAMPLE_X8 + } else if max_samples >= 4 { + Tfc::MULTISAMPLE_X2 | Tfc::MULTISAMPLE_X4 + } else { + Tfc::MULTISAMPLE_X2 + } + }; + + // Base types are pulled from the table in the OpenGLES 3.0 spec in section 3.8. + // + // The storage types are based on table 8.26, in section + // "TEXTURE IMAGE LOADS AND STORES" of OpenGLES-3.2 spec. + let empty = Tfc::empty(); + let base = Tfc::COPY_SRC | Tfc::COPY_DST; + let unfilterable = base | Tfc::SAMPLED; + let depth = base | Tfc::SAMPLED | sample_count | Tfc::DEPTH_STENCIL_ATTACHMENT; + let filterable = unfilterable | Tfc::SAMPLED_LINEAR; + let renderable = + unfilterable | Tfc::COLOR_ATTACHMENT | sample_count | Tfc::MULTISAMPLE_RESOLVE; + let filterable_renderable = filterable | renderable | Tfc::COLOR_ATTACHMENT_BLEND; + let storage = base | Tfc::STORAGE | Tfc::STORAGE_READ_WRITE; + + let feature_fn = |f, caps| { + if self.shared.features.contains(f) { + caps + } else { + empty + } + }; + + let bcn_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_BC, filterable); + let etc2_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ETC2, filterable); + let astc_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC, filterable); + let astc_hdr_features = feature_fn(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR, filterable); + + let private_caps_fn = |f, caps| { + if self.shared.private_caps.contains(f) { + caps + } else { + empty + } + }; + + let half_float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let float_renderable = private_caps_fn( + super::PrivateCapabilities::COLOR_BUFFER_FLOAT, + Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | sample_count + | Tfc::MULTISAMPLE_RESOLVE, + ); + + let texture_float_linear = + private_caps_fn(super::PrivateCapabilities::TEXTURE_FLOAT_LINEAR, filterable); + + match format { + Tf::R8Unorm => filterable_renderable, + Tf::R8Snorm => filterable, + Tf::R8Uint => renderable, + Tf::R8Sint => renderable, + Tf::R16Uint => renderable, + Tf::R16Sint => renderable, + Tf::R16Unorm => empty, + Tf::R16Snorm => empty, + Tf::R16Float => filterable | half_float_renderable, + Tf::Rg8Unorm => filterable_renderable, + Tf::Rg8Snorm => filterable, + Tf::Rg8Uint => renderable, + Tf::Rg8Sint => renderable, + Tf::R32Uint => renderable | storage, + Tf::R32Sint => renderable | storage, + Tf::R32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Rg16Uint => renderable, + Tf::Rg16Sint => renderable, + Tf::Rg16Unorm => empty, + Tf::Rg16Snorm => empty, + Tf::Rg16Float => filterable | half_float_renderable, + Tf::Rgba8Unorm | Tf::Rgba8UnormSrgb => filterable_renderable | storage, + Tf::Bgra8Unorm | Tf::Bgra8UnormSrgb => filterable_renderable, + Tf::Rgba8Snorm => filterable, + Tf::Rgba8Uint => renderable | storage, + Tf::Rgba8Sint => renderable | storage, + Tf::Rgb10a2Unorm => filterable_renderable, + Tf::Rg11b10Float => filterable | float_renderable, + Tf::Rg32Uint => renderable, + Tf::Rg32Sint => renderable, + Tf::Rg32Float => unfilterable | float_renderable | texture_float_linear, + Tf::Rgba16Uint => renderable | storage, + Tf::Rgba16Sint => renderable | storage, + Tf::Rgba16Unorm => empty, + Tf::Rgba16Snorm => empty, + Tf::Rgba16Float => filterable | storage | half_float_renderable, + Tf::Rgba32Uint => renderable | storage, + Tf::Rgba32Sint => renderable | storage, + Tf::Rgba32Float => unfilterable | storage | float_renderable | texture_float_linear, + Tf::Stencil8 + | Tf::Depth16Unorm + | Tf::Depth32Float + | Tf::Depth32FloatStencil8 + | Tf::Depth24Plus + | Tf::Depth24PlusStencil8 => depth, + Tf::Rgb9e5Ufloat => filterable, + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbFloat + | Tf::Bc6hRgbUfloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => bcn_features, + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => etc2_features, + Tf::Astc { + block: _, + channel: AstcChannel::Unorm | AstcChannel::UnormSrgb, + } => astc_features, + Tf::Astc { + block: _, + channel: AstcChannel::Hdr, + } => astc_hdr_features, + } + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if surface.presentable { + let mut formats = vec![ + wgt::TextureFormat::Rgba8Unorm, + #[cfg(not(target_arch = "wasm32"))] + wgt::TextureFormat::Bgra8Unorm, + ]; + if surface.supports_srgb() { + formats.extend([ + wgt::TextureFormat::Rgba8UnormSrgb, + #[cfg(not(target_arch = "wasm32"))] + wgt::TextureFormat::Bgra8UnormSrgb, + ]) + } + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::COLOR_BUFFER_HALF_FLOAT) + { + formats.push(wgt::TextureFormat::Rgba16Float) + } + + Some(crate::SurfaceCapabilities { + formats, + present_modes: vec![wgt::PresentMode::Fifo], //TODO + composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO + swap_chain_sizes: 2..=2, + current_extent: None, + extents: wgt::Extent3d { + width: 4, + height: 4, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: self.shared.max_texture_size, + height: self.shared.max_texture_size, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET, + }) + } else { + None + } + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } +} + +impl super::AdapterShared { + pub(super) unsafe fn get_buffer_sub_data( + &self, + gl: &glow::Context, + target: u32, + offset: i32, + dst_data: &mut [u8], + ) { + if self + .private_caps + .contains(super::PrivateCapabilities::GET_BUFFER_SUB_DATA) + { + unsafe { gl.get_buffer_sub_data(target, offset, dst_data) }; + } else { + log::error!("Fake map"); + let length = dst_data.len(); + let buffer_mapping = + unsafe { gl.map_buffer_range(target, offset, length as _, glow::MAP_READ_BIT) }; + + unsafe { std::ptr::copy_nonoverlapping(buffer_mapping, dst_data.as_mut_ptr(), length) }; + + unsafe { gl.unmap_buffer(target) }; + } + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Adapter {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Adapter {} + +#[cfg(test)] +mod tests { + use super::super::Adapter; + + #[test] + fn test_version_parse() { + let error = Err(crate::InstanceError); + assert_eq!(Adapter::parse_version("1"), error); + assert_eq!(Adapter::parse_version("1."), error); + assert_eq!(Adapter::parse_version("1 h3l1o. W0rld"), error); + assert_eq!(Adapter::parse_version("1. h3l1o. W0rld"), error); + assert_eq!(Adapter::parse_version("1.2.3"), error); + assert_eq!(Adapter::parse_version("OpenGL ES 3.1"), Ok((3, 1))); + assert_eq!( + Adapter::parse_version("OpenGL ES 2.0 Google Nexus"), + Ok((2, 0)) + ); + assert_eq!(Adapter::parse_version("GLSL ES 1.1"), Ok((1, 1))); + assert_eq!(Adapter::parse_version("OpenGL ES GLSL ES 3.20"), Ok((3, 2))); + assert_eq!( + // WebGL 2.0 should parse as OpenGL ES 3.0 + Adapter::parse_version("WebGL 2.0 (OpenGL ES 3.0 Chromium)"), + Ok((3, 0)) + ); + assert_eq!( + Adapter::parse_version("WebGL GLSL ES 3.00 (OpenGL ES GLSL ES 3.0 Chromium)"), + Ok((3, 0)) + ); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/command.rs b/third_party/rust/wgpu-hal/src/gles/command.rs new file mode 100644 index 0000000000..5e3a1c52c8 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/command.rs @@ -0,0 +1,1063 @@ +use super::{conv, Command as C}; +use arrayvec::ArrayVec; +use std::{mem, ops::Range}; + +#[derive(Clone, Copy, Debug, Default)] +struct TextureSlotDesc { + tex_target: super::BindTarget, + sampler_index: Option<u8>, +} + +#[derive(Default)] +pub(super) struct State { + topology: u32, + primitive: super::PrimitiveState, + index_format: wgt::IndexFormat, + index_offset: wgt::BufferAddress, + vertex_buffers: + [(super::VertexBufferDesc, Option<super::BufferBinding>); crate::MAX_VERTEX_BUFFERS], + vertex_attributes: ArrayVec<super::AttributeDesc, { super::MAX_VERTEX_ATTRIBUTES }>, + color_targets: ArrayVec<super::ColorTargetDesc, { crate::MAX_COLOR_ATTACHMENTS }>, + stencil: super::StencilState, + depth_bias: wgt::DepthBiasState, + alpha_to_coverage_enabled: bool, + samplers: [Option<glow::Sampler>; super::MAX_SAMPLERS], + texture_slots: [TextureSlotDesc; super::MAX_TEXTURE_SLOTS], + render_size: wgt::Extent3d, + resolve_attachments: ArrayVec<(u32, super::TextureView), { crate::MAX_COLOR_ATTACHMENTS }>, + invalidate_attachments: ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>, + has_pass_label: bool, + instance_vbuf_mask: usize, + dirty_vbuf_mask: usize, + active_first_instance: u32, + push_offset_to_uniform: ArrayVec<super::UniformDesc, { super::MAX_PUSH_CONSTANTS }>, +} + +impl super::CommandBuffer { + fn clear(&mut self) { + self.label = None; + self.commands.clear(); + self.data_bytes.clear(); + self.queries.clear(); + } + + fn add_marker(&mut self, marker: &str) -> Range<u32> { + let start = self.data_bytes.len() as u32; + self.data_bytes.extend(marker.as_bytes()); + start..self.data_bytes.len() as u32 + } + + fn add_push_constant_data(&mut self, data: &[u32]) -> Range<u32> { + let data_raw = unsafe { + std::slice::from_raw_parts( + data.as_ptr() as *const _, + data.len() * mem::size_of::<u32>(), + ) + }; + let start = self.data_bytes.len(); + assert!(start < u32::MAX as usize); + self.data_bytes.extend_from_slice(data_raw); + let end = self.data_bytes.len(); + assert!(end < u32::MAX as usize); + (start as u32)..(end as u32) + } +} + +impl super::CommandEncoder { + fn rebind_stencil_func(&mut self) { + fn make(s: &super::StencilSide, face: u32) -> C { + C::SetStencilFunc { + face, + function: s.function, + reference: s.reference, + read_mask: s.mask_read, + } + } + + let s = &self.state.stencil; + if s.front.function == s.back.function + && s.front.mask_read == s.back.mask_read + && s.front.reference == s.back.reference + { + self.cmd_buffer + .commands + .push(make(&s.front, glow::FRONT_AND_BACK)); + } else { + self.cmd_buffer.commands.push(make(&s.front, glow::FRONT)); + self.cmd_buffer.commands.push(make(&s.back, glow::BACK)); + } + } + + fn rebind_vertex_data(&mut self, first_instance: u32) { + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for (index, pair) in self.state.vertex_buffers.iter().enumerate() { + if self.state.dirty_vbuf_mask & (1 << index) == 0 { + continue; + } + let (buffer_desc, vb) = match *pair { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + let instance_offset = match buffer_desc.step { + wgt::VertexStepMode::Vertex => 0, + wgt::VertexStepMode::Instance => first_instance * buffer_desc.stride, + }; + + self.cmd_buffer.commands.push(C::SetVertexBuffer { + index: index as u32, + buffer: super::BufferBinding { + raw: vb.raw, + offset: vb.offset + instance_offset as wgt::BufferAddress, + }, + buffer_desc, + }); + self.state.dirty_vbuf_mask ^= 1 << index; + } + } else { + let mut vbuf_mask = 0; + for attribute in self.state.vertex_attributes.iter() { + if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 { + continue; + } + let (buffer_desc, vb) = + match self.state.vertex_buffers[attribute.buffer_index as usize] { + // Not all dirty bindings are necessarily filled. Some may be unused. + (_, None) => continue, + (ref vb_desc, Some(ref vb)) => (vb_desc.clone(), vb), + }; + + let mut attribute_desc = attribute.clone(); + attribute_desc.offset += vb.offset as u32; + if buffer_desc.step == wgt::VertexStepMode::Instance { + attribute_desc.offset += buffer_desc.stride * first_instance; + } + + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: Some(vb.raw), + buffer_desc, + attribute_desc, + }); + vbuf_mask |= 1 << attribute.buffer_index; + } + self.state.dirty_vbuf_mask ^= vbuf_mask; + } + } + + fn rebind_sampler_states(&mut self, dirty_textures: u32, dirty_samplers: u32) { + for (texture_index, slot) in self.state.texture_slots.iter().enumerate() { + if dirty_textures & (1 << texture_index) != 0 + || slot + .sampler_index + .map_or(false, |si| dirty_samplers & (1 << si) != 0) + { + let sampler = slot + .sampler_index + .and_then(|si| self.state.samplers[si as usize]); + self.cmd_buffer + .commands + .push(C::BindSampler(texture_index as u32, sampler)); + } + } + } + + fn prepare_draw(&mut self, first_instance: u32) { + if first_instance != self.state.active_first_instance { + // rebind all per-instance buffers on first-instance change + self.state.dirty_vbuf_mask |= self.state.instance_vbuf_mask; + self.state.active_first_instance = first_instance; + } + if self.state.dirty_vbuf_mask != 0 { + self.rebind_vertex_data(first_instance); + } + } + + fn set_pipeline_inner(&mut self, inner: &super::PipelineInner) { + self.cmd_buffer.commands.push(C::SetProgram(inner.program)); + + self.state.push_offset_to_uniform.clear(); + self.state + .push_offset_to_uniform + .extend(inner.uniforms.iter().cloned()); + + // rebind textures, if needed + let mut dirty_textures = 0u32; + for (texture_index, (slot, &sampler_index)) in self + .state + .texture_slots + .iter_mut() + .zip(inner.sampler_map.iter()) + .enumerate() + { + if slot.sampler_index != sampler_index { + slot.sampler_index = sampler_index; + dirty_textures |= 1 << texture_index; + } + } + if dirty_textures != 0 { + self.rebind_sampler_states(dirty_textures, 0); + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + self.state = State::default(); + self.cmd_buffer.label = label.map(str::to_string); + Ok(()) + } + unsafe fn discard_encoding(&mut self) { + self.cmd_buffer.clear(); + } + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + Ok(mem::take(&mut self.cmd_buffer)) + } + unsafe fn reset_all<I>(&mut self, _command_buffers: I) { + //TODO: could re-use the allocations in all these command buffers + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::BufferUses::STORAGE_READ_WRITE) + { + continue; + } + self.cmd_buffer + .commands + .push(C::BufferBarrier(bar.buffer.raw.unwrap(), bar.usage.end)); + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + if !self + .private_caps + .contains(super::PrivateCapabilities::MEMORY_BARRIERS) + { + return; + } + + let mut combined_usage = crate::TextureUses::empty(); + for bar in barriers { + // GLES only synchronizes storage -> anything explicitly + if !bar + .usage + .start + .contains(crate::TextureUses::STORAGE_READ_WRITE) + { + continue; + } + // unlike buffers, there is no need for a concrete texture + // object to be bound anywhere for a barrier + combined_usage |= bar.usage.end; + } + + if !combined_usage.is_empty() { + self.cmd_buffer + .commands + .push(C::TextureBarrier(combined_usage)); + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + self.cmd_buffer.commands.push(C::ClearBuffer { + dst: buffer.clone(), + dst_target: buffer.target, + range, + }); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let (src_target, dst_target) = if src.target == dst.target { + (glow::COPY_READ_BUFFER, glow::COPY_WRITE_BUFFER) + } else { + (src.target, dst.target) + }; + for copy in regions { + self.cmd_buffer.commands.push(C::CopyBufferToBuffer { + src: src.clone(), + src_target, + dst: dst.clone(), + dst_target, + copy, + }) + } + } + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &super::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + for copy in regions { + self.cmd_buffer + .commands + .push(C::CopyExternalImageToTexture { + src: src.clone(), + dst: dst_raw, + dst_target, + dst_format: dst.format, + dst_premultiplication, + copy, + }) + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + let (dst_raw, dst_target) = dst.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size, &dst.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToTexture { + src: src_raw, + src_target, + dst: dst_raw, + dst_target, + copy, + dst_is_cubemap: dst.is_cubemap, + }) + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (dst_raw, dst_target) = dst.inner.as_native(); + + for mut copy in regions { + copy.clamp_size_to_virtual(&dst.copy_size); + self.cmd_buffer.commands.push(C::CopyBufferToTexture { + src: src.clone(), + src_target: src.target, + dst: dst_raw, + dst_target, + dst_format: dst.format, + copy, + }) + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (src_raw, src_target) = src.inner.as_native(); + for mut copy in regions { + copy.clamp_size_to_virtual(&src.copy_size); + self.cmd_buffer.commands.push(C::CopyTextureToBuffer { + src: src_raw, + src_target, + src_format: src.format, + dst: dst.clone(), + dst_target: dst.target, + copy, + }) + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + let query = set.queries[index as usize]; + self.cmd_buffer + .commands + .push(C::BeginQuery(query, set.target)); + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + self.cmd_buffer.commands.push(C::EndQuery(set.target)); + } + unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) { + unimplemented!() + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range<u32>) { + //TODO: what do we do here? + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _stride: wgt::BufferSize, + ) { + let start = self.cmd_buffer.queries.len(); + self.cmd_buffer + .queries + .extend_from_slice(&set.queries[range.start as usize..range.end as usize]); + let query_range = start as u32..self.cmd_buffer.queries.len() as u32; + self.cmd_buffer.commands.push(C::CopyQueryResults { + query_range, + dst: buffer.clone(), + dst_target: buffer.target, + dst_offset: offset, + }); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.state.render_size = desc.extent; + self.state.resolve_attachments.clear(); + self.state.invalidate_attachments.clear(); + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + + let rendering_to_external_framebuffer = desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .any(|at| match at.target.view.inner { + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { .. } => true, + _ => false, + }); + + if rendering_to_external_framebuffer && desc.color_attachments.len() != 1 { + panic!("Multiple render attachments with external framebuffers are not supported."); + } + + match desc + .color_attachments + .first() + .filter(|at| at.is_some()) + .and_then(|at| at.as_ref().map(|at| &at.target.view.inner)) + { + // default framebuffer (provided externally) + Some(&super::TextureInner::DefaultRenderbuffer) => { + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: true }); + } + _ => { + // set the framebuffer + self.cmd_buffer + .commands + .push(C::ResetFramebuffer { is_default: false }); + + for (i, cat) in desc.color_attachments.iter().enumerate() { + if let Some(cat) = cat.as_ref() { + let attachment = glow::COLOR_ATTACHMENT0 + i as u32; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: cat.target.view.clone(), + }); + if let Some(ref rat) = cat.resolve_target { + self.state + .resolve_attachments + .push((attachment, rat.view.clone())); + } + if !cat.ops.contains(crate::AttachmentOps::STORE) { + self.state.invalidate_attachments.push(attachment); + } + } + } + if let Some(ref dsat) = desc.depth_stencil_attachment { + let aspects = dsat.target.view.aspects; + let attachment = match aspects { + crate::FormatAspects::DEPTH => glow::DEPTH_ATTACHMENT, + crate::FormatAspects::STENCIL => glow::STENCIL_ATTACHMENT, + _ => glow::DEPTH_STENCIL_ATTACHMENT, + }; + self.cmd_buffer.commands.push(C::BindAttachment { + attachment, + view: dsat.target.view.clone(), + }); + if aspects.contains(crate::FormatAspects::DEPTH) + && !dsat.depth_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::DEPTH_ATTACHMENT); + } + if aspects.contains(crate::FormatAspects::STENCIL) + && !dsat.stencil_ops.contains(crate::AttachmentOps::STORE) + { + self.state + .invalidate_attachments + .push(glow::STENCIL_ATTACHMENT); + } + } + + if !rendering_to_external_framebuffer { + // set the draw buffers and states + self.cmd_buffer + .commands + .push(C::SetDrawColorBuffers(desc.color_attachments.len() as u8)); + } + } + } + + let rect = crate::Rect { + x: 0, + y: 0, + w: desc.extent.width as i32, + h: desc.extent.height as i32, + }; + self.cmd_buffer.commands.push(C::SetScissor(rect.clone())); + self.cmd_buffer.commands.push(C::SetViewport { + rect, + depth: 0.0..1.0, + }); + + // issue the clears + for (i, cat) in desc + .color_attachments + .iter() + .filter_map(|at| at.as_ref()) + .enumerate() + { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let c = &cat.clear_value; + self.cmd_buffer.commands.push( + match cat.target.view.format.sample_type(None).unwrap() { + wgt::TextureSampleType::Float { .. } => C::ClearColorF { + draw_buffer: i as u32, + color: [c.r as f32, c.g as f32, c.b as f32, c.a as f32], + is_srgb: cat.target.view.format.is_srgb(), + }, + wgt::TextureSampleType::Uint => C::ClearColorU( + i as u32, + [c.r as u32, c.g as u32, c.b as u32, c.a as u32], + ), + wgt::TextureSampleType::Sint => C::ClearColorI( + i as u32, + [c.r as i32, c.g as i32, c.b as i32, c.a as i32], + ), + wgt::TextureSampleType::Depth => unreachable!(), + }, + ); + } + } + if let Some(ref dsat) = desc.depth_stencil_attachment { + let clear_depth = !dsat.depth_ops.contains(crate::AttachmentOps::LOAD); + let clear_stencil = !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD); + + if clear_depth && clear_stencil { + self.cmd_buffer.commands.push(C::ClearDepthAndStencil( + dsat.clear_value.0, + dsat.clear_value.1, + )); + } else if clear_depth { + self.cmd_buffer + .commands + .push(C::ClearDepth(dsat.clear_value.0)); + } else if clear_stencil { + self.cmd_buffer + .commands + .push(C::ClearStencil(dsat.clear_value.1)); + } + } + } + unsafe fn end_render_pass(&mut self) { + for (attachment, dst) in self.state.resolve_attachments.drain(..) { + self.cmd_buffer.commands.push(C::ResolveAttachment { + attachment, + dst, + size: self.state.render_size, + }); + } + if !self.state.invalidate_attachments.is_empty() { + self.cmd_buffer.commands.push(C::InvalidateAttachments( + self.state.invalidate_attachments.clone(), + )); + self.state.invalidate_attachments.clear(); + } + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + self.state.instance_vbuf_mask = 0; + self.state.dirty_vbuf_mask = 0; + self.state.active_first_instance = 0; + self.state.color_targets.clear(); + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + self.state.primitive = super::PrimitiveState::default(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let mut do_index = 0; + let mut dirty_textures = 0u32; + let mut dirty_samplers = 0u32; + let group_info = &layout.group_infos[index as usize]; + + for (binding_layout, raw_binding) in group_info.entries.iter().zip(group.contents.iter()) { + let slot = group_info.binding_to_slot[binding_layout.binding as usize] as u32; + match *raw_binding { + super::RawBinding::Buffer { + raw, + offset: base_offset, + size, + } => { + let mut offset = base_offset; + let target = match binding_layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + min_binding_size: _, + } => { + if has_dynamic_offset { + offset += dynamic_offsets[do_index] as i32; + do_index += 1; + } + match ty { + wgt::BufferBindingType::Uniform => glow::UNIFORM_BUFFER, + wgt::BufferBindingType::Storage { .. } => { + glow::SHADER_STORAGE_BUFFER + } + } + } + _ => unreachable!(), + }; + self.cmd_buffer.commands.push(C::BindBuffer { + target, + slot, + buffer: raw, + offset, + size, + }); + } + super::RawBinding::Sampler(sampler) => { + dirty_samplers |= 1 << slot; + self.state.samplers[slot as usize] = Some(sampler); + } + super::RawBinding::Texture { + raw, + target, + aspects, + } => { + dirty_textures |= 1 << slot; + self.state.texture_slots[slot as usize].tex_target = target; + self.cmd_buffer.commands.push(C::BindTexture { + slot, + texture: raw, + target, + aspects, + }); + } + super::RawBinding::Image(ref binding) => { + self.cmd_buffer.commands.push(C::BindImage { + slot, + binding: binding.clone(), + }); + } + } + } + + self.rebind_sampler_states(dirty_textures, dirty_samplers); + } + + unsafe fn set_push_constants( + &mut self, + _layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + start_offset: u32, + data: &[u32], + ) { + let range = self.cmd_buffer.add_push_constant_data(data); + + let end = start_offset + data.len() as u32 * 4; + let mut offset = start_offset; + while offset < end { + let uniform = self.state.push_offset_to_uniform[offset as usize / 4].clone(); + let size = uniform.size; + if uniform.location.is_none() { + panic!("No uniform for push constant"); + } + self.cmd_buffer.commands.push(C::SetPushConstants { + uniform, + offset: range.start + offset, + }); + offset += size; + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::InsertDebugMarker(range)); + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let range = self.cmd_buffer.add_marker(group_label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + } + unsafe fn end_debug_marker(&mut self) { + self.cmd_buffer.commands.push(C::PopDebugGroup); + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology); + + if self + .private_caps + .contains(super::PrivateCapabilities::VERTEX_BUFFER_LAYOUT) + { + for vat in pipeline.vertex_attributes.iter() { + let vb = &pipeline.vertex_buffers[vat.buffer_index as usize]; + // set the layout + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: None, + buffer_desc: vb.clone(), + attribute_desc: vat.clone(), + }); + } + } else { + for vat in &self.state.vertex_attributes { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(vat.location)); + } + self.state.vertex_attributes.clear(); + + self.state.dirty_vbuf_mask = 0; + // copy vertex attributes + for vat in pipeline.vertex_attributes.iter() { + //Note: we can invalidate more carefully here. + self.state.dirty_vbuf_mask |= 1 << vat.buffer_index; + self.state.vertex_attributes.push(vat.clone()); + } + } + + self.state.instance_vbuf_mask = 0; + // copy vertex state + for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self + .state + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_buffers.iter()) + .enumerate() + { + if pipe_desc.step == wgt::VertexStepMode::Instance { + self.state.instance_vbuf_mask |= 1 << index; + } + if state_desc != pipe_desc { + self.state.dirty_vbuf_mask |= 1 << index; + *state_desc = pipe_desc.clone(); + } + } + + self.set_pipeline_inner(&pipeline.inner); + + // set primitive state + let prim_state = conv::map_primitive_state(&pipeline.primitive); + if prim_state != self.state.primitive { + self.cmd_buffer + .commands + .push(C::SetPrimitive(prim_state.clone())); + self.state.primitive = prim_state; + } + + // set depth/stencil states + let mut aspects = crate::FormatAspects::empty(); + if pipeline.depth_bias != self.state.depth_bias { + self.state.depth_bias = pipeline.depth_bias; + self.cmd_buffer + .commands + .push(C::SetDepthBias(pipeline.depth_bias)); + } + if let Some(ref depth) = pipeline.depth { + aspects |= crate::FormatAspects::DEPTH; + self.cmd_buffer.commands.push(C::SetDepth(depth.clone())); + } + if let Some(ref stencil) = pipeline.stencil { + aspects |= crate::FormatAspects::STENCIL; + self.state.stencil = stencil.clone(); + self.rebind_stencil_func(); + if stencil.front.ops == stencil.back.ops + && stencil.front.mask_write == stencil.back.mask_write + { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT_AND_BACK, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + } else { + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::FRONT, + write_mask: stencil.front.mask_write, + ops: stencil.front.ops.clone(), + }); + self.cmd_buffer.commands.push(C::SetStencilOps { + face: glow::BACK, + write_mask: stencil.back.mask_write, + ops: stencil.back.ops.clone(), + }); + } + } + self.cmd_buffer + .commands + .push(C::ConfigureDepthStencil(aspects)); + + // set multisampling state + if pipeline.alpha_to_coverage_enabled != self.state.alpha_to_coverage_enabled { + self.state.alpha_to_coverage_enabled = pipeline.alpha_to_coverage_enabled; + self.cmd_buffer + .commands + .push(C::SetAlphaToCoverage(pipeline.alpha_to_coverage_enabled)); + } + + // set blend states + if self.state.color_targets[..] != pipeline.color_targets[..] { + if pipeline + .color_targets + .iter() + .skip(1) + .any(|ct| *ct != pipeline.color_targets[0]) + { + for (index, ct) in pipeline.color_targets.iter().enumerate() { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: Some(index as u32), + desc: ct.clone(), + }); + } + } else { + self.cmd_buffer.commands.push(C::SetColorTarget { + draw_buffer_index: None, + desc: pipeline.color_targets.first().cloned().unwrap_or_default(), + }); + } + } + self.state.color_targets.clear(); + for ct in pipeline.color_targets.iter() { + self.state.color_targets.push(ct.clone()); + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + self.state.index_offset = binding.offset; + self.state.index_format = format; + self.cmd_buffer + .commands + .push(C::SetIndexBuffer(binding.buffer.raw.unwrap())); + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + self.state.dirty_vbuf_mask |= 1 << index; + let (_, ref mut vb) = self.state.vertex_buffers[index as usize]; + *vb = Some(super::BufferBinding { + raw: binding.buffer.raw.unwrap(), + offset: binding.offset, + }); + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth: Range<f32>) { + self.cmd_buffer.commands.push(C::SetViewport { + rect: crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + }, + depth, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + self.cmd_buffer.commands.push(C::SetScissor(crate::Rect { + x: rect.x as i32, + y: rect.y as i32, + w: rect.w as i32, + h: rect.h as i32, + })); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.state.stencil.front.reference = value; + self.state.stencil.back.reference = value; + self.rebind_stencil_func(); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.cmd_buffer.commands.push(C::SetBlendConstant(*color)); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(start_instance); + self.cmd_buffer.commands.push(C::Draw { + topology: self.state.topology, + start_vertex, + vertex_count, + instance_count, + }); + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + self.prepare_draw(start_instance); + let (index_size, index_type) = match self.state.index_format { + wgt::IndexFormat::Uint16 => (2, glow::UNSIGNED_SHORT), + wgt::IndexFormat::Uint32 => (4, glow::UNSIGNED_INT), + }; + let index_offset = self.state.index_offset + index_size * start_index as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndexed { + topology: self.state.topology, + index_type, + index_offset, + index_count, + base_vertex, + instance_count, + }); + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = + offset + draw * mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndirect { + topology: self.state.topology, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + }); + } + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + self.prepare_draw(0); + let index_type = match self.state.index_format { + wgt::IndexFormat::Uint16 => glow::UNSIGNED_SHORT, + wgt::IndexFormat::Uint32 => glow::UNSIGNED_INT, + }; + for draw in 0..draw_count as wgt::BufferAddress { + let indirect_offset = offset + + draw * mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + self.cmd_buffer.commands.push(C::DrawIndexedIndirect { + topology: self.state.topology, + index_type, + indirect_buf: buffer.raw.unwrap(), + indirect_offset, + }); + } + } + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + unreachable!() + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + if let Some(label) = desc.label { + let range = self.cmd_buffer.add_marker(label); + self.cmd_buffer.commands.push(C::PushDebugGroup(range)); + self.state.has_pass_label = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.state.has_pass_label { + self.cmd_buffer.commands.push(C::PopDebugGroup); + self.state.has_pass_label = false; + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.set_pipeline_inner(&pipeline.inner); + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.cmd_buffer.commands.push(C::Dispatch(count)); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.cmd_buffer.commands.push(C::DispatchIndirect { + indirect_buf: buffer.raw.unwrap(), + indirect_offset: offset, + }); + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs new file mode 100644 index 0000000000..86ff3b60b0 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/conv.rs @@ -0,0 +1,501 @@ +impl super::AdapterShared { + pub(super) fn describe_texture_format( + &self, + texture_format: wgt::TextureFormat, + ) -> super::TextureFormatDesc { + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + + let (internal, external, data_type) = match texture_format { + Tf::R8Unorm => (glow::R8, glow::RED, glow::UNSIGNED_BYTE), + Tf::R8Snorm => (glow::R8_SNORM, glow::RED, glow::BYTE), + Tf::R8Uint => (glow::R8UI, glow::RED_INTEGER, glow::UNSIGNED_BYTE), + Tf::R8Sint => (glow::R8I, glow::RED_INTEGER, glow::BYTE), + Tf::R16Uint => (glow::R16UI, glow::RED_INTEGER, glow::UNSIGNED_SHORT), + Tf::R16Sint => (glow::R16I, glow::RED_INTEGER, glow::SHORT), + Tf::R16Unorm => (glow::R16, glow::RED, glow::UNSIGNED_SHORT), + Tf::R16Snorm => (glow::R16_SNORM, glow::RED, glow::SHORT), + Tf::R16Float => (glow::R16F, glow::RED, glow::HALF_FLOAT), + Tf::Rg8Unorm => (glow::RG8, glow::RG, glow::UNSIGNED_BYTE), + Tf::Rg8Snorm => (glow::RG8_SNORM, glow::RG, glow::BYTE), + Tf::Rg8Uint => (glow::RG8UI, glow::RG_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rg8Sint => (glow::RG8I, glow::RG_INTEGER, glow::BYTE), + Tf::R32Uint => (glow::R32UI, glow::RED_INTEGER, glow::UNSIGNED_INT), + Tf::R32Sint => (glow::R32I, glow::RED_INTEGER, glow::INT), + Tf::R32Float => (glow::R32F, glow::RED, glow::FLOAT), + Tf::Rg16Uint => (glow::RG16UI, glow::RG_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rg16Sint => (glow::RG16I, glow::RG_INTEGER, glow::SHORT), + Tf::Rg16Unorm => (glow::RG16, glow::RG, glow::UNSIGNED_SHORT), + Tf::Rg16Snorm => (glow::RG16_SNORM, glow::RG, glow::SHORT), + Tf::Rg16Float => (glow::RG16F, glow::RG, glow::HALF_FLOAT), + Tf::Rgba8Unorm => (glow::RGBA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Rgba8UnormSrgb => (glow::SRGB8_ALPHA8, glow::RGBA, glow::UNSIGNED_BYTE), + Tf::Bgra8UnormSrgb => (glow::SRGB8_ALPHA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Snorm => (glow::RGBA8_SNORM, glow::RGBA, glow::BYTE), + Tf::Bgra8Unorm => (glow::RGBA8, glow::BGRA, glow::UNSIGNED_BYTE), //TODO? + Tf::Rgba8Uint => (glow::RGBA8UI, glow::RGBA_INTEGER, glow::UNSIGNED_BYTE), + Tf::Rgba8Sint => (glow::RGBA8I, glow::RGBA_INTEGER, glow::BYTE), + Tf::Rgb10a2Unorm => ( + glow::RGB10_A2, + glow::RGBA, + glow::UNSIGNED_INT_2_10_10_10_REV, + ), + Tf::Rg11b10Float => ( + glow::R11F_G11F_B10F, + glow::RGB, + glow::UNSIGNED_INT_10F_11F_11F_REV, + ), + Tf::Rg32Uint => (glow::RG32UI, glow::RG_INTEGER, glow::UNSIGNED_INT), + Tf::Rg32Sint => (glow::RG32I, glow::RG_INTEGER, glow::INT), + Tf::Rg32Float => (glow::RG32F, glow::RG, glow::FLOAT), + Tf::Rgba16Uint => (glow::RGBA16UI, glow::RGBA_INTEGER, glow::UNSIGNED_SHORT), + Tf::Rgba16Sint => (glow::RGBA16I, glow::RGBA_INTEGER, glow::SHORT), + Tf::Rgba16Unorm => (glow::RGBA16, glow::RGBA, glow::UNSIGNED_SHORT), + Tf::Rgba16Snorm => (glow::RGBA16_SNORM, glow::RGBA, glow::SHORT), + Tf::Rgba16Float => (glow::RGBA16F, glow::RGBA, glow::HALF_FLOAT), + Tf::Rgba32Uint => (glow::RGBA32UI, glow::RGBA_INTEGER, glow::UNSIGNED_INT), + Tf::Rgba32Sint => (glow::RGBA32I, glow::RGBA_INTEGER, glow::INT), + Tf::Rgba32Float => (glow::RGBA32F, glow::RGBA, glow::FLOAT), + Tf::Stencil8 => ( + glow::STENCIL_INDEX8, + glow::STENCIL_INDEX, + glow::UNSIGNED_BYTE, + ), + Tf::Depth16Unorm => ( + glow::DEPTH_COMPONENT16, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_SHORT, + ), + Tf::Depth32Float => (glow::DEPTH_COMPONENT32F, glow::DEPTH_COMPONENT, glow::FLOAT), + Tf::Depth32FloatStencil8 => ( + glow::DEPTH32F_STENCIL8, + glow::DEPTH_STENCIL, + glow::FLOAT_32_UNSIGNED_INT_24_8_REV, + ), + Tf::Depth24Plus => ( + glow::DEPTH_COMPONENT24, + glow::DEPTH_COMPONENT, + glow::UNSIGNED_INT, + ), + Tf::Depth24PlusStencil8 => ( + glow::DEPTH24_STENCIL8, + glow::DEPTH_STENCIL, + glow::UNSIGNED_INT_24_8, + ), + Tf::Rgb9e5Ufloat => (glow::RGB9_E5, glow::RGB, glow::UNSIGNED_INT_5_9_9_9_REV), + Tf::Bc1RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc1RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc2RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnorm => (glow::COMPRESSED_RGBA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc3RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, glow::RGBA, 0), + Tf::Bc4RUnorm => (glow::COMPRESSED_RED_RGTC1, glow::RED, 0), + Tf::Bc4RSnorm => (glow::COMPRESSED_SIGNED_RED_RGTC1, glow::RED, 0), + Tf::Bc5RgUnorm => (glow::COMPRESSED_RG_RGTC2, glow::RG, 0), + Tf::Bc5RgSnorm => (glow::COMPRESSED_SIGNED_RG_RGTC2, glow::RG, 0), + Tf::Bc6hRgbUfloat => (glow::COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, glow::RGB, 0), + Tf::Bc6hRgbFloat => (glow::COMPRESSED_RGB_BPTC_SIGNED_FLOAT, glow::RGB, 0), + Tf::Bc7RgbaUnorm => (glow::COMPRESSED_RGBA_BPTC_UNORM, glow::RGBA, 0), + Tf::Bc7RgbaUnormSrgb => (glow::COMPRESSED_SRGB_ALPHA_BPTC_UNORM, glow::RGBA, 0), + Tf::Etc2Rgb8Unorm => (glow::COMPRESSED_RGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8UnormSrgb => (glow::COMPRESSED_SRGB8_ETC2, glow::RGB, 0), + Tf::Etc2Rgb8A1Unorm => ( + glow::COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgb8A1UnormSrgb => ( + glow::COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, + glow::RGBA, + 0, + ), + Tf::Etc2Rgba8Unorm => ( + //TODO: this is a lie, it's not sRGB + glow::COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, + glow::RGBA, + 0, + ), + Tf::Etc2Rgba8UnormSrgb => (glow::COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, glow::RGBA, 0), + Tf::EacR11Unorm => (glow::COMPRESSED_R11_EAC, glow::RED, 0), + Tf::EacR11Snorm => (glow::COMPRESSED_SIGNED_R11_EAC, glow::RED, 0), + Tf::EacRg11Unorm => (glow::COMPRESSED_RG11_EAC, glow::RG, 0), + Tf::EacRg11Snorm => (glow::COMPRESSED_SIGNED_RG11_EAC, glow::RG, 0), + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm | AstcChannel::Hdr => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_RGBA_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_RGBA_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_RGBA_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_RGBA_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_RGBA_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_RGBA_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_RGBA_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_RGBA_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => (glow::COMPRESSED_RGBA_ASTC_10x5_KHR, glow::RGBA, 0), + AstcBlock::B10x6 => (glow::COMPRESSED_RGBA_ASTC_10x6_KHR, glow::RGBA, 0), + AstcBlock::B10x8 => (glow::COMPRESSED_RGBA_ASTC_10x8_KHR, glow::RGBA, 0), + AstcBlock::B10x10 => (glow::COMPRESSED_RGBA_ASTC_10x10_KHR, glow::RGBA, 0), + AstcBlock::B12x10 => (glow::COMPRESSED_RGBA_ASTC_12x10_KHR, glow::RGBA, 0), + AstcBlock::B12x12 => (glow::COMPRESSED_RGBA_ASTC_12x12_KHR, glow::RGBA, 0), + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, glow::RGBA, 0), + AstcBlock::B5x4 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, glow::RGBA, 0), + AstcBlock::B5x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, glow::RGBA, 0), + AstcBlock::B6x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, glow::RGBA, 0), + AstcBlock::B6x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, glow::RGBA, 0), + AstcBlock::B8x5 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, glow::RGBA, 0), + AstcBlock::B8x6 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, glow::RGBA, 0), + AstcBlock::B8x8 => (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, glow::RGBA, 0), + AstcBlock::B10x5 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, glow::RGBA, 0) + } + AstcBlock::B10x6 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, glow::RGBA, 0) + } + AstcBlock::B10x8 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, glow::RGBA, 0) + } + AstcBlock::B10x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x10 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, glow::RGBA, 0) + } + AstcBlock::B12x12 => { + (glow::COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, glow::RGBA, 0) + } + }, + }, + }; + + super::TextureFormatDesc { + internal, + external, + data_type, + } + } +} + +pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super::VertexFormatDesc { + use super::VertexAttribKind as Vak; + use wgt::VertexFormat as Vf; + + let (element_count, element_format, attrib_kind) = match vertex_format { + Vf::Unorm8x2 => (2, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x2 => (2, glow::BYTE, Vak::Float), + Vf::Uint8x2 => (2, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x2 => (2, glow::BYTE, Vak::Integer), + Vf::Unorm8x4 => (4, glow::UNSIGNED_BYTE, Vak::Float), + Vf::Snorm8x4 => (4, glow::BYTE, Vak::Float), + Vf::Uint8x4 => (4, glow::UNSIGNED_BYTE, Vak::Integer), + Vf::Sint8x4 => (4, glow::BYTE, Vak::Integer), + Vf::Unorm16x2 => (2, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x2 => (2, glow::SHORT, Vak::Float), + Vf::Uint16x2 => (2, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x2 => (2, glow::SHORT, Vak::Integer), + Vf::Float16x2 => (2, glow::HALF_FLOAT, Vak::Float), + Vf::Unorm16x4 => (4, glow::UNSIGNED_SHORT, Vak::Float), + Vf::Snorm16x4 => (4, glow::SHORT, Vak::Float), + Vf::Uint16x4 => (4, glow::UNSIGNED_SHORT, Vak::Integer), + Vf::Sint16x4 => (4, glow::SHORT, Vak::Integer), + Vf::Float16x4 => (4, glow::HALF_FLOAT, Vak::Float), + Vf::Uint32 => (1, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32 => (1, glow::INT, Vak::Integer), + Vf::Float32 => (1, glow::FLOAT, Vak::Float), + Vf::Uint32x2 => (2, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x2 => (2, glow::INT, Vak::Integer), + Vf::Float32x2 => (2, glow::FLOAT, Vak::Float), + Vf::Uint32x3 => (3, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x3 => (3, glow::INT, Vak::Integer), + Vf::Float32x3 => (3, glow::FLOAT, Vak::Float), + Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer), + Vf::Sint32x4 => (4, glow::INT, Vak::Integer), + Vf::Float32x4 => (4, glow::FLOAT, Vak::Float), + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + }; + + super::VertexFormatDesc { + element_count, + element_format, + attrib_kind, + } +} + +pub fn map_filter_modes( + min: wgt::FilterMode, + mag: wgt::FilterMode, + mip: wgt::FilterMode, +) -> (u32, u32) { + use wgt::FilterMode as Fm; + + let mag_filter = match mag { + Fm::Nearest => glow::NEAREST, + Fm::Linear => glow::LINEAR, + }; + + let min_filter = match (min, mip) { + (Fm::Nearest, Fm::Nearest) => glow::NEAREST_MIPMAP_NEAREST, + (Fm::Nearest, Fm::Linear) => glow::NEAREST_MIPMAP_LINEAR, + (Fm::Linear, Fm::Nearest) => glow::LINEAR_MIPMAP_NEAREST, + (Fm::Linear, Fm::Linear) => glow::LINEAR_MIPMAP_LINEAR, + }; + + (min_filter, mag_filter) +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> u32 { + match mode { + wgt::AddressMode::Repeat => glow::REPEAT, + wgt::AddressMode::MirrorRepeat => glow::MIRRORED_REPEAT, + wgt::AddressMode::ClampToEdge => glow::CLAMP_TO_EDGE, + wgt::AddressMode::ClampToBorder => glow::CLAMP_TO_BORDER, + //wgt::AddressMode::MirrorClamp => glow::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_compare_func(fun: wgt::CompareFunction) -> u32 { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => glow::NEVER, + Cf::Less => glow::LESS, + Cf::LessEqual => glow::LEQUAL, + Cf::Equal => glow::EQUAL, + Cf::GreaterEqual => glow::GEQUAL, + Cf::Greater => glow::GREATER, + Cf::NotEqual => glow::NOTEQUAL, + Cf::Always => glow::ALWAYS, + } +} + +pub fn map_primitive_topology(topology: wgt::PrimitiveTopology) -> u32 { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => glow::POINTS, + Pt::LineList => glow::LINES, + Pt::LineStrip => glow::LINE_STRIP, + Pt::TriangleList => glow::TRIANGLES, + Pt::TriangleStrip => glow::TRIANGLE_STRIP, + } +} + +pub(super) fn map_primitive_state(state: &wgt::PrimitiveState) -> super::PrimitiveState { + //Note: state.polygon_mode is not supported, see `Features::POLYGON_MODE_LINE` and + //`Features::POLYGON_MODE_POINT` + super::PrimitiveState { + //Note: we are flipping the front face, so that + // the Y-flip in the generated GLSL keeps the same visibility. + // See `naga::back::glsl::WriterFlags::ADJUST_COORDINATE_SPACE`. + front_face: match state.front_face { + wgt::FrontFace::Cw => glow::CCW, + wgt::FrontFace::Ccw => glow::CW, + }, + cull_face: match state.cull_mode { + Some(wgt::Face::Front) => glow::FRONT, + Some(wgt::Face::Back) => glow::BACK, + None => 0, + }, + unclipped_depth: state.unclipped_depth, + } +} + +pub fn _map_view_dimension(dim: wgt::TextureViewDimension) -> u32 { + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 | Tvd::D2 => glow::TEXTURE_2D, + Tvd::D2Array => glow::TEXTURE_2D_ARRAY, + Tvd::Cube => glow::TEXTURE_CUBE_MAP, + Tvd::CubeArray => glow::TEXTURE_CUBE_MAP_ARRAY, + Tvd::D3 => glow::TEXTURE_3D, + } +} + +fn map_stencil_op(operation: wgt::StencilOperation) -> u32 { + use wgt::StencilOperation as So; + match operation { + So::Keep => glow::KEEP, + So::Zero => glow::ZERO, + So::Replace => glow::REPLACE, + So::Invert => glow::INVERT, + So::IncrementClamp => glow::INCR, + So::DecrementClamp => glow::DECR, + So::IncrementWrap => glow::INCR_WRAP, + So::DecrementWrap => glow::DECR_WRAP, + } +} + +fn map_stencil_ops(face: &wgt::StencilFaceState) -> super::StencilOps { + super::StencilOps { + pass: map_stencil_op(face.pass_op), + fail: map_stencil_op(face.fail_op), + depth_fail: map_stencil_op(face.depth_fail_op), + } +} + +pub(super) fn map_stencil(state: &wgt::StencilState) -> super::StencilState { + super::StencilState { + front: super::StencilSide { + function: map_compare_func(state.front.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.front), + }, + back: super::StencilSide { + function: map_compare_func(state.back.compare), + mask_read: state.read_mask, + mask_write: state.write_mask, + reference: 0, + ops: map_stencil_ops(&state.back), + }, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> u32 { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => glow::ZERO, + Bf::One => glow::ONE, + Bf::Src => glow::SRC_COLOR, + Bf::OneMinusSrc => glow::ONE_MINUS_SRC_COLOR, + Bf::Dst => glow::DST_COLOR, + Bf::OneMinusDst => glow::ONE_MINUS_DST_COLOR, + Bf::SrcAlpha => glow::SRC_ALPHA, + Bf::OneMinusSrcAlpha => glow::ONE_MINUS_SRC_ALPHA, + Bf::DstAlpha => glow::DST_ALPHA, + Bf::OneMinusDstAlpha => glow::ONE_MINUS_DST_ALPHA, + Bf::Constant => glow::CONSTANT_COLOR, + Bf::OneMinusConstant => glow::ONE_MINUS_CONSTANT_COLOR, + Bf::SrcAlphaSaturated => glow::SRC_ALPHA_SATURATE, + } +} + +fn map_blend_component(component: &wgt::BlendComponent) -> super::BlendComponent { + super::BlendComponent { + src: map_blend_factor(component.src_factor), + dst: map_blend_factor(component.dst_factor), + equation: match component.operation { + wgt::BlendOperation::Add => glow::FUNC_ADD, + wgt::BlendOperation::Subtract => glow::FUNC_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => glow::FUNC_REVERSE_SUBTRACT, + wgt::BlendOperation::Min => glow::MIN, + wgt::BlendOperation::Max => glow::MAX, + }, + } +} + +pub(super) fn map_blend(blend: &wgt::BlendState) -> super::BlendDesc { + super::BlendDesc { + color: map_blend_component(&blend.color), + alpha: map_blend_component(&blend.alpha), + } +} + +pub(super) fn map_storage_access(access: wgt::StorageTextureAccess) -> u32 { + match access { + wgt::StorageTextureAccess::ReadOnly => glow::READ_ONLY, + wgt::StorageTextureAccess::WriteOnly => glow::WRITE_ONLY, + wgt::StorageTextureAccess::ReadWrite => glow::READ_WRITE, + } +} + +pub(super) fn is_sampler(glsl_uniform_type: u32) -> bool { + match glsl_uniform_type { + glow::INT_SAMPLER_1D + | glow::INT_SAMPLER_1D_ARRAY + | glow::INT_SAMPLER_2D + | glow::INT_SAMPLER_2D_ARRAY + | glow::INT_SAMPLER_2D_MULTISAMPLE + | glow::INT_SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::INT_SAMPLER_2D_RECT + | glow::INT_SAMPLER_3D + | glow::INT_SAMPLER_CUBE + | glow::INT_SAMPLER_CUBE_MAP_ARRAY + | glow::UNSIGNED_INT_SAMPLER_1D + | glow::UNSIGNED_INT_SAMPLER_1D_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D + | glow::UNSIGNED_INT_SAMPLER_2D_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE + | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::UNSIGNED_INT_SAMPLER_2D_RECT + | glow::UNSIGNED_INT_SAMPLER_3D + | glow::UNSIGNED_INT_SAMPLER_CUBE + | glow::UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY + | glow::SAMPLER_1D + | glow::SAMPLER_1D_SHADOW + | glow::SAMPLER_1D_ARRAY + | glow::SAMPLER_1D_ARRAY_SHADOW + | glow::SAMPLER_2D + | glow::SAMPLER_2D_SHADOW + | glow::SAMPLER_2D_ARRAY + | glow::SAMPLER_2D_ARRAY_SHADOW + | glow::SAMPLER_2D_MULTISAMPLE + | glow::SAMPLER_2D_MULTISAMPLE_ARRAY + | glow::SAMPLER_2D_RECT + | glow::SAMPLER_2D_RECT_SHADOW + | glow::SAMPLER_3D + | glow::SAMPLER_CUBE + | glow::SAMPLER_CUBE_MAP_ARRAY + | glow::SAMPLER_CUBE_MAP_ARRAY_SHADOW + | glow::SAMPLER_CUBE_SHADOW => true, + _ => false, + } +} + +pub(super) fn is_image(glsl_uniform_type: u32) -> bool { + match glsl_uniform_type { + glow::INT_IMAGE_1D + | glow::INT_IMAGE_1D_ARRAY + | glow::INT_IMAGE_2D + | glow::INT_IMAGE_2D_ARRAY + | glow::INT_IMAGE_2D_MULTISAMPLE + | glow::INT_IMAGE_2D_MULTISAMPLE_ARRAY + | glow::INT_IMAGE_2D_RECT + | glow::INT_IMAGE_3D + | glow::INT_IMAGE_CUBE + | glow::INT_IMAGE_CUBE_MAP_ARRAY + | glow::UNSIGNED_INT_IMAGE_1D + | glow::UNSIGNED_INT_IMAGE_1D_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D + | glow::UNSIGNED_INT_IMAGE_2D_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE + | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY + | glow::UNSIGNED_INT_IMAGE_2D_RECT + | glow::UNSIGNED_INT_IMAGE_3D + | glow::UNSIGNED_INT_IMAGE_CUBE + | glow::UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY + | glow::IMAGE_1D + | glow::IMAGE_1D_ARRAY + | glow::IMAGE_2D + | glow::IMAGE_2D_ARRAY + | glow::IMAGE_2D_MULTISAMPLE + | glow::IMAGE_2D_MULTISAMPLE_ARRAY + | glow::IMAGE_2D_RECT + | glow::IMAGE_3D + | glow::IMAGE_CUBE + | glow::IMAGE_CUBE_MAP_ARRAY => true, + _ => false, + } +} + +pub(super) fn is_atomic_counter(glsl_uniform_type: u32) -> bool { + glsl_uniform_type == glow::UNSIGNED_INT_ATOMIC_COUNTER +} + +pub(super) fn is_opaque_type(glsl_uniform_type: u32) -> bool { + is_sampler(glsl_uniform_type) + || is_image(glsl_uniform_type) + || is_atomic_counter(glsl_uniform_type) +} + +pub(super) fn uniform_byte_size(glsl_uniform_type: u32) -> u32 { + match glsl_uniform_type { + glow::FLOAT | glow::INT => 4, + glow::FLOAT_VEC2 | glow::INT_VEC2 => 8, + glow::FLOAT_VEC3 | glow::INT_VEC3 => 12, + glow::FLOAT_VEC4 | glow::INT_VEC4 => 16, + glow::FLOAT_MAT2 => 16, + glow::FLOAT_MAT3 => 36, + glow::FLOAT_MAT4 => 64, + _ => panic!("Unsupported uniform datatype! {glsl_uniform_type:#X}"), + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs new file mode 100644 index 0000000000..0a1cfaf241 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/device.rs @@ -0,0 +1,1328 @@ +use super::conv; +use crate::auxil::map_naga_stage; +use glow::HasContext; +use std::{ + convert::TryInto, + ptr, + sync::{Arc, Mutex}, +}; + +use arrayvec::ArrayVec; +#[cfg(not(target_arch = "wasm32"))] +use std::mem; +use std::sync::atomic::Ordering; + +type ShaderStage<'a> = ( + naga::ShaderStage, + &'a crate::ProgrammableStage<'a, super::Api>, +); +type NameBindingMap = rustc_hash::FxHashMap<String, (super::BindingRegister, u8)>; + +struct CompilationContext<'a> { + layout: &'a super::PipelineLayout, + sampler_map: &'a mut super::SamplerBindMap, + name_binding_map: &'a mut NameBindingMap, + multiview: Option<std::num::NonZeroU32>, +} + +impl CompilationContext<'_> { + fn consume_reflection( + self, + module: &naga::Module, + ep_info: &naga::valid::FunctionInfo, + reflection_info: naga::back::glsl::ReflectionInfo, + ) { + for (handle, var) in module.global_variables.iter() { + if ep_info[handle].is_empty() { + continue; + } + let register = match var.space { + naga::AddressSpace::Uniform => super::BindingRegister::UniformBuffers, + naga::AddressSpace::Storage { .. } => super::BindingRegister::StorageBuffers, + _ => continue, + }; + + let br = var.binding.as_ref().unwrap(); + let slot = self.layout.get_slot(br); + + let name = match reflection_info.uniforms.get(&handle) { + Some(name) => name.clone(), + None => continue, + }; + log::debug!( + "Rebind buffer: {:?} -> {}, register={:?}, slot={}", + var.name.as_ref(), + &name, + register, + slot + ); + self.name_binding_map.insert(name, (register, slot)); + } + + for (name, mapping) in reflection_info.texture_mapping { + let var = &module.global_variables[mapping.texture]; + let register = match module.types[var.ty].inner { + naga::TypeInner::Image { + class: naga::ImageClass::Storage { .. }, + .. + } => super::BindingRegister::Images, + _ => super::BindingRegister::Textures, + }; + + let tex_br = var.binding.as_ref().unwrap(); + let texture_linear_index = self.layout.get_slot(tex_br); + + self.name_binding_map + .insert(name, (register, texture_linear_index)); + if let Some(sampler_handle) = mapping.sampler { + let sam_br = module.global_variables[sampler_handle] + .binding + .as_ref() + .unwrap(); + let sampler_linear_index = self.layout.get_slot(sam_br); + self.sampler_map[texture_linear_index as usize] = Some(sampler_linear_index); + } + } + } +} + +impl super::Device { + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a texture + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the texture. If `drop_guard` is + /// [`Some`], the texture must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + pub unsafe fn texture_from_raw( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + let (target, _, is_cubemap) = super::Texture::get_info_from_desc(desc); + + super::Texture { + inner: super::TextureInner::Texture { + raw: glow::NativeTexture(name), + target, + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + is_cubemap, + } + } + + /// # Safety + /// + /// - `name` must be created respecting `desc` + /// - `name` must be a renderbuffer + /// - If `drop_guard` is [`None`], wgpu-hal will take ownership of the renderbuffer. If `drop_guard` is + /// [`Some`], the renderbuffer must be valid until the drop implementation + /// of the drop guard is called. + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + pub unsafe fn texture_from_raw_renderbuffer( + &self, + name: std::num::NonZeroU32, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: glow::NativeRenderbuffer(name), + }, + drop_guard, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc: self.shared.describe_texture_format(desc.format), + copy_size: desc.copy_extent(), + is_cubemap: false, + } + } + + unsafe fn compile_shader( + gl: &glow::Context, + shader: &str, + naga_stage: naga::ShaderStage, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + ) -> Result<glow::Shader, crate::PipelineError> { + let target = match naga_stage { + naga::ShaderStage::Vertex => glow::VERTEX_SHADER, + naga::ShaderStage::Fragment => glow::FRAGMENT_SHADER, + naga::ShaderStage::Compute => glow::COMPUTE_SHADER, + }; + + let raw = unsafe { gl.create_shader(target) }.unwrap(); + #[cfg(not(target_arch = "wasm32"))] + if gl.supports_debug() { + //TODO: remove all transmutes from `object_label` + // https://github.com/grovesNL/glow/issues/186 + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SHADER, name, label) }; + } + + unsafe { gl.shader_source(raw, shader) }; + unsafe { gl.compile_shader(raw) }; + + log::info!("\tCompiled shader {:?}", raw); + + let compiled_ok = unsafe { gl.get_shader_compile_status(raw) }; + let msg = unsafe { gl.get_shader_info_log(raw) }; + if compiled_ok { + if !msg.is_empty() { + log::warn!("\tCompile: {}", msg); + } + Ok(raw) + } else { + Err(crate::PipelineError::Linkage( + map_naga_stage(naga_stage), + msg, + )) + } + } + + fn create_shader( + gl: &glow::Context, + naga_stage: naga::ShaderStage, + stage: &crate::ProgrammableStage<super::Api>, + context: CompilationContext, + ) -> Result<glow::Shader, crate::PipelineError> { + use naga::back::glsl; + let pipeline_options = glsl::PipelineOptions { + shader_stage: naga_stage, + entry_point: stage.entry_point.to_string(), + multiview: context.multiview, + }; + + let shader = &stage.module.naga; + let entry_point_index = shader + .module + .entry_points + .iter() + .position(|ep| ep.name.as_str() == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + + use naga::proc::BoundsCheckPolicy; + // The image bounds checks require the TEXTURE_LEVELS feature available in GL core 1.3+. + let version = gl.version(); + let image_check = if !version.is_embedded && (version.major, version.minor) >= (1, 3) { + BoundsCheckPolicy::ReadZeroSkipWrite + } else { + BoundsCheckPolicy::Unchecked + }; + + // Other bounds check are either provided by glsl or not implemented yet. + let policies = naga::proc::BoundsCheckPolicies { + index: BoundsCheckPolicy::Unchecked, + buffer: BoundsCheckPolicy::Unchecked, + image: image_check, + binding_array: BoundsCheckPolicy::Unchecked, + }; + + let mut output = String::new(); + let mut writer = glsl::Writer::new( + &mut output, + &shader.module, + &shader.info, + &context.layout.naga_options, + &pipeline_options, + policies, + ) + .map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + let reflection_info = writer.write().map_err(|e| { + let msg = format!("{e}"); + crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg) + })?; + + log::debug!("Naga generated shader:\n{}", output); + + context.consume_reflection( + &shader.module, + shader.info.get_entry_point(entry_point_index), + reflection_info, + ); + + unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) } + } + + unsafe fn create_pipeline<'a>( + &self, + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let mut program_stages = ArrayVec::new(); + let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len()); + for group in &*layout.group_infos { + group_to_binding_to_slot.push(group.binding_to_slot.clone()); + } + for &(naga_stage, stage) in &shaders { + program_stages.push(super::ProgramStage { + naga_stage: naga_stage.to_owned(), + shader_id: stage.module.id, + entry_point: stage.entry_point.to_owned(), + }); + } + let glsl_version = match self.shared.shading_language_version { + naga::back::glsl::Version::Embedded { version, .. } => version, + naga::back::glsl::Version::Desktop(_) => unreachable!(), + }; + let mut guard = self + .shared + .program_cache + .try_lock() + .expect("Couldn't acquire program_cache lock"); + // This guard ensures that we can't accidentally destroy a program whilst we're about to reuse it + // The only place that destroys a pipeline is also locking on `program_cache` + let program = guard + .entry(super::ProgramCacheKey { + stages: program_stages, + group_to_binding_to_slot: group_to_binding_to_slot.into_boxed_slice(), + }) + .or_insert_with(|| unsafe { + Self::create_program( + gl, + shaders, + layout, + label, + multiview, + glsl_version, + self.shared.private_caps, + ) + }) + .to_owned()?; + drop(guard); + + Ok(program) + } + + unsafe fn create_program<'a>( + gl: &glow::Context, + shaders: ArrayVec<ShaderStage<'a>, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option<std::num::NonZeroU32>, + glsl_version: u16, + private_caps: super::PrivateCapabilities, + ) -> Result<Arc<super::PipelineInner>, crate::PipelineError> { + let program = unsafe { gl.create_program() }.unwrap(); + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(program) }; + unsafe { gl.object_label(glow::PROGRAM, name, Some(label)) }; + } + } + + let mut name_binding_map = NameBindingMap::default(); + let mut sampler_map = [None; super::MAX_TEXTURE_SLOTS]; + let mut has_stages = wgt::ShaderStages::empty(); + let mut shaders_to_delete = arrayvec::ArrayVec::<_, 3>::new(); + + for (naga_stage, stage) in shaders { + has_stages |= map_naga_stage(naga_stage); + let context = CompilationContext { + layout, + sampler_map: &mut sampler_map, + name_binding_map: &mut name_binding_map, + multiview, + }; + + let shader = Self::create_shader(gl, naga_stage, stage, context)?; + shaders_to_delete.push(shader); + } + + // Create empty fragment shader if only vertex shader is present + if has_stages == wgt::ShaderStages::VERTEX { + let shader_src = format!("#version {glsl_version} es \n void main(void) {{}}",); + log::info!("Only vertex shader is present. Creating an empty fragment shader",); + let shader = unsafe { + Self::compile_shader( + gl, + &shader_src, + naga::ShaderStage::Fragment, + Some("(wgpu internal) dummy fragment shader"), + ) + }?; + shaders_to_delete.push(shader); + } + + for &shader in shaders_to_delete.iter() { + unsafe { gl.attach_shader(program, shader) }; + } + unsafe { gl.link_program(program) }; + + for shader in shaders_to_delete { + unsafe { gl.delete_shader(shader) }; + } + + log::info!("\tLinked program {:?}", program); + + let linked_ok = unsafe { gl.get_program_link_status(program) }; + let msg = unsafe { gl.get_program_info_log(program) }; + if !linked_ok { + return Err(crate::PipelineError::Linkage(has_stages, msg)); + } + if !msg.is_empty() { + log::warn!("\tLink: {}", msg); + } + + if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) { + // This remapping is only needed if we aren't able to put the binding layout + // in the shader. We can't remap storage buffers this way. + unsafe { gl.use_program(Some(program)) }; + for (ref name, (register, slot)) in name_binding_map { + log::trace!("Get binding {:?} from program {:?}", name, program); + match register { + super::BindingRegister::UniformBuffers => { + let index = unsafe { gl.get_uniform_block_index(program, name) }.unwrap(); + unsafe { gl.uniform_block_binding(program, index, slot as _) }; + } + super::BindingRegister::StorageBuffers => { + let index = + unsafe { gl.get_shader_storage_block_index(program, name) }.unwrap(); + log::error!( + "Unable to re-map shader storage block {} to {}", + name, + index + ); + return Err(crate::DeviceError::Lost.into()); + } + super::BindingRegister::Textures | super::BindingRegister::Images => { + let location = unsafe { gl.get_uniform_location(program, name) }; + unsafe { gl.uniform_1_i32(location.as_ref(), slot as _) }; + } + } + } + } + + let mut uniforms: [super::UniformDesc; super::MAX_PUSH_CONSTANTS] = + [None; super::MAX_PUSH_CONSTANTS].map(|_: Option<()>| Default::default()); + let count = unsafe { gl.get_active_uniforms(program) }; + let mut offset = 0; + + for uniform in 0..count { + let glow::ActiveUniform { utype, name, .. } = + unsafe { gl.get_active_uniform(program, uniform) }.unwrap(); + + if conv::is_opaque_type(utype) { + continue; + } + + if let Some(location) = unsafe { gl.get_uniform_location(program, &name) } { + if uniforms[offset / 4].location.is_some() { + panic!("Offset already occupied") + } + + // `size` will always be 1 so we need to guess the real size from the type + let uniform_size = conv::uniform_byte_size(utype); + + uniforms[offset / 4] = super::UniformDesc { + location: Some(location), + size: uniform_size, + utype, + }; + + offset += uniform_size as usize; + } + } + + Ok(Arc::new(super::PipelineInner { + program, + sampler_map, + uniforms, + })) + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_vertex_array(self.main_vao) }; + unsafe { gl.delete_framebuffer(queue.draw_fbo) }; + unsafe { gl.delete_framebuffer(queue.copy_fbo) }; + unsafe { gl.delete_buffer(queue.zero_buffer) }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let emulate_map = self + .shared + .workarounds + .contains(super::Workarounds::EMULATE_BUFFER_MAP) + || !self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION); + + if emulate_map && desc.usage.intersects(crate::BufferUses::MAP_WRITE) { + return Ok(super::Buffer { + raw: None, + target, + size: desc.size, + map_flags: 0, + data: Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))), + }); + } + + let gl = &self.shared.context.lock(); + + let target = if desc.usage.contains(crate::BufferUses::INDEX) { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::ARRAY_BUFFER + }; + + let is_host_visible = desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE); + let is_coherent = desc + .memory_flags + .contains(crate::MemoryFlags::PREFER_COHERENT); + + let mut map_flags = 0; + if desc.usage.contains(crate::BufferUses::MAP_READ) { + map_flags |= glow::MAP_READ_BIT; + } + if desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_WRITE_BIT; + } + + let raw = Some(unsafe { gl.create_buffer() }.map_err(|_| crate::DeviceError::OutOfMemory)?); + unsafe { gl.bind_buffer(target, raw) }; + let raw_size = desc + .size + .try_into() + .map_err(|_| crate::DeviceError::OutOfMemory)?; + + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::BUFFER_ALLOCATION) + { + if is_host_visible { + map_flags |= glow::MAP_PERSISTENT_BIT; + if is_coherent { + map_flags |= glow::MAP_COHERENT_BIT; + } + } + unsafe { gl.buffer_storage(target, raw_size, None, map_flags) }; + } else { + assert!(!is_coherent); + let usage = if is_host_visible { + if desc.usage.contains(crate::BufferUses::MAP_READ) { + glow::STREAM_READ + } else { + glow::DYNAMIC_DRAW + } + } else { + // Even if the usage doesn't contain SRC_READ, we update it internally at least once + // Some vendors take usage very literally and STATIC_DRAW will freeze us with an empty buffer + // https://github.com/gfx-rs/wgpu/issues/3371 + glow::DYNAMIC_DRAW + }; + unsafe { gl.buffer_data_size(target, raw_size, usage) }; + } + + unsafe { gl.bind_buffer(target, None) }; + + if !is_coherent && desc.usage.contains(crate::BufferUses::MAP_WRITE) { + map_flags |= glow::MAP_FLUSH_EXPLICIT_BIT; + } + //TODO: do we need `glow::MAP_UNSYNCHRONIZED_BIT`? + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::BUFFER, name, Some(label)) }; + } + } + + let data = if emulate_map && desc.usage.contains(crate::BufferUses::MAP_READ) { + Some(Arc::new(Mutex::new(vec![0; desc.size as usize]))) + } else { + None + }; + + Ok(super::Buffer { + raw, + target, + size: desc.size, + map_flags, + data, + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_buffer(raw) }; + } + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let is_coherent = buffer.map_flags & glow::MAP_COHERENT_BIT != 0; + let ptr = match buffer.raw { + None => { + let mut vec = buffer.data.as_ref().unwrap().lock().unwrap(); + let slice = &mut vec.as_mut_slice()[range.start as usize..range.end as usize]; + slice.as_mut_ptr() + } + Some(raw) => { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + let ptr = if let Some(ref map_read_allocation) = buffer.data { + let mut guard = map_read_allocation.lock().unwrap(); + let slice = guard.as_mut_slice(); + unsafe { self.shared.get_buffer_sub_data(gl, buffer.target, 0, slice) }; + slice.as_mut_ptr() + } else { + unsafe { + gl.map_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + buffer.map_flags, + ) + } + }; + unsafe { gl.bind_buffer(buffer.target, None) }; + ptr + } + }; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr).ok_or(crate::DeviceError::Lost)?, + is_coherent, + }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + if let Some(raw) = buffer.raw { + if buffer.data.is_none() { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + unsafe { gl.unmap_buffer(buffer.target) }; + unsafe { gl.bind_buffer(buffer.target, None) }; + } + } + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + if let Some(raw) = buffer.raw { + let gl = &self.shared.context.lock(); + unsafe { gl.bind_buffer(buffer.target, Some(raw)) }; + for range in ranges { + unsafe { + gl.flush_mapped_buffer_range( + buffer.target, + range.start as i32, + (range.end - range.start) as i32, + ) + }; + } + } + } + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) { + //TODO: do we need to do anything? + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let render_usage = crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_WRITE + | crate::TextureUses::DEPTH_STENCIL_READ; + let format_desc = self.shared.describe_texture_format(desc.format); + + let (inner, is_cubemap) = if render_usage.contains(desc.usage) + && desc.dimension == wgt::TextureDimension::D2 + && desc.size.depth_or_array_layers == 1 + { + let raw = unsafe { gl.create_renderbuffer().unwrap() }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(raw)) }; + if desc.sample_count > 1 { + unsafe { + gl.renderbuffer_storage_multisample( + glow::RENDERBUFFER, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } else { + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::RENDERBUFFER, name, Some(label)) }; + } + } + + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + (super::TextureInner::Renderbuffer { raw }, false) + } else { + let raw = unsafe { gl.create_texture().unwrap() }; + let (target, is_3d, is_cubemap) = super::Texture::get_info_from_desc(desc); + + unsafe { gl.bind_texture(target, Some(raw)) }; + //Note: this has to be done before defining the storage! + match desc.format.sample_type(None) { + Some( + wgt::TextureSampleType::Float { filterable: false } + | wgt::TextureSampleType::Uint + | wgt::TextureSampleType::Sint, + ) => { + // reset default filtering mode + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MIN_FILTER, glow::NEAREST as i32) + }; + unsafe { + gl.tex_parameter_i32(target, glow::TEXTURE_MAG_FILTER, glow::NEAREST as i32) + }; + } + _ => {} + } + + if is_3d { + unsafe { + gl.tex_storage_3d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + desc.size.depth_or_array_layers as i32, + ) + }; + } else if desc.sample_count > 1 { + unsafe { + gl.tex_storage_2d_multisample( + target, + desc.sample_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + true, + ) + }; + } else { + unsafe { + gl.tex_storage_2d( + target, + desc.mip_level_count as i32, + format_desc.internal, + desc.size.width as i32, + desc.size.height as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::TEXTURE, name, Some(label)) }; + } + } + + unsafe { gl.bind_texture(target, None) }; + (super::TextureInner::Texture { raw, target }, is_cubemap) + }; + + Ok(super::Texture { + inner, + drop_guard: None, + mip_level_count: desc.mip_level_count, + array_layer_count: desc.array_layer_count(), + format: desc.format, + format_desc, + copy_size: desc.copy_extent(), + is_cubemap, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + let gl = &self.shared.context.lock(); + match texture.inner { + super::TextureInner::Renderbuffer { raw, .. } => { + unsafe { gl.delete_renderbuffer(raw) }; + } + super::TextureInner::DefaultRenderbuffer => {} + super::TextureInner::Texture { raw, .. } => { + unsafe { gl.delete_texture(raw) }; + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { .. } => {} + } + } + + // For clarity, we explicitly drop the drop guard. Although this has no real semantic effect as the + // end of the scope will drop the drop guard since this function takes ownership of the texture. + drop(texture.drop_guard); + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + Ok(super::TextureView { + //TODO: use `conv::map_view_dimension(desc.dimension)`? + inner: texture.inner.clone(), + aspects: crate::FormatAspects::new(texture.format, desc.range.aspect), + mip_levels: desc.range.mip_range(texture.mip_level_count), + array_layers: desc.range.layer_range(texture.array_layer_count), + format: texture.format, + }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let gl = &self.shared.context.lock(); + + let raw = unsafe { gl.create_sampler().unwrap() }; + + let (min, mag) = + conv::map_filter_modes(desc.min_filter, desc.mag_filter, desc.mipmap_filter); + + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MIN_FILTER, min as i32) }; + unsafe { gl.sampler_parameter_i32(raw, glow::TEXTURE_MAG_FILTER, mag as i32) }; + + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_S, + conv::map_address_mode(desc.address_modes[0]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_T, + conv::map_address_mode(desc.address_modes[1]) as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_WRAP_R, + conv::map_address_mode(desc.address_modes[2]) as i32, + ) + }; + + if let Some(border_color) = desc.border_color { + let border = match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + [0.0; 4] + } + wgt::SamplerBorderColor::OpaqueBlack => [0.0, 0.0, 0.0, 1.0], + wgt::SamplerBorderColor::OpaqueWhite => [1.0; 4], + }; + unsafe { gl.sampler_parameter_f32_slice(raw, glow::TEXTURE_BORDER_COLOR, &border) }; + } + + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MIN_LOD, desc.lod_clamp.start) }; + unsafe { gl.sampler_parameter_f32(raw, glow::TEXTURE_MAX_LOD, desc.lod_clamp.end) }; + + // If clamp is not 1, we know anisotropy is supported up to 16x + if desc.anisotropy_clamp != 1 { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_MAX_ANISOTROPY, + desc.anisotropy_clamp as i32, + ) + }; + } + + //set_param_float(glow::TEXTURE_LOD_BIAS, info.lod_bias.0); + + if let Some(compare) = desc.compare { + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_MODE, + glow::COMPARE_REF_TO_TEXTURE as i32, + ) + }; + unsafe { + gl.sampler_parameter_i32( + raw, + glow::TEXTURE_COMPARE_FUNC, + conv::map_compare_func(compare) as i32, + ) + }; + } + + #[cfg(not(target_arch = "wasm32"))] + if let Some(label) = desc.label { + if gl.supports_debug() { + let name = unsafe { mem::transmute(raw) }; + unsafe { gl.object_label(glow::SAMPLER, name, Some(label)) }; + } + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_sampler(sampler.raw) }; + } + + unsafe fn create_command_encoder( + &self, + _desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + cmd_buffer: super::CommandBuffer::default(), + state: Default::default(), + private_caps: self.shared.private_caps, + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + use naga::back::glsl; + + let mut group_infos = Vec::with_capacity(desc.bind_group_layouts.len()); + let mut num_samplers = 0u8; + let mut num_textures = 0u8; + let mut num_images = 0u8; + let mut num_uniform_buffers = 0u8; + let mut num_storage_buffers = 0u8; + + let mut writer_flags = glsl::WriterFlags::ADJUST_COORDINATE_SPACE; + writer_flags.set( + glsl::WriterFlags::TEXTURE_SHADOW_LOD, + self.shared + .private_caps + .contains(super::PrivateCapabilities::SHADER_TEXTURE_SHADOW_LOD), + ); + // We always force point size to be written and it will be ignored by the driver if it's not a point list primitive. + // https://github.com/gfx-rs/wgpu/pull/3440/files#r1095726950 + writer_flags.set(glsl::WriterFlags::FORCE_POINT_SIZE, true); + let mut binding_map = glsl::BindingMap::default(); + + for (group_index, bg_layout) in desc.bind_group_layouts.iter().enumerate() { + // create a vector with the size enough to hold all the bindings, filled with `!0` + let mut binding_to_slot = vec![ + !0; + bg_layout + .entries + .last() + .map_or(0, |b| b.binding as usize + 1) + ] + .into_boxed_slice(); + + for entry in bg_layout.entries.iter() { + let counter = match entry.ty { + wgt::BindingType::Sampler { .. } => &mut num_samplers, + wgt::BindingType::Texture { .. } => &mut num_textures, + wgt::BindingType::StorageTexture { .. } => &mut num_images, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => &mut num_uniform_buffers, + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } => &mut num_storage_buffers, + }; + + binding_to_slot[entry.binding as usize] = *counter; + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + binding_map.insert(br, *counter); + *counter += entry.count.map_or(1, |c| c.get() as u8); + } + + group_infos.push(super::BindGroupLayoutInfo { + entries: Arc::clone(&bg_layout.entries), + binding_to_slot, + }); + } + + Ok(super::PipelineLayout { + group_infos: group_infos.into_boxed_slice(), + naga_options: glsl::Options { + version: self.shared.shading_language_version, + writer_flags, + binding_map, + zero_initialize_workgroup_memory: true, + }, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut contents = Vec::new(); + + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let binding = match layout.ty { + wgt::BindingType::Buffer { .. } => { + let bb = &desc.buffers[entry.resource_index as usize]; + super::RawBinding::Buffer { + raw: bb.buffer.raw.unwrap(), + offset: bb.offset as i32, + size: match bb.size { + Some(s) => s.get() as i32, + None => (bb.buffer.size - bb.offset) as i32, + }, + } + } + wgt::BindingType::Sampler { .. } => { + let sampler = desc.samplers[entry.resource_index as usize]; + super::RawBinding::Sampler(sampler.raw) + } + wgt::BindingType::Texture { .. } => { + let view = desc.textures[entry.resource_index as usize].view; + if view.mip_levels.start != 0 || view.array_layers.start != 0 { + log::error!("Unable to create a sampled texture binding for non-zero mipmap level or array layer.\n{}", + "This is an implementation problem of wgpu-hal/gles backend.") + } + let (raw, target) = view.inner.as_native(); + super::RawBinding::Texture { + raw, + target, + aspects: view.aspects, + } + } + wgt::BindingType::StorageTexture { + access, + format, + view_dimension, + } => { + let view = desc.textures[entry.resource_index as usize].view; + let format_desc = self.shared.describe_texture_format(format); + let (raw, _target) = view.inner.as_native(); + super::RawBinding::Image(super::ImageBinding { + raw, + mip_level: view.mip_levels.start, + array_layer: match view_dimension { + wgt::TextureViewDimension::D2Array + | wgt::TextureViewDimension::CubeArray => None, + _ => Some(view.array_layers.start), + }, + access: conv::map_storage_access(access), + format: format_desc.internal, + }) + } + }; + contents.push(binding); + } + + Ok(super::BindGroup { + contents: contents.into_boxed_slice(), + }) + } + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + Ok(super::ShaderModule { + naga: match shader { + crate::ShaderInput::SpirV(_) => { + panic!("`Features::SPIRV_SHADER_PASSTHROUGH` is not enabled") + } + crate::ShaderInput::Naga(naga) => naga, + }, + label: desc.label.map(|str| str.to_string()), + id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed), + }) + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage)); + if let Some(ref fs) = desc.fragment_stage { + shaders.push((naga::ShaderStage::Fragment, fs)); + } + let inner = + unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?; + + let (vertex_buffers, vertex_attributes) = { + let mut buffers = Vec::new(); + let mut attributes = Vec::new(); + for (index, vb_layout) in desc.vertex_buffers.iter().enumerate() { + buffers.push(super::VertexBufferDesc { + step: vb_layout.step_mode, + stride: vb_layout.array_stride as u32, + }); + for vat in vb_layout.attributes.iter() { + let format_desc = conv::describe_vertex_format(vat.format); + attributes.push(super::AttributeDesc { + location: vat.shader_location, + offset: vat.offset as u32, + buffer_index: index as u32, + format_desc, + }); + } + } + (buffers.into_boxed_slice(), attributes.into_boxed_slice()) + }; + + let color_targets = { + let mut targets = Vec::new(); + for ct in desc.color_targets.iter().filter_map(|at| at.as_ref()) { + targets.push(super::ColorTargetDesc { + mask: ct.write_mask, + blend: ct.blend.as_ref().map(conv::map_blend), + }); + } + //Note: if any of the states are different, and `INDEPENDENT_BLEND` flag + // is not exposed, then this pipeline will not bind correctly. + targets.into_boxed_slice() + }; + + Ok(super::RenderPipeline { + inner, + primitive: desc.primitive, + vertex_buffers, + vertex_attributes, + color_targets, + depth: desc.depth_stencil.as_ref().map(|ds| super::DepthState { + function: conv::map_compare_func(ds.depth_compare), + mask: ds.depth_write_enabled, + }), + depth_bias: desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias) + .unwrap_or_default(), + stencil: desc + .depth_stencil + .as_ref() + .map(|ds| conv::map_stencil(&ds.stencil)), + alpha_to_coverage_enabled: desc.multisample.alpha_to_coverage_enabled, + }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache` + // This is safe to assume as long as: + // - `RenderPipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let gl = &self.shared.context.lock(); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Compute, &desc.stage)); + let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?; + + Ok(super::ComputePipeline { inner }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + let mut program_cache = self.shared.program_cache.lock(); + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`` + // This is safe to assume as long as: + // - `ComputePipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } + } + + #[cfg_attr(target_arch = "wasm32", allow(unused))] + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let gl = &self.shared.context.lock(); + let mut temp_string = String::new(); + + let mut queries = Vec::with_capacity(desc.count as usize); + for i in 0..desc.count { + let query = + unsafe { gl.create_query() }.map_err(|_| crate::DeviceError::OutOfMemory)?; + #[cfg(not(target_arch = "wasm32"))] + if gl.supports_debug() { + use std::fmt::Write; + + if let Some(label) = desc.label { + temp_string.clear(); + let _ = write!(temp_string, "{label}[{i}]"); + let name = unsafe { mem::transmute(query) }; + unsafe { gl.object_label(glow::QUERY, name, Some(&temp_string)) }; + } + } + queries.push(query); + } + + Ok(super::QuerySet { + queries: queries.into_boxed_slice(), + target: match desc.ty { + wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED, + _ => unimplemented!(), + }, + }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + let gl = &self.shared.context.lock(); + for &query in set.queries.iter() { + unsafe { gl.delete_query(query) }; + } + } + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(super::Fence { + last_completed: 0, + pending: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + let gl = &self.shared.context.lock(); + for (_, sync) in fence.pending { + unsafe { gl.delete_sync(sync) }; + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + #[cfg_attr(target_arch = "wasm32", allow(clippy::needless_borrow))] + Ok(fence.get_latest(&self.shared.context.lock())) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + if fence.last_completed < wait_value { + let gl = &self.shared.context.lock(); + let timeout_ns = if cfg!(target_arch = "wasm32") { + 0 + } else { + (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) + }; + let &(_, sync) = fence + .pending + .iter() + .find(|&&(value, _)| value >= wait_value) + .unwrap(); + match unsafe { + gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32) + } { + // for some reason firefox returns WAIT_FAILED, to investigate + #[cfg(target_arch = "wasm32")] + glow::WAIT_FAILED => { + log::warn!("wait failed!"); + Ok(false) + } + glow::TIMEOUT_EXPIRED => Ok(false), + glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => Ok(true), + _ => Err(crate::DeviceError::Lost), + } + } else { + Ok(true) + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + return unsafe { + self.render_doc + .start_frame_capture(self.shared.context.raw_context(), ptr::null_mut()) + }; + #[allow(unreachable_code)] + false + } + unsafe fn stop_capture(&self) { + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + unsafe { + self.render_doc + .end_frame_capture(ptr::null_mut(), ptr::null_mut()) + } + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Device {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Device {} diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs new file mode 100644 index 0000000000..13e217f9d9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/egl.rs @@ -0,0 +1,1310 @@ +use glow::HasContext; +use parking_lot::{Mutex, MutexGuard}; + +use std::{ffi, os::raw, ptr, sync::Arc, time::Duration}; + +/// The amount of time to wait while trying to obtain a lock to the adapter context +const CONTEXT_LOCK_TIMEOUT_SECS: u64 = 1; + +const EGL_CONTEXT_FLAGS_KHR: i32 = 0x30FC; +const EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR: i32 = 0x0001; +const EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT: i32 = 0x30BF; +const EGL_PLATFORM_WAYLAND_KHR: u32 = 0x31D8; +const EGL_PLATFORM_X11_KHR: u32 = 0x31D5; +const EGL_PLATFORM_ANGLE_ANGLE: u32 = 0x3202; +const EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE: u32 = 0x348F; +const EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED: u32 = 0x3451; +const EGL_PLATFORM_SURFACELESS_MESA: u32 = 0x31DD; +const EGL_GL_COLORSPACE_KHR: u32 = 0x309D; +const EGL_GL_COLORSPACE_SRGB_KHR: u32 = 0x3089; + +type XOpenDisplayFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type WlDisplayConnectFun = + unsafe extern "system" fn(display_name: *const raw::c_char) -> *mut raw::c_void; + +type WlDisplayDisconnectFun = unsafe extern "system" fn(display: *const raw::c_void); + +#[cfg(not(target_os = "emscripten"))] +type EglInstance = khronos_egl::DynamicInstance<khronos_egl::EGL1_4>; + +#[cfg(target_os = "emscripten")] +type EglInstance = khronos_egl::Instance<khronos_egl::Static>; + +type WlEglWindowCreateFun = unsafe extern "system" fn( + surface: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, +) -> *mut raw::c_void; + +type WlEglWindowResizeFun = unsafe extern "system" fn( + window: *const raw::c_void, + width: raw::c_int, + height: raw::c_int, + dx: raw::c_int, + dy: raw::c_int, +); + +type WlEglWindowDestroyFun = unsafe extern "system" fn(window: *const raw::c_void); + +#[cfg(target_os = "android")] +extern "C" { + pub fn ANativeWindow_setBuffersGeometry( + window: *mut raw::c_void, + width: i32, + height: i32, + format: i32, + ) -> i32; +} + +type EglLabel = *const raw::c_void; + +#[allow(clippy::upper_case_acronyms)] +type EGLDEBUGPROCKHR = Option< + unsafe extern "system" fn( + error: khronos_egl::Enum, + command: *const raw::c_char, + message_type: u32, + thread_label: EglLabel, + object_label: EglLabel, + message: *const raw::c_char, + ), +>; + +const EGL_DEBUG_MSG_CRITICAL_KHR: u32 = 0x33B9; +const EGL_DEBUG_MSG_ERROR_KHR: u32 = 0x33BA; +const EGL_DEBUG_MSG_WARN_KHR: u32 = 0x33BB; +const EGL_DEBUG_MSG_INFO_KHR: u32 = 0x33BC; + +type EglDebugMessageControlFun = unsafe extern "system" fn( + proc: EGLDEBUGPROCKHR, + attrib_list: *const khronos_egl::Attrib, +) -> raw::c_int; + +unsafe extern "system" fn egl_debug_proc( + error: khronos_egl::Enum, + command_raw: *const raw::c_char, + message_type: u32, + _thread_label: EglLabel, + _object_label: EglLabel, + message_raw: *const raw::c_char, +) { + let log_severity = match message_type { + EGL_DEBUG_MSG_CRITICAL_KHR | EGL_DEBUG_MSG_ERROR_KHR => log::Level::Error, + EGL_DEBUG_MSG_WARN_KHR => log::Level::Warn, + EGL_DEBUG_MSG_INFO_KHR => log::Level::Info, + _ => log::Level::Debug, + }; + let command = unsafe { ffi::CStr::from_ptr(command_raw) }.to_string_lossy(); + let message = if message_raw.is_null() { + "".into() + } else { + unsafe { ffi::CStr::from_ptr(message_raw) }.to_string_lossy() + }; + + log::log!( + log_severity, + "EGL '{}' code 0x{:x}: {}", + command, + error, + message, + ); +} + +fn open_x_display() -> Option<(ptr::NonNull<raw::c_void>, libloading::Library)> { + log::info!("Loading X11 library to get the current display"); + unsafe { + let library = libloading::Library::new("libX11.so").ok()?; + let func: libloading::Symbol<XOpenDisplayFun> = library.get(b"XOpenDisplay").unwrap(); + let result = func(ptr::null()); + ptr::NonNull::new(result).map(|ptr| (ptr, library)) + } +} + +unsafe fn find_library(paths: &[&str]) -> Option<libloading::Library> { + for path in paths { + match unsafe { libloading::Library::new(path) } { + Ok(lib) => return Some(lib), + _ => continue, + }; + } + None +} + +fn test_wayland_display() -> Option<libloading::Library> { + /* We try to connect and disconnect here to simply ensure there + * is an active wayland display available. + */ + log::info!("Loading Wayland library to get the current display"); + let library = unsafe { + let client_library = find_library(&["libwayland-client.so.0", "libwayland-client.so"])?; + let wl_display_connect: libloading::Symbol<WlDisplayConnectFun> = + client_library.get(b"wl_display_connect").unwrap(); + let wl_display_disconnect: libloading::Symbol<WlDisplayDisconnectFun> = + client_library.get(b"wl_display_disconnect").unwrap(); + let display = ptr::NonNull::new(wl_display_connect(ptr::null()))?; + wl_display_disconnect(display.as_ptr()); + find_library(&["libwayland-egl.so.1", "libwayland-egl.so"])? + }; + Some(library) +} + +#[derive(Clone, Copy, Debug)] +enum SrgbFrameBufferKind { + /// No support for SRGB surface + None, + /// Using EGL 1.5's support for colorspaces + Core, + /// Using EGL_KHR_gl_colorspace + Khr, +} + +/// Choose GLES framebuffer configuration. +fn choose_config( + egl: &EglInstance, + display: khronos_egl::Display, + srgb_kind: SrgbFrameBufferKind, +) -> Result<(khronos_egl::Config, bool), crate::InstanceError> { + //TODO: EGL_SLOW_CONFIG + let tiers = [ + ( + "off-screen", + &[ + khronos_egl::SURFACE_TYPE, + khronos_egl::PBUFFER_BIT, + khronos_egl::RENDERABLE_TYPE, + khronos_egl::OPENGL_ES2_BIT, + ][..], + ), + ( + "presentation", + &[khronos_egl::SURFACE_TYPE, khronos_egl::WINDOW_BIT][..], + ), + #[cfg(not(target_os = "android"))] + ( + "native-render", + &[khronos_egl::NATIVE_RENDERABLE, khronos_egl::TRUE as _][..], + ), + ]; + + let mut attributes = Vec::with_capacity(9); + for tier_max in (0..tiers.len()).rev() { + let name = tiers[tier_max].0; + log::info!("\tTrying {}", name); + + attributes.clear(); + for &(_, tier_attr) in tiers[..=tier_max].iter() { + attributes.extend_from_slice(tier_attr); + } + // make sure the Alpha is enough to support sRGB + match srgb_kind { + SrgbFrameBufferKind::None => {} + _ => { + attributes.push(khronos_egl::ALPHA_SIZE); + attributes.push(8); + } + } + attributes.push(khronos_egl::NONE); + + match egl.choose_first_config(display, &attributes) { + Ok(Some(config)) => { + if tier_max == 1 { + //Note: this has been confirmed to malfunction on Intel+NV laptops, + // but also on Angle. + log::warn!("EGL says it can present to the window but not natively",); + } + // Android emulator can't natively present either. + let tier_threshold = if cfg!(target_os = "android") || cfg!(windows) { + 1 + } else { + 2 + }; + return Ok((config, tier_max >= tier_threshold)); + } + Ok(None) => { + log::warn!("No config found!"); + } + Err(e) => { + log::error!("error in choose_first_config: {:?}", e); + } + } + } + + Err(crate::InstanceError) +} + +fn gl_debug_message_callback(source: u32, gltype: u32, id: u32, severity: u32, message: &str) { + let source_str = match source { + glow::DEBUG_SOURCE_API => "API", + glow::DEBUG_SOURCE_WINDOW_SYSTEM => "Window System", + glow::DEBUG_SOURCE_SHADER_COMPILER => "ShaderCompiler", + glow::DEBUG_SOURCE_THIRD_PARTY => "Third Party", + glow::DEBUG_SOURCE_APPLICATION => "Application", + glow::DEBUG_SOURCE_OTHER => "Other", + _ => unreachable!(), + }; + + let log_severity = match severity { + glow::DEBUG_SEVERITY_HIGH => log::Level::Error, + glow::DEBUG_SEVERITY_MEDIUM => log::Level::Warn, + glow::DEBUG_SEVERITY_LOW => log::Level::Info, + glow::DEBUG_SEVERITY_NOTIFICATION => log::Level::Trace, + _ => unreachable!(), + }; + + let type_str = match gltype { + glow::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "Deprecated Behavior", + glow::DEBUG_TYPE_ERROR => "Error", + glow::DEBUG_TYPE_MARKER => "Marker", + glow::DEBUG_TYPE_OTHER => "Other", + glow::DEBUG_TYPE_PERFORMANCE => "Performance", + glow::DEBUG_TYPE_POP_GROUP => "Pop Group", + glow::DEBUG_TYPE_PORTABILITY => "Portability", + glow::DEBUG_TYPE_PUSH_GROUP => "Push Group", + glow::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "Undefined Behavior", + _ => unreachable!(), + }; + + let _ = std::panic::catch_unwind(|| { + log::log!( + log_severity, + "GLES: [{}/{}] ID {} : {}", + source_str, + type_str, + id, + message + ); + }); + + if cfg!(debug_assertions) && log_severity == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } +} + +#[derive(Clone, Debug)] +struct EglContext { + instance: Arc<EglInstance>, + version: (i32, i32), + display: khronos_egl::Display, + raw: khronos_egl::Context, + pbuffer: Option<khronos_egl::Surface>, +} + +impl EglContext { + fn make_current(&self) { + self.instance + .make_current(self.display, self.pbuffer, self.pbuffer, Some(self.raw)) + .unwrap(); + } + fn unmake_current(&self) { + self.instance + .make_current(self.display, None, None, None) + .unwrap(); + } +} + +/// A wrapper around a [`glow::Context`] and the required EGL context that uses locking to guarantee +/// exclusive access when shared with multiple threads. +pub struct AdapterContext { + glow: Mutex<glow::Context>, + egl: Option<EglContext>, +} + +unsafe impl Sync for AdapterContext {} +unsafe impl Send for AdapterContext {} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + self.egl.is_some() + } + + /// Returns the EGL instance. + /// + /// This provides access to EGL functions and the ability to load GL and EGL extension functions. + pub fn egl_instance(&self) -> Option<&EglInstance> { + self.egl.as_ref().map(|egl| &*egl.instance) + } + + /// Returns the EGLDisplay corresponding to the adapter context. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn raw_display(&self) -> Option<&khronos_egl::Display> { + self.egl.as_ref().map(|egl| &egl.display) + } + + /// Returns the EGL version the adapter context was created with. + /// + /// Returns [`None`] if the adapter was externally created. + pub fn egl_version(&self) -> Option<(i32, i32)> { + self.egl.as_ref().map(|egl| egl.version) + } + + pub fn raw_context(&self) -> *mut raw::c_void { + match self.egl { + Some(ref egl) => egl.raw.as_ptr(), + None => ptr::null_mut(), + } + } +} + +struct EglContextLock<'a> { + instance: &'a Arc<EglInstance>, + display: khronos_egl::Display, +} + +/// A guard containing a lock to an [`AdapterContext`] +pub struct AdapterContextLock<'a> { + glow: MutexGuard<'a, glow::Context>, + egl: Option<EglContextLock<'a>>, +} + +impl<'a> std::ops::Deref for AdapterContextLock<'a> { + type Target = glow::Context; + + fn deref(&self) -> &Self::Target { + &self.glow + } +} + +impl<'a> Drop for AdapterContextLock<'a> { + fn drop(&mut self) { + if let Some(egl) = self.egl.take() { + egl.instance + .make_current(egl.display, None, None, None) + .unwrap(); + } + } +} + +impl AdapterContext { + /// Get's the [`glow::Context`] without waiting for a lock + /// + /// # Safety + /// + /// This should only be called when you have manually made sure that the current thread has made + /// the EGL context current and that no other thread also has the EGL context current. + /// Additionally, you must manually make the EGL context **not** current after you are done with + /// it, so that future calls to `lock()` will not fail. + /// + /// > **Note:** Calling this function **will** still lock the [`glow::Context`] which adds an + /// > extra safe-guard against accidental concurrent access to the context. + pub unsafe fn get_without_egl_lock(&self) -> MutexGuard<glow::Context> { + self.glow + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock.") + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock<'a>(&'a self) -> AdapterContextLock<'a> { + let glow = self + .glow + // Don't lock forever. If it takes longer than 1 second to get the lock we've got a + // deadlock and should panic to show where we got stuck + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + let egl = self.egl.as_ref().map(|egl| { + egl.make_current(); + EglContextLock { + instance: &egl.instance, + display: egl.display, + } + }); + + AdapterContextLock { glow, egl } + } +} + +#[derive(Debug)] +struct Inner { + /// Note: the context contains a dummy pbuffer (1x1). + /// Required for `eglMakeCurrent` on platforms that doesn't supports `EGL_KHR_surfaceless_context`. + egl: EglContext, + #[allow(unused)] + version: (i32, i32), + supports_native_window: bool, + config: khronos_egl::Config, + #[cfg_attr(target_os = "emscripten", allow(dead_code))] + wl_display: Option<*mut raw::c_void>, + /// Method by which the framebuffer should support srgb + srgb_kind: SrgbFrameBufferKind, +} + +impl Inner { + fn create( + flags: crate::InstanceFlags, + egl: Arc<EglInstance>, + display: khronos_egl::Display, + ) -> Result<Self, crate::InstanceError> { + let version = egl.initialize(display).map_err(|_| crate::InstanceError)?; + let vendor = egl + .query_string(Some(display), khronos_egl::VENDOR) + .unwrap(); + let display_extensions = egl + .query_string(Some(display), khronos_egl::EXTENSIONS) + .unwrap() + .to_string_lossy(); + log::info!("Display vendor {:?}, version {:?}", vendor, version,); + log::debug!( + "Display extensions: {:#?}", + display_extensions.split_whitespace().collect::<Vec<_>>() + ); + + let srgb_kind = if version >= (1, 5) { + log::info!("\tEGL surface: +srgb"); + SrgbFrameBufferKind::Core + } else if display_extensions.contains("EGL_KHR_gl_colorspace") { + log::info!("\tEGL surface: +srgb khr"); + SrgbFrameBufferKind::Khr + } else { + log::warn!("\tEGL surface: -srgb"); + SrgbFrameBufferKind::None + }; + + if log::max_level() >= log::LevelFilter::Trace { + log::trace!("Configurations:"); + let config_count = egl.get_config_count(display).unwrap(); + let mut configurations = Vec::with_capacity(config_count); + egl.get_configs(display, &mut configurations).unwrap(); + for &config in configurations.iter() { + log::trace!("\tCONFORMANT=0x{:X}, RENDERABLE=0x{:X}, NATIVE_RENDERABLE=0x{:X}, SURFACE_TYPE=0x{:X}, ALPHA_SIZE={}", + egl.get_config_attrib(display, config, khronos_egl::CONFORMANT).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::RENDERABLE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::NATIVE_RENDERABLE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::SURFACE_TYPE).unwrap(), + egl.get_config_attrib(display, config, khronos_egl::ALPHA_SIZE).unwrap(), + ); + } + } + + let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?; + egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap(); + + let needs_robustness = true; + let mut khr_context_flags = 0; + let supports_khr_context = display_extensions.contains("EGL_KHR_create_context"); + + //TODO: make it so `Device` == EGL Context + let mut context_attributes = vec![ + khronos_egl::CONTEXT_CLIENT_VERSION, + 3, // Request GLES 3.0 or higher + ]; + if flags.contains(crate::InstanceFlags::DEBUG) { + if version >= (1, 5) { + log::info!("\tEGL context: +debug"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_DEBUG); + context_attributes.push(khronos_egl::TRUE as _); + } else if supports_khr_context { + log::info!("\tEGL context: +debug KHR"); + khr_context_flags |= EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR; + } else { + log::info!("\tEGL context: -debug"); + } + } + if needs_robustness { + //Note: the core version can fail if robustness is not supported + // (regardless of whether the extension is supported!). + // In fact, Angle does precisely that awful behavior, so we don't try it there. + if version >= (1, 5) && !display_extensions.contains("EGL_ANGLE_") { + log::info!("\tEGL context: +robust access"); + context_attributes.push(khronos_egl::CONTEXT_OPENGL_ROBUST_ACCESS); + context_attributes.push(khronos_egl::TRUE as _); + } else if display_extensions.contains("EGL_EXT_create_context_robustness") { + log::info!("\tEGL context: +robust access EXT"); + context_attributes.push(EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT); + context_attributes.push(khronos_egl::TRUE as _); + } else { + //Note: we aren't trying `EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR` + // because it's for desktop GL only, not GLES. + log::warn!("\tEGL context: -robust access"); + } + + //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`? + } + if khr_context_flags != 0 { + context_attributes.push(EGL_CONTEXT_FLAGS_KHR); + context_attributes.push(khr_context_flags); + } + context_attributes.push(khronos_egl::NONE); + let context = match egl.create_context(display, config, None, &context_attributes) { + Ok(context) => context, + Err(e) => { + log::warn!("unable to create GLES 3.x context: {:?}", e); + return Err(crate::InstanceError); + } + }; + + // Testing if context can be binded without surface + // and creating dummy pbuffer surface if not. + let pbuffer = if version >= (1, 5) + || display_extensions.contains("EGL_KHR_surfaceless_context") + || cfg!(target_os = "emscripten") + { + log::info!("\tEGL context: +surfaceless"); + None + } else { + let attributes = [ + khronos_egl::WIDTH, + 1, + khronos_egl::HEIGHT, + 1, + khronos_egl::NONE, + ]; + egl.create_pbuffer_surface(display, config, &attributes) + .map(Some) + .map_err(|e| { + log::warn!("Error in create_pbuffer_surface: {:?}", e); + crate::InstanceError + })? + }; + + Ok(Self { + egl: EglContext { + instance: egl, + display, + raw: context, + pbuffer, + version, + }, + version, + supports_native_window, + config, + wl_display: None, + srgb_kind, + }) + } +} + +impl Drop for Inner { + fn drop(&mut self) { + if let Err(e) = self + .egl + .instance + .destroy_context(self.egl.display, self.egl.raw) + { + log::warn!("Error in destroy_context: {:?}", e); + } + if let Err(e) = self.egl.instance.terminate(self.egl.display) { + log::warn!("Error in terminate: {:?}", e); + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum WindowKind { + Wayland, + X11, + AngleX11, + Unknown, +} + +#[derive(Clone, Debug)] +struct WindowSystemInterface { + library: Option<Arc<libloading::Library>>, + kind: WindowKind, +} + +pub struct Instance { + wsi: WindowSystemInterface, + flags: crate::InstanceFlags, + inner: Mutex<Inner>, +} + +impl Instance { + pub fn raw_display(&self) -> khronos_egl::Display { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .egl + .display + } + + /// Returns the version of the EGL display. + pub fn egl_version(&self) -> (i32, i32) { + self.inner + .try_lock() + .expect("Could not lock instance. This is most-likely a deadlock.") + .version + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + #[cfg(target_os = "emscripten")] + let egl_result: Result<EglInstance, khronos_egl::Error> = + Ok(khronos_egl::Instance::new(khronos_egl::Static)); + + #[cfg(not(target_os = "emscripten"))] + let egl_result = if cfg!(windows) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dll", + ) + } + } else if cfg!(any(target_os = "macos", target_os = "ios")) { + unsafe { + khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required_from_filename( + "libEGL.dylib", + ) + } + } else { + unsafe { khronos_egl::DynamicInstance::<khronos_egl::EGL1_4>::load_required() } + }; + let egl = match egl_result { + Ok(egl) => Arc::new(egl), + Err(e) => { + log::info!("Unable to open libEGL: {:?}", e); + return Err(crate::InstanceError); + } + }; + + let client_extensions = egl.query_string(None, khronos_egl::EXTENSIONS); + + let client_ext_str = match client_extensions { + Ok(ext) => ext.to_string_lossy().into_owned(), + Err(_) => String::new(), + }; + log::debug!( + "Client extensions: {:#?}", + client_ext_str.split_whitespace().collect::<Vec<_>>() + ); + + let wayland_library = if client_ext_str.contains("EGL_EXT_platform_wayland") { + test_wayland_display() + } else { + None + }; + let x11_display_library = if client_ext_str.contains("EGL_EXT_platform_x11") { + open_x_display() + } else { + None + }; + let angle_x11_display_library = if client_ext_str.contains("EGL_ANGLE_platform_angle") { + open_x_display() + } else { + None + }; + + #[cfg(not(target_os = "emscripten"))] + let egl1_5 = egl.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(target_os = "emscripten")] + let egl1_5: Option<&Arc<EglInstance>> = Some(&egl); + + let (display, wsi_library, wsi_kind) = if let (Some(library), Some(egl)) = + (wayland_library, egl1_5) + { + log::info!("Using Wayland platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = egl + .get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + khronos_egl::DEFAULT_DISPLAY, + &display_attributes, + ) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::Wayland) + } else if let (Some((display, library)), Some(egl)) = (x11_display_library, egl1_5) { + log::info!("Using X11 platform"); + let display_attributes = [khronos_egl::ATTRIB_NONE]; + let display = egl + .get_platform_display(EGL_PLATFORM_X11_KHR, display.as_ptr(), &display_attributes) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::X11) + } else if let (Some((display, library)), Some(egl)) = (angle_x11_display_library, egl1_5) { + log::info!("Using Angle platform with X11"); + let display_attributes = [ + EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE as khronos_egl::Attrib, + EGL_PLATFORM_X11_KHR as khronos_egl::Attrib, + EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED as khronos_egl::Attrib, + usize::from(desc.flags.contains(crate::InstanceFlags::VALIDATION)), + khronos_egl::ATTRIB_NONE, + ]; + let display = egl + .get_platform_display( + EGL_PLATFORM_ANGLE_ANGLE, + display.as_ptr(), + &display_attributes, + ) + .unwrap(); + (display, Some(Arc::new(library)), WindowKind::AngleX11) + } else if client_ext_str.contains("EGL_MESA_platform_surfaceless") { + log::info!("No windowing system present. Using surfaceless platform"); + let egl = egl1_5.expect("Failed to get EGL 1.5 for surfaceless"); + let display = egl + .get_platform_display( + EGL_PLATFORM_SURFACELESS_MESA, + std::ptr::null_mut(), + &[khronos_egl::ATTRIB_NONE], + ) + .unwrap(); + (display, None, WindowKind::Unknown) + } else { + log::info!("EGL_MESA_platform_surfaceless not available. Using default platform"); + let display = egl.get_display(khronos_egl::DEFAULT_DISPLAY).unwrap(); + (display, None, WindowKind::Unknown) + }; + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) + && client_ext_str.contains("EGL_KHR_debug") + { + log::info!("Enabling EGL debug output"); + let function: EglDebugMessageControlFun = { + let addr = egl.get_proc_address("eglDebugMessageControlKHR").unwrap(); + unsafe { std::mem::transmute(addr) } + }; + let attributes = [ + EGL_DEBUG_MSG_CRITICAL_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_ERROR_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_WARN_KHR as khronos_egl::Attrib, + 1, + EGL_DEBUG_MSG_INFO_KHR as khronos_egl::Attrib, + 1, + khronos_egl::ATTRIB_NONE, + ]; + unsafe { (function)(Some(egl_debug_proc), attributes.as_ptr()) }; + } + + let inner = Inner::create(desc.flags, egl, display)?; + + Ok(Instance { + wsi: WindowSystemInterface { + library: wsi_library, + kind: wsi_kind, + }, + flags: desc.flags, + inner: Mutex::new(inner), + }) + } + + #[cfg_attr(target_os = "macos", allow(unused, unused_mut, unreachable_code))] + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + #[cfg_attr( + any(target_os = "android", target_os = "emscripten"), + allow(unused_mut) + )] + let mut inner = self.inner.lock(); + + match (window_handle, display_handle) { + (Rwh::Xlib(_), _) => {} + (Rwh::Xcb(_), _) => {} + (Rwh::Win32(_), _) => {} + (Rwh::AppKit(_), _) => {} + #[cfg(target_os = "android")] + (Rwh::AndroidNdk(handle), _) => { + let format = inner + .egl + .instance + .get_config_attrib( + inner.egl.display, + inner.config, + khronos_egl::NATIVE_VISUAL_ID, + ) + .unwrap(); + + let ret = unsafe { + ANativeWindow_setBuffersGeometry(handle.a_native_window, 0, 0, format) + }; + + if ret != 0 { + log::error!("Error returned from ANativeWindow_setBuffersGeometry"); + return Err(crate::InstanceError); + } + } + #[cfg(not(target_os = "emscripten"))] + (Rwh::Wayland(_), raw_window_handle::RawDisplayHandle::Wayland(display_handle)) => { + if inner + .wl_display + .map(|ptr| ptr != display_handle.display) + .unwrap_or(true) + { + /* Wayland displays are not sharable between surfaces so if the + * surface we receive from this handle is from a different + * display, we must re-initialize the context. + * + * See gfx-rs/gfx#3545 + */ + log::warn!("Re-initializing Gles context due to Wayland window"); + + use std::ops::DerefMut; + let display_attributes = [khronos_egl::ATTRIB_NONE]; + + let display = inner + .egl + .instance + .upcast::<khronos_egl::EGL1_5>() + .unwrap() + .get_platform_display( + EGL_PLATFORM_WAYLAND_KHR, + display_handle.display, + &display_attributes, + ) + .unwrap(); + + let new_inner = + Inner::create(self.flags, Arc::clone(&inner.egl.instance), display) + .map_err(|_| crate::InstanceError)?; + + let old_inner = std::mem::replace(inner.deref_mut(), new_inner); + inner.wl_display = Some(display_handle.display); + + drop(old_inner); + } + } + #[cfg(target_os = "emscripten")] + (Rwh::Web(_), _) => {} + other => { + log::error!("Unsupported window: {:?}", other); + return Err(crate::InstanceError); + } + }; + + inner.egl.unmake_current(); + + Ok(Surface { + egl: inner.egl.clone(), + wsi: self.wsi.clone(), + config: inner.config, + presentable: inner.supports_native_window, + raw_window_handle: window_handle, + swapchain: None, + srgb_kind: inner.srgb_kind, + }) + } + unsafe fn destroy_surface(&self, _surface: Surface) {} + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let inner = self.inner.lock(); + inner.egl.make_current(); + + let gl = unsafe { + glow::Context::from_loader_function(|name| { + inner + .egl + .instance + .get_proc_address(name) + .map_or(ptr::null(), |p| p as *const _) + }) + }; + + if self.flags.contains(crate::InstanceFlags::DEBUG) && gl.supports_debug() { + log::info!("Max label length: {}", unsafe { + gl.get_parameter_i32(glow::MAX_LABEL_LENGTH) + }); + } + + if self.flags.contains(crate::InstanceFlags::VALIDATION) && gl.supports_debug() { + log::info!("Enabling GLES debug output"); + unsafe { gl.enable(glow::DEBUG_OUTPUT) }; + unsafe { gl.debug_message_callback(gl_debug_message_callback) }; + } + + inner.egl.unmake_current(); + + unsafe { + super::Adapter::expose(AdapterContext { + glow: Mutex::new(gl), + egl: Some(inner.egl.clone()), + }) + } + .into_iter() + .collect() + } +} + +impl super::Adapter { + /// Creates a new external adapter using the specified loader function. + /// + /// # Safety + /// + /// - The underlying OpenGL ES context must be current. + /// - The underlying OpenGL ES context must be current when interfacing with any objects returned by + /// wgpu-hal from this adapter. + pub unsafe fn new_external( + fun: impl FnMut(&str) -> *const ffi::c_void, + ) -> Option<crate::ExposedAdapter<super::Api>> { + let context = unsafe { glow::Context::from_loader_function(fun) }; + unsafe { + Self::expose(AdapterContext { + glow: Mutex::new(context), + egl: None, + }) + } + } + + pub fn adapter_context(&self) -> &AdapterContext { + &self.shared.context + } +} + +impl super::Device { + /// Returns the underlying EGL context. + pub fn context(&self) -> &AdapterContext { + &self.shared.context + } +} + +#[derive(Debug)] +pub struct Swapchain { + surface: khronos_egl::Surface, + wl_window: Option<*mut raw::c_void>, + framebuffer: glow::Framebuffer, + renderbuffer: glow::Renderbuffer, + /// Extent because the window lies + extent: wgt::Extent3d, + format: wgt::TextureFormat, + format_desc: super::TextureFormatDesc, + #[allow(unused)] + sample_type: wgt::TextureSampleType, +} + +#[derive(Debug)] +pub struct Surface { + egl: EglContext, + wsi: WindowSystemInterface, + config: khronos_egl::Config, + pub(super) presentable: bool, + raw_window_handle: raw_window_handle::RawWindowHandle, + swapchain: Option<Swapchain>, + srgb_kind: SrgbFrameBufferKind, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +impl Surface { + pub(super) unsafe fn present( + &mut self, + _suf_texture: super::Texture, + gl: &glow::Context, + ) -> Result<(), crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + + self.egl + .instance + .make_current( + self.egl.display, + Some(sc.surface), + Some(sc.surface), + Some(self.egl.raw), + ) + .map_err(|e| { + log::error!("make_current(surface) failed: {}", e); + crate::SurfaceError::Lost + })?; + + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.color_mask(true, true, true, true) }; + + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + sc.extent.height as i32, + sc.extent.width as i32, + 0, + 0, + 0, + sc.extent.width as i32, + sc.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + self.egl + .instance + .swap_buffers(self.egl.display, sc.surface) + .map_err(|e| { + log::error!("swap_buffers failed: {}", e); + crate::SurfaceError::Lost + })?; + self.egl + .instance + .make_current(self.egl.display, None, None, None) + .map_err(|e| { + log::error!("make_current(null) failed: {}", e); + crate::SurfaceError::Lost + })?; + + Ok(()) + } + + unsafe fn unconfigure_impl( + &mut self, + device: &super::Device, + ) -> Option<(khronos_egl::Surface, Option<*mut raw::c_void>)> { + let gl = &device.shared.context.lock(); + match self.swapchain.take() { + Some(sc) => { + unsafe { gl.delete_renderbuffer(sc.renderbuffer) }; + unsafe { gl.delete_framebuffer(sc.framebuffer) }; + Some((sc.surface, sc.wl_window)) + } + None => None, + } + } + + pub fn supports_srgb(&self) -> bool { + match self.srgb_kind { + SrgbFrameBufferKind::None => false, + _ => true, + } + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + use raw_window_handle::RawWindowHandle as Rwh; + + let (surface, wl_window) = match unsafe { self.unconfigure_impl(device) } { + Some(pair) => pair, + None => { + let mut wl_window = None; + let (mut temp_xlib_handle, mut temp_xcb_handle); + #[allow(trivial_casts)] + let native_window_ptr = match (self.wsi.kind, self.raw_window_handle) { + (WindowKind::Unknown | WindowKind::X11, Rwh::Xlib(handle)) => { + temp_xlib_handle = handle.window; + &mut temp_xlib_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xlib(handle)) => { + handle.window as *mut std::ffi::c_void + } + (WindowKind::Unknown | WindowKind::X11, Rwh::Xcb(handle)) => { + temp_xcb_handle = handle.window; + &mut temp_xcb_handle as *mut _ as *mut std::ffi::c_void + } + (WindowKind::AngleX11, Rwh::Xcb(handle)) => { + handle.window as *mut std::ffi::c_void + } + (WindowKind::Unknown, Rwh::AndroidNdk(handle)) => handle.a_native_window, + (WindowKind::Wayland, Rwh::Wayland(handle)) => { + let library = self.wsi.library.as_ref().unwrap(); + let wl_egl_window_create: libloading::Symbol<WlEglWindowCreateFun> = + unsafe { library.get(b"wl_egl_window_create") }.unwrap(); + let window = unsafe { wl_egl_window_create(handle.surface, 640, 480) } + as *mut _ as *mut std::ffi::c_void; + wl_window = Some(window); + window + } + #[cfg(target_os = "emscripten")] + (WindowKind::Unknown, Rwh::Web(handle)) => handle.id as *mut std::ffi::c_void, + (WindowKind::Unknown, Rwh::Win32(handle)) => handle.hwnd, + (WindowKind::Unknown, Rwh::AppKit(handle)) => { + #[cfg(not(target_os = "macos"))] + let window_ptr = handle.ns_view; + #[cfg(target_os = "macos")] + let window_ptr = { + use objc::{msg_send, runtime::Object, sel, sel_impl}; + // ns_view always have a layer and don't need to verify that it exists. + let layer: *mut Object = + msg_send![handle.ns_view as *mut Object, layer]; + layer as *mut ffi::c_void + }; + window_ptr + } + _ => { + log::warn!( + "Initialized platform {:?} doesn't work with window {:?}", + self.wsi.kind, + self.raw_window_handle + ); + return Err(crate::SurfaceError::Other("incompatible window kind")); + } + }; + + let mut attributes = vec![ + khronos_egl::RENDER_BUFFER, + // We don't want any of the buffering done by the driver, because we + // manage a swapchain on our side. + // Some drivers just fail on surface creation seeing `EGL_SINGLE_BUFFER`. + if cfg!(any(target_os = "android", target_os = "macos")) + || cfg!(windows) + || self.wsi.kind == WindowKind::AngleX11 + { + khronos_egl::BACK_BUFFER + } else { + khronos_egl::SINGLE_BUFFER + }, + ]; + match self.srgb_kind { + SrgbFrameBufferKind::None => {} + SrgbFrameBufferKind::Core => { + attributes.push(khronos_egl::GL_COLORSPACE); + attributes.push(khronos_egl::GL_COLORSPACE_SRGB); + } + SrgbFrameBufferKind::Khr => { + attributes.push(EGL_GL_COLORSPACE_KHR as i32); + attributes.push(EGL_GL_COLORSPACE_SRGB_KHR as i32); + } + } + attributes.push(khronos_egl::ATTRIB_NONE as i32); + + #[cfg(not(target_os = "emscripten"))] + let egl1_5 = self.egl.instance.upcast::<khronos_egl::EGL1_5>(); + + #[cfg(target_os = "emscripten")] + let egl1_5: Option<&Arc<EglInstance>> = Some(&self.egl.instance); + + // Careful, we can still be in 1.4 version even if `upcast` succeeds + let raw_result = match egl1_5 { + Some(egl) if self.wsi.kind != WindowKind::Unknown => { + let attributes_usize = attributes + .into_iter() + .map(|v| v as usize) + .collect::<Vec<_>>(); + egl.create_platform_window_surface( + self.egl.display, + self.config, + native_window_ptr, + &attributes_usize, + ) + } + _ => unsafe { + self.egl.instance.create_window_surface( + self.egl.display, + self.config, + native_window_ptr, + Some(&attributes), + ) + }, + }; + + match raw_result { + Ok(raw) => (raw, wl_window), + Err(e) => { + log::warn!("Error in create_window_surface: {:?}", e); + return Err(crate::SurfaceError::Lost); + } + } + } + }; + + if let Some(window) = wl_window { + let library = self.wsi.library.as_ref().unwrap(); + let wl_egl_window_resize: libloading::Symbol<WlEglWindowResizeFun> = + unsafe { library.get(b"wl_egl_window_resize") }.unwrap(); + unsafe { + wl_egl_window_resize( + window, + config.extent.width as i32, + config.extent.height as i32, + 0, + 0, + ) + }; + } + + let format_desc = device.shared.describe_texture_format(config.format); + let gl = &device.shared.context.lock(); + let renderbuffer = unsafe { gl.create_renderbuffer() }.map_err(|error| { + log::error!("Internal swapchain renderbuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(renderbuffer)) }; + unsafe { + gl.renderbuffer_storage( + glow::RENDERBUFFER, + format_desc.internal, + config.extent.width as _, + config.extent.height as _, + ) + }; + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_renderbuffer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::RENDERBUFFER, + Some(renderbuffer), + ) + }; + unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + + self.swapchain = Some(Swapchain { + surface, + wl_window, + renderbuffer, + framebuffer, + extent: config.extent, + format: config.format, + format_desc, + sample_type: wgt::TextureSampleType::Float { filterable: false }, + }); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + if let Some((surface, wl_window)) = unsafe { self.unconfigure_impl(device) } { + self.egl + .instance + .destroy_surface(self.egl.display, surface) + .unwrap(); + if let Some(window) = wl_window { + let library = self.wsi.library.as_ref().expect("unsupported window"); + let wl_egl_window_destroy: libloading::Symbol<WlEglWindowDestroyFun> = + unsafe { library.get(b"wl_egl_window_destroy") }.unwrap(); + unsafe { wl_egl_window_destroy(window) }; + } + } + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Renderbuffer { + raw: sc.renderbuffer, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + is_cubemap: false, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/gles/emscripten.rs b/third_party/rust/wgpu-hal/src/gles/emscripten.rs new file mode 100644 index 0000000000..7372dbd369 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/emscripten.rs @@ -0,0 +1,26 @@ +extern "C" { + /// returns 1 if success. 0 if failure. extension name must be null terminated + fn emscripten_webgl_enable_extension( + context: std::ffi::c_int, + extension: *const std::ffi::c_char, + ) -> std::ffi::c_int; + fn emscripten_webgl_get_current_context() -> std::ffi::c_int; +} +/// Webgl requires extensions to be enabled before using them. +/// This function can be used to enable webgl extension on emscripten target. +/// +/// returns true on success +/// +/// # Safety: +/// +/// - opengl context MUST BE current +/// - extension_name_null_terminated argument must be a valid string with null terminator. +/// - extension must be present. check `glow_context.supported_extensions()` +pub unsafe fn enable_extension(extension_name_null_terminated: &str) -> bool { + unsafe { + emscripten_webgl_enable_extension( + emscripten_webgl_get_current_context(), + extension_name_null_terminated.as_ptr() as _, + ) == 1 + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs new file mode 100644 index 0000000000..f11286833d --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/mod.rs @@ -0,0 +1,861 @@ +/*! +# OpenGL ES3 API (aka GLES3). + +Designed to work on Linux and Android, with context provided by EGL. + +## Texture views + +GLES3 doesn't really have separate texture view objects. We have to remember the +original texture and the sub-range into it. Problem is, however, that there is +no way to expose a subset of array layers or mip levels of a sampled texture. + +## Binding model + +Binding model is very different from WebGPU, especially with regards to samplers. +GLES3 has sampler objects, but they aren't separately bindable to the shaders. +Each sampled texture is exposed to the shader as a combined texture-sampler binding. + +When building the pipeline layout, we linearize binding entries based on the groups +(uniform/storage buffers, uniform/storage textures), and record the mapping into +`BindGroupLayoutInfo`. +When a pipeline gets created, and we track all the texture-sampler associations +from the static use in the shader. +We only support at most one sampler used with each texture so far. The linear index +of this sampler is stored per texture slot in `SamplerBindMap` array. + +The texture-sampler pairs get potentially invalidated in 2 places: + - when a new pipeline is set, we update the linear indices of associated samplers + - when a new bind group is set, we update both the textures and the samplers + +We expect that the changes to sampler states between any 2 pipelines of the same layout +will be minimal, if any. + +## Vertex data + +Generally, vertex buffers are marked as dirty and lazily bound on draw. + +GLES3 doesn't support "base instance" semantics. However, it's easy to support, +since we are forced to do late binding anyway. We just adjust the offsets +into the vertex data. + +### Old path + +In GLES-3.0 and WebGL2, vertex buffer layout is provided +together with the actual buffer binding. +We invalidate the attributes on the vertex buffer change, and re-bind them. + +### New path + +In GLES-3.1 and higher, the vertex buffer layout can be declared separately +from the vertex data itself. This mostly matches WebGPU, however there is a catch: +`stride` needs to be specified with the data, not as a part of the layout. + +To address this, we invalidate the vertex buffers based on: + - whether or not `start_instance` is used + - stride has changed + +*/ + +///cbindgen:ignore +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +mod egl; +#[cfg(target_os = "emscripten")] +mod emscripten; +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +mod web; + +mod adapter; +mod command; +mod conv; +mod device; +mod queue; + +use crate::{CopyExtent, TextureDescriptor}; + +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +pub use self::egl::{AdapterContext, AdapterContextLock}; +#[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] +use self::egl::{Instance, Surface}; + +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +pub use self::web::AdapterContext; +#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] +use self::web::{Instance, Surface}; + +use arrayvec::ArrayVec; + +use glow::HasContext; + +use naga::FastHashMap; +use parking_lot::Mutex; +use std::sync::atomic::AtomicU32; +use std::{fmt, ops::Range, sync::Arc}; + +#[derive(Clone)] +pub struct Api; + +//Note: we can support more samplers if not every one of them is used at a time, +// but it probably doesn't worth it. +const MAX_TEXTURE_SLOTS: usize = 16; +const MAX_SAMPLERS: usize = 16; +const MAX_VERTEX_ATTRIBUTES: usize = 16; +const ZERO_BUFFER_SIZE: usize = 256 << 10; +const MAX_PUSH_CONSTANTS: usize = 64; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +bitflags::bitflags! { + /// Flags that affect internal code paths but do not + /// change the exposed feature set. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct PrivateCapabilities: u32 { + /// Indicates support for `glBufferStorage` allocation. + const BUFFER_ALLOCATION = 1 << 0; + /// Support explicit layouts in shader. + const SHADER_BINDING_LAYOUT = 1 << 1; + /// Support extended shadow sampling instructions. + const SHADER_TEXTURE_SHADOW_LOD = 1 << 2; + /// Support memory barriers. + const MEMORY_BARRIERS = 1 << 3; + /// Vertex buffer layouts separate from the data. + const VERTEX_BUFFER_LAYOUT = 1 << 4; + /// Indicates that buffers used as `GL_ELEMENT_ARRAY_BUFFER` may be created / initialized / used + /// as other targets, if not present they must not be mixed with other targets. + const INDEX_BUFFER_ROLE_CHANGE = 1 << 5; + /// Indicates that the device supports disabling draw buffers + const CAN_DISABLE_DRAW_BUFFER = 1 << 6; + /// Supports `glGetBufferSubData` + const GET_BUFFER_SUB_DATA = 1 << 7; + /// Supports `f16` color buffers + const COLOR_BUFFER_HALF_FLOAT = 1 << 8; + /// Supports `f11/f10` and `f32` color buffers + const COLOR_BUFFER_FLOAT = 1 << 9; + /// Supports linear flitering `f32` textures. + const TEXTURE_FLOAT_LINEAR = 1 << 10; + } +} + +bitflags::bitflags! { + /// Flags that indicate necessary workarounds for specific devices or driver bugs + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + struct Workarounds: u32 { + // Needs workaround for Intel Mesa bug: + // https://gitlab.freedesktop.org/mesa/mesa/-/issues/2565. + // + // This comment + // (https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4972/diffs?diff_id=75888#22f5d1004713c9bbf857988c7efb81631ab88f99_323_327) + // seems to indicate all skylake models are effected. + const MESA_I915_SRGB_SHADER_CLEAR = 1 << 0; + /// Buffer map must emulated becuase it is not supported natively + const EMULATE_BUFFER_MAP = 1 << 1; + } +} + +type BindTarget = u32; + +#[derive(Debug, Clone, Copy)] +enum VertexAttribKind { + Float, // glVertexAttribPointer + Integer, // glVertexAttribIPointer + //Double, // glVertexAttribLPointer +} + +impl Default for VertexAttribKind { + fn default() -> Self { + Self::Float + } +} + +#[derive(Clone, Debug)] +pub struct TextureFormatDesc { + pub internal: u32, + pub external: u32, + pub data_type: u32, +} + +struct AdapterShared { + context: AdapterContext, + private_caps: PrivateCapabilities, + features: wgt::Features, + workarounds: Workarounds, + shading_language_version: naga::back::glsl::Version, + max_texture_size: u32, + next_shader_id: AtomicU32, + program_cache: Mutex<ProgramCache>, +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Device { + shared: Arc<AdapterShared>, + main_vao: glow::VertexArray, + #[cfg(all(not(target_arch = "wasm32"), feature = "renderdoc"))] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + shared: Arc<AdapterShared>, + features: wgt::Features, + draw_fbo: glow::Framebuffer, + copy_fbo: glow::Framebuffer, + /// Shader program used to clear the screen for [`Workarounds::MESA_I915_SRGB_SHADER_CLEAR`] + /// devices. + shader_clear_program: glow::Program, + /// The uniform location of the color uniform in the shader clear program + shader_clear_program_color_uniform_location: glow::UniformLocation, + /// Keep a reasonably large buffer filled with zeroes, so that we can implement `ClearBuffer` of + /// zeroes by copying from it. + zero_buffer: glow::Buffer, + temp_query_results: Vec<u64>, + draw_buffer_count: u8, + current_index_buffer: Option<glow::Buffer>, +} + +#[derive(Clone, Debug)] +pub struct Buffer { + raw: Option<glow::Buffer>, + target: BindTarget, + size: wgt::BufferAddress, + map_flags: u32, + data: Option<Arc<std::sync::Mutex<Vec<u8>>>>, +} + +// Safe: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for Buffer {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for Buffer {} + +#[derive(Clone, Debug)] +pub enum TextureInner { + Renderbuffer { + raw: glow::Renderbuffer, + }, + DefaultRenderbuffer, + Texture { + raw: glow::Texture, + target: BindTarget, + }, + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + ExternalFramebuffer { + inner: web_sys::WebGlFramebuffer, + }, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for TextureInner {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for TextureInner {} + +impl TextureInner { + fn as_native(&self) -> (glow::Texture, BindTarget) { + match *self { + Self::Renderbuffer { .. } | Self::DefaultRenderbuffer => { + panic!("Unexpected renderbuffer"); + } + Self::Texture { raw, target } => (raw, target), + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + Self::ExternalFramebuffer { .. } => panic!("Unexpected external framebuffer"), + } + } +} + +#[derive(Debug)] +pub struct Texture { + pub inner: TextureInner, + pub drop_guard: Option<crate::DropGuard>, + pub mip_level_count: u32, + pub array_layer_count: u32, + pub format: wgt::TextureFormat, + #[allow(unused)] + pub format_desc: TextureFormatDesc, + pub copy_size: CopyExtent, + pub is_cubemap: bool, +} + +impl Texture { + pub fn default_framebuffer(format: wgt::TextureFormat) -> Self { + Self { + inner: TextureInner::DefaultRenderbuffer, + drop_guard: None, + mip_level_count: 1, + array_layer_count: 1, + format, + format_desc: TextureFormatDesc { + internal: 0, + external: 0, + data_type: 0, + }, + copy_size: CopyExtent { + width: 0, + height: 0, + depth: 0, + }, + is_cubemap: false, + } + } + + /// Returns the `target`, whether the image is 3d and whether the image is a cubemap. + fn get_info_from_desc(desc: &TextureDescriptor) -> (u32, bool, bool) { + match desc.dimension { + wgt::TextureDimension::D1 => (glow::TEXTURE_2D, false, false), + wgt::TextureDimension::D2 => { + // HACK: detect a cube map; forces cube compatible textures to be cube textures + match (desc.is_cube_compatible(), desc.size.depth_or_array_layers) { + (false, 1) => (glow::TEXTURE_2D, false, false), + (false, _) => (glow::TEXTURE_2D_ARRAY, true, false), + (true, 6) => (glow::TEXTURE_CUBE_MAP, false, true), + (true, _) => (glow::TEXTURE_CUBE_MAP_ARRAY, true, true), + } + } + wgt::TextureDimension::D3 => (glow::TEXTURE_3D, true, false), + } + } +} + +#[derive(Clone, Debug)] +pub struct TextureView { + inner: TextureInner, + aspects: crate::FormatAspects, + mip_levels: Range<u32>, + array_layers: Range<u32>, + format: wgt::TextureFormat, +} + +#[derive(Debug)] +pub struct Sampler { + raw: glow::Sampler, +} + +pub struct BindGroupLayout { + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +struct BindGroupLayoutInfo { + entries: Arc<[wgt::BindGroupLayoutEntry]>, + /// Mapping of resources, indexed by `binding`, into the whole layout space. + /// For texture resources, the value is the texture slot index. + /// For sampler resources, the value is the index of the sampler in the whole layout. + /// For buffers, the value is the uniform or storage slot index. + /// For unused bindings, the value is `!0` + binding_to_slot: Box<[u8]>, +} + +pub struct PipelineLayout { + group_infos: Box<[BindGroupLayoutInfo]>, + naga_options: naga::back::glsl::Options, +} + +impl PipelineLayout { + fn get_slot(&self, br: &naga::ResourceBinding) -> u8 { + let group_info = &self.group_infos[br.group as usize]; + group_info.binding_to_slot[br.binding as usize] + } +} + +#[derive(Debug)] +enum BindingRegister { + UniformBuffers, + StorageBuffers, + Textures, + Images, +} + +#[derive(Debug)] +enum RawBinding { + Buffer { + raw: glow::Buffer, + offset: i32, + size: i32, + }, + Texture { + raw: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + //TODO: mip levels, array layers + }, + Image(ImageBinding), + Sampler(glow::Sampler), +} + +#[derive(Debug)] +pub struct BindGroup { + contents: Box<[RawBinding]>, +} + +type ShaderId = u32; + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + label: Option<String>, + id: ShaderId, +} + +#[derive(Clone, Debug, Default)] +struct VertexFormatDesc { + element_count: i32, + element_format: u32, + attrib_kind: VertexAttribKind, +} + +#[derive(Clone, Debug, Default)] +struct AttributeDesc { + location: u32, + offset: u32, + buffer_index: u32, + format_desc: VertexFormatDesc, +} + +#[derive(Clone, Debug)] +struct BufferBinding { + raw: glow::Buffer, + offset: wgt::BufferAddress, +} + +#[derive(Clone, Debug)] +struct ImageBinding { + raw: glow::Texture, + mip_level: u32, + array_layer: Option<u32>, + access: u32, + format: u32, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct VertexBufferDesc { + step: wgt::VertexStepMode, + stride: u32, +} + +#[derive(Clone, Debug, Default)] +struct UniformDesc { + location: Option<glow::UniformLocation>, + size: u32, + utype: u32, +} + +// Safe: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for UniformDesc {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for UniformDesc {} + +/// For each texture in the pipeline layout, store the index of the only +/// sampler (in this layout) that the texture is used with. +type SamplerBindMap = [Option<u8>; MAX_TEXTURE_SLOTS]; + +struct PipelineInner { + program: glow::Program, + sampler_map: SamplerBindMap, + uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], +} + +#[derive(Clone, Debug)] +struct DepthState { + function: u32, + mask: bool, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendComponent { + src: u32, + dst: u32, + equation: u32, +} + +#[derive(Clone, Debug, PartialEq)] +struct BlendDesc { + alpha: BlendComponent, + color: BlendComponent, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct ColorTargetDesc { + mask: wgt::ColorWrites, + blend: Option<BlendDesc>, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramStage { + naga_stage: naga::ShaderStage, + shader_id: ShaderId, + entry_point: String, +} + +#[derive(PartialEq, Eq, Hash)] +struct ProgramCacheKey { + stages: ArrayVec<ProgramStage, 3>, + group_to_binding_to_slot: Box<[Box<[u8]>]>, +} + +type ProgramCache = FastHashMap<ProgramCacheKey, Result<Arc<PipelineInner>, crate::PipelineError>>; + +pub struct RenderPipeline { + inner: Arc<PipelineInner>, + primitive: wgt::PrimitiveState, + vertex_buffers: Box<[VertexBufferDesc]>, + vertex_attributes: Box<[AttributeDesc]>, + color_targets: Box<[ColorTargetDesc]>, + depth: Option<DepthState>, + depth_bias: wgt::DepthBiasState, + stencil: Option<StencilState>, + alpha_to_coverage_enabled: bool, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for RenderPipeline {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + inner: Arc<PipelineInner>, +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Send for ComputePipeline {} +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub struct QuerySet { + queries: Box<[glow::Query]>, + target: BindTarget, +} + +#[derive(Debug)] +pub struct Fence { + last_completed: crate::FenceValue, + pending: Vec<(crate::FenceValue, glow::Fence)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { + let mut max_value = self.last_completed; + for &(value, sync) in self.pending.iter() { + let status = unsafe { gl.get_sync_status(sync) }; + if status == glow::SIGNALED { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self, gl: &glow::Context) { + let latest = self.get_latest(gl); + for &(value, sync) in self.pending.iter() { + if value <= latest { + unsafe { + gl.delete_sync(sync); + } + } + } + self.pending.retain(|&(value, _)| value > latest); + self.last_completed = latest; + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilOps { + pass: u32, + fail: u32, + depth_fail: u32, +} + +impl Default for StencilOps { + fn default() -> Self { + Self { + pass: glow::KEEP, + fail: glow::KEEP, + depth_fail: glow::KEEP, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +struct StencilSide { + function: u32, + mask_read: u32, + mask_write: u32, + reference: u32, + ops: StencilOps, +} + +impl Default for StencilSide { + fn default() -> Self { + Self { + function: glow::ALWAYS, + mask_read: 0xFF, + mask_write: 0xFF, + reference: 0, + ops: StencilOps::default(), + } + } +} + +#[derive(Clone, Default)] +struct StencilState { + front: StencilSide, + back: StencilSide, +} + +#[derive(Clone, Debug, Default, PartialEq)] +struct PrimitiveState { + front_face: u32, + cull_face: u32, + unclipped_depth: bool, +} + +type InvalidatedAttachments = ArrayVec<u32, { crate::MAX_COLOR_ATTACHMENTS + 2 }>; + +#[derive(Debug)] +enum Command { + Draw { + topology: u32, + start_vertex: u32, + vertex_count: u32, + instance_count: u32, + }, + DrawIndexed { + topology: u32, + index_type: u32, + index_count: u32, + index_offset: wgt::BufferAddress, + base_vertex: i32, + instance_count: u32, + }, + DrawIndirect { + topology: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + DrawIndexedIndirect { + topology: u32, + index_type: u32, + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + Dispatch([u32; 3]), + DispatchIndirect { + indirect_buf: glow::Buffer, + indirect_offset: wgt::BufferAddress, + }, + ClearBuffer { + dst: Buffer, + dst_target: BindTarget, + range: crate::MemoryRange, + }, + CopyBufferToBuffer { + src: Buffer, + src_target: BindTarget, + dst: Buffer, + dst_target: BindTarget, + copy: crate::BufferCopy, + }, + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + CopyExternalImageToTexture { + src: wgt::ImageCopyExternalImage, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + dst_premultiplication: bool, + copy: crate::TextureCopy, + }, + CopyTextureToTexture { + src: glow::Texture, + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + copy: crate::TextureCopy, + dst_is_cubemap: bool, + }, + CopyBufferToTexture { + src: Buffer, + #[allow(unused)] + src_target: BindTarget, + dst: glow::Texture, + dst_target: BindTarget, + dst_format: wgt::TextureFormat, + copy: crate::BufferTextureCopy, + }, + CopyTextureToBuffer { + src: glow::Texture, + src_target: BindTarget, + src_format: wgt::TextureFormat, + dst: Buffer, + #[allow(unused)] + dst_target: BindTarget, + copy: crate::BufferTextureCopy, + }, + SetIndexBuffer(glow::Buffer), + BeginQuery(glow::Query, BindTarget), + EndQuery(BindTarget), + CopyQueryResults { + query_range: Range<u32>, + dst: Buffer, + dst_target: BindTarget, + dst_offset: wgt::BufferAddress, + }, + ResetFramebuffer { + is_default: bool, + }, + BindAttachment { + attachment: u32, + view: TextureView, + }, + ResolveAttachment { + attachment: u32, + dst: TextureView, + size: wgt::Extent3d, + }, + InvalidateAttachments(InvalidatedAttachments), + SetDrawColorBuffers(u8), + ClearColorF { + draw_buffer: u32, + color: [f32; 4], + is_srgb: bool, + }, + ClearColorU(u32, [u32; 4]), + ClearColorI(u32, [i32; 4]), + ClearDepth(f32), + ClearStencil(u32), + // Clearing both the depth and stencil buffer individually appears to + // result in the stencil buffer failing to clear, atleast in WebGL. + // It is also more efficient to emit a single command instead of two for + // this. + ClearDepthAndStencil(f32, u32), + BufferBarrier(glow::Buffer, crate::BufferUses), + TextureBarrier(crate::TextureUses), + SetViewport { + rect: crate::Rect<i32>, + depth: Range<f32>, + }, + SetScissor(crate::Rect<i32>), + SetStencilFunc { + face: u32, + function: u32, + reference: u32, + read_mask: u32, + }, + SetStencilOps { + face: u32, + write_mask: u32, + ops: StencilOps, + }, + SetDepth(DepthState), + SetDepthBias(wgt::DepthBiasState), + ConfigureDepthStencil(crate::FormatAspects), + SetAlphaToCoverage(bool), + SetVertexAttribute { + buffer: Option<glow::Buffer>, + buffer_desc: VertexBufferDesc, + attribute_desc: AttributeDesc, + }, + UnsetVertexAttribute(u32), + SetVertexBuffer { + index: u32, + buffer: BufferBinding, + buffer_desc: VertexBufferDesc, + }, + SetProgram(glow::Program), + SetPrimitive(PrimitiveState), + SetBlendConstant([f32; 4]), + SetColorTarget { + draw_buffer_index: Option<u32>, + desc: ColorTargetDesc, + }, + BindBuffer { + target: BindTarget, + slot: u32, + buffer: glow::Buffer, + offset: i32, + size: i32, + }, + BindSampler(u32, Option<glow::Sampler>), + BindTexture { + slot: u32, + texture: glow::Texture, + target: BindTarget, + aspects: crate::FormatAspects, + }, + BindImage { + slot: u32, + binding: ImageBinding, + }, + InsertDebugMarker(Range<u32>), + PushDebugGroup(Range<u32>), + PopDebugGroup, + SetPushConstants { + uniform: UniformDesc, + /// Offset from the start of the `data_bytes` + offset: u32, + }, +} + +#[derive(Default)] +pub struct CommandBuffer { + label: Option<String>, + commands: Vec<Command>, + data_bytes: Vec<u8>, + queries: Vec<glow::Query>, +} + +impl fmt::Debug for CommandBuffer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut builder = f.debug_struct("CommandBuffer"); + if let Some(ref label) = self.label { + builder.field("label", label); + } + builder.finish() + } +} + +//TODO: we would have something like `Arc<typed_arena::Arena>` +// here and in the command buffers. So that everything grows +// inside the encoder and stays there until `reset_all`. + +pub struct CommandEncoder { + cmd_buffer: CommandBuffer, + state: command::State, + private_caps: PrivateCapabilities, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("cmd_buffer", &self.cmd_buffer) + .finish() + } +} diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs new file mode 100644 index 0000000000..87a82524fc --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/queue.rs @@ -0,0 +1,1532 @@ +use super::Command as C; +use arrayvec::ArrayVec; +use glow::HasContext; +use std::{mem, slice, sync::Arc}; + +#[cfg(not(target_arch = "wasm32"))] +const DEBUG_ID: u32 = 0; + +const CUBEMAP_FACES: [u32; 6] = [ + glow::TEXTURE_CUBE_MAP_POSITIVE_X, + glow::TEXTURE_CUBE_MAP_NEGATIVE_X, + glow::TEXTURE_CUBE_MAP_POSITIVE_Y, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Y, + glow::TEXTURE_CUBE_MAP_POSITIVE_Z, + glow::TEXTURE_CUBE_MAP_NEGATIVE_Z, +]; + +#[cfg(not(target_arch = "wasm32"))] +fn extract_marker<'a>(data: &'a [u8], range: &std::ops::Range<u32>) -> &'a str { + std::str::from_utf8(&data[range.start as usize..range.end as usize]).unwrap() +} + +fn is_layered_target(target: super::BindTarget) -> bool { + match target { + glow::TEXTURE_2D_ARRAY | glow::TEXTURE_3D | glow::TEXTURE_CUBE_MAP_ARRAY => true, + _ => false, + } +} + +impl super::Queue { + /// Performs a manual shader clear, used as a workaround for a clearing bug on mesa + unsafe fn perform_shader_clear(&self, gl: &glow::Context, draw_buffer: u32, color: [f32; 4]) { + unsafe { gl.use_program(Some(self.shader_clear_program)) }; + unsafe { + gl.uniform_4_f32( + Some(&self.shader_clear_program_color_uniform_location), + color[0], + color[1], + color[2], + color[3], + ) + }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::COLOR_ATTACHMENT0 + draw_buffer]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + + if self.draw_buffer_count != 0 { + // Reset the draw buffers to what they were before the clear + let indices = (0..self.draw_buffer_count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + } + #[cfg(not(target_arch = "wasm32"))] + for draw_buffer in 0..self.draw_buffer_count as u32 { + unsafe { gl.disable_draw_buffer(glow::BLEND, draw_buffer) }; + } + } + + unsafe fn reset_state(&mut self, gl: &glow::Context) { + unsafe { gl.use_program(None) }; + unsafe { gl.bind_framebuffer(glow::FRAMEBUFFER, None) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, None) }; + self.current_index_buffer = None; + } + + unsafe fn set_attachment( + &self, + gl: &glow::Context, + fbo_target: u32, + attachment: u32, + view: &super::TextureView, + ) { + match view.inner { + super::TextureInner::Renderbuffer { raw } => { + unsafe { + gl.framebuffer_renderbuffer( + fbo_target, + attachment, + glow::RENDERBUFFER, + Some(raw), + ) + }; + } + super::TextureInner::DefaultRenderbuffer => panic!("Unexpected default RBO"), + super::TextureInner::Texture { raw, target } => { + let num_layers = view.array_layers.end - view.array_layers.start; + if num_layers > 1 { + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + unsafe { + gl.framebuffer_texture_multiview_ovr( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + num_layers as i32, + ) + }; + } else if is_layered_target(target) { + unsafe { + gl.framebuffer_texture_layer( + fbo_target, + attachment, + Some(raw), + view.mip_levels.start as i32, + view.array_layers.start as i32, + ) + }; + } else if target == glow::TEXTURE_CUBE_MAP { + unsafe { + gl.framebuffer_texture_2d( + fbo_target, + attachment, + CUBEMAP_FACES[view.array_layers.start as usize], + Some(raw), + view.mip_levels.start as i32, + ) + }; + } else { + unsafe { + gl.framebuffer_texture_2d( + fbo_target, + attachment, + target, + Some(raw), + view.mip_levels.start as i32, + ) + }; + } + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + super::TextureInner::ExternalFramebuffer { ref inner } => unsafe { + gl.bind_external_framebuffer(glow::FRAMEBUFFER, inner); + }, + } + } + + unsafe fn process( + &mut self, + gl: &glow::Context, + command: &C, + #[cfg_attr(target_arch = "wasm32", allow(unused))] data_bytes: &[u8], + queries: &[glow::Query], + ) { + match *command { + C::Draw { + topology, + start_vertex, + vertex_count, + instance_count, + } => { + // Don't use `gl.draw_arrays` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `draw_arrays`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + unsafe { + gl.draw_arrays_instanced( + topology, + start_vertex as i32, + vertex_count as i32, + instance_count as i32, + ) + }; + } + C::DrawIndexed { + topology, + index_type, + index_count, + index_offset, + base_vertex, + instance_count, + } => { + match base_vertex { + // Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`. + // Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`. + // See https://github.com/gfx-rs/wgpu/issues/3578 + 0 => unsafe { + gl.draw_elements_instanced( + topology, + index_count as i32, + index_type, + index_offset as i32, + instance_count as i32, + ) + }, + _ => unsafe { + gl.draw_elements_instanced_base_vertex( + topology, + index_count as _, + index_type, + index_offset as i32, + instance_count as i32, + base_vertex, + ) + }, + } + } + C::DrawIndirect { + topology, + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.draw_arrays_indirect_offset(topology, indirect_offset as i32) }; + } + C::DrawIndexedIndirect { + topology, + index_type, + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { + gl.draw_elements_indirect_offset(topology, index_type, indirect_offset as i32) + }; + } + C::Dispatch(group_counts) => { + unsafe { gl.dispatch_compute(group_counts[0], group_counts[1], group_counts[2]) }; + } + C::DispatchIndirect { + indirect_buf, + indirect_offset, + } => { + unsafe { gl.bind_buffer(glow::DISPATCH_INDIRECT_BUFFER, Some(indirect_buf)) }; + unsafe { gl.dispatch_compute_indirect(indirect_offset as i32) }; + } + C::ClearBuffer { + ref dst, + dst_target, + ref range, + } => match dst.raw { + Some(buffer) => { + // When `INDEX_BUFFER_ROLE_CHANGE` isn't available, we can't copy into the + // index buffer from the zero buffer. This would fail in Chrome with the + // following message: + // + // > Cannot copy into an element buffer destination from a non-element buffer + // > source + // + // Instead, we'll upload zeroes into the buffer. + let can_use_zero_buffer = self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + || dst_target != glow::ELEMENT_ARRAY_BUFFER; + + if can_use_zero_buffer { + unsafe { gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer)) }; + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let mut dst_offset = range.start; + while dst_offset < range.end { + let size = (range.end - dst_offset).min(super::ZERO_BUFFER_SIZE as u64); + unsafe { + gl.copy_buffer_sub_data( + glow::COPY_READ_BUFFER, + dst_target, + 0, + dst_offset as i32, + size as i32, + ) + }; + dst_offset += size; + } + } else { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + let zeroes = vec![0u8; (range.end - range.start) as usize]; + unsafe { + gl.buffer_sub_data_u8_slice(dst_target, range.start as i32, &zeroes) + }; + } + } + None => { + dst.data.as_ref().unwrap().lock().unwrap().as_mut_slice() + [range.start as usize..range.end as usize] + .fill(0); + } + }, + C::CopyBufferToBuffer { + ref src, + src_target, + ref dst, + dst_target, + copy, + } => { + let copy_src_target = glow::COPY_READ_BUFFER; + let is_index_buffer_only_element_dst = !self + .shared + .private_caps + .contains(super::PrivateCapabilities::INDEX_BUFFER_ROLE_CHANGE) + && dst_target == glow::ELEMENT_ARRAY_BUFFER + || src_target == glow::ELEMENT_ARRAY_BUFFER; + + // WebGL not allowed to copy data from other targets to element buffer and can't copy element data to other buffers + let copy_dst_target = if is_index_buffer_only_element_dst { + glow::ELEMENT_ARRAY_BUFFER + } else { + glow::COPY_WRITE_BUFFER + }; + let size = copy.size.get() as usize; + match (src.raw, dst.raw) { + (Some(ref src), Some(ref dst)) => { + unsafe { gl.bind_buffer(copy_src_target, Some(*src)) }; + unsafe { gl.bind_buffer(copy_dst_target, Some(*dst)) }; + unsafe { + gl.copy_buffer_sub_data( + copy_src_target, + copy_dst_target, + copy.src_offset as _, + copy.dst_offset as _, + copy.size.get() as _, + ) + }; + } + (Some(src), None) => { + let mut data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut data.as_mut_slice() + [copy.dst_offset as usize..copy.dst_offset as usize + size]; + + unsafe { gl.bind_buffer(copy_src_target, Some(src)) }; + unsafe { + self.shared.get_buffer_sub_data( + gl, + copy_src_target, + copy.src_offset as i32, + dst_data, + ) + }; + } + (None, Some(dst)) => { + let data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &data.as_slice() + [copy.src_offset as usize..copy.src_offset as usize + size]; + unsafe { gl.bind_buffer(copy_dst_target, Some(dst)) }; + unsafe { + gl.buffer_sub_data_u8_slice( + copy_dst_target, + copy.dst_offset as i32, + src_data, + ) + }; + } + (None, None) => { + todo!() + } + } + unsafe { gl.bind_buffer(copy_src_target, None) }; + if is_index_buffer_only_element_dst { + unsafe { + gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, self.current_index_buffer) + }; + } else { + unsafe { gl.bind_buffer(copy_dst_target, None) }; + } + } + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + C::CopyExternalImageToTexture { + ref src, + dst, + dst_target, + dst_format, + dst_premultiplication, + ref copy, + } => { + const UNPACK_FLIP_Y_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_FLIP_Y_WEBGL; + const UNPACK_PREMULTIPLY_ALPHA_WEBGL: u32 = + web_sys::WebGl2RenderingContext::UNPACK_PREMULTIPLY_ALPHA_WEBGL; + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, true); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, true); + } + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + let format_desc = self.shared.describe_texture_format(dst_format); + if is_layered_target(dst_target) { + let z_offset = + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = dst_target { + copy.dst_base.array_layer as i32 + } else { + copy.dst_base.origin.z as i32 + }; + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_3d_with_image_bitmap( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_3d_with_html_video_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + v, + ); + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_3d_with_html_canvas_element( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + z_offset, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + c, + ); + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } else { + let dst_target = if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.dst_base.array_layer as usize] + } else { + dst_target + }; + + match src.source { + wgt::ExternalImageSource::ImageBitmap(ref b) => unsafe { + gl.tex_sub_image_2d_with_image_bitmap_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + b, + ); + }, + wgt::ExternalImageSource::HTMLVideoElement(ref v) => unsafe { + gl.tex_sub_image_2d_with_html_video_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + v, + ) + }, + wgt::ExternalImageSource::HTMLCanvasElement(ref c) => unsafe { + gl.tex_sub_image_2d_with_html_canvas_and_width_and_height( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + c, + ) + }, + wgt::ExternalImageSource::OffscreenCanvas(_) => unreachable!(), + } + } + + unsafe { + if src.flip_y { + gl.pixel_store_bool(UNPACK_FLIP_Y_WEBGL, false); + } + if dst_premultiplication { + gl.pixel_store_bool(UNPACK_PREMULTIPLY_ALPHA_WEBGL, false); + } + } + } + C::CopyTextureToTexture { + src, + src_target, + dst, + dst_target, + dst_is_cubemap, + ref copy, + } => { + //TODO: handle 3D copies + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + if is_layered_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.src_base.mip_level as i32, + copy.src_base.array_layer as i32, + ) + }; + } else { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.src_base.mip_level as i32, + ) + }; + } + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + if dst_is_cubemap { + unsafe { + gl.copy_tex_sub_image_2d( + CUBEMAP_FACES[copy.dst_base.array_layer as usize], + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } else if is_layered_target(dst_target) { + unsafe { + gl.copy_tex_sub_image_3d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.dst_base.array_layer as i32 + } else { + copy.dst_base.origin.z as i32 + }, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } else { + unsafe { + gl.copy_tex_sub_image_2d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ) + }; + } + } + C::CopyBufferToTexture { + ref src, + src_target: _, + dst, + dst_target, + dst_format, + ref copy, + } => { + let (block_width, block_height) = dst_format.block_dimensions(); + let block_size = dst_format.block_size(None).unwrap(); + let format_desc = self.shared.describe_texture_format(dst_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(0, |bpr| block_width * bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .map_or(0, |rpi| block_height * rpi); + + unsafe { gl.bind_texture(dst_target, Some(dst)) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.pixel_store_i32(glow::UNPACK_IMAGE_HEIGHT, column_texels as i32) }; + let mut unbind_unpack_buffer = false; + if !dst_format.is_compressed() { + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::PixelUnpackData::BufferOffset(copy.buffer_layout.offset as u32) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = + &buffer_data.as_slice()[copy.buffer_layout.offset as usize..]; + glow::PixelUnpackData::Slice(src_data) + } + }; + if is_layered_target(dst_target) { + unsafe { + gl.tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.texture_base.array_layer as i32 + } else { + copy.texture_base.origin.z as i32 + }, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } else { + unsafe { + gl.tex_sub_image_2d( + if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.texture_base.array_layer as usize] + } else { + dst_target + }, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + } + } else { + let bytes_per_row = copy + .buffer_layout + .bytes_per_row + .unwrap_or(copy.size.width * block_size); + let minimum_rows_per_image = + (copy.size.height + block_height - 1) / block_height; + let rows_per_image = copy + .buffer_layout + .rows_per_image + .unwrap_or(minimum_rows_per_image); + + let bytes_per_image = bytes_per_row * rows_per_image; + let minimum_bytes_per_image = bytes_per_row * minimum_rows_per_image; + let bytes_in_upload = + (bytes_per_image * (copy.size.depth - 1)) + minimum_bytes_per_image; + let offset = copy.buffer_layout.offset as u32; + + let buffer_data; + let unpack_data = match src.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(buffer)) }; + unbind_unpack_buffer = true; + glow::CompressedPixelUnpackData::BufferRange( + offset..offset + bytes_in_upload, + ) + } + None => { + buffer_data = src.data.as_ref().unwrap().lock().unwrap(); + let src_data = &buffer_data.as_slice() + [(offset as usize)..(offset + bytes_in_upload) as usize]; + glow::CompressedPixelUnpackData::Slice(src_data) + } + }; + + if is_layered_target(dst_target) { + unsafe { + gl.compressed_tex_sub_image_3d( + dst_target, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + if let glow::TEXTURE_2D_ARRAY | glow::TEXTURE_CUBE_MAP_ARRAY = + dst_target + { + copy.texture_base.array_layer as i32 + } else { + copy.texture_base.origin.z as i32 + }, + copy.size.width as i32, + copy.size.height as i32, + copy.size.depth as i32, + format_desc.internal, + unpack_data, + ) + }; + } else { + unsafe { + gl.compressed_tex_sub_image_2d( + if let glow::TEXTURE_CUBE_MAP = dst_target { + CUBEMAP_FACES[copy.texture_base.array_layer as usize] + } else { + dst_target + }, + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.internal, + unpack_data, + ) + }; + } + } + if unbind_unpack_buffer { + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, None) }; + } + } + C::CopyTextureToBuffer { + src, + src_target, + src_format, + ref dst, + dst_target: _, + ref copy, + } => { + let block_size = src_format.block_size(None).unwrap(); + if src_format.is_compressed() { + log::error!("Not implemented yet: compressed texture copy to buffer"); + return; + } + if src_target == glow::TEXTURE_CUBE_MAP + || src_target == glow::TEXTURE_CUBE_MAP_ARRAY + { + log::error!("Not implemented yet: cubemap texture copy to buffer"); + return; + } + let format_desc = self.shared.describe_texture_format(src_format); + let row_texels = copy + .buffer_layout + .bytes_per_row + .map_or(copy.size.width, |bpr| bpr / block_size); + let column_texels = copy + .buffer_layout + .rows_per_image + .unwrap_or(copy.size.height); + + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)) }; + + let read_pixels = |offset| { + let mut buffer_data; + let unpack_data = match dst.raw { + Some(buffer) => { + unsafe { gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32) }; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(buffer)) }; + glow::PixelPackData::BufferOffset(offset as u32) + } + None => { + buffer_data = dst.data.as_ref().unwrap().lock().unwrap(); + let dst_data = &mut buffer_data.as_mut_slice()[offset as usize..]; + glow::PixelPackData::Slice(dst_data) + } + }; + unsafe { + gl.read_pixels( + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + unpack_data, + ) + }; + }; + + match src_target { + glow::TEXTURE_2D => { + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.texture_base.mip_level as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_2D_ARRAY => { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + copy.texture_base.array_layer as i32, + ) + }; + read_pixels(copy.buffer_layout.offset); + } + glow::TEXTURE_3D => { + for z in copy.texture_base.origin.z..copy.size.depth { + unsafe { + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + z as i32, + ) + }; + let offset = copy.buffer_layout.offset + + (z * block_size * row_texels * column_texels) as u64; + read_pixels(offset); + } + } + glow::TEXTURE_CUBE_MAP | glow::TEXTURE_CUBE_MAP_ARRAY => unimplemented!(), + _ => unreachable!(), + } + } + C::SetIndexBuffer(buffer) => { + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(buffer)) }; + self.current_index_buffer = Some(buffer); + } + C::BeginQuery(query, target) => { + unsafe { gl.begin_query(target, query) }; + } + C::EndQuery(target) => { + unsafe { gl.end_query(target) }; + } + C::CopyQueryResults { + ref query_range, + ref dst, + dst_target, + dst_offset, + } => { + self.temp_query_results.clear(); + for &query in queries[query_range.start as usize..query_range.end as usize].iter() { + let result = unsafe { gl.get_query_parameter_u32(query, glow::QUERY_RESULT) }; + self.temp_query_results.push(result as u64); + } + let query_data = unsafe { + slice::from_raw_parts( + self.temp_query_results.as_ptr() as *const u8, + self.temp_query_results.len() * mem::size_of::<u64>(), + ) + }; + match dst.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + unsafe { + gl.buffer_sub_data_u8_slice(dst_target, dst_offset as i32, query_data) + }; + } + None => { + let data = &mut dst.data.as_ref().unwrap().lock().unwrap(); + let len = query_data.len().min(data.len()); + data[..len].copy_from_slice(&query_data[..len]); + } + } + } + C::ResetFramebuffer { is_default } => { + if is_default { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + } else { + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + glow::DEPTH_STENCIL_ATTACHMENT, + glow::TEXTURE_2D, + None, + 0, + ) + }; + for i in 0..crate::MAX_COLOR_ATTACHMENTS { + let target = glow::COLOR_ATTACHMENT0 + i as u32; + unsafe { + gl.framebuffer_texture_2d( + glow::DRAW_FRAMEBUFFER, + target, + glow::TEXTURE_2D, + None, + 0, + ) + }; + } + } + unsafe { gl.color_mask(true, true, true, true) }; + unsafe { gl.depth_mask(true) }; + unsafe { gl.stencil_mask(!0) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + } + C::BindAttachment { + attachment, + ref view, + } => { + unsafe { self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, attachment, view) }; + } + C::ResolveAttachment { + attachment, + ref dst, + ref size, + } => { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.draw_fbo)) }; + unsafe { gl.read_buffer(attachment) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.copy_fbo)) }; + unsafe { + self.set_attachment(gl, glow::DRAW_FRAMEBUFFER, glow::COLOR_ATTACHMENT0, dst) + }; + unsafe { + gl.blit_framebuffer( + 0, + 0, + size.width as i32, + size.height as i32, + 0, + 0, + size.width as i32, + size.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, Some(self.draw_fbo)) }; + } + C::InvalidateAttachments(ref list) => { + unsafe { gl.invalidate_framebuffer(glow::DRAW_FRAMEBUFFER, list) }; + } + C::SetDrawColorBuffers(count) => { + self.draw_buffer_count = count; + let indices = (0..count as u32) + .map(|i| glow::COLOR_ATTACHMENT0 + i) + .collect::<ArrayVec<_, { crate::MAX_COLOR_ATTACHMENTS }>>(); + unsafe { gl.draw_buffers(&indices) }; + + if self + .shared + .private_caps + .contains(super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER) + { + for draw_buffer in 0..count as u32 { + unsafe { gl.disable_draw_buffer(glow::BLEND, draw_buffer) }; + } + } + } + C::ClearColorF { + draw_buffer, + ref color, + is_srgb, + } => { + if self + .shared + .workarounds + .contains(super::Workarounds::MESA_I915_SRGB_SHADER_CLEAR) + && is_srgb + { + unsafe { self.perform_shader_clear(gl, draw_buffer, *color) }; + } else { + unsafe { gl.clear_buffer_f32_slice(glow::COLOR, draw_buffer, color) }; + } + } + C::ClearColorU(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_u32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearColorI(draw_buffer, ref color) => { + unsafe { gl.clear_buffer_i32_slice(glow::COLOR, draw_buffer, color) }; + } + C::ClearDepth(depth) => { + unsafe { gl.clear_buffer_f32_slice(glow::DEPTH, 0, &[depth]) }; + } + C::ClearStencil(value) => { + unsafe { gl.clear_buffer_i32_slice(glow::STENCIL, 0, &[value as i32]) }; + } + C::ClearDepthAndStencil(depth, stencil_value) => { + unsafe { + gl.clear_buffer_depth_stencil( + glow::DEPTH_STENCIL, + 0, + depth, + stencil_value as i32, + ) + }; + } + C::BufferBarrier(raw, usage) => { + let mut flags = 0; + if usage.contains(crate::BufferUses::VERTEX) { + flags |= glow::VERTEX_ATTRIB_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, Some(raw)) }; + unsafe { gl.vertex_attrib_pointer_f32(0, 1, glow::BYTE, true, 0, 0) }; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= glow::ELEMENT_ARRAY_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= glow::UNIFORM_BARRIER_BIT; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= glow::COMMAND_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_UNPACK_BUFFER, Some(raw)) }; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= glow::PIXEL_BUFFER_BARRIER_BIT; + unsafe { gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(raw)) }; + } + if usage.intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) { + flags |= glow::BUFFER_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_STORAGE_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::TextureBarrier(usage) => { + let mut flags = 0; + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= glow::TEXTURE_FETCH_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE, + ) { + flags |= glow::SHADER_IMAGE_ACCESS_BARRIER_BIT; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= glow::TEXTURE_UPDATE_BARRIER_BIT; + } + if usage.intersects( + crate::TextureUses::COLOR_TARGET + | crate::TextureUses::DEPTH_STENCIL_READ + | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= glow::FRAMEBUFFER_BARRIER_BIT; + } + unsafe { gl.memory_barrier(flags) }; + } + C::SetViewport { + ref rect, + ref depth, + } => { + unsafe { gl.viewport(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.depth_range_f32(depth.start, depth.end) }; + } + C::SetScissor(ref rect) => { + unsafe { gl.scissor(rect.x, rect.y, rect.w, rect.h) }; + unsafe { gl.enable(glow::SCISSOR_TEST) }; + } + C::SetStencilFunc { + face, + function, + reference, + read_mask, + } => { + unsafe { gl.stencil_func_separate(face, function, reference as i32, read_mask) }; + } + C::SetStencilOps { + face, + write_mask, + ref ops, + } => { + unsafe { gl.stencil_mask_separate(face, write_mask) }; + unsafe { gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass) }; + } + C::SetVertexAttribute { + buffer, + ref buffer_desc, + attribute_desc: ref vat, + } => { + unsafe { gl.bind_buffer(glow::ARRAY_BUFFER, buffer) }; + unsafe { gl.enable_vertex_attrib_array(vat.location) }; + + if buffer.is_none() { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_format_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + vat.offset, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_format_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + vat.offset, + ) + }, + } + + //Note: there is apparently a bug on AMD 3500U: + // this call is ignored if the current array is disabled. + unsafe { gl.vertex_attrib_binding(vat.location, vat.buffer_index) }; + } else { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => unsafe { + gl.vertex_attrib_pointer_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + super::VertexAttribKind::Integer => unsafe { + gl.vertex_attrib_pointer_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + buffer_desc.stride as i32, + vat.offset as i32, + ) + }, + } + unsafe { gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32) }; + } + } + C::UnsetVertexAttribute(location) => { + unsafe { gl.disable_vertex_attrib_array(location) }; + } + C::SetVertexBuffer { + index, + ref buffer, + ref buffer_desc, + } => { + unsafe { gl.vertex_binding_divisor(index, buffer_desc.step as u32) }; + unsafe { + gl.bind_vertex_buffer( + index, + Some(buffer.raw), + buffer.offset as i32, + buffer_desc.stride as i32, + ) + }; + } + C::SetDepth(ref depth) => { + unsafe { gl.depth_func(depth.function) }; + unsafe { gl.depth_mask(depth.mask) }; + } + C::SetDepthBias(bias) => { + if bias.is_enabled() { + unsafe { gl.enable(glow::POLYGON_OFFSET_FILL) }; + unsafe { gl.polygon_offset(bias.constant as f32, bias.slope_scale) }; + } else { + unsafe { gl.disable(glow::POLYGON_OFFSET_FILL) }; + } + } + C::ConfigureDepthStencil(aspects) => { + if aspects.contains(crate::FormatAspects::DEPTH) { + unsafe { gl.enable(glow::DEPTH_TEST) }; + } else { + unsafe { gl.disable(glow::DEPTH_TEST) }; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + unsafe { gl.enable(glow::STENCIL_TEST) }; + } else { + unsafe { gl.disable(glow::STENCIL_TEST) }; + } + } + C::SetAlphaToCoverage(enabled) => { + if enabled { + unsafe { gl.enable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } else { + unsafe { gl.disable(glow::SAMPLE_ALPHA_TO_COVERAGE) }; + } + } + C::SetProgram(program) => { + unsafe { gl.use_program(Some(program)) }; + } + C::SetPrimitive(ref state) => { + unsafe { gl.front_face(state.front_face) }; + if state.cull_face != 0 { + unsafe { gl.enable(glow::CULL_FACE) }; + unsafe { gl.cull_face(state.cull_face) }; + } else { + unsafe { gl.disable(glow::CULL_FACE) }; + } + if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + //Note: this is a bit tricky, since we are controlling the clip, not the clamp. + if state.unclipped_depth { + unsafe { gl.enable(glow::DEPTH_CLAMP) }; + } else { + unsafe { gl.disable(glow::DEPTH_CLAMP) }; + } + } + } + C::SetBlendConstant(c) => { + unsafe { gl.blend_color(c[0], c[1], c[2], c[3]) }; + } + C::SetColorTarget { + draw_buffer_index, + desc: super::ColorTargetDesc { mask, ref blend }, + } => { + use wgt::ColorWrites as Cw; + if let Some(index) = draw_buffer_index { + unsafe { + gl.color_mask_draw_buffer( + index, + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable_draw_buffer(index, glow::BLEND) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate_draw_buffer( + index, + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate_draw_buffer( + index, + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation_draw_buffer(index, blend.color.equation) }; + unsafe { + gl.blend_func_draw_buffer(index, blend.color.src, blend.color.dst) + }; + } + } else if self + .shared + .private_caps + .contains(super::PrivateCapabilities::CAN_DISABLE_DRAW_BUFFER) + { + unsafe { gl.disable_draw_buffer(index, glow::BLEND) }; + } + } else { + unsafe { + gl.color_mask( + mask.contains(Cw::RED), + mask.contains(Cw::GREEN), + mask.contains(Cw::BLUE), + mask.contains(Cw::ALPHA), + ) + }; + if let Some(ref blend) = *blend { + unsafe { gl.enable(glow::BLEND) }; + if blend.color != blend.alpha { + unsafe { + gl.blend_equation_separate( + blend.color.equation, + blend.alpha.equation, + ) + }; + unsafe { + gl.blend_func_separate( + blend.color.src, + blend.color.dst, + blend.alpha.src, + blend.alpha.dst, + ) + }; + } else { + unsafe { gl.blend_equation(blend.color.equation) }; + unsafe { gl.blend_func(blend.color.src, blend.color.dst) }; + } + } else { + unsafe { gl.disable(glow::BLEND) }; + } + } + } + C::BindBuffer { + target, + slot, + buffer, + offset, + size, + } => { + unsafe { gl.bind_buffer_range(target, slot, Some(buffer), offset, size) }; + } + C::BindSampler(texture_index, sampler) => { + unsafe { gl.bind_sampler(texture_index, sampler) }; + } + C::BindTexture { + slot, + texture, + target, + aspects, + } => { + unsafe { gl.active_texture(glow::TEXTURE0 + slot) }; + unsafe { gl.bind_texture(target, Some(texture)) }; + + let version = gl.version(); + let is_min_es_3_1 = version.is_embedded && (version.major, version.minor) >= (3, 1); + let is_min_4_3 = !version.is_embedded && (version.major, version.minor) >= (4, 3); + if is_min_es_3_1 || is_min_4_3 { + let mode = match aspects { + crate::FormatAspects::DEPTH => Some(glow::DEPTH_COMPONENT), + crate::FormatAspects::STENCIL => Some(glow::STENCIL_INDEX), + _ => None, + }; + if let Some(mode) = mode { + unsafe { + gl.tex_parameter_i32( + target, + glow::DEPTH_STENCIL_TEXTURE_MODE, + mode as _, + ) + }; + } + } + } + C::BindImage { slot, ref binding } => { + unsafe { + gl.bind_image_texture( + slot, + binding.raw, + binding.mip_level as i32, + binding.array_layer.is_none(), + binding.array_layer.unwrap_or_default() as i32, + binding.access, + binding.format, + ) + }; + } + #[cfg(not(target_arch = "wasm32"))] + C::InsertDebugMarker(ref range) => { + let marker = extract_marker(data_bytes, range); + unsafe { + gl.debug_message_insert( + glow::DEBUG_SOURCE_APPLICATION, + glow::DEBUG_TYPE_MARKER, + DEBUG_ID, + glow::DEBUG_SEVERITY_NOTIFICATION, + marker, + ) + }; + } + #[cfg(target_arch = "wasm32")] + C::InsertDebugMarker(_) => (), + #[cfg_attr(target_arch = "wasm32", allow(unused))] + C::PushDebugGroup(ref range) => { + #[cfg(not(target_arch = "wasm32"))] + let marker = extract_marker(data_bytes, range); + #[cfg(not(target_arch = "wasm32"))] + unsafe { + gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, marker) + }; + } + C::PopDebugGroup => { + #[cfg(not(target_arch = "wasm32"))] + unsafe { + gl.pop_debug_group() + }; + } + C::SetPushConstants { + ref uniform, + offset, + } => { + fn get_data<T>(data: &[u8], offset: u32) -> &[T] { + let raw = &data[(offset as usize)..]; + unsafe { + slice::from_raw_parts( + raw.as_ptr() as *const _, + raw.len() / mem::size_of::<T>(), + ) + } + } + + let location = uniform.location.as_ref(); + + match uniform.utype { + glow::FLOAT => { + let data = get_data::<f32>(data_bytes, offset)[0]; + unsafe { gl.uniform_1_f32(location, data) }; + } + glow::FLOAT_VEC2 => { + let data = get_data::<[f32; 2]>(data_bytes, offset)[0]; + unsafe { gl.uniform_2_f32_slice(location, &data) }; + } + glow::FLOAT_VEC3 => { + let data = get_data::<[f32; 3]>(data_bytes, offset)[0]; + unsafe { gl.uniform_3_f32_slice(location, &data) }; + } + glow::FLOAT_VEC4 => { + let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_4_f32_slice(location, &data) }; + } + glow::INT => { + let data = get_data::<i32>(data_bytes, offset)[0]; + unsafe { gl.uniform_1_i32(location, data) }; + } + glow::INT_VEC2 => { + let data = get_data::<[i32; 2]>(data_bytes, offset)[0]; + unsafe { gl.uniform_2_i32_slice(location, &data) }; + } + glow::INT_VEC3 => { + let data = get_data::<[i32; 3]>(data_bytes, offset)[0]; + unsafe { gl.uniform_3_i32_slice(location, &data) }; + } + glow::INT_VEC4 => { + let data = get_data::<[i32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_4_i32_slice(location, &data) }; + } + glow::FLOAT_MAT2 => { + let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_2_f32_slice(location, false, &data) }; + } + glow::FLOAT_MAT3 => { + let data = get_data::<[f32; 9]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_3_f32_slice(location, false, &data) }; + } + glow::FLOAT_MAT4 => { + let data = get_data::<[f32; 16]>(data_bytes, offset)[0]; + unsafe { gl.uniform_matrix_4_f32_slice(location, false, &data) }; + } + _ => panic!("Unsupported uniform datatype!"), + } + } + } + } +} + +impl crate::Queue<super::Api> for super::Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&super::CommandBuffer], + signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let shared = Arc::clone(&self.shared); + let gl = &shared.context.lock(); + for cmd_buf in command_buffers.iter() { + // The command encoder assumes a default state when encoding the command buffer. + // Always reset the state between command_buffers to reflect this assumption. Do + // this at the beginning of the loop in case something outside of wgpu modified + // this state prior to commit. + unsafe { self.reset_state(gl) }; + #[cfg(not(target_arch = "wasm32"))] + if let Some(ref label) = cmd_buf.label { + unsafe { gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, DEBUG_ID, label) }; + } + + for command in cmd_buf.commands.iter() { + unsafe { self.process(gl, command, &cmd_buf.data_bytes, &cmd_buf.queries) }; + } + + #[cfg(not(target_arch = "wasm32"))] + if cmd_buf.label.is_some() { + unsafe { gl.pop_debug_group() }; + } + } + + if let Some((fence, value)) = signal_fence { + fence.maintain(gl); + let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } + .map_err(|_| crate::DeviceError::OutOfMemory)?; + fence.pending.push((value, sync)); + } + + Ok(()) + } + + unsafe fn present( + &mut self, + surface: &mut super::Surface, + texture: super::Texture, + ) -> Result<(), crate::SurfaceError> { + #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] + let gl = unsafe { &self.shared.context.get_without_egl_lock() }; + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + let gl = &self.shared.context.glow_context; + + unsafe { surface.present(texture, gl) } + } + + unsafe fn get_timestamp_period(&self) -> f32 { + 1.0 + } +} + +// SAFE: WASM doesn't have threads +#[cfg(target_arch = "wasm32")] +unsafe impl Sync for super::Queue {} +#[cfg(target_arch = "wasm32")] +unsafe impl Send for super::Queue {} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag new file mode 100644 index 0000000000..7766c12d9f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.frag @@ -0,0 +1,9 @@ +#version 300 es +precision lowp float; +uniform vec4 color; +//Hack: Some WebGL implementations don't find "color" otherwise. +uniform vec4 color_workaround; +out vec4 frag; +void main() { + frag = color + color_workaround; +} diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert new file mode 100644 index 0000000000..ac655e7f31 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/clear.vert @@ -0,0 +1,11 @@ +#version 300 es +precision lowp float; +// A triangle that fills the whole screen +const vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +void main() { + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag new file mode 100644 index 0000000000..853f82a6ae --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.frag @@ -0,0 +1,16 @@ +#version 300 es +precision mediump float; +in vec2 uv; +uniform sampler2D present_texture; +out vec4 frag; +vec4 linear_to_srgb(vec4 linear) { + vec3 color_linear = linear.rgb; + vec3 selector = ceil(color_linear - 0.0031308); // 0 if under value, 1 if over + vec3 under = 12.92 * color_linear; + vec3 over = 1.055 * pow(color_linear, vec3(0.41666)) - 0.055; + vec3 result = mix(under, over, selector); + return vec4(result, linear.a); +} +void main() { + frag = linear_to_srgb(texture(present_texture, uv)); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert new file mode 100644 index 0000000000..922f2a1848 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/shaders/srgb_present.vert @@ -0,0 +1,18 @@ +#version 300 es +precision mediump float; +// A triangle that fills the whole screen +const vec2[3] TRIANGLE_POS = vec2[]( + vec2( 0.0, -3.0), + vec2(-3.0, 1.0), + vec2( 3.0, 1.0) +); +const vec2[3] TRIANGLE_UV = vec2[]( + vec2( 0.5, 1.), + vec2( -1.0, -1.0), + vec2( 2.0, -1.0) +); +out vec2 uv; +void main() { + uv = TRIANGLE_UV[gl_VertexID]; + gl_Position = vec4(TRIANGLE_POS[gl_VertexID], 0.0, 1.0); +}
\ No newline at end of file diff --git a/third_party/rust/wgpu-hal/src/gles/web.rs b/third_party/rust/wgpu-hal/src/gles/web.rs new file mode 100644 index 0000000000..565ffcab18 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/gles/web.rs @@ -0,0 +1,404 @@ +use glow::HasContext; +use parking_lot::Mutex; +use wasm_bindgen::JsCast; + +use super::TextureFormatDesc; + +/// A wrapper around a [`glow::Context`] to provide a fake `lock()` api that makes it compatible +/// with the `AdapterContext` API from the EGL implementation. +pub struct AdapterContext { + pub glow_context: glow::Context, +} + +impl AdapterContext { + pub fn is_owned(&self) -> bool { + false + } + + /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + #[track_caller] + pub fn lock(&self) -> &glow::Context { + &self.glow_context + } +} + +#[derive(Debug)] +pub struct Instance { + webgl2_context: Mutex<Option<web_sys::WebGl2RenderingContext>>, +} + +impl Instance { + pub fn create_surface_from_canvas( + &self, + canvas: web_sys::HtmlCanvasElement, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Canvas(canvas), result) + } + + pub fn create_surface_from_offscreen_canvas( + &self, + canvas: web_sys::OffscreenCanvas, + ) -> Result<Surface, crate::InstanceError> { + let result = + canvas.get_context_with_context_options("webgl2", &Self::create_context_options()); + self.create_surface_from_context(Canvas::Offscreen(canvas), result) + } + + /// Common portion of public `create_surface_from_*` functions. + /// + /// Note: Analogous code also exists in the WebGPU backend at + /// `wgpu::backend::web::Context`. + fn create_surface_from_context( + &self, + canvas: Canvas, + context_result: Result<Option<js_sys::Object>, wasm_bindgen::JsValue>, + ) -> Result<Surface, crate::InstanceError> { + let context_object: js_sys::Object = match context_result { + Ok(Some(context)) => context, + Ok(None) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext-dev> + // A getContext() call “returns null if contextId is not supported, or if the + // canvas has already been initialized with another context type”. Additionally, + // “not supported” could include “insufficient GPU resources” or “the GPU process + // previously crashed”. So, we must return it as an `Err` since it could occur + // for circumstances outside the application author's control. + return Err(crate::InstanceError); + } + Err(js_error) => { + // <https://html.spec.whatwg.org/multipage/canvas.html#dom-canvas-getcontext> + // A thrown exception indicates misuse of the canvas state. Ideally we wouldn't + // panic in this case, but for now, `InstanceError` conveys no detail, so it + // is more informative to panic with a specific message. + panic!("canvas.getContext() threw {js_error:?}") + } + }; + + // Not returning this error because it is a type error that shouldn't happen unless + // the browser, JS builtin objects, or wasm bindings are misbehaving somehow. + let webgl2_context: web_sys::WebGl2RenderingContext = context_object + .dyn_into() + .expect("canvas context is not a WebGl2RenderingContext"); + + *self.webgl2_context.lock() = Some(webgl2_context.clone()); + + Ok(Surface { + canvas, + webgl2_context, + srgb_present_program: None, + swapchain: None, + texture: None, + presentable: true, + }) + } + + fn create_context_options() -> js_sys::Object { + let context_options = js_sys::Object::new(); + js_sys::Reflect::set( + &context_options, + &"antialias".into(), + &wasm_bindgen::JsValue::FALSE, + ) + .expect("Cannot create context options"); + context_options + } +} + +// SAFE: WASM doesn't have threads +unsafe impl Sync for Instance {} +unsafe impl Send for Instance {} + +impl crate::Instance<super::Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + Ok(Instance { + webgl2_context: Mutex::new(None), + }) + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + let context_guard = self.webgl2_context.lock(); + let gl = match *context_guard { + Some(ref webgl2_context) => glow::Context::from_webgl2_context(webgl2_context.clone()), + None => return Vec::new(), + }; + + unsafe { super::Adapter::expose(AdapterContext { glow_context: gl }) } + .into_iter() + .collect() + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + if let raw_window_handle::RawWindowHandle::Web(handle) = window_handle { + let canvas: web_sys::HtmlCanvasElement = web_sys::window() + .and_then(|win| win.document()) + .expect("Cannot get document") + .query_selector(&format!("canvas[data-raw-handle=\"{}\"]", handle.id)) + .expect("Cannot query for canvas") + .expect("Canvas is not found") + .dyn_into() + .expect("Failed to downcast to canvas type"); + + self.create_surface_from_canvas(canvas) + } else { + Err(crate::InstanceError) + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + let mut context_option_ref = self.webgl2_context.lock(); + + if let Some(context) = context_option_ref.as_ref() { + if context == &surface.webgl2_context { + *context_option_ref = None; + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Surface { + canvas: Canvas, + webgl2_context: web_sys::WebGl2RenderingContext, + pub(super) swapchain: Option<Swapchain>, + texture: Option<glow::Texture>, + pub(super) presentable: bool, + srgb_present_program: Option<glow::Program>, +} + +#[derive(Clone, Debug)] +enum Canvas { + Canvas(web_sys::HtmlCanvasElement), + Offscreen(web_sys::OffscreenCanvas), +} + +// SAFE: Because web doesn't have threads ( yet ) +unsafe impl Sync for Surface {} +unsafe impl Send for Surface {} + +#[derive(Clone, Debug)] +pub struct Swapchain { + pub(crate) extent: wgt::Extent3d, + // pub(crate) channel: f::ChannelType, + pub(super) format: wgt::TextureFormat, + pub(super) framebuffer: glow::Framebuffer, + pub(super) format_desc: TextureFormatDesc, +} + +impl Surface { + pub(super) unsafe fn present( + &mut self, + _suf_texture: super::Texture, + gl: &glow::Context, + ) -> Result<(), crate::SurfaceError> { + let swapchain = self.swapchain.as_ref().ok_or(crate::SurfaceError::Other( + "need to configure surface before presenting", + ))?; + + if swapchain.format.is_srgb() { + // Important to set the viewport since we don't know in what state the user left it. + unsafe { + gl.viewport( + 0, + 0, + swapchain.extent.width as _, + swapchain.extent.height as _, + ) + }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + unsafe { gl.bind_sampler(0, None) }; + unsafe { gl.active_texture(glow::TEXTURE0) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, self.texture) }; + unsafe { gl.use_program(self.srgb_present_program) }; + unsafe { gl.disable(glow::DEPTH_TEST) }; + unsafe { gl.disable(glow::STENCIL_TEST) }; + unsafe { gl.disable(glow::SCISSOR_TEST) }; + unsafe { gl.disable(glow::BLEND) }; + unsafe { gl.disable(glow::CULL_FACE) }; + unsafe { gl.draw_buffers(&[glow::BACK]) }; + unsafe { gl.draw_arrays(glow::TRIANGLES, 0, 3) }; + } else { + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(swapchain.framebuffer)) }; + unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; + // Note the Y-flipping here. GL's presentation is not flipped, + // but main rendering is. Therefore, we Y-flip the output positions + // in the shader, and also this blit. + unsafe { + gl.blit_framebuffer( + 0, + swapchain.extent.height as i32, + swapchain.extent.width as i32, + 0, + 0, + 0, + swapchain.extent.width as i32, + swapchain.extent.height as i32, + glow::COLOR_BUFFER_BIT, + glow::NEAREST, + ) + }; + } + + Ok(()) + } + + unsafe fn create_srgb_present_program(gl: &glow::Context) -> glow::Program { + let program = unsafe { gl.create_program() }.expect("Could not create shader program"); + let vertex = + unsafe { gl.create_shader(glow::VERTEX_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(vertex, include_str!("./shaders/srgb_present.vert")) }; + unsafe { gl.compile_shader(vertex) }; + let fragment = + unsafe { gl.create_shader(glow::FRAGMENT_SHADER) }.expect("Could not create shader"); + unsafe { gl.shader_source(fragment, include_str!("./shaders/srgb_present.frag")) }; + unsafe { gl.compile_shader(fragment) }; + unsafe { gl.attach_shader(program, vertex) }; + unsafe { gl.attach_shader(program, fragment) }; + unsafe { gl.link_program(program) }; + unsafe { gl.delete_shader(vertex) }; + unsafe { gl.delete_shader(fragment) }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + program + } + + pub fn supports_srgb(&self) -> bool { + // present.frag takes care of handling srgb conversion + true + } +} + +impl crate::Surface<super::Api> for Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + match self.canvas { + Canvas::Canvas(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + Canvas::Offscreen(ref canvas) => { + canvas.set_width(config.extent.width); + canvas.set_height(config.extent.height); + } + } + + let gl = &device.shared.context.lock(); + + if let Some(swapchain) = self.swapchain.take() { + // delete all frame buffers already allocated + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + + if self.srgb_present_program.is_none() && config.format.is_srgb() { + self.srgb_present_program = Some(unsafe { Self::create_srgb_present_program(gl) }); + } + + if let Some(texture) = self.texture.take() { + unsafe { gl.delete_texture(texture) }; + } + + self.texture = Some(unsafe { gl.create_texture() }.map_err(|error| { + log::error!("Internal swapchain texture creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?); + + let desc = device.shared.describe_texture_format(config.format); + unsafe { gl.bind_texture(glow::TEXTURE_2D, self.texture) }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MIN_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_parameter_i32( + glow::TEXTURE_2D, + glow::TEXTURE_MAG_FILTER, + glow::NEAREST as _, + ) + }; + unsafe { + gl.tex_storage_2d( + glow::TEXTURE_2D, + 1, + desc.internal, + config.extent.width as i32, + config.extent.height as i32, + ) + }; + + let framebuffer = unsafe { gl.create_framebuffer() }.map_err(|error| { + log::error!("Internal swapchain framebuffer creation failed: {error}"); + crate::DeviceError::OutOfMemory + })?; + unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(framebuffer)) }; + unsafe { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + glow::TEXTURE_2D, + self.texture, + 0, + ) + }; + unsafe { gl.bind_texture(glow::TEXTURE_2D, None) }; + + self.swapchain = Some(Swapchain { + extent: config.extent, + // channel: config.format.base_format().1, + format: config.format, + format_desc: desc, + framebuffer, + }); + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + let gl = device.shared.context.lock(); + if let Some(swapchain) = self.swapchain.take() { + unsafe { gl.delete_framebuffer(swapchain.framebuffer) }; + } + if let Some(renderbuffer) = self.texture.take() { + unsafe { gl.delete_texture(renderbuffer) }; + } + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_ref().unwrap(); + let texture = super::Texture { + inner: super::TextureInner::Texture { + raw: self.texture.unwrap(), + target: glow::TEXTURE_2D, + }, + drop_guard: None, + array_layer_count: 1, + mip_level_count: 1, + format: sc.format, + format_desc: sc.format_desc.clone(), + copy_size: crate::CopyExtent { + width: sc.extent.width, + height: sc.extent.height, + depth: 1, + }, + is_cubemap: false, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::Texture) {} +} diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs new file mode 100644 index 0000000000..1758149380 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/lib.rs @@ -0,0 +1,1316 @@ +/*! This library describes the internal unsafe graphics abstraction API. + * It follows WebGPU for the most part, re-using wgpu-types, + * with the following deviations: + * - Fully unsafe: zero overhead, zero validation. + * - Compile-time backend selection via traits. + * - Objects are passed by references and returned by value. No IDs. + * - Mapping is persistent, with explicit synchronization. + * - Resource transitions are explicit. + * - All layouts are explicit. Binding model has compatibility. + * + * General design direction is to follow the majority by the following weights: + * - wgpu-core: 1.5 + * - primary backends (Vulkan/Metal/DX12): 1.0 each + * - secondary backends (DX11/GLES): 0.5 each + */ + +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![allow( + // for `if_then_panic` until it reaches stable + unknown_lints, + // We use loops for getting early-out of scope without closures. + clippy::never_loop, + // We don't use syntax sugar where it's not necessary. + clippy::match_like_matches_macro, + // Redundant matching is more explicit. + clippy::redundant_pattern_matching, + // Explicit lifetimes are often easier to reason about. + clippy::needless_lifetimes, + // No need for defaults in the internal types. + clippy::new_without_default, + // Matches are good and extendable, no need to make an exception here. + clippy::single_match, + // Push commands are more regular than macros. + clippy::vec_init_then_push, + // "if panic" is a good uniform construct. + clippy::if_then_panic, + // We unsafe impl `Send` for a reason. + clippy::non_send_fields_in_send_ty, + // TODO! + clippy::missing_safety_doc, + // Clashes with clippy::pattern_type_mismatch + clippy::needless_borrowed_reference, +)] +#![warn( + trivial_casts, + trivial_numeric_casts, + unsafe_op_in_unsafe_fn, + unused_extern_crates, + unused_qualifications, + // We don't match on a reference, unless required. + clippy::pattern_type_mismatch, +)] + +/// DirectX11 API internals. +#[cfg(all(feature = "dx11", windows))] +pub mod dx11; +/// DirectX12 API internals. +#[cfg(all(feature = "dx12", windows))] +pub mod dx12; +/// A dummy API implementation. +pub mod empty; +/// GLES API internals. +#[cfg(all(feature = "gles"))] +pub mod gles; +/// Metal API internals. +#[cfg(all(feature = "metal", any(target_os = "macos", target_os = "ios")))] +pub mod metal; +/// Vulkan API internals. +#[cfg(all(feature = "vulkan", not(target_arch = "wasm32")))] +pub mod vulkan; + +pub mod auxil; +pub mod api { + #[cfg(all(feature = "dx11", windows))] + pub use super::dx11::Api as Dx11; + #[cfg(all(feature = "dx12", windows))] + pub use super::dx12::Api as Dx12; + pub use super::empty::Api as Empty; + #[cfg(feature = "gles")] + pub use super::gles::Api as Gles; + #[cfg(all(feature = "metal", any(target_os = "macos", target_os = "ios")))] + pub use super::metal::Api as Metal; + #[cfg(all(feature = "vulkan", not(target_arch = "wasm32")))] + pub use super::vulkan::Api as Vulkan; +} + +use std::{ + borrow::{Borrow, Cow}, + fmt, + num::NonZeroU32, + ops::{Range, RangeInclusive}, + ptr::NonNull, + sync::atomic::AtomicBool, +}; + +use bitflags::bitflags; +use thiserror::Error; + +pub const MAX_ANISOTROPY: u8 = 16; +pub const MAX_BIND_GROUPS: usize = 8; +pub const MAX_VERTEX_BUFFERS: usize = 16; +pub const MAX_COLOR_ATTACHMENTS: usize = 8; +pub const MAX_MIP_LEVELS: u32 = 16; +/// Size of a single occlusion/timestamp query, when copied into a buffer, in bytes. +pub const QUERY_SIZE: wgt::BufferAddress = 8; + +pub type Label<'a> = Option<&'a str>; +pub type MemoryRange = Range<wgt::BufferAddress>; +pub type FenceValue = u64; + +/// Drop guard to signal wgpu-hal is no longer using an externally created object. +pub type DropGuard = Box<dyn std::any::Any + Send + Sync>; + +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub enum DeviceError { + #[error("Out of memory")] + OutOfMemory, + #[error("Device is lost")] + Lost, +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum ShaderError { + #[error("Compilation failed: {0:?}")] + Compilation(String), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum PipelineError { + #[error("Linkage failed for stage {0:?}: {1}")] + Linkage(wgt::ShaderStages, String), + #[error("Entry point for stage {0:?} is invalid")] + EntryPoint(naga::ShaderStage), + #[error(transparent)] + Device(#[from] DeviceError), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +pub enum SurfaceError { + #[error("Surface is lost")] + Lost, + #[error("Surface is outdated, needs to be re-created")] + Outdated, + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Other reason: {0}")] + Other(&'static str), +} + +#[derive(Clone, Debug, Eq, PartialEq, Error)] +#[error("Not supported")] +pub struct InstanceError; + +pub trait Api: Clone + Sized { + type Instance: Instance<Self>; + type Surface: Surface<Self>; + type Adapter: Adapter<Self>; + type Device: Device<Self>; + + type Queue: Queue<Self>; + type CommandEncoder: CommandEncoder<Self>; + type CommandBuffer: Send + Sync + fmt::Debug; + + type Buffer: fmt::Debug + Send + Sync + 'static; + type Texture: fmt::Debug + Send + Sync + 'static; + type SurfaceTexture: fmt::Debug + Send + Sync + Borrow<Self::Texture>; + type TextureView: fmt::Debug + Send + Sync; + type Sampler: fmt::Debug + Send + Sync; + type QuerySet: fmt::Debug + Send + Sync; + type Fence: fmt::Debug + Send + Sync; + + type BindGroupLayout: Send + Sync; + type BindGroup: fmt::Debug + Send + Sync; + type PipelineLayout: Send + Sync; + type ShaderModule: fmt::Debug + Send + Sync; + type RenderPipeline: Send + Sync; + type ComputePipeline: Send + Sync; +} + +pub trait Instance<A: Api>: Sized + Send + Sync { + unsafe fn init(desc: &InstanceDescriptor) -> Result<Self, InstanceError>; + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<A::Surface, InstanceError>; + unsafe fn destroy_surface(&self, surface: A::Surface); + unsafe fn enumerate_adapters(&self) -> Vec<ExposedAdapter<A>>; +} + +pub trait Surface<A: Api>: Send + Sync { + unsafe fn configure( + &mut self, + device: &A::Device, + config: &SurfaceConfiguration, + ) -> Result<(), SurfaceError>; + + unsafe fn unconfigure(&mut self, device: &A::Device); + + /// Returns the next texture to be presented by the swapchain for drawing + /// + /// A `timeout` of `None` means to wait indefinitely, with no timeout. + /// + /// # Portability + /// + /// Some backends can't support a timeout when acquiring a texture and + /// the timeout will be ignored. + /// + /// Returns `None` on timing out. + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<AcquiredSurfaceTexture<A>>, SurfaceError>; + unsafe fn discard_texture(&mut self, texture: A::SurfaceTexture); +} + +pub trait Adapter<A: Api>: Send + Sync { + unsafe fn open( + &self, + features: wgt::Features, + limits: &wgt::Limits, + ) -> Result<OpenDevice<A>, DeviceError>; + + /// Return the set of supported capabilities for a texture format. + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> TextureFormatCapabilities; + + /// Returns the capabilities of working with a specified surface. + /// + /// `None` means presentation is not supported for it. + unsafe fn surface_capabilities(&self, surface: &A::Surface) -> Option<SurfaceCapabilities>; + + /// Creates a [`PresentationTimestamp`] using the adapter's WSI. + /// + /// [`PresentationTimestamp`]: wgt::PresentationTimestamp + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp; +} + +pub trait Device<A: Api>: Send + Sync { + /// Exit connection to this logical device. + unsafe fn exit(self, queue: A::Queue); + /// Creates a new buffer. + /// + /// The initial usage is `BufferUses::empty()`. + unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result<A::Buffer, DeviceError>; + unsafe fn destroy_buffer(&self, buffer: A::Buffer); + //TODO: clarify if zero-sized mapping is allowed + unsafe fn map_buffer( + &self, + buffer: &A::Buffer, + range: MemoryRange, + ) -> Result<BufferMapping, DeviceError>; + unsafe fn unmap_buffer(&self, buffer: &A::Buffer) -> Result<(), DeviceError>; + unsafe fn flush_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &A::Buffer, ranges: I) + where + I: Iterator<Item = MemoryRange>; + + /// Creates a new texture. + /// + /// The initial usage for all subresources is `TextureUses::UNINITIALIZED`. + unsafe fn create_texture(&self, desc: &TextureDescriptor) -> Result<A::Texture, DeviceError>; + unsafe fn destroy_texture(&self, texture: A::Texture); + unsafe fn create_texture_view( + &self, + texture: &A::Texture, + desc: &TextureViewDescriptor, + ) -> Result<A::TextureView, DeviceError>; + unsafe fn destroy_texture_view(&self, view: A::TextureView); + unsafe fn create_sampler(&self, desc: &SamplerDescriptor) -> Result<A::Sampler, DeviceError>; + unsafe fn destroy_sampler(&self, sampler: A::Sampler); + + unsafe fn create_command_encoder( + &self, + desc: &CommandEncoderDescriptor<A>, + ) -> Result<A::CommandEncoder, DeviceError>; + unsafe fn destroy_command_encoder(&self, pool: A::CommandEncoder); + + /// Creates a bind group layout. + unsafe fn create_bind_group_layout( + &self, + desc: &BindGroupLayoutDescriptor, + ) -> Result<A::BindGroupLayout, DeviceError>; + unsafe fn destroy_bind_group_layout(&self, bg_layout: A::BindGroupLayout); + unsafe fn create_pipeline_layout( + &self, + desc: &PipelineLayoutDescriptor<A>, + ) -> Result<A::PipelineLayout, DeviceError>; + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: A::PipelineLayout); + unsafe fn create_bind_group( + &self, + desc: &BindGroupDescriptor<A>, + ) -> Result<A::BindGroup, DeviceError>; + unsafe fn destroy_bind_group(&self, group: A::BindGroup); + + unsafe fn create_shader_module( + &self, + desc: &ShaderModuleDescriptor, + shader: ShaderInput, + ) -> Result<A::ShaderModule, ShaderError>; + unsafe fn destroy_shader_module(&self, module: A::ShaderModule); + unsafe fn create_render_pipeline( + &self, + desc: &RenderPipelineDescriptor<A>, + ) -> Result<A::RenderPipeline, PipelineError>; + unsafe fn destroy_render_pipeline(&self, pipeline: A::RenderPipeline); + unsafe fn create_compute_pipeline( + &self, + desc: &ComputePipelineDescriptor<A>, + ) -> Result<A::ComputePipeline, PipelineError>; + unsafe fn destroy_compute_pipeline(&self, pipeline: A::ComputePipeline); + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<Label>, + ) -> Result<A::QuerySet, DeviceError>; + unsafe fn destroy_query_set(&self, set: A::QuerySet); + unsafe fn create_fence(&self) -> Result<A::Fence, DeviceError>; + unsafe fn destroy_fence(&self, fence: A::Fence); + unsafe fn get_fence_value(&self, fence: &A::Fence) -> Result<FenceValue, DeviceError>; + /// Calling wait with a lower value than the current fence value will immediately return. + unsafe fn wait( + &self, + fence: &A::Fence, + value: FenceValue, + timeout_ms: u32, + ) -> Result<bool, DeviceError>; + + unsafe fn start_capture(&self) -> bool; + unsafe fn stop_capture(&self); +} + +pub trait Queue<A: Api>: Send + Sync { + /// Submits the command buffers for execution on GPU. + /// + /// Valid usage: + /// - all of the command buffers were created from command pools + /// that are associated with this queue. + /// - all of the command buffers had `CommadBuffer::finish()` called. + unsafe fn submit( + &mut self, + command_buffers: &[&A::CommandBuffer], + signal_fence: Option<(&mut A::Fence, FenceValue)>, + ) -> Result<(), DeviceError>; + unsafe fn present( + &mut self, + surface: &mut A::Surface, + texture: A::SurfaceTexture, + ) -> Result<(), SurfaceError>; + unsafe fn get_timestamp_period(&self) -> f32; +} + +/// Encoder for commands in command buffers. +/// Serves as a parent for all the encoded command buffers. +/// Works in bursts of action: one or more command buffers are recorded, +/// then submitted to a queue, and then it needs to be `reset_all()`. +pub trait CommandEncoder<A: Api>: Send + Sync + fmt::Debug { + /// Begin encoding a new command buffer. + unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>; + /// Discard currently recorded list, if any. + unsafe fn discard_encoding(&mut self); + unsafe fn end_encoding(&mut self) -> Result<A::CommandBuffer, DeviceError>; + /// Reclaims all resources that are allocated for this encoder. + /// Must get all of the produced command buffers back, + /// and they must not be used by GPU at this moment. + unsafe fn reset_all<I>(&mut self, command_buffers: I) + where + I: Iterator<Item = A::CommandBuffer>; + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = BufferBarrier<'a, A>>; + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = TextureBarrier<'a, A>>; + + // copy operations + + unsafe fn clear_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange); + + unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &A::Buffer, dst: &A::Buffer, regions: T) + where + T: Iterator<Item = BufferCopy>; + + /// Copy from an external image to an internal texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + unsafe fn copy_external_image_to_texture<T>( + &mut self, + src: &wgt::ImageCopyExternalImage, + dst: &A::Texture, + dst_premultiplication: bool, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from one texture to another. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Texture, + regions: T, + ) where + T: Iterator<Item = TextureCopy>; + + /// Copy from buffer to texture. + /// Works with a single array layer. + /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_buffer_to_texture<T>(&mut self, src: &A::Buffer, dst: &A::Texture, regions: T) + where + T: Iterator<Item = BufferTextureCopy>; + + /// Copy from texture to buffer. + /// Works with a single array layer. + /// Note: the copy extent is in physical size (rounded to the block size) + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &A::Texture, + src_usage: TextureUses, + dst: &A::Buffer, + regions: T, + ) where + T: Iterator<Item = BufferTextureCopy>; + + // pass common + + /// Sets the bind group at `index` to `group`, assuming the layout + /// of all the preceeding groups to be taken from `layout`. + unsafe fn set_bind_group( + &mut self, + layout: &A::PipelineLayout, + index: u32, + group: &A::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ); + + unsafe fn set_push_constants( + &mut self, + layout: &A::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ); + + unsafe fn insert_debug_marker(&mut self, label: &str); + unsafe fn begin_debug_marker(&mut self, group_label: &str); + unsafe fn end_debug_marker(&mut self); + + // queries + + unsafe fn begin_query(&mut self, set: &A::QuerySet, index: u32); + unsafe fn end_query(&mut self, set: &A::QuerySet, index: u32); + unsafe fn write_timestamp(&mut self, set: &A::QuerySet, index: u32); + unsafe fn reset_queries(&mut self, set: &A::QuerySet, range: Range<u32>); + unsafe fn copy_query_results( + &mut self, + set: &A::QuerySet, + range: Range<u32>, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ); + + // render passes + + // Begins a render pass, clears all active bindings. + unsafe fn begin_render_pass(&mut self, desc: &RenderPassDescriptor<A>); + unsafe fn end_render_pass(&mut self); + + unsafe fn set_render_pipeline(&mut self, pipeline: &A::RenderPipeline); + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: BufferBinding<'a, A>, + format: wgt::IndexFormat, + ); + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: BufferBinding<'a, A>); + unsafe fn set_viewport(&mut self, rect: &Rect<f32>, depth_range: Range<f32>); + unsafe fn set_scissor_rect(&mut self, rect: &Rect<u32>); + unsafe fn set_stencil_reference(&mut self, value: u32); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]); + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ); + unsafe fn draw_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ); + unsafe fn draw_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &A::Buffer, + offset: wgt::BufferAddress, + count_buffer: &A::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ); + + // compute passes + + // Begins a compute pass, clears all active bindings. + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor); + unsafe fn end_compute_pass(&mut self); + + unsafe fn set_compute_pipeline(&mut self, pipeline: &A::ComputePipeline); + + unsafe fn dispatch(&mut self, count: [u32; 3]); + unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); +} + +bitflags!( + /// Instance initialization flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct InstanceFlags: u32 { + /// Generate debug information in shaders and objects. + const DEBUG = 1 << 0; + /// Enable validation, if possible. + const VALIDATION = 1 << 1; + } +); + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct PipelineLayoutFlags: u32 { + /// Include support for base vertex/instance drawing. + const BASE_VERTEX_INSTANCE = 1 << 0; + /// Include support for num work groups builtin. + const NUM_WORK_GROUPS = 1 << 1; + } +); + +bitflags!( + /// Pipeline layout creation flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BindGroupLayoutFlags: u32 { + /// Allows for bind group binding arrays to be shorter than the array in the BGL. + const PARTIALLY_BOUND = 1 << 0; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureFormatCapabilities: u32 { + /// Format can be sampled. + const SAMPLED = 1 << 0; + /// Format can be sampled with a linear sampler. + const SAMPLED_LINEAR = 1 << 1; + /// Format can be sampled with a min/max reduction sampler. + const SAMPLED_MINMAX = 1 << 2; + + /// Format can be used as storage with write-only access. + const STORAGE = 1 << 3; + /// Format can be used as storage with read and read/write access. + const STORAGE_READ_WRITE = 1 << 4; + /// Format can be used as storage with atomics. + const STORAGE_ATOMIC = 1 << 5; + + /// Format can be used as color and input attachment. + const COLOR_ATTACHMENT = 1 << 6; + /// Format can be used as color (with blending) and input attachment. + const COLOR_ATTACHMENT_BLEND = 1 << 7; + /// Format can be used as depth-stencil and input attachment. + const DEPTH_STENCIL_ATTACHMENT = 1 << 8; + + /// Format can be multisampled by x2. + const MULTISAMPLE_X2 = 1 << 9; + /// Format can be multisampled by x4. + const MULTISAMPLE_X4 = 1 << 10; + /// Format can be multisampled by x8. + const MULTISAMPLE_X8 = 1 << 11; + /// Format can be multisampled by x16. + const MULTISAMPLE_X16 = 1 << 12; + + /// Format can be used for render pass resolve targets. + const MULTISAMPLE_RESOLVE = 1 << 13; + + /// Format can be copied from. + const COPY_SRC = 1 << 14; + /// Format can be copied to. + const COPY_DST = 1 << 15; + } +); + +bitflags!( + /// Texture format capability flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct FormatAspects: u8 { + const COLOR = 1 << 0; + const DEPTH = 1 << 1; + const STENCIL = 1 << 2; + } +); + +impl FormatAspects { + pub fn new(format: wgt::TextureFormat, aspect: wgt::TextureAspect) -> Self { + let aspect_mask = match aspect { + wgt::TextureAspect::All => Self::all(), + wgt::TextureAspect::DepthOnly => Self::DEPTH, + wgt::TextureAspect::StencilOnly => Self::STENCIL, + }; + Self::from(format) & aspect_mask + } + + /// Returns `true` if only one flag is set + pub fn is_one(&self) -> bool { + self.bits().count_ones() == 1 + } + + pub fn map(&self) -> wgt::TextureAspect { + match *self { + Self::COLOR => wgt::TextureAspect::All, + Self::DEPTH => wgt::TextureAspect::DepthOnly, + Self::STENCIL => wgt::TextureAspect::StencilOnly, + _ => unreachable!(), + } + } +} + +impl From<wgt::TextureFormat> for FormatAspects { + fn from(format: wgt::TextureFormat) -> Self { + match format { + wgt::TextureFormat::Stencil8 => Self::STENCIL, + wgt::TextureFormat::Depth16Unorm + | wgt::TextureFormat::Depth32Float + | wgt::TextureFormat::Depth24Plus => Self::DEPTH, + wgt::TextureFormat::Depth32FloatStencil8 | wgt::TextureFormat::Depth24PlusStencil8 => { + Self::DEPTH | Self::STENCIL + } + _ => Self::COLOR, + } + } +} + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct MemoryFlags: u32 { + const TRANSIENT = 1 << 0; + const PREFER_COHERENT = 1 << 1; + } +); + +//TODO: it's not intuitive for the backends to consider `LOAD` being optional. + +bitflags!( + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct AttachmentOps: u8 { + const LOAD = 1 << 0; + const STORE = 1 << 1; + } +); + +bitflags::bitflags! { + /// Similar to `wgt::BufferUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct BufferUses: u16 { + /// The argument to a read-only mapping. + const MAP_READ = 1 << 0; + /// The argument to a write-only mapping. + const MAP_WRITE = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// The index buffer used for drawing. + const INDEX = 1 << 4; + /// A vertex buffer used for drawing. + const VERTEX = 1 << 5; + /// A uniform buffer bound in a bind group. + const UNIFORM = 1 << 6; + /// A read-only storage buffer used in a bind group. + const STORAGE_READ = 1 << 7; + /// A read-write or write-only buffer used in a bind group. + const STORAGE_READ_WRITE = 1 << 8; + /// The indirect or count buffer in a indirect draw or dispatch. + const INDIRECT = 1 << 9; + /// The combination of states that a buffer may be in _at the same time_. + const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | + Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | + Self::STORAGE_READ.bits() | Self::INDIRECT.bits(); + /// The combination of states that a buffer must exclusively be in. + const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::MAP_WRITE.bits(); + } +} + +bitflags::bitflags! { + /// Similar to `wgt::TextureUsages` but for internal use. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct TextureUses: u16 { + /// The texture is in unknown state. + const UNINITIALIZED = 1 << 0; + /// Ready to present image to the surface. + const PRESENT = 1 << 1; + /// The source of a hardware copy. + const COPY_SRC = 1 << 2; + /// The destination of a hardware copy. + const COPY_DST = 1 << 3; + /// Read-only sampled or fetched resource. + const RESOURCE = 1 << 4; + /// The color target of a renderpass. + const COLOR_TARGET = 1 << 5; + /// Read-only depth stencil usage. + const DEPTH_STENCIL_READ = 1 << 6; + /// Read-write depth stencil usage + const DEPTH_STENCIL_WRITE = 1 << 7; + /// Read-only storage buffer usage. Corresponds to a UAV in d3d, so is exclusive, despite being read only. + const STORAGE_READ = 1 << 8; + /// Read-write or write-only storage buffer usage. + const STORAGE_READ_WRITE = 1 << 9; + /// The combination of states that a texture may be in _at the same time_. + const INCLUSIVE = Self::COPY_SRC.bits() | Self::RESOURCE.bits() | Self::DEPTH_STENCIL_READ.bits(); + /// The combination of states that a texture must exclusively be in. + const EXCLUSIVE = Self::COPY_DST.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits() | Self::STORAGE_READ_WRITE.bits() | Self::PRESENT.bits(); + /// The combination of all usages that the are guaranteed to be be ordered by the hardware. + /// If a usage is ordered, then if the texture state doesn't change between draw calls, there + /// are no barriers needed for synchronization. + const ORDERED = Self::INCLUSIVE.bits() | Self::COLOR_TARGET.bits() | Self::DEPTH_STENCIL_WRITE.bits() | Self::STORAGE_READ.bits(); + + /// Flag used by the wgpu-core texture tracker to say a texture is in different states for every sub-resource + const COMPLEX = 1 << 10; + /// Flag used by the wgpu-core texture tracker to say that the tracker does not know the state of the sub-resource. + /// This is different from UNINITIALIZED as that says the tracker does know, but the texture has not been initialized. + const UNKNOWN = 1 << 11; + } +} + +#[derive(Clone, Debug)] +pub struct InstanceDescriptor<'a> { + pub name: &'a str, + pub flags: InstanceFlags, + pub dx12_shader_compiler: wgt::Dx12Compiler, +} + +#[derive(Clone, Debug)] +pub struct Alignments { + /// The alignment of the start of the buffer used as a GPU copy source. + pub buffer_copy_offset: wgt::BufferSize, + /// The alignment of the row pitch of the texture data stored in a buffer that is + /// used in a GPU copy operation. + pub buffer_copy_pitch: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct Capabilities { + pub limits: wgt::Limits, + pub alignments: Alignments, + pub downlevel: wgt::DownlevelCapabilities, +} + +#[derive(Debug)] +pub struct ExposedAdapter<A: Api> { + pub adapter: A::Adapter, + pub info: wgt::AdapterInfo, + pub features: wgt::Features, + pub capabilities: Capabilities, +} + +/// Describes information about what a `Surface`'s presentation capabilities are. +/// Fetch this with [Adapter::surface_capabilities]. +#[derive(Debug, Clone)] +pub struct SurfaceCapabilities { + /// List of supported texture formats. + /// + /// Must be at least one. + pub formats: Vec<wgt::TextureFormat>, + + /// Range for the swap chain sizes. + /// + /// - `swap_chain_sizes.start` must be at least 1. + /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`. + pub swap_chain_sizes: RangeInclusive<u32>, + + /// Current extent of the surface, if known. + pub current_extent: Option<wgt::Extent3d>, + + /// Range of supported extents. + /// + /// `current_extent` must be inside this range. + pub extents: RangeInclusive<wgt::Extent3d>, + + /// Supported texture usage flags. + /// + /// Must have at least `TextureUses::COLOR_TARGET` + pub usage: TextureUses, + + /// List of supported V-sync modes. + /// + /// Must be at least one. + pub present_modes: Vec<wgt::PresentMode>, + + /// List of supported alpha composition modes. + /// + /// Must be at least one. + pub composite_alpha_modes: Vec<wgt::CompositeAlphaMode>, +} + +#[derive(Debug)] +pub struct AcquiredSurfaceTexture<A: Api> { + pub texture: A::SurfaceTexture, + /// The presentation configuration no longer matches + /// the surface properties exactly, but can still be used to present + /// to the surface successfully. + pub suboptimal: bool, +} + +#[derive(Debug)] +pub struct OpenDevice<A: Api> { + pub device: A::Device, + pub queue: A::Queue, +} + +#[derive(Clone, Debug)] +pub struct BufferMapping { + pub ptr: NonNull<u8>, + pub is_coherent: bool, +} + +#[derive(Clone, Debug)] +pub struct BufferDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub usage: BufferUses, + pub memory_flags: MemoryFlags, +} + +#[derive(Clone, Debug)] +pub struct TextureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::Extent3d, + pub mip_level_count: u32, + pub sample_count: u32, + pub dimension: wgt::TextureDimension, + pub format: wgt::TextureFormat, + pub usage: TextureUses, + pub memory_flags: MemoryFlags, + /// Allows views of this texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +impl TextureDescriptor<'_> { + pub fn copy_extent(&self) -> CopyExtent { + CopyExtent::map_extent_to_copy_size(&self.size, self.dimension) + } + + pub fn is_cube_compatible(&self) -> bool { + self.dimension == wgt::TextureDimension::D2 + && self.size.depth_or_array_layers % 6 == 0 + && self.sample_count == 1 + && self.size.width == self.size.height + } + + pub fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D3 => 1, + wgt::TextureDimension::D2 => self.size.depth_or_array_layers, + } + } +} + +/// TextureView descriptor. +/// +/// Valid usage: +///. - `format` has to be the same as `TextureDescriptor::format` +///. - `dimension` has to be compatible with `TextureDescriptor::dimension` +///. - `usage` has to be a subset of `TextureDescriptor::usage` +///. - `range` has to be a subset of parent texture +#[derive(Clone, Debug)] +pub struct TextureViewDescriptor<'a> { + pub label: Label<'a>, + pub format: wgt::TextureFormat, + pub dimension: wgt::TextureViewDimension, + pub usage: TextureUses, + pub range: wgt::ImageSubresourceRange, +} + +#[derive(Clone, Debug)] +pub struct SamplerDescriptor<'a> { + pub label: Label<'a>, + pub address_modes: [wgt::AddressMode; 3], + pub mag_filter: wgt::FilterMode, + pub min_filter: wgt::FilterMode, + pub mipmap_filter: wgt::FilterMode, + pub lod_clamp: Range<f32>, + pub compare: Option<wgt::CompareFunction>, + // Must in the range [1, 16]. + // + // Anisotropic filtering must be supported if this is not 1. + pub anisotropy_clamp: u16, + pub border_color: Option<wgt::SamplerBorderColor>, +} + +/// BindGroupLayout descriptor. +/// +/// Valid usage: +/// - `entries` are sorted by ascending `wgt::BindGroupLayoutEntry::binding` +#[derive(Clone, Debug)] +pub struct BindGroupLayoutDescriptor<'a> { + pub label: Label<'a>, + pub flags: BindGroupLayoutFlags, + pub entries: &'a [wgt::BindGroupLayoutEntry], +} + +#[derive(Clone, Debug)] +pub struct PipelineLayoutDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub flags: PipelineLayoutFlags, + pub bind_group_layouts: &'a [&'a A::BindGroupLayout], + pub push_constant_ranges: &'a [wgt::PushConstantRange], +} + +#[derive(Debug)] +pub struct BufferBinding<'a, A: Api> { + /// The buffer being bound. + pub buffer: &'a A::Buffer, + + /// The offset at which the bound region starts. + /// + /// This must be less than the size of the buffer. Some back ends + /// cannot tolerate zero-length regions; for example, see + /// [VUID-VkDescriptorBufferInfo-offset-00340][340] and + /// [VUID-VkDescriptorBufferInfo-range-00341][341], or the + /// documentation for GLES's [glBindBufferRange][bbr]. + /// + /// [340]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-offset-00340 + /// [341]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkDescriptorBufferInfo-range-00341 + /// [bbr]: https://registry.khronos.org/OpenGL-Refpages/es3.0/html/glBindBufferRange.xhtml + pub offset: wgt::BufferAddress, + + /// The size of the region bound, in bytes. + /// + /// If `None`, the region extends from `offset` to the end of the + /// buffer. Given the restrictions on `offset`, this means that + /// the size is always greater than zero. + pub size: Option<wgt::BufferSize>, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for BufferBinding<'_, A> { + fn clone(&self) -> Self { + Self { + buffer: self.buffer, + offset: self.offset, + size: self.size, + } + } +} + +#[derive(Debug)] +pub struct TextureBinding<'a, A: Api> { + pub view: &'a A::TextureView, + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for TextureBinding<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Clone, Debug)] +pub struct BindGroupEntry { + pub binding: u32, + pub resource_index: u32, + pub count: u32, +} + +/// BindGroup descriptor. +/// +/// Valid usage: +///. - `entries` has to be sorted by ascending `BindGroupEntry::binding` +///. - `entries` has to have the same set of `BindGroupEntry::binding` as `layout` +///. - each entry has to be compatible with the `layout` +///. - each entry's `BindGroupEntry::resource_index` is within range +/// of the corresponding resource array, selected by the relevant +/// `BindGroupLayoutEntry`. +#[derive(Clone, Debug)] +pub struct BindGroupDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub layout: &'a A::BindGroupLayout, + pub buffers: &'a [BufferBinding<'a, A>], + pub samplers: &'a [&'a A::Sampler], + pub textures: &'a [TextureBinding<'a, A>], + pub entries: &'a [BindGroupEntry], +} + +#[derive(Clone, Debug)] +pub struct CommandEncoderDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub queue: &'a A::Queue, +} + +/// Naga shader module. +pub struct NagaShader { + /// Shader module IR. + pub module: Cow<'static, naga::Module>, + /// Analysis information of the module. + pub info: naga::valid::ModuleInfo, +} + +// Custom implementation avoids the need to generate Debug impl code +// for the whole Naga module and info. +impl fmt::Debug for NagaShader { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Naga shader") + } +} + +/// Shader input. +#[allow(clippy::large_enum_variant)] +pub enum ShaderInput<'a> { + Naga(NagaShader), + SpirV(&'a [u32]), +} + +pub struct ShaderModuleDescriptor<'a> { + pub label: Label<'a>, + pub runtime_checks: bool, +} + +/// Describes a programmable pipeline stage. +#[derive(Debug)] +pub struct ProgrammableStage<'a, A: Api> { + /// The compiled shader module for this stage. + pub module: &'a A::ShaderModule, + /// The name of the entry point in the compiled shader. There must be a function with this name + /// in the shader. + pub entry_point: &'a str, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ProgrammableStage<'_, A> { + fn clone(&self) -> Self { + Self { + module: self.module, + entry_point: self.entry_point, + } + } +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug)] +pub struct ComputePipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The compiled compute stage and its entry point. + pub stage: ProgrammableStage<'a, A>, +} + +/// Describes how the vertex buffer is interpreted. +#[derive(Clone, Debug)] +pub struct VertexBufferLayout<'a> { + /// The stride, in bytes, between elements of this buffer. + pub array_stride: wgt::BufferAddress, + /// How often this vertex buffer is "stepped" forward. + pub step_mode: wgt::VertexStepMode, + /// The list of attributes which comprise a single vertex. + pub attributes: &'a [wgt::VertexAttribute], +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug)] +pub struct RenderPipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: &'a A::PipelineLayout, + /// The format of any vertex buffers used with this pipeline. + pub vertex_buffers: &'a [VertexBufferLayout<'a>], + /// The vertex stage for this pipeline. + pub vertex_stage: ProgrammableStage<'a, A>, + /// The properties of the pipeline at the primitive assembly and rasterization level. + pub primitive: wgt::PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + pub depth_stencil: Option<wgt::DepthStencilState>, + /// The multi-sampling properties of the pipeline. + pub multisample: wgt::MultisampleState, + /// The fragment stage for this pipeline. + pub fragment_stage: Option<ProgrammableStage<'a, A>>, + /// The effect of draw calls on the color aspect of the output target. + pub color_targets: &'a [Option<wgt::ColorTargetState>], + /// If the pipeline will be used with a multiview render pass, this indicates how many array + /// layers the attachments will have. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Debug, Clone)] +pub struct SurfaceConfiguration { + /// Number of textures in the swap chain. Must be in + /// `SurfaceCapabilities::swap_chain_size` range. + pub swap_chain_size: u32, + /// Vertical synchronization mode. + pub present_mode: wgt::PresentMode, + /// Alpha composition mode. + pub composite_alpha_mode: wgt::CompositeAlphaMode, + /// Format of the surface textures. + pub format: wgt::TextureFormat, + /// Requested texture extent. Must be in + /// `SurfaceCapabilities::extents` range. + pub extent: wgt::Extent3d, + /// Allowed usage of surface textures, + pub usage: TextureUses, + /// Allows views of swapchain texture to have a different format + /// than the texture does. + pub view_formats: Vec<wgt::TextureFormat>, +} + +#[derive(Debug, Clone)] +pub struct Rect<T> { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[derive(Debug, Clone)] +pub struct BufferBarrier<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub usage: Range<BufferUses>, +} + +#[derive(Debug, Clone)] +pub struct TextureBarrier<'a, A: Api> { + pub texture: &'a A::Texture, + pub range: wgt::ImageSubresourceRange, + pub usage: Range<TextureUses>, +} + +#[derive(Clone, Copy, Debug)] +pub struct BufferCopy { + pub src_offset: wgt::BufferAddress, + pub dst_offset: wgt::BufferAddress, + pub size: wgt::BufferSize, +} + +#[derive(Clone, Debug)] +pub struct TextureCopyBase { + pub mip_level: u32, + pub array_layer: u32, + /// Origin within a texture. + /// Note: for 1D and 2D textures, Z must be 0. + pub origin: wgt::Origin3d, + pub aspect: FormatAspects, +} + +#[derive(Clone, Copy, Debug)] +pub struct CopyExtent { + pub width: u32, + pub height: u32, + pub depth: u32, +} + +#[derive(Clone, Debug)] +pub struct TextureCopy { + pub src_base: TextureCopyBase, + pub dst_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Clone, Debug)] +pub struct BufferTextureCopy { + pub buffer_layout: wgt::ImageDataLayout, + pub texture_base: TextureCopyBase, + pub size: CopyExtent, +} + +#[derive(Debug)] +pub struct Attachment<'a, A: Api> { + pub view: &'a A::TextureView, + /// Contains either a single mutating usage as a target, + /// or a valid combination of read-only usages. + pub usage: TextureUses, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for Attachment<'_, A> { + fn clone(&self) -> Self { + Self { + view: self.view, + usage: self.usage, + } + } +} + +#[derive(Debug)] +pub struct ColorAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub resolve_target: Option<Attachment<'a, A>>, + pub ops: AttachmentOps, + pub clear_value: wgt::Color, +} + +// Rust gets confused about the impl requirements for `A` +impl<A: Api> Clone for ColorAttachment<'_, A> { + fn clone(&self) -> Self { + Self { + target: self.target.clone(), + resolve_target: self.resolve_target.clone(), + ops: self.ops, + clear_value: self.clear_value, + } + } +} + +#[derive(Clone, Debug)] +pub struct DepthStencilAttachment<'a, A: Api> { + pub target: Attachment<'a, A>, + pub depth_ops: AttachmentOps, + pub stencil_ops: AttachmentOps, + pub clear_value: (f32, u32), +} + +#[derive(Clone, Debug)] +pub struct RenderPassDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub extent: wgt::Extent3d, + pub sample_count: u32, + pub color_attachments: &'a [Option<ColorAttachment<'a, A>>], + pub depth_stencil_attachment: Option<DepthStencilAttachment<'a, A>>, + pub multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug)] +pub struct ComputePassDescriptor<'a> { + pub label: Label<'a>, +} + +/// Stores if any API validation error has occurred in this process +/// since it was last reset. +/// +/// This is used for internal wgpu testing only and _must not_ be used +/// as a way to check for errors. +/// +/// This works as a static because `cargo nextest` runs all of our +/// tests in separate processes, so each test gets its own canary. +/// +/// This prevents the issue of one validation error terminating the +/// entire process. +pub static VALIDATION_CANARY: ValidationCanary = ValidationCanary { + inner: AtomicBool::new(false), +}; + +/// Flag for internal testing. +pub struct ValidationCanary { + inner: AtomicBool, +} + +impl ValidationCanary { + #[allow(dead_code)] // in some configurations this function is dead + fn set(&self) { + self.inner.store(true, std::sync::atomic::Ordering::SeqCst); + } + + /// Returns true if any API validation error has occurred in this process + /// since the last call to this function. + pub fn get_and_reset(&self) -> bool { + self.inner.swap(false, std::sync::atomic::Ordering::SeqCst) + } +} + +#[test] +fn test_default_limits() { + let limits = wgt::Limits::default(); + assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); +} diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs new file mode 100644 index 0000000000..e5c3de3417 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs @@ -0,0 +1,1045 @@ +use metal::{MTLFeatureSet, MTLGPUFamily, MTLLanguageVersion, MTLReadWriteTextureTier}; +use objc::{class, msg_send, sel, sel_impl}; +use parking_lot::Mutex; +use wgt::{AstcBlock, AstcChannel}; + +use std::{sync::Arc, thread}; + +const MAX_COMMAND_BUFFERS: u64 = 2048; + +unsafe impl Send for super::Adapter {} +unsafe impl Sync for super::Adapter {} + +impl super::Adapter { + pub(super) fn new(shared: Arc<super::AdapterShared>) -> Self { + Self { shared } + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let queue = self + .shared + .device + .lock() + .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + Ok(crate::OpenDevice { + device: super::Device { + shared: Arc::clone(&self.shared), + features, + }, + queue: super::Queue { + raw: Arc::new(Mutex::new(queue)), + }, + }) + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + use wgt::TextureFormat as Tf; + + let pc = &self.shared.private_caps; + // Affected formats documented at: + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier1?language=objc + // https://developer.apple.com/documentation/metal/mtlreadwritetexturetier/mtlreadwritetexturetier2?language=objc + let (read_write_tier1_if, read_write_tier2_if) = match pc.read_write_texture_tier { + metal::MTLReadWriteTextureTier::TierNone => (Tfc::empty(), Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier1 => (Tfc::STORAGE_READ_WRITE, Tfc::empty()), + metal::MTLReadWriteTextureTier::Tier2 => { + (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE) + } + }; + let msaa_count = pc.sample_count_mask; + + let msaa_resolve_desktop_if = if pc.msaa_desktop { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let msaa_resolve_apple3x_if = if pc.msaa_desktop | pc.msaa_apple3 { + Tfc::MULTISAMPLE_RESOLVE + } else { + Tfc::empty() + }; + let is_not_apple1x = super::PrivateCapabilities::supports_any( + self.shared.device.lock().as_ref(), + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + ], + ); + + // Metal defined pixel format capabilities + let all_caps = Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE; + + let extra = match format { + Tf::R8Unorm | Tf::R16Float | Tf::Rgba8Unorm | Tf::Rgba16Float => { + read_write_tier2_if | all_caps + } + Tf::R8Snorm | Tf::Rg8Snorm | Tf::Rgba8Snorm => { + let mut flags = all_caps; + flags.set(Tfc::MULTISAMPLE_RESOLVE, is_not_apple1x); + flags + } + Tf::R8Uint + | Tf::R8Sint + | Tf::R16Uint + | Tf::R16Sint + | Tf::Rgba8Uint + | Tf::Rgba8Sint + | Tf::Rgba16Uint + | Tf::Rgba16Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R16Unorm + | Tf::R16Snorm + | Tf::Rg16Unorm + | Tf::Rg16Snorm + | Tf::Rgba16Unorm + | Tf::Rgba16Snorm => { + Tfc::SAMPLED_LINEAR + | Tfc::STORAGE + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | msaa_resolve_desktop_if + } + Tf::Rg8Unorm | Tf::Rg16Float | Tf::Bgra8Unorm => all_caps, + Tf::Rg8Uint | Tf::Rg8Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::R32Uint | Tf::R32Sint => { + read_write_tier1_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::R32Float => { + let flags = if pc.format_r32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + }; + read_write_tier1_if | flags + } + Tf::Rg16Uint | Tf::Rg16Sint => Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count, + Tf::Rgba8UnormSrgb | Tf::Bgra8UnormSrgb => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgba8_srgb_all); + flags + } + Tf::Rgb10a2Unorm => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rgb10a2_unorm_all); + flags + } + Tf::Rg11b10Float => { + let mut flags = all_caps; + flags.set(Tfc::STORAGE, pc.format_rg11b10_all); + flags + } + Tf::Rg32Uint | Tf::Rg32Sint => Tfc::COLOR_ATTACHMENT | Tfc::STORAGE | msaa_count, + Tf::Rg32Float => { + if pc.format_rg32float_all { + all_caps + } else { + Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | Tfc::COLOR_ATTACHMENT_BLEND | msaa_count + } + } + Tf::Rgba32Uint | Tf::Rgba32Sint => { + read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT | msaa_count + } + Tf::Rgba32Float => { + let mut flags = read_write_tier2_if | Tfc::STORAGE | Tfc::COLOR_ATTACHMENT; + if pc.format_rgba32float_all { + flags |= all_caps + } else if pc.msaa_apple7 { + flags |= msaa_count + }; + flags + } + Tf::Stencil8 => { + all_caps | Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if + } + Tf::Depth16Unorm => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth16unorm { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth32Float | Tf::Depth32FloatStencil8 => { + let mut flags = + Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count | msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + flags + } + Tf::Depth24Plus | Tf::Depth24PlusStencil8 => { + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | msaa_count; + if pc.format_depth24_stencil8 { + flags |= Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE_RESOLVE + } else { + flags |= msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } + } + flags + } + Tf::Rgb9e5Ufloat => { + if pc.msaa_apple3 { + all_caps + } else if pc.msaa_desktop { + Tfc::SAMPLED_LINEAR + } else { + Tfc::SAMPLED_LINEAR + | Tfc::COLOR_ATTACHMENT + | Tfc::COLOR_ATTACHMENT_BLEND + | msaa_count + | Tfc::MULTISAMPLE_RESOLVE + } + } + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc4RUnorm + | Tf::Bc4RSnorm + | Tf::Bc5RgUnorm + | Tf::Bc5RgSnorm + | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbFloat + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb => { + if pc.format_bc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Etc2Rgb8Unorm + | Tf::Etc2Rgb8UnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb + | Tf::EacR11Unorm + | Tf::EacR11Snorm + | Tf::EacRg11Unorm + | Tf::EacRg11Snorm => { + if pc.format_eac_etc { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + Tf::Astc { + block: _, + channel: _, + } => { + if pc.format_astc || pc.format_astc_hdr { + Tfc::SAMPLED_LINEAR + } else { + Tfc::empty() + } + } + }; + + Tfc::COPY_SRC | Tfc::COPY_DST | Tfc::SAMPLED | extra + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + let current_extent = if surface.main_thread_id == thread::current().id() { + Some(surface.dimensions()) + } else { + log::warn!("Unable to get the current view dimensions on a non-main thread"); + None + }; + + let mut formats = vec![ + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Rgba16Float, + ]; + if self.shared.private_caps.format_rgb10a2_unorm_all { + formats.push(wgt::TextureFormat::Rgb10a2Unorm); + } + + let pc = &self.shared.private_caps; + Some(crate::SurfaceCapabilities { + formats, + //Note: this is hardcoded in `CAMetalLayer` documentation + swap_chain_sizes: if pc.can_set_maximum_drawables_count { + 2..=3 + } else { + // 3 is the default in `CAMetalLayer` documentation + // iOS 10.3 was tested to use 3 on iphone5s + 3..=3 + }, + present_modes: if pc.can_set_display_sync { + vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] + } else { + vec![wgt::PresentMode::Fifo] + }, + composite_alpha_modes: vec![ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::PostMultiplied, + ], + + current_extent, + extents: wgt::Extent3d { + width: 4, + height: 4, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: pc.max_texture_size as u32, + height: pc.max_texture_size as u32, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET | crate::TextureUses::COPY_DST, //TODO: expose more + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + let timestamp = self.shared.presentation_timer.get_timestamp_ns(); + + wgt::PresentationTimestamp(timestamp) + } +} + +const RESOURCE_HEAP_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const ARGUMENT_BUFFER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v3, +]; + +const MUTABLE_COMPARISON_SAMPLER_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const SAMPLER_CLAMP_TO_BORDER_SUPPORT: &[MTLFeatureSet] = &[MTLFeatureSet::macOS_GPUFamily1_v2]; + +const ASTC_PIXEL_FORMAT_FEATURES: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, +]; + +const ANY8_UNORM_SRGB_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const ANY8_SNORM_RESOLVE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGBA8_SRGB: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily2_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, +]; + +const RGB10A2UNORM_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB10A2UINT_COLOR_WRITE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RG11B10FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const RGB9E5FLOAT_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, +]; + +const BGR10A2_ALL: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const BASE_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const BASE_VERTEX_INSTANCE_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const TEXTURE_CUBE_ARRAY_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const DUAL_SOURCE_BLEND_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v4, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const LAYERED_RENDERING_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily5_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily2_v1, +]; + +const FUNCTION_SPECIALIZATION_SUPPORT: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + MTLFeatureSet::macOS_GPUFamily1_v2, +]; + +const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::tvOS_GPUFamily1_v3, + MTLFeatureSet::macOS_GPUFamily1_v1, +]; + +const OS_NOT_SUPPORT: (usize, usize) = (10000, 0); + +impl super::PrivateCapabilities { + fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + features_sets + .iter() + .cloned() + .any(|x| raw.supports_feature_set(x)) + } + + pub fn new(device: &metal::Device) -> Self { + #[repr(C)] + #[derive(Clone, Copy, Debug)] + #[allow(clippy::upper_case_acronyms)] + struct NSOperatingSystemVersion { + major: usize, + minor: usize, + patch: usize, + } + + impl NSOperatingSystemVersion { + fn at_least( + &self, + mac_version: (usize, usize), + ios_version: (usize, usize), + is_mac: bool, + ) -> bool { + if is_mac { + self.major > mac_version.0 + || (self.major == mac_version.0 && self.minor >= mac_version.1) + } else { + self.major > ios_version.0 + || (self.major == ios_version.0 && self.minor >= ios_version.1) + } + } + } + + let version: NSOperatingSystemVersion = unsafe { + let process_info: *mut objc::runtime::Object = + msg_send![class!(NSProcessInfo), processInfo]; + msg_send![process_info, operatingSystemVersion] + }; + + let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + let family_check = version.at_least((10, 15), (13, 0), os_is_mac); + + let mut sample_count_mask = crate::TextureFormatCapabilities::MULTISAMPLE_X4; // 1 and 4 samples are supported on all devices + if device.supports_texture_sample_count(2) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X2; + } + if device.supports_texture_sample_count(8) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X8; + } + if device.supports_texture_sample_count(16) { + sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X16; + } + + let rw_texture_tier = if version.at_least((10, 13), (11, 0), os_is_mac) { + device.read_write_texture_support() + } else if version.at_least((10, 12), OS_NOT_SUPPORT, os_is_mac) { + if Self::supports_any(device, &[MTLFeatureSet::macOS_ReadWriteTextureTier2]) { + MTLReadWriteTextureTier::Tier2 + } else { + MTLReadWriteTextureTier::Tier1 + } + } else { + MTLReadWriteTextureTier::TierNone + }; + + Self { + family_check, + msl_version: if version.at_least((12, 0), (15, 0), os_is_mac) { + MTLLanguageVersion::V2_4 + } else if version.at_least((11, 0), (14, 0), os_is_mac) { + MTLLanguageVersion::V2_3 + } else if version.at_least((10, 15), (13, 0), os_is_mac) { + MTLLanguageVersion::V2_2 + } else if version.at_least((10, 14), (12, 0), os_is_mac) { + MTLLanguageVersion::V2_1 + } else if version.at_least((10, 13), (11, 0), os_is_mac) { + MTLLanguageVersion::V2_0 + } else if version.at_least((10, 12), (10, 0), os_is_mac) { + MTLLanguageVersion::V1_2 + } else if version.at_least((10, 11), (9, 0), os_is_mac) { + MTLLanguageVersion::V1_1 + } else { + MTLLanguageVersion::V1_0 + }, + // macOS 10.11 doesn't support read-write resources + fragment_rw_storage: version.at_least((10, 12), (8, 0), os_is_mac), + read_write_texture_tier: rw_texture_tier, + msaa_desktop: os_is_mac, + msaa_apple3: if family_check { + device.supports_family(MTLGPUFamily::Apple3) + } else { + device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily3_v4) + }, + msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), + resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), + argument_buffers: Self::supports_any(device, ARGUMENT_BUFFER_SUPPORT), + shared_textures: !os_is_mac, + mutable_comparison_samplers: Self::supports_any( + device, + MUTABLE_COMPARISON_SAMPLER_SUPPORT, + ), + sampler_clamp_to_border: Self::supports_any(device, SAMPLER_CLAMP_TO_BORDER_SUPPORT), + base_instance: Self::supports_any(device, BASE_INSTANCE_SUPPORT), + base_vertex_instance_drawing: Self::supports_any(device, BASE_VERTEX_INSTANCE_SUPPORT), + dual_source_blending: Self::supports_any(device, DUAL_SOURCE_BLEND_SUPPORT), + low_power: !os_is_mac || device.is_low_power(), + headless: os_is_mac && device.is_headless(), + layered_rendering: Self::supports_any(device, LAYERED_RENDERING_SUPPORT), + function_specialization: Self::supports_any(device, FUNCTION_SPECIALIZATION_SUPPORT), + depth_clip_mode: Self::supports_any(device, DEPTH_CLIP_MODE), + texture_cube_array: Self::supports_any(device, TEXTURE_CUBE_ARRAY_SUPPORT), + format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + format_depth32_stencil8_filter: os_is_mac, + format_depth32_stencil8_none: !os_is_mac, + format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, + format_b5: !os_is_mac, + format_bc: os_is_mac, + format_eac_etc: !os_is_mac + // M1 in macOS supports EAC/ETC2 + || (family_check && device.supports_family(MTLGPUFamily::Apple7)), + // A8(Apple2) and later always support ASTC pixel formats + format_astc: (family_check && device.supports_family(MTLGPUFamily::Apple2)) + || Self::supports_any(device, ASTC_PIXEL_FORMAT_FEATURES), + // A13(Apple6) M1(Apple7) and later always support HDR ASTC pixel formats + format_astc_hdr: family_check && device.supports_family(MTLGPUFamily::Apple6), + format_any8_unorm_srgb_all: Self::supports_any(device, ANY8_UNORM_SRGB_ALL), + format_any8_unorm_srgb_no_write: !Self::supports_any(device, ANY8_UNORM_SRGB_ALL) + && !os_is_mac, + format_any8_snorm_all: Self::supports_any(device, ANY8_SNORM_RESOLVE), + format_r16_norm_all: os_is_mac, + // No devices support r32's all capabilities + format_r32_all: false, + // All devices support r32's write capability + format_r32_no_write: false, + // iOS support r32float's write capability, macOS support r32float's all capabilities + format_r32float_no_write_no_filter: false, + // Only iOS doesn't support r32float's filter capability + format_r32float_no_filter: !os_is_mac, + format_r32float_all: os_is_mac, + format_rgba8_srgb_all: Self::supports_any(device, RGBA8_SRGB), + format_rgba8_srgb_no_write: !Self::supports_any(device, RGBA8_SRGB), + format_rgb10a2_unorm_all: Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_unorm_no_write: !Self::supports_any(device, RGB10A2UNORM_ALL), + format_rgb10a2_uint_color: !Self::supports_any(device, RGB10A2UINT_COLOR_WRITE), + format_rgb10a2_uint_color_write: Self::supports_any(device, RGB10A2UINT_COLOR_WRITE), + format_rg11b10_all: Self::supports_any(device, RG11B10FLOAT_ALL), + format_rg11b10_no_write: !Self::supports_any(device, RG11B10FLOAT_ALL), + format_rgb9e5_all: Self::supports_any(device, RGB9E5FLOAT_ALL), + format_rgb9e5_no_write: !Self::supports_any(device, RGB9E5FLOAT_ALL) && !os_is_mac, + format_rgb9e5_filter_only: os_is_mac, + format_rg32_color: true, + format_rg32_color_write: true, + // Only macOS support rg32float's all capabilities + format_rg32float_all: os_is_mac, + // All devices support rg32float's color + blend capabilities + format_rg32float_color_blend: true, + // Only iOS doesn't support rg32float's filter + format_rg32float_no_filter: !os_is_mac, + format_rgba32int_color: true, + // All devices support rgba32uint and rgba32sint's color + write capabilities + format_rgba32int_color_write: true, + format_rgba32float_color: true, + // All devices support rgba32float's color + write capabilities + format_rgba32float_color_write: true, + // Only macOS support rgba32float's all capabilities + format_rgba32float_all: os_is_mac, + format_depth16unorm: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v3, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ), + format_depth32float_filter: os_is_mac, + format_depth32float_none: !os_is_mac, + format_bgr10a2_all: Self::supports_any(device, BGR10A2_ALL), + format_bgr10a2_no_write: !Self::supports_any(device, BGR10A2_ALL), + max_buffers_per_stage: 31, + max_vertex_buffers: 31, + max_textures_per_stage: if os_is_mac + || (family_check && device.supports_family(MTLGPUFamily::Apple6)) + { + 128 + } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + 96 + } else { + 31 + }, + max_samplers_per_stage: 16, + buffer_alignment: if os_is_mac { 256 } else { 64 }, + max_buffer_size: if version.at_least((10, 14), (12, 0), os_is_mac) { + // maxBufferLength available on macOS 10.14+ and iOS 12.0+ + let buffer_size: metal::NSInteger = + unsafe { msg_send![device.as_ref(), maxBufferLength] }; + buffer_size as _ + } else if os_is_mac { + 1 << 30 // 1GB on macOS 10.11 and up + } else { + 1 << 28 // 256MB on iOS 8.0+ + }, + max_texture_size: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v1, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 16384 + } else { + 8192 + }, + max_texture_3d_size: 2048, + max_texture_layers: 2048, + max_fragment_input_components: if os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1) + { + 124 + } else { + 60 + }, + max_color_render_targets: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily2_v1, + MTLFeatureSet::tvOS_GPUFamily1_v1, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 8 + } else { + 4 + }, + max_varying_components: if device + .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) + { + 124 + } else { + 60 + }, + max_threads_per_group: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v2, + MTLFeatureSet::macOS_GPUFamily1_v1, + ], + ) { + 1024 + } else { + 512 + }, + max_total_threadgroup_memory: if Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily4_v1, + MTLFeatureSet::macOS_GPUFamily1_v2, + ], + ) { + 32 << 10 + } else { + 16 << 10 + }, + sample_count_mask, + supports_debug_markers: Self::supports_any( + device, + &[ + MTLFeatureSet::macOS_GPUFamily1_v2, + MTLFeatureSet::iOS_GPUFamily1_v3, + MTLFeatureSet::tvOS_GPUFamily1_v2, + ], + ), + supports_binary_archives: family_check + && (device.supports_family(MTLGPUFamily::Apple3) + || device.supports_family(MTLGPUFamily::Mac1)), + supports_capture_manager: version.at_least((10, 13), (11, 0), os_is_mac), + can_set_maximum_drawables_count: version.at_least((10, 14), (11, 2), os_is_mac), + can_set_display_sync: version.at_least((10, 13), OS_NOT_SUPPORT, os_is_mac), + can_set_next_drawable_timeout: version.at_least((10, 13), (11, 0), os_is_mac), + supports_arrays_of_textures: Self::supports_any( + device, + &[ + MTLFeatureSet::iOS_GPUFamily3_v2, + MTLFeatureSet::tvOS_GPUFamily2_v1, + MTLFeatureSet::macOS_GPUFamily1_v3, + ], + ), + supports_arrays_of_textures_write: family_check + && (device.supports_family(MTLGPUFamily::Apple6) + || device.supports_family(MTLGPUFamily::Mac1) + || device.supports_family(MTLGPUFamily::MacCatalyst1)), + supports_mutability: version.at_least((10, 13), (11, 0), os_is_mac), + //Depth clipping is supported on all macOS GPU families and iOS family 4 and later + supports_depth_clip_control: os_is_mac + || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1), + supports_preserve_invariance: version.at_least((11, 0), (13, 0), os_is_mac), + // Metal 2.2 on mac, 2.3 on iOS. + supports_shader_primitive_index: version.at_least((10, 15), (14, 0), os_is_mac), + has_unified_memory: if version.at_least((10, 15), (13, 0), os_is_mac) { + Some(device.has_unified_memory()) + } else { + None + }, + } + } + + pub fn device_type(&self) -> wgt::DeviceType { + if self.has_unified_memory.unwrap_or(self.low_power) { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + } + } + + pub fn features(&self) -> wgt::Features { + use wgt::Features as F; + + let mut features = F::empty() + | F::INDIRECT_FIRST_INSTANCE + | F::MAPPABLE_PRIMARY_BUFFERS + | F::VERTEX_WRITABLE_STORAGE + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::PUSH_CONSTANTS + | F::POLYGON_MODE_LINE + | F::CLEAR_TEXTURE + | F::TEXTURE_FORMAT_16BIT_NORM + | F::SHADER_F16 + | F::DEPTH32FLOAT_STENCIL8 + | F::MULTI_DRAW_INDIRECT; + + features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); + features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); + features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); + features.set(F::TEXTURE_COMPRESSION_ETC2, self.format_eac_etc); + + features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control); + features.set( + F::SHADER_PRIMITIVE_INDEX, + self.supports_shader_primitive_index, + ); + + features.set( + F::TEXTURE_BINDING_ARRAY + | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + ); + //// XXX: this is technically not true, as read-only storage images can be used in arrays + //// on precisely the same conditions that sampled textures can. But texel fetch from a + //// sampled texture is a thing; should we bother introducing another feature flag? + if self.msl_version >= MTLLanguageVersion::V2_2 + && self.supports_arrays_of_textures + && self.supports_arrays_of_textures_write + { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + + features.set( + F::ADDRESS_MODE_CLAMP_TO_BORDER, + self.sampler_clamp_to_border, + ); + features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); + + features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); + + features + } + + pub fn capabilities(&self) -> crate::Capabilities { + let mut downlevel = wgt::DownlevelCapabilities::default(); + downlevel.flags.set( + wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE, + self.fragment_rw_storage, + ); + downlevel.flags.set( + wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES, + self.texture_cube_array, + ); + //TODO: separate the mutable comparisons from immutable ones + downlevel.flags.set( + wgt::DownlevelFlags::COMPARISON_SAMPLERS, + self.mutable_comparison_samplers, + ); + downlevel + .flags + .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + + let base = wgt::Limits::default(); + crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: self.max_texture_size as u32, + max_texture_dimension_2d: self.max_texture_size as u32, + max_texture_dimension_3d: self.max_texture_3d_size as u32, + max_texture_array_layers: self.max_texture_layers as u32, + max_bind_groups: 8, + max_bindings_per_bind_group: 65535, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: self.max_textures_per_stage, + max_samplers_per_shader_stage: self.max_samplers_per_stage, + max_storage_buffers_per_shader_stage: self.max_buffers_per_stage, + max_storage_textures_per_shader_stage: self.max_textures_per_stage, + max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage, + max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, + max_vertex_buffers: self.max_vertex_buffers, + max_vertex_attributes: 31, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, + max_push_constant_size: 0x1000, + min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, + min_storage_buffer_offset_alignment: self.buffer_alignment as u32, + max_inter_stage_shader_components: self.max_varying_components, + max_compute_workgroup_storage_size: self.max_total_threadgroup_memory, + max_compute_invocations_per_workgroup: self.max_threads_per_group, + max_compute_workgroup_size_x: self.max_threads_per_group, + max_compute_workgroup_size_y: self.max_threads_per_group, + max_compute_workgroup_size_z: self.max_threads_per_group, + max_compute_workgroups_per_dimension: 0xFFFF, + max_buffer_size: self.max_buffer_size, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + }, + downlevel, + } + } + + pub fn map_format(&self, format: wgt::TextureFormat) -> metal::MTLPixelFormat { + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match format { + Tf::R8Unorm => R8Unorm, + Tf::R8Snorm => R8Snorm, + Tf::R8Uint => R8Uint, + Tf::R8Sint => R8Sint, + Tf::R16Uint => R16Uint, + Tf::R16Sint => R16Sint, + Tf::R16Unorm => R16Unorm, + Tf::R16Snorm => R16Snorm, + Tf::R16Float => R16Float, + Tf::Rg8Unorm => RG8Unorm, + Tf::Rg8Snorm => RG8Snorm, + Tf::Rg8Uint => RG8Uint, + Tf::Rg8Sint => RG8Sint, + Tf::Rg16Unorm => RG16Unorm, + Tf::Rg16Snorm => RG16Snorm, + Tf::R32Uint => R32Uint, + Tf::R32Sint => R32Sint, + Tf::R32Float => R32Float, + Tf::Rg16Uint => RG16Uint, + Tf::Rg16Sint => RG16Sint, + Tf::Rg16Float => RG16Float, + Tf::Rgba8Unorm => RGBA8Unorm, + Tf::Rgba8UnormSrgb => RGBA8Unorm_sRGB, + Tf::Bgra8UnormSrgb => BGRA8Unorm_sRGB, + Tf::Rgba8Snorm => RGBA8Snorm, + Tf::Bgra8Unorm => BGRA8Unorm, + Tf::Rgba8Uint => RGBA8Uint, + Tf::Rgba8Sint => RGBA8Sint, + Tf::Rgb10a2Unorm => RGB10A2Unorm, + Tf::Rg11b10Float => RG11B10Float, + Tf::Rg32Uint => RG32Uint, + Tf::Rg32Sint => RG32Sint, + Tf::Rg32Float => RG32Float, + Tf::Rgba16Uint => RGBA16Uint, + Tf::Rgba16Sint => RGBA16Sint, + Tf::Rgba16Unorm => RGBA16Unorm, + Tf::Rgba16Snorm => RGBA16Snorm, + Tf::Rgba16Float => RGBA16Float, + Tf::Rgba32Uint => RGBA32Uint, + Tf::Rgba32Sint => RGBA32Sint, + Tf::Rgba32Float => RGBA32Float, + Tf::Stencil8 => Stencil8, + Tf::Depth16Unorm => Depth16Unorm, + Tf::Depth32Float => Depth32Float, + Tf::Depth32FloatStencil8 => Depth32Float_Stencil8, + Tf::Depth24Plus => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float + } + } + Tf::Depth24PlusStencil8 => { + if self.format_depth24_stencil8 { + Depth24Unorm_Stencil8 + } else { + Depth32Float_Stencil8 + } + } + Tf::Rgb9e5Ufloat => RGB9E5Float, + Tf::Bc1RgbaUnorm => BC1_RGBA, + Tf::Bc1RgbaUnormSrgb => BC1_RGBA_sRGB, + Tf::Bc2RgbaUnorm => BC2_RGBA, + Tf::Bc2RgbaUnormSrgb => BC2_RGBA_sRGB, + Tf::Bc3RgbaUnorm => BC3_RGBA, + Tf::Bc3RgbaUnormSrgb => BC3_RGBA_sRGB, + Tf::Bc4RUnorm => BC4_RUnorm, + Tf::Bc4RSnorm => BC4_RSnorm, + Tf::Bc5RgUnorm => BC5_RGUnorm, + Tf::Bc5RgSnorm => BC5_RGSnorm, + Tf::Bc6hRgbFloat => BC6H_RGBFloat, + Tf::Bc6hRgbUfloat => BC6H_RGBUfloat, + Tf::Bc7RgbaUnorm => BC7_RGBAUnorm, + Tf::Bc7RgbaUnormSrgb => BC7_RGBAUnorm_sRGB, + Tf::Etc2Rgb8Unorm => ETC2_RGB8, + Tf::Etc2Rgb8UnormSrgb => ETC2_RGB8_sRGB, + Tf::Etc2Rgb8A1Unorm => ETC2_RGB8A1, + Tf::Etc2Rgb8A1UnormSrgb => ETC2_RGB8A1_sRGB, + Tf::Etc2Rgba8Unorm => EAC_RGBA8, + Tf::Etc2Rgba8UnormSrgb => EAC_RGBA8_sRGB, + Tf::EacR11Unorm => EAC_R11Unorm, + Tf::EacR11Snorm => EAC_R11Snorm, + Tf::EacRg11Unorm => EAC_RG11Unorm, + Tf::EacRg11Snorm => EAC_RG11Snorm, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => ASTC_4x4_LDR, + AstcBlock::B5x4 => ASTC_5x4_LDR, + AstcBlock::B5x5 => ASTC_5x5_LDR, + AstcBlock::B6x5 => ASTC_6x5_LDR, + AstcBlock::B6x6 => ASTC_6x6_LDR, + AstcBlock::B8x5 => ASTC_8x5_LDR, + AstcBlock::B8x6 => ASTC_8x6_LDR, + AstcBlock::B8x8 => ASTC_8x8_LDR, + AstcBlock::B10x5 => ASTC_10x5_LDR, + AstcBlock::B10x6 => ASTC_10x6_LDR, + AstcBlock::B10x8 => ASTC_10x8_LDR, + AstcBlock::B10x10 => ASTC_10x10_LDR, + AstcBlock::B12x10 => ASTC_12x10_LDR, + AstcBlock::B12x12 => ASTC_12x12_LDR, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => ASTC_4x4_sRGB, + AstcBlock::B5x4 => ASTC_5x4_sRGB, + AstcBlock::B5x5 => ASTC_5x5_sRGB, + AstcBlock::B6x5 => ASTC_6x5_sRGB, + AstcBlock::B6x6 => ASTC_6x6_sRGB, + AstcBlock::B8x5 => ASTC_8x5_sRGB, + AstcBlock::B8x6 => ASTC_8x6_sRGB, + AstcBlock::B8x8 => ASTC_8x8_sRGB, + AstcBlock::B10x5 => ASTC_10x5_sRGB, + AstcBlock::B10x6 => ASTC_10x6_sRGB, + AstcBlock::B10x8 => ASTC_10x8_sRGB, + AstcBlock::B10x10 => ASTC_10x10_sRGB, + AstcBlock::B12x10 => ASTC_12x10_sRGB, + AstcBlock::B12x12 => ASTC_12x12_sRGB, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => ASTC_4x4_HDR, + AstcBlock::B5x4 => ASTC_5x4_HDR, + AstcBlock::B5x5 => ASTC_5x5_HDR, + AstcBlock::B6x5 => ASTC_6x5_HDR, + AstcBlock::B6x6 => ASTC_6x6_HDR, + AstcBlock::B8x5 => ASTC_8x5_HDR, + AstcBlock::B8x6 => ASTC_8x6_HDR, + AstcBlock::B8x8 => ASTC_8x8_HDR, + AstcBlock::B10x5 => ASTC_10x5_HDR, + AstcBlock::B10x6 => ASTC_10x6_HDR, + AstcBlock::B10x8 => ASTC_10x8_HDR, + AstcBlock::B10x10 => ASTC_10x10_HDR, + AstcBlock::B12x10 => ASTC_12x10_HDR, + AstcBlock::B12x12 => ASTC_12x12_HDR, + }, + }, + } + } + + pub fn map_view_format( + &self, + format: wgt::TextureFormat, + aspects: crate::FormatAspects, + ) -> metal::MTLPixelFormat { + use crate::FormatAspects as Fa; + use metal::MTLPixelFormat::*; + use wgt::TextureFormat as Tf; + match (format, aspects) { + // map combined depth-stencil format to their stencil-only format + // see https://developer.apple.com/library/archive/documentation/Miscellaneous/Conceptual/MetalProgrammingGuide/WhatsNewiniOS10tvOS10andOSX1012/WhatsNewiniOS10tvOS10andOSX1012.html#//apple_ref/doc/uid/TP40014221-CH14-DontLinkElementID_77 + (Tf::Depth24PlusStencil8, Fa::STENCIL) => { + if self.format_depth24_stencil8 { + X24_Stencil8 + } else { + X32_Stencil8 + } + } + (Tf::Depth32FloatStencil8, Fa::STENCIL) => X32_Stencil8, + + _ => self.map_format(format), + } + } +} + +impl super::PrivateDisabilities { + pub fn new(device: &metal::Device) -> Self { + let is_intel = device.name().starts_with("Intel"); + Self { + broken_viewport_near_depth: is_intel + && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + broken_layered_clear_image: is_intel, + } + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/command.rs b/third_party/rust/wgpu-hal/src/metal/command.rs new file mode 100644 index 0000000000..866e163a64 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/command.rs @@ -0,0 +1,977 @@ +use super::{conv, AsNative}; +use std::{borrow::Cow, mem, ops::Range}; + +// has to match `Temp::binding_sizes` +const WORD_SIZE: usize = 4; + +impl Default for super::CommandState { + fn default() -> Self { + Self { + blit: None, + render: None, + compute: None, + raw_primitive_type: metal::MTLPrimitiveType::Point, + index: None, + raw_wg_size: metal::MTLSize::new(0, 0, 0), + stage_infos: Default::default(), + storage_buffer_length_map: Default::default(), + work_group_memory_sizes: Vec::new(), + push_constants: Vec::new(), + } + } +} + +impl super::CommandEncoder { + fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { + if self.state.blit.is_none() { + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + objc::rc::autoreleasepool(|| { + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); + }); + } + self.state.blit.as_ref().unwrap() + } + + pub(super) fn leave_blit(&mut self) { + if let Some(encoder) = self.state.blit.take() { + encoder.end_encoding(); + } + } + + fn enter_any(&mut self) -> Option<&metal::CommandEncoderRef> { + if let Some(ref encoder) = self.state.render { + Some(encoder) + } else if let Some(ref encoder) = self.state.compute { + Some(encoder) + } else if let Some(ref encoder) = self.state.blit { + Some(encoder) + } else { + None + } + } + + fn begin_pass(&mut self) { + self.state.reset(); + self.leave_blit(); + } +} + +impl super::CommandState { + fn reset(&mut self) { + self.storage_buffer_length_map.clear(); + self.stage_infos.vs.clear(); + self.stage_infos.fs.clear(); + self.stage_infos.cs.clear(); + self.work_group_memory_sizes.clear(); + self.push_constants.clear(); + } + + fn make_sizes_buffer_update<'a>( + &self, + stage: naga::ShaderStage, + result_sizes: &'a mut Vec<u32>, + ) -> Option<(u32, &'a [u32])> { + let stage_info = &self.stage_infos[stage]; + let slot = stage_info.sizes_slot?; + + result_sizes.clear(); + result_sizes.extend(stage_info.sized_bindings.iter().map(|br| { + self.storage_buffer_length_map + .get(br) + .map(|size| u32::try_from(size.get()).unwrap_or(u32::MAX)) + .unwrap_or_default() + })); + + if !result_sizes.is_empty() { + Some((slot as _, result_sizes)) + } else { + None + } + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let queue = &self.raw_queue.lock(); + let retain_references = self.shared.settings.retain_command_buffer_references; + let raw = objc::rc::autoreleasepool(move || { + let cmd_buf_ref = if retain_references { + queue.new_command_buffer() + } else { + queue.new_command_buffer_with_unretained_references() + }; + if let Some(label) = label { + cmd_buf_ref.set_label(label); + } + cmd_buf_ref.to_owned() + }); + + self.raw_cmd_buf = Some(raw); + + Ok(()) + } + + unsafe fn discard_encoding(&mut self) { + self.leave_blit(); + // when discarding, we don't have a guarantee that + // everything is in a good state, so check carefully + if let Some(encoder) = self.state.render.take() { + encoder.end_encoding(); + } + if let Some(encoder) = self.state.compute.take() { + encoder.end_encoding(); + } + self.raw_cmd_buf = None; + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + self.leave_blit(); + assert!(self.state.render.is_none()); + assert!(self.state.compute.is_none()); + Ok(super::CommandBuffer { + raw: self.raw_cmd_buf.take().unwrap(), + }) + } + + unsafe fn reset_all<I>(&mut self, _cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + //do nothing + } + + unsafe fn transition_buffers<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, _barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + let encoder = self.enter_blit(); + encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0); + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + encoder.copy_from_buffer( + &src.raw, + copy.src_offset, + &dst.raw, + copy.dst_offset, + copy.size.get(), + ); + } + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let dst_texture = if src.format != dst.format { + let raw_format = self.shared.private_caps.map_format(src.format); + Cow::Owned(objc::rc::autoreleasepool(|| { + dst.raw.new_texture_view(raw_format) + })) + } else { + Cow::Borrowed(&dst.raw) + }; + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + // no clamping is done: Metal expects physical sizes here + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst_texture, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); + } + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let dst_origin = conv::map_origin(©.texture_base.origin); + // Metal expects buffer-texture copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&dst.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let image_byte_stride = if extent.depth > 1 { + copy.buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row) + } else { + // Don't pass a stride when updating a single layer, otherwise metal validation + // fails when updating a subset of the image due to the stride being larger than + // the amount of data to copy. + 0 + }; + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + image_byte_stride, + conv::map_copy_extent(&extent), + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + conv::get_blit_option(dst.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + _src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let encoder = self.enter_blit(); + for copy in regions { + let src_origin = conv::map_origin(©.texture_base.origin); + // Metal expects texture-buffer copies in virtual sizes + let extent = copy + .texture_base + .max_copy_size(&src.copy_size) + .min(©.size); + let bytes_per_row = copy.buffer_layout.bytes_per_row.unwrap_or(0) as u64; + let bytes_per_image = copy + .buffer_layout + .rows_per_image + .map_or(0, |v| v as u64 * bytes_per_row); + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + conv::map_copy_extent(&extent), + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + conv::get_blit_option(src.format, copy.texture_base.aspect), + ); + } + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode( + metal::MTLVisibilityResultMode::Boolean, + index as u64 * crate::QUERY_SIZE, + ); + } + _ => {} + } + } + unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { + match set.ty { + wgt::QueryType::Occlusion => { + self.state + .render + .as_ref() + .unwrap() + .set_visibility_result_mode(metal::MTLVisibilityResultMode::Disabled, 0); + } + _ => {} + } + } + unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) {} + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + let encoder = self.enter_blit(); + let raw_range = metal::NSRange { + location: range.start as u64 * crate::QUERY_SIZE, + length: (range.end - range.start) as u64 * crate::QUERY_SIZE, + }; + encoder.fill_buffer(&set.raw_buffer, raw_range, 0); + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + _: wgt::BufferSize, // Metal doesn't support queries that are bigger than a single element are not supported + ) { + let encoder = self.enter_blit(); + let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; + encoder.copy_from_buffer( + &set.raw_buffer, + range.start as u64 * crate::QUERY_SIZE, + &buffer.raw, + offset, + size, + ); + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + self.begin_pass(); + self.state.index = None; + + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPassDescriptor::new(); + //TODO: set visibility results buffer + + for (i, at) in desc.color_attachments.iter().enumerate() { + if let Some(at) = at.as_ref() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + if let Some(ref resolve) = at.resolve_target { + //Note: the selection of levels and slices is already handled by `TextureView` + at_descriptor.set_resolve_texture(Some(&resolve.view.raw)); + } + let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value)); + metal::MTLLoadAction::Clear + }; + let store_action = conv::map_store_action( + at.ops.contains(crate::AttachmentOps::STORE), + at.resolve_target.is_some(), + ); + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + if let Some(ref at) = desc.depth_stencil_attachment { + if at.target.view.aspects.contains(crate::FormatAspects::DEPTH) { + let at_descriptor = descriptor.depth_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_depth(at.clear_value.0 as f64); + metal::MTLLoadAction::Clear + }; + let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + if at + .target + .view + .aspects + .contains(crate::FormatAspects::STENCIL) + { + let at_descriptor = descriptor.stencil_attachment().unwrap(); + at_descriptor.set_texture(Some(&at.target.view.raw)); + + let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) { + metal::MTLLoadAction::Load + } else { + at_descriptor.set_clear_stencil(at.clear_value.1); + metal::MTLLoadAction::Clear + }; + let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) { + metal::MTLStoreAction::Store + } else { + metal::MTLStoreAction::DontCare + }; + at_descriptor.set_load_action(load_action); + at_descriptor.set_store_action(store_action); + } + } + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + let encoder = raw.new_render_command_encoder(descriptor); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.render = Some(encoder.to_owned()); + }); + } + + unsafe fn end_render_pass(&mut self) { + self.state.render.take().unwrap().end_encoding(); + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + group_index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let bg_info = &layout.bind_group_infos[group_index as usize]; + + if let Some(ref encoder) = self.state.render { + let mut changes_sizes_buffer = false; + for index in 0..group.counters.vs.buffers { + let buf = &group.buffers[index as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_vertex_buffer( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Vertex, + &mut self.temp.binding_sizes, + ) { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + changes_sizes_buffer = false; + for index in 0..group.counters.fs.buffers { + let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_fragment_buffer( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Fragment, + &mut self.temp.binding_sizes, + ) { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.vs.samplers { + let res = group.samplers[index as usize]; + encoder.set_vertex_sampler_state( + (bg_info.base_resource_indices.vs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.samplers { + let res = group.samplers[(group.counters.vs.samplers + index) as usize]; + encoder.set_fragment_sampler_state( + (bg_info.base_resource_indices.fs.samplers + index) as u64, + Some(res.as_native()), + ); + } + + for index in 0..group.counters.vs.textures { + let res = group.textures[index as usize]; + encoder.set_vertex_texture( + (bg_info.base_resource_indices.vs.textures + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.fs.textures { + let res = group.textures[(group.counters.vs.textures + index) as usize]; + encoder.set_fragment_texture( + (bg_info.base_resource_indices.fs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + + if let Some(ref encoder) = self.state.compute { + let index_base = super::ResourceData { + buffers: group.counters.vs.buffers + group.counters.fs.buffers, + samplers: group.counters.vs.samplers + group.counters.fs.samplers, + textures: group.counters.vs.textures + group.counters.fs.textures, + }; + + let mut changes_sizes_buffer = false; + for index in 0..group.counters.cs.buffers { + let buf = &group.buffers[(index_base.buffers + index) as usize]; + let mut offset = buf.offset; + if let Some(dyn_index) = buf.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_buffer( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(buf.ptr.as_native()), + offset, + ); + if let Some(size) = buf.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: buf.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + if changes_sizes_buffer { + if let Some((index, sizes)) = self.state.make_sizes_buffer_update( + naga::ShaderStage::Compute, + &mut self.temp.binding_sizes, + ) { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + + for index in 0..group.counters.cs.samplers { + let res = group.samplers[(index_base.samplers + index) as usize]; + encoder.set_sampler_state( + (bg_info.base_resource_indices.cs.samplers + index) as u64, + Some(res.as_native()), + ); + } + for index in 0..group.counters.cs.textures { + let res = group.textures[(index_base.textures + index) as usize]; + encoder.set_texture( + (bg_info.base_resource_indices.cs.textures + index) as u64, + Some(res.as_native()), + ); + } + } + } + + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + let state_pc = &mut self.state.push_constants; + if state_pc.len() < layout.total_push_constants as usize { + state_pc.resize(layout.total_push_constants as usize, 0); + } + assert_eq!(offset as usize % WORD_SIZE, 0); + + let offset = offset as usize / WORD_SIZE; + state_pc[offset..offset + data.len()].copy_from_slice(data); + + if stages.contains(wgt::ShaderStages::COMPUTE) { + self.state.compute.as_ref().unwrap().set_bytes( + layout.push_constants_infos.cs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::VERTEX) { + self.state.render.as_ref().unwrap().set_vertex_bytes( + layout.push_constants_infos.vs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + if stages.contains(wgt::ShaderStages::FRAGMENT) { + self.state.render.as_ref().unwrap().set_fragment_bytes( + layout.push_constants_infos.fs.unwrap().buffer_index as _, + (layout.total_push_constants as usize * WORD_SIZE) as _, + state_pc.as_ptr() as _, + ) + } + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(encoder) = self.enter_any() { + encoder.insert_debug_signpost(label); + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(encoder) = self.enter_any() { + encoder.push_debug_group(group_label); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.push_debug_group(group_label); + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(encoder) = self.enter_any() { + encoder.pop_debug_group(); + } else if let Some(ref buf) = self.raw_cmd_buf { + buf.pop_debug_group(); + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + self.state.raw_primitive_type = pipeline.raw_primitive_type; + self.state.stage_infos.vs.assign_from(&pipeline.vs_info); + match pipeline.fs_info { + Some(ref info) => self.state.stage_infos.fs.assign_from(info), + None => self.state.stage_infos.fs.clear(), + } + + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_render_pipeline_state(&pipeline.raw); + encoder.set_front_facing_winding(pipeline.raw_front_winding); + encoder.set_cull_mode(pipeline.raw_cull_mode); + encoder.set_triangle_fill_mode(pipeline.raw_triangle_fill_mode); + if let Some(depth_clip) = pipeline.raw_depth_clip_mode { + encoder.set_depth_clip_mode(depth_clip); + } + if let Some((ref state, bias)) = pipeline.depth_stencil { + encoder.set_depth_stencil_state(state); + encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); + } + + { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) + { + encoder.set_vertex_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + if pipeline.fs_lib.is_some() { + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) + { + encoder.set_fragment_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + } + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + let (stride, raw_type) = match format { + wgt::IndexFormat::Uint16 => (2, metal::MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, metal::MTLIndexType::UInt32), + }; + self.state.index = Some(super::IndexState { + buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), + offset: binding.offset, + stride, + raw_type, + }); + } + + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); + } + + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let zfar = if self.shared.disabilities.broken_viewport_near_depth { + depth_range.end - depth_range.start + } else { + depth_range.end + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_viewport(metal::MTLViewport { + originX: rect.x as _, + originY: rect.y as _, + width: rect.w as _, + height: rect.h as _, + znear: depth_range.start as _, + zfar: zfar as _, + }); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + //TODO: support empty scissors by modifying the viewport + let scissor = metal::MTLScissorRect { + x: rect.x as _, + y: rect.y as _, + width: rect.w as _, + height: rect.h as _, + }; + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_scissor_rect(scissor); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_stencil_front_back_reference_value(value, value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + let encoder = self.state.render.as_ref().unwrap(); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + if start_instance != 0 { + encoder.draw_primitives_instanced_base_instance( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + instance_count as _, + start_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_primitives_instanced( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + instance_count as _, + ); + } else { + encoder.draw_primitives( + self.state.raw_primitive_type, + start_vertex as _, + vertex_count as _, + ); + } + } + + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + let offset = index.offset + index.stride * start_index as wgt::BufferAddress; + if base_vertex != 0 || start_instance != 0 { + encoder.draw_indexed_primitives_instanced_base_instance( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + base_vertex as _, + start_instance as _, + ); + } else if instance_count != 1 { + encoder.draw_indexed_primitives_instanced( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + instance_count as _, + ); + } else { + encoder.draw_indexed_primitives( + self.state.raw_primitive_type, + index_count as _, + index.raw_type, + index.buffer_ptr.as_native(), + offset, + ); + } + } + + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_primitives_indirect(self.state.raw_primitive_type, &buffer.raw, offset); + offset += mem::size_of::<wgt::DrawIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + mut offset: wgt::BufferAddress, + draw_count: u32, + ) { + let encoder = self.state.render.as_ref().unwrap(); + let index = self.state.index.as_ref().unwrap(); + for _ in 0..draw_count { + encoder.draw_indexed_primitives_indirect( + self.state.raw_primitive_type, + index.raw_type, + index.buffer_ptr.as_native(), + index.offset, + &buffer.raw, + offset, + ); + offset += mem::size_of::<wgt::DrawIndexedIndirectArgs>() as wgt::BufferAddress; + } + } + + unsafe fn draw_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + unsafe fn draw_indexed_indirect_count( + &mut self, + _buffer: &super::Buffer, + _offset: wgt::BufferAddress, + _count_buffer: &super::Buffer, + _count_offset: wgt::BufferAddress, + _max_count: u32, + ) { + //TODO + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + self.begin_pass(); + + let raw = self.raw_cmd_buf.as_ref().unwrap(); + objc::rc::autoreleasepool(|| { + let encoder = raw.new_compute_command_encoder(); + if let Some(label) = desc.label { + encoder.set_label(label); + } + self.state.compute = Some(encoder.to_owned()); + }); + } + unsafe fn end_compute_pass(&mut self) { + self.state.compute.take().unwrap().end_encoding(); + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + self.state.raw_wg_size = pipeline.work_group_size; + self.state.stage_infos.cs.assign_from(&pipeline.cs_info); + + let encoder = self.state.compute.as_ref().unwrap(); + encoder.set_compute_pipeline_state(&pipeline.raw); + + if let Some((index, sizes)) = self + .state + .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes) + { + encoder.set_bytes( + index as _, + (sizes.len() * WORD_SIZE) as u64, + sizes.as_ptr() as _, + ); + } + + // update the threadgroup memory sizes + while self.state.work_group_memory_sizes.len() < pipeline.work_group_memory_sizes.len() { + self.state.work_group_memory_sizes.push(0); + } + for (index, (cur_size, pipeline_size)) in self + .state + .work_group_memory_sizes + .iter_mut() + .zip(pipeline.work_group_memory_sizes.iter()) + .enumerate() + { + const ALIGN_MASK: u32 = 0xF; // must be a multiple of 16 bytes + let size = ((*pipeline_size - 1) | ALIGN_MASK) + 1; + if *cur_size != size { + *cur_size = size; + encoder.set_threadgroup_memory_length(index as _, size as _); + } + } + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + let encoder = self.state.compute.as_ref().unwrap(); + let raw_count = metal::MTLSize { + width: count[0] as u64, + height: count[1] as u64, + depth: count[2] as u64, + }; + encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + } + + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + let encoder = self.state.compute.as_ref().unwrap(); + encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs new file mode 100644 index 0000000000..a1ceb287ab --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/conv.rs @@ -0,0 +1,324 @@ +pub fn map_texture_usage( + format: wgt::TextureFormat, + usage: crate::TextureUses, +) -> metal::MTLTextureUsage { + use crate::TextureUses as Tu; + + let mut mtl_usage = metal::MTLTextureUsage::Unknown; + + mtl_usage.set( + metal::MTLTextureUsage::RenderTarget, + usage.intersects(Tu::COLOR_TARGET | Tu::DEPTH_STENCIL_READ | Tu::DEPTH_STENCIL_WRITE), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderRead, + usage.intersects( + Tu::RESOURCE | Tu::DEPTH_STENCIL_READ | Tu::STORAGE_READ | Tu::STORAGE_READ_WRITE, + ), + ); + mtl_usage.set( + metal::MTLTextureUsage::ShaderWrite, + usage.intersects(Tu::STORAGE_READ_WRITE), + ); + // needed for combined depth/stencil formats since we might + // create a stencil-only view from them + mtl_usage.set( + metal::MTLTextureUsage::PixelFormatView, + format.is_combined_depth_stencil_format(), + ); + + mtl_usage +} + +pub fn map_texture_view_dimension(dim: wgt::TextureViewDimension) -> metal::MTLTextureType { + use metal::MTLTextureType::*; + use wgt::TextureViewDimension as Tvd; + match dim { + Tvd::D1 => D1, + Tvd::D2 => D2, + Tvd::D2Array => D2Array, + Tvd::D3 => D3, + Tvd::Cube => Cube, + Tvd::CubeArray => CubeArray, + } +} + +pub fn map_compare_function(fun: wgt::CompareFunction) -> metal::MTLCompareFunction { + use metal::MTLCompareFunction::*; + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => Never, + Cf::Less => Less, + Cf::LessEqual => LessEqual, + Cf::Equal => Equal, + Cf::GreaterEqual => GreaterEqual, + Cf::Greater => Greater, + Cf::NotEqual => NotEqual, + Cf::Always => Always, + } +} + +pub fn map_filter_mode(filter: wgt::FilterMode) -> metal::MTLSamplerMinMagFilter { + use metal::MTLSamplerMinMagFilter::*; + match filter { + wgt::FilterMode::Nearest => Nearest, + wgt::FilterMode::Linear => Linear, + } +} + +pub fn map_address_mode(address: wgt::AddressMode) -> metal::MTLSamplerAddressMode { + use metal::MTLSamplerAddressMode::*; + use wgt::AddressMode as Fm; + match address { + Fm::Repeat => Repeat, + Fm::MirrorRepeat => MirrorRepeat, + Fm::ClampToEdge => ClampToEdge, + Fm::ClampToBorder => ClampToBorderColor, + //Fm::MirrorClamp => MirrorClampToEdge, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> metal::MTLSamplerBorderColor { + use metal::MTLSamplerBorderColor::*; + match border_color { + wgt::SamplerBorderColor::TransparentBlack => TransparentBlack, + wgt::SamplerBorderColor::OpaqueBlack => OpaqueBlack, + wgt::SamplerBorderColor::OpaqueWhite => OpaqueWhite, + wgt::SamplerBorderColor::Zero => unreachable!(), + } +} + +pub fn map_primitive_topology( + topology: wgt::PrimitiveTopology, +) -> (metal::MTLPrimitiveTopologyClass, metal::MTLPrimitiveType) { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => ( + metal::MTLPrimitiveTopologyClass::Point, + metal::MTLPrimitiveType::Point, + ), + Pt::LineList => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::Line, + ), + Pt::LineStrip => ( + metal::MTLPrimitiveTopologyClass::Line, + metal::MTLPrimitiveType::LineStrip, + ), + Pt::TriangleList => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::Triangle, + ), + Pt::TriangleStrip => ( + metal::MTLPrimitiveTopologyClass::Triangle, + metal::MTLPrimitiveType::TriangleStrip, + ), + } +} + +pub fn map_color_write(mask: wgt::ColorWrites) -> metal::MTLColorWriteMask { + let mut raw_mask = metal::MTLColorWriteMask::empty(); + + if mask.contains(wgt::ColorWrites::RED) { + raw_mask |= metal::MTLColorWriteMask::Red; + } + if mask.contains(wgt::ColorWrites::GREEN) { + raw_mask |= metal::MTLColorWriteMask::Green; + } + if mask.contains(wgt::ColorWrites::BLUE) { + raw_mask |= metal::MTLColorWriteMask::Blue; + } + if mask.contains(wgt::ColorWrites::ALPHA) { + raw_mask |= metal::MTLColorWriteMask::Alpha; + } + + raw_mask +} + +pub fn map_blend_factor(factor: wgt::BlendFactor) -> metal::MTLBlendFactor { + use metal::MTLBlendFactor::*; + use wgt::BlendFactor as Bf; + + match factor { + Bf::Zero => Zero, + Bf::One => One, + Bf::Src => SourceColor, + Bf::OneMinusSrc => OneMinusSourceColor, + Bf::Dst => DestinationColor, + Bf::OneMinusDst => OneMinusDestinationColor, + Bf::SrcAlpha => SourceAlpha, + Bf::OneMinusSrcAlpha => OneMinusSourceAlpha, + Bf::DstAlpha => DestinationAlpha, + Bf::OneMinusDstAlpha => OneMinusDestinationAlpha, + Bf::Constant => BlendColor, + Bf::OneMinusConstant => OneMinusBlendColor, + //Bf::ConstantAlpha => BlendAlpha, + //Bf::OneMinusConstantAlpha => OneMinusBlendAlpha, + Bf::SrcAlphaSaturated => SourceAlphaSaturated, + //Bf::Src1 => Source1Color, + //Bf::OneMinusSrc1 => OneMinusSource1Color, + //Bf::Src1Alpha => Source1Alpha, + //Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, + } +} + +pub fn map_blend_op(operation: wgt::BlendOperation) -> metal::MTLBlendOperation { + use metal::MTLBlendOperation::*; + use wgt::BlendOperation as Bo; + + match operation { + Bo::Add => Add, + Bo::Subtract => Subtract, + Bo::ReverseSubtract => ReverseSubtract, + Bo::Min => Min, + Bo::Max => Max, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> ( + metal::MTLBlendOperation, + metal::MTLBlendFactor, + metal::MTLBlendFactor, +) { + ( + map_blend_op(component.operation), + map_blend_factor(component.src_factor), + map_blend_factor(component.dst_factor), + ) +} + +pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat { + use metal::MTLVertexFormat::*; + use wgt::VertexFormat as Vf; + + match format { + Vf::Unorm8x2 => UChar2Normalized, + Vf::Snorm8x2 => Char2Normalized, + Vf::Uint8x2 => UChar2, + Vf::Sint8x2 => Char2, + Vf::Unorm8x4 => UChar4Normalized, + Vf::Snorm8x4 => Char4Normalized, + Vf::Uint8x4 => UChar4, + Vf::Sint8x4 => Char4, + Vf::Unorm16x2 => UShort2Normalized, + Vf::Snorm16x2 => Short2Normalized, + Vf::Uint16x2 => UShort2, + Vf::Sint16x2 => Short2, + Vf::Float16x2 => Half2, + Vf::Unorm16x4 => UShort4Normalized, + Vf::Snorm16x4 => Short4Normalized, + Vf::Uint16x4 => UShort4, + Vf::Sint16x4 => Short4, + Vf::Float16x4 => Half4, + Vf::Uint32 => UInt, + Vf::Sint32 => Int, + Vf::Float32 => Float, + Vf::Uint32x2 => UInt2, + Vf::Sint32x2 => Int2, + Vf::Float32x2 => Float2, + Vf::Uint32x3 => UInt3, + Vf::Sint32x3 => Int3, + Vf::Float32x3 => Float3, + Vf::Uint32x4 => UInt4, + Vf::Sint32x4 => Int4, + Vf::Float32x4 => Float4, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + +pub fn map_step_mode(mode: wgt::VertexStepMode) -> metal::MTLVertexStepFunction { + match mode { + wgt::VertexStepMode::Vertex => metal::MTLVertexStepFunction::PerVertex, + wgt::VertexStepMode::Instance => metal::MTLVertexStepFunction::PerInstance, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> metal::MTLStencilOperation { + use metal::MTLStencilOperation::*; + use wgt::StencilOperation as So; + + match op { + So::Keep => Keep, + So::Zero => Zero, + So::Replace => Replace, + So::IncrementClamp => IncrementClamp, + So::IncrementWrap => IncrementWrap, + So::DecrementClamp => DecrementClamp, + So::DecrementWrap => DecrementWrap, + So::Invert => Invert, + } +} + +pub fn map_winding(winding: wgt::FrontFace) -> metal::MTLWinding { + match winding { + wgt::FrontFace::Cw => metal::MTLWinding::Clockwise, + wgt::FrontFace::Ccw => metal::MTLWinding::CounterClockwise, + } +} + +pub fn map_cull_mode(face: Option<wgt::Face>) -> metal::MTLCullMode { + match face { + None => metal::MTLCullMode::None, + Some(wgt::Face::Front) => metal::MTLCullMode::Front, + Some(wgt::Face::Back) => metal::MTLCullMode::Back, + } +} + +pub fn map_range(range: &crate::MemoryRange) -> metal::NSRange { + metal::NSRange { + location: range.start, + length: range.end - range.start, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> metal::MTLSize { + metal::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } +} + +pub fn map_origin(origin: &wgt::Origin3d) -> metal::MTLOrigin { + metal::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } +} + +pub fn map_store_action(store: bool, resolve: bool) -> metal::MTLStoreAction { + use metal::MTLStoreAction::*; + match (store, resolve) { + (true, true) => StoreAndMultisampleResolve, + (false, true) => MultisampleResolve, + (true, false) => Store, + (false, false) => DontCare, + } +} + +pub fn map_clear_color(color: &wgt::Color) -> metal::MTLClearColor { + metal::MTLClearColor { + red: color.r, + green: color.g, + blue: color.b, + alpha: color.a, + } +} + +pub fn get_blit_option( + format: wgt::TextureFormat, + aspect: crate::FormatAspects, +) -> metal::MTLBlitOption { + if format.is_combined_depth_stencil_format() { + match aspect { + crate::FormatAspects::DEPTH => metal::MTLBlitOption::DepthFromDepthStencil, + crate::FormatAspects::STENCIL => metal::MTLBlitOption::StencilFromDepthStencil, + _ => unreachable!(), + } + } else { + metal::MTLBlitOption::None + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs new file mode 100644 index 0000000000..f8a1ad9a9f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/device.rs @@ -0,0 +1,1176 @@ +use parking_lot::Mutex; +use std::{ + num::NonZeroU32, + ptr, + sync::{atomic, Arc}, + thread, time, +}; + +use super::conv; +use crate::auxil::map_naga_stage; + +type DeviceResult<T> = Result<T, crate::DeviceError>; + +struct CompiledShader { + library: metal::Library, + function: metal::Function, + wg_size: metal::MTLSize, + wg_memory_sizes: Vec<u32>, + + /// Bindings of WGSL `storage` globals that contain variable-sized arrays. + /// + /// In order to implement bounds checks and the `arrayLength` function for + /// WGSL runtime-sized arrays, we pass the entry point a struct with a + /// member for each global variable that contains such an array. That member + /// is a `u32` holding the variable's total size in bytes---which is simply + /// the size of the `Buffer` supplying that variable's contents for the + /// draw call. + sized_bindings: Vec<naga::ResourceBinding>, + + immutable_buffer_mask: usize, +} + +fn create_stencil_desc( + face: &wgt::StencilFaceState, + read_mask: u32, + write_mask: u32, +) -> metal::StencilDescriptor { + let desc = metal::StencilDescriptor::new(); + desc.set_stencil_compare_function(conv::map_compare_function(face.compare)); + desc.set_read_mask(read_mask); + desc.set_write_mask(write_mask); + desc.set_stencil_failure_operation(conv::map_stencil_op(face.fail_op)); + desc.set_depth_failure_operation(conv::map_stencil_op(face.depth_fail_op)); + desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.pass_op)); + desc +} + +fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthStencilDescriptor { + let desc = metal::DepthStencilDescriptor::new(); + desc.set_depth_compare_function(conv::map_compare_function(state.depth_compare)); + desc.set_depth_write_enabled(state.depth_write_enabled); + let s = &state.stencil; + if s.is_enabled() { + let front_desc = create_stencil_desc(&s.front, s.read_mask, s.write_mask); + desc.set_front_face_stencil(Some(&front_desc)); + let back_desc = create_stencil_desc(&s.back, s.read_mask, s.write_mask); + desc.set_back_face_stencil(Some(&back_desc)); + } + desc +} + +impl super::Device { + fn load_shader( + &self, + stage: &crate::ProgrammableStage<super::Api>, + layout: &super::PipelineLayout, + primitive_class: metal::MTLPrimitiveTopologyClass, + naga_stage: naga::ShaderStage, + ) -> Result<CompiledShader, crate::PipelineError> { + let stage_bit = map_naga_stage(naga_stage); + + let module = &stage.module.naga.module; + let ep_resources = &layout.per_stage_map[naga_stage]; + + let bounds_check_policy = if stage.module.runtime_checks { + naga::proc::BoundsCheckPolicy::ReadZeroSkipWrite + } else { + naga::proc::BoundsCheckPolicy::Unchecked + }; + + let options = naga::back::msl::Options { + lang_version: match self.shared.private_caps.msl_version { + metal::MTLLanguageVersion::V1_0 => (1, 0), + metal::MTLLanguageVersion::V1_1 => (1, 1), + metal::MTLLanguageVersion::V1_2 => (1, 2), + metal::MTLLanguageVersion::V2_0 => (2, 0), + metal::MTLLanguageVersion::V2_1 => (2, 1), + metal::MTLLanguageVersion::V2_2 => (2, 2), + metal::MTLLanguageVersion::V2_3 => (2, 3), + metal::MTLLanguageVersion::V2_4 => (2, 4), + }, + inline_samplers: Default::default(), + spirv_cross_compatibility: false, + fake_missing_bindings: false, + per_entry_point_map: naga::back::msl::EntryPointResourceMap::from([( + stage.entry_point.to_string(), + ep_resources.clone(), + )]), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: bounds_check_policy, + buffer: bounds_check_policy, + image: bounds_check_policy, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: true, + }; + + let pipeline_options = naga::back::msl::PipelineOptions { + allow_point_size: match primitive_class { + metal::MTLPrimitiveTopologyClass::Point => true, + _ => false, + }, + }; + + let (source, info) = naga::back::msl::write_string( + module, + &stage.module.naga.info, + &options, + &pipeline_options, + ) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?; + + log::debug!( + "Naga generated shader for entry point '{}' and stage {:?}\n{}", + stage.entry_point, + naga_stage, + &source + ); + + let options = metal::CompileOptions::new(); + options.set_language_version(self.shared.private_caps.msl_version); + + if self.shared.private_caps.supports_preserve_invariance { + options.set_preserve_invariance(true); + } + + let library = self + .shared + .device + .lock() + .new_library_with_source(source.as_ref(), &options) + .map_err(|err| { + log::warn!("Naga generated shader:\n{}", source); + crate::PipelineError::Linkage(stage_bit, format!("Metal: {}", err)) + })?; + + let ep_index = module + .entry_points + .iter() + .position(|ep| ep.stage == naga_stage && ep.name == stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint(naga_stage))?; + let ep = &module.entry_points[ep_index]; + let ep_name = info.entry_point_names[ep_index] + .as_ref() + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("{}", e)))?; + + let wg_size = metal::MTLSize { + width: ep.workgroup_size[0] as _, + height: ep.workgroup_size[1] as _, + depth: ep.workgroup_size[2] as _, + }; + + let function = library.get_function(ep_name, None).map_err(|e| { + log::error!("get_function: {:?}", e); + crate::PipelineError::EntryPoint(naga_stage) + })?; + + // collect sizes indices, immutable buffers, and work group memory sizes + let ep_info = &stage.module.naga.info.get_entry_point(ep_index); + let mut wg_memory_sizes = Vec::new(); + let mut sized_bindings = Vec::new(); + let mut immutable_buffer_mask = 0; + for (var_handle, var) in module.global_variables.iter() { + match var.space { + naga::AddressSpace::WorkGroup => { + if !ep_info[var_handle].is_empty() { + let size = module.types[var.ty].inner.size(&module.constants); + wg_memory_sizes.push(size); + } + } + naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { + let br = match var.binding { + Some(ref br) => br.clone(), + None => continue, + }; + let storage_access_store = match var.space { + naga::AddressSpace::Storage { access } => { + access.contains(naga::StorageAccess::STORE) + } + _ => false, + }; + + // check for an immutable buffer + if !ep_info[var_handle].is_empty() && !storage_access_store { + let slot = ep_resources.resources[&br].buffer.unwrap(); + immutable_buffer_mask |= 1 << slot; + } + + let mut dynamic_array_container_ty = var.ty; + if let naga::TypeInner::Struct { ref members, .. } = module.types[var.ty].inner + { + dynamic_array_container_ty = members.last().unwrap().ty; + } + if let naga::TypeInner::Array { + size: naga::ArraySize::Dynamic, + .. + } = module.types[dynamic_array_container_ty].inner + { + sized_bindings.push(br); + } + } + _ => {} + } + } + + Ok(CompiledShader { + library, + function, + wg_size, + wg_memory_sizes, + sized_bindings, + immutable_buffer_mask, + }) + } + + fn set_buffers_mutability( + buffers: &metal::PipelineBufferDescriptorArrayRef, + mut immutable_mask: usize, + ) { + while immutable_mask != 0 { + let slot = immutable_mask.trailing_zeros(); + immutable_mask ^= 1 << slot; + buffers + .object_at(slot as u64) + .unwrap() + .set_mutability(metal::MTLMutability::Immutable); + } + } + + pub unsafe fn texture_from_raw( + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, + ) -> super::Texture { + super::Texture { + raw, + format, + raw_type, + array_layers, + mip_levels, + copy_size, + } + } + + pub unsafe fn device_from_raw(raw: metal::Device, features: wgt::Features) -> super::Device { + super::Device { + shared: Arc::new(super::AdapterShared::new(raw)), + features, + } + } + + pub fn raw_device(&self) -> &Mutex<metal::Device> { + &self.shared.device + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, _queue: super::Queue) {} + + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> { + let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let mut options = metal::MTLResourceOptions::empty(); + options |= if map_read || map_write { + // `crate::MemoryFlags::PREFER_COHERENT` is ignored here + metal::MTLResourceOptions::StorageModeShared + } else { + metal::MTLResourceOptions::StorageModePrivate + }; + options.set( + metal::MTLResourceOptions::CPUCacheModeWriteCombined, + map_write, + ); + + //TODO: HazardTrackingModeUntracked + + objc::rc::autoreleasepool(|| { + let raw = self.shared.device.lock().new_buffer(desc.size, options); + if let Some(label) = desc.label { + raw.set_label(label); + } + Ok(super::Buffer { + raw, + size: desc.size, + }) + }) + } + unsafe fn destroy_buffer(&self, _buffer: super::Buffer) {} + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> DeviceResult<crate::BufferMapping> { + let ptr = buffer.raw.contents() as *mut u8; + assert!(!ptr.is_null()); + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + is_coherent: true, + }) + } + + unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges<I>(&self, _buffer: &super::Buffer, _ranges: I) {} + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> DeviceResult<super::Texture> { + use foreign_types::ForeignTypeRef; + + let mtl_format = self.shared.private_caps.map_format(desc.format); + + objc::rc::autoreleasepool(|| { + let descriptor = metal::TextureDescriptor::new(); + + let mtl_type = match desc.dimension { + wgt::TextureDimension::D1 => metal::MTLTextureType::D1, + wgt::TextureDimension::D2 => { + if desc.sample_count > 1 { + descriptor.set_sample_count(desc.sample_count as u64); + metal::MTLTextureType::D2Multisample + } else if desc.size.depth_or_array_layers > 1 { + descriptor.set_array_length(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D2Array + } else { + metal::MTLTextureType::D2 + } + } + wgt::TextureDimension::D3 => { + descriptor.set_depth(desc.size.depth_or_array_layers as u64); + metal::MTLTextureType::D3 + } + }; + + descriptor.set_texture_type(mtl_type); + descriptor.set_width(desc.size.width as u64); + descriptor.set_height(desc.size.height as u64); + descriptor.set_mipmap_level_count(desc.mip_level_count as u64); + descriptor.set_pixel_format(mtl_format); + descriptor.set_usage(conv::map_texture_usage(desc.format, desc.usage)); + descriptor.set_storage_mode(metal::MTLStorageMode::Private); + + let raw = self.shared.device.lock().new_texture(&descriptor); + if raw.as_ptr().is_null() { + return Err(crate::DeviceError::OutOfMemory); + } + if let Some(label) = desc.label { + raw.set_label(label); + } + + Ok(super::Texture { + raw, + format: desc.format, + raw_type: mtl_type, + mip_levels: desc.mip_level_count, + array_layers: desc.array_layer_count(), + copy_size: desc.copy_extent(), + }) + }) + } + + unsafe fn destroy_texture(&self, _texture: super::Texture) {} + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult<super::TextureView> { + let raw_type = if texture.raw_type == metal::MTLTextureType::D2Multisample { + texture.raw_type + } else { + conv::map_texture_view_dimension(desc.dimension) + }; + + let aspects = crate::FormatAspects::new(desc.format, desc.range.aspect); + + let raw_format = self + .shared + .private_caps + .map_view_format(desc.format, aspects); + + let format_equal = raw_format == self.shared.private_caps.map_format(texture.format); + let type_equal = raw_type == texture.raw_type; + let range_full_resource = + desc.range + .is_full_resource(desc.format, texture.mip_levels, texture.array_layers); + + let raw = if format_equal && type_equal && range_full_resource { + // Some images are marked as framebuffer-only, and we can't create aliases of them. + // Also helps working around Metal bugs with aliased array textures. + texture.raw.to_owned() + } else { + let mip_level_count = desc + .range + .mip_level_count + .unwrap_or(texture.mip_levels - desc.range.base_mip_level); + let array_layer_count = desc + .range + .array_layer_count + .unwrap_or(texture.array_layers - desc.range.base_array_layer); + + objc::rc::autoreleasepool(|| { + let raw = texture.raw.new_texture_view_from_slice( + raw_format, + raw_type, + metal::NSRange { + location: desc.range.base_mip_level as _, + length: mip_level_count as _, + }, + metal::NSRange { + location: desc.range.base_array_layer as _, + length: array_layer_count as _, + }, + ); + if let Some(label) = desc.label { + raw.set_label(label); + } + raw + }) + }; + + Ok(super::TextureView { raw, aspects }) + } + unsafe fn destroy_texture_view(&self, _view: super::TextureView) {} + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> DeviceResult<super::Sampler> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::SamplerDescriptor::new(); + + descriptor.set_min_filter(conv::map_filter_mode(desc.min_filter)); + descriptor.set_mag_filter(conv::map_filter_mode(desc.mag_filter)); + descriptor.set_mip_filter(match desc.mipmap_filter { + wgt::FilterMode::Nearest if desc.lod_clamp == (0.0..0.0) => { + metal::MTLSamplerMipFilter::NotMipmapped + } + wgt::FilterMode::Nearest => metal::MTLSamplerMipFilter::Nearest, + wgt::FilterMode::Linear => metal::MTLSamplerMipFilter::Linear, + }); + + let [s, t, r] = desc.address_modes; + descriptor.set_address_mode_s(conv::map_address_mode(s)); + descriptor.set_address_mode_t(conv::map_address_mode(t)); + descriptor.set_address_mode_r(conv::map_address_mode(r)); + + // Anisotropy is always supported on mac up to 16x + descriptor.set_max_anisotropy(desc.anisotropy_clamp as _); + + descriptor.set_lod_min_clamp(desc.lod_clamp.start); + descriptor.set_lod_max_clamp(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + descriptor.set_compare_function(conv::map_compare_function(fun)); + } + + if let Some(border_color) = desc.border_color { + if let wgt::SamplerBorderColor::Zero = border_color { + if s == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_s(metal::MTLSamplerAddressMode::ClampToZero); + } + + if t == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_t(metal::MTLSamplerAddressMode::ClampToZero); + } + + if r == wgt::AddressMode::ClampToBorder { + descriptor.set_address_mode_r(metal::MTLSamplerAddressMode::ClampToZero); + } + } else { + descriptor.set_border_color(conv::map_border_color(border_color)); + } + } + + if let Some(label) = desc.label { + descriptor.set_label(label); + } + let raw = self.shared.device.lock().new_sampler(&descriptor); + + Ok(super::Sampler { raw }) + }) + } + unsafe fn destroy_sampler(&self, _sampler: super::Sampler) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + Ok(super::CommandEncoder { + shared: Arc::clone(&self.shared), + raw_queue: Arc::clone(&desc.queue.raw), + raw_cmd_buf: None, + state: super::CommandState::default(), + temp: super::Temp::default(), + }) + } + unsafe fn destroy_command_encoder(&self, _encoder: super::CommandEncoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult<super::BindGroupLayout> { + Ok(super::BindGroupLayout { + entries: Arc::from(desc.entries), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) {} + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> DeviceResult<super::PipelineLayout> { + #[derive(Debug)] + struct StageInfo { + stage: naga::ShaderStage, + counters: super::ResourceData<super::ResourceIndex>, + pc_buffer: Option<super::ResourceIndex>, + pc_limit: u32, + sizes_buffer: Option<super::ResourceIndex>, + sizes_count: u8, + resources: naga::back::msl::BindingMap, + } + + let mut stage_data = super::NAGA_STAGES.map(|stage| StageInfo { + stage, + counters: super::ResourceData::default(), + pc_buffer: None, + pc_limit: 0, + sizes_buffer: None, + sizes_count: 0, + resources: Default::default(), + }); + let mut bind_group_infos = arrayvec::ArrayVec::new(); + + // First, place the push constants + let mut total_push_constants = 0; + for info in stage_data.iter_mut() { + for pcr in desc.push_constant_ranges { + if pcr.stages.contains(map_naga_stage(info.stage)) { + debug_assert_eq!(pcr.range.end % 4, 0); + info.pc_limit = (pcr.range.end / 4).max(info.pc_limit); + } + } + + // round up the limits alignment to 4, so that it matches MTL compiler logic + const LIMIT_MASK: u32 = 3; + //TODO: figure out what and how exactly does the alignment. Clearly, it's not + // straightforward, given that value of 2 stays non-aligned. + if info.pc_limit > LIMIT_MASK { + info.pc_limit = (info.pc_limit + LIMIT_MASK) & !LIMIT_MASK; + } + + // handle the push constant buffer assignment and shader overrides + if info.pc_limit != 0 { + info.pc_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + + total_push_constants = total_push_constants.max(info.pc_limit); + } + + // Second, place the described resources + for (group_index, &bgl) in desc.bind_group_layouts.iter().enumerate() { + // remember where the resources for this set start at each shader stage + let base_resource_indices = stage_data.map_ref(|info| info.counters.clone()); + + for entry in bgl.entries.iter() { + if let wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + for info in stage_data.iter_mut() { + if entry.visibility.contains(map_naga_stage(info.stage)) { + info.sizes_count += 1; + } + } + } + + for info in stage_data.iter_mut() { + if !entry.visibility.contains(map_naga_stage(info.stage)) { + continue; + } + + let mut target = naga::back::msl::BindTarget::default(); + let count = entry.count.map_or(1, NonZeroU32::get); + target.binding_array_size = entry.count.map(NonZeroU32::get); + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += count; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } + } + wgt::BindingType::Sampler { .. } => { + target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += count; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += count; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + } + + let br = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + info.resources.insert(br, target); + } + } + + bind_group_infos.push(super::BindGroupLayoutInfo { + base_resource_indices, + }); + } + + // Finally, make sure we fit the limits + for info in stage_data.iter_mut() { + // handle the sizes buffer assignment and shader overrides + if info.sizes_count != 0 { + info.sizes_buffer = Some(info.counters.buffers); + info.counters.buffers += 1; + } + if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage + || info.counters.textures > self.shared.private_caps.max_textures_per_stage + || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage + { + log::error!("Resource limit exceeded: {:?}", info); + return Err(crate::DeviceError::OutOfMemory); + } + } + + let push_constants_infos = stage_data.map_ref(|info| { + info.pc_buffer.map(|buffer_index| super::PushConstantsInfo { + count: info.pc_limit, + buffer_index, + }) + }); + + let total_counters = stage_data.map_ref(|info| info.counters.clone()); + + let per_stage_map = stage_data.map(|info| naga::back::msl::EntryPointResources { + push_constant_buffer: info + .pc_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + sizes_buffer: info + .sizes_buffer + .map(|buffer_index| buffer_index as naga::back::msl::Slot), + resources: info.resources, + }); + + Ok(super::PipelineLayout { + bind_group_infos, + push_constants_infos, + total_counters, + total_push_constants, + per_stage_map, + }) + } + unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: super::PipelineLayout) {} + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> DeviceResult<super::BindGroup> { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + for (entry, layout) in desc.entries.iter().zip(desc.layout.entries.iter()) { + let size = layout.count.map_or(1, |c| c.get()); + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += size; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = + wgt::BufferSize::new(source.buffer.size - source.offset); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.samplers + .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); + counter.samplers += size; + } + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + size as usize; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += size; + } + } + } + } + + Ok(bg) + } + + unsafe fn destroy_bind_group(&self, _group: super::BindGroup) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { + naga, + runtime_checks: desc.runtime_checks, + }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } + } + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::RenderPipelineDescriptor::new(); + + let raw_triangle_fill_mode = match desc.primitive.polygon_mode { + wgt::PolygonMode::Fill => metal::MTLTriangleFillMode::Fill, + wgt::PolygonMode::Line => metal::MTLTriangleFillMode::Lines, + wgt::PolygonMode::Point => panic!( + "{:?} is not enabled for this backend", + wgt::Features::POLYGON_MODE_POINT + ), + }; + + let (primitive_class, raw_primitive_type) = + conv::map_primitive_topology(desc.primitive.topology); + + // Vertex shader + let (vs_lib, vs_info) = { + let vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + primitive_class, + naga::ShaderStage::Vertex, + )?; + + descriptor.set_vertex_function(Some(&vs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.vertex_buffers().unwrap(), + vs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.vs, + sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, + sized_bindings: vs.sized_bindings, + }; + + (vs.library, info) + }; + + // Fragment shader + let (fs_lib, fs_info) = match desc.fragment_stage { + Some(ref stage) => { + let fs = self.load_shader( + stage, + desc.layout, + primitive_class, + naga::ShaderStage::Fragment, + )?; + + descriptor.set_fragment_function(Some(&fs.function)); + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.fragment_buffers().unwrap(), + fs.immutable_buffer_mask, + ); + } + + let info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.fs, + sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, + sized_bindings: fs.sized_bindings, + }; + + (Some(fs.library), Some(info)) + } + None => { + // TODO: This is a workaround for what appears to be a Metal validation bug + // A pixel format is required even though no attachments are provided + if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { + descriptor + .set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float); + } + (None, None) + } + }; + + for (i, ct) in desc.color_targets.iter().enumerate() { + let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let ct = if let Some(color_target) = ct.as_ref() { + color_target + } else { + at_descriptor.set_pixel_format(metal::MTLPixelFormat::Invalid); + continue; + }; + + let raw_format = self.shared.private_caps.map_format(ct.format); + at_descriptor.set_pixel_format(raw_format); + at_descriptor.set_write_mask(conv::map_color_write(ct.write_mask)); + + if let Some(ref blend) = ct.blend { + at_descriptor.set_blending_enabled(true); + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + + at_descriptor.set_rgb_blend_operation(color_op); + at_descriptor.set_source_rgb_blend_factor(color_src); + at_descriptor.set_destination_rgb_blend_factor(color_dst); + + at_descriptor.set_alpha_blend_operation(alpha_op); + at_descriptor.set_source_alpha_blend_factor(alpha_src); + at_descriptor.set_destination_alpha_blend_factor(alpha_dst); + } + } + + let depth_stencil = match desc.depth_stencil { + Some(ref ds) => { + let raw_format = self.shared.private_caps.map_format(ds.format); + let aspects = crate::FormatAspects::from(ds.format); + if aspects.contains(crate::FormatAspects::DEPTH) { + descriptor.set_depth_attachment_pixel_format(raw_format); + } + if aspects.contains(crate::FormatAspects::STENCIL) { + descriptor.set_stencil_attachment_pixel_format(raw_format); + } + + let ds_descriptor = create_depth_stencil_desc(ds); + let raw = self + .shared + .device + .lock() + .new_depth_stencil_state(&ds_descriptor); + Some((raw, ds.bias)) + } + None => None, + }; + + if desc.layout.total_counters.vs.buffers + (desc.vertex_buffers.len() as u32) + > self.shared.private_caps.max_vertex_buffers + { + let msg = format!( + "pipeline needs too many buffers in the vertex stage: {} vertex and {} layout", + desc.vertex_buffers.len(), + desc.layout.total_counters.vs.buffers + ); + return Err(crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX, + msg, + )); + } + + if !desc.vertex_buffers.is_empty() { + let vertex_descriptor = metal::VertexDescriptor::new(); + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + let buffer_index = + self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; + let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + + // Metal expects the stride to be the actual size of the attributes. + // The semantics of array_stride == 0 can be achieved by setting + // the step function to constant and rate to 0. + if vb.array_stride == 0 { + let stride = vb + .attributes + .iter() + .map(|attribute| attribute.offset + attribute.format.size()) + .max() + .unwrap_or(0); + buffer_desc.set_stride(wgt::math::align_to(stride, 4)); + buffer_desc.set_step_function(metal::MTLVertexStepFunction::Constant); + buffer_desc.set_step_rate(0); + } else { + buffer_desc.set_stride(vb.array_stride); + buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + } + + for at in vb.attributes { + let attribute_desc = vertex_descriptor + .attributes() + .object_at(at.shader_location as u64) + .unwrap(); + attribute_desc.set_format(conv::map_vertex_format(at.format)); + attribute_desc.set_buffer_index(buffer_index); + attribute_desc.set_offset(at.offset); + } + } + descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + } + + if desc.multisample.count != 1 { + //TODO: handle sample mask + descriptor.set_sample_count(desc.multisample.count as u64); + descriptor + .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); + } + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_render_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, + format!("new_render_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::RenderPipeline { + raw, + vs_lib, + fs_lib, + vs_info, + fs_info, + raw_primitive_type, + raw_triangle_fill_mode, + raw_front_winding: conv::map_winding(desc.primitive.front_face), + raw_cull_mode: conv::map_cull_mode(desc.primitive.cull_mode), + raw_depth_clip_mode: if self.features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + Some(if desc.primitive.unclipped_depth { + metal::MTLDepthClipMode::Clamp + } else { + metal::MTLDepthClipMode::Clip + }) + } else { + None + }, + depth_stencil, + }) + }) + } + unsafe fn destroy_render_pipeline(&self, _pipeline: super::RenderPipeline) {} + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + objc::rc::autoreleasepool(|| { + let descriptor = metal::ComputePipelineDescriptor::new(); + + let cs = self.load_shader( + &desc.stage, + desc.layout, + metal::MTLPrimitiveTopologyClass::Unspecified, + naga::ShaderStage::Compute, + )?; + descriptor.set_compute_function(Some(&cs.function)); + + if self.shared.private_caps.supports_mutability { + Self::set_buffers_mutability( + descriptor.buffers().unwrap(), + cs.immutable_buffer_mask, + ); + } + + let cs_info = super::PipelineStageInfo { + push_constants: desc.layout.push_constants_infos.cs, + sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, + sized_bindings: cs.sized_bindings, + }; + + if let Some(name) = desc.label { + descriptor.set_label(name); + } + + let raw = self + .shared + .device + .lock() + .new_compute_pipeline_state(&descriptor) + .map_err(|e| { + crate::PipelineError::Linkage( + wgt::ShaderStages::COMPUTE, + format!("new_compute_pipeline_state: {:?}", e), + ) + })?; + + Ok(super::ComputePipeline { + raw, + cs_info, + cs_lib: cs.library, + work_group_size: cs.wg_size, + work_group_memory_sizes: cs.wg_memory_sizes, + }) + }) + } + unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> DeviceResult<super::QuerySet> { + objc::rc::autoreleasepool(|| { + match desc.ty { + wgt::QueryType::Occlusion => { + let size = desc.count as u64 * crate::QUERY_SIZE; + let options = metal::MTLResourceOptions::empty(); + //TODO: HazardTrackingModeUntracked + let raw_buffer = self.shared.device.lock().new_buffer(size, options); + if let Some(label) = desc.label { + raw_buffer.set_label(label); + } + Ok(super::QuerySet { + raw_buffer, + ty: desc.ty, + }) + } + wgt::QueryType::Timestamp | wgt::QueryType::PipelineStatistics(_) => { + Err(crate::DeviceError::OutOfMemory) + } + } + }) + } + unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> DeviceResult<super::Fence> { + Ok(super::Fence { + completed_value: Arc::new(atomic::AtomicU64::new(0)), + pending_command_buffers: Vec::new(), + }) + } + unsafe fn destroy_fence(&self, _fence: super::Fence) {} + unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> { + let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + Ok(max_value) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult<bool> { + if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) { + return Ok(true); + } + + let cmd_buf = match fence + .pending_command_buffers + .iter() + .find(|&&(value, _)| value >= wait_value) + { + Some(&(_, ref cmd_buf)) => cmd_buf, + None => { + log::error!("No active command buffers for fence value {}", wait_value); + return Err(crate::DeviceError::Lost); + } + }; + + let start = time::Instant::now(); + loop { + if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() { + return Ok(true); + } + if start.elapsed().as_millis() >= timeout_ms as u128 { + return Ok(false); + } + thread::sleep(time::Duration::from_millis(1)); + } + } + + unsafe fn start_capture(&self) -> bool { + if !self.shared.private_caps.supports_capture_manager { + return false; + } + let device = self.shared.device.lock(); + let shared_capture_manager = metal::CaptureManager::shared(); + let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device); + shared_capture_manager.set_default_capture_scope(&default_capture_scope); + shared_capture_manager.start_capture_with_scope(&default_capture_scope); + default_capture_scope.begin_scope(); + true + } + unsafe fn stop_capture(&self) { + let shared_capture_manager = metal::CaptureManager::shared(); + if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { + default_capture_scope.end_scope(); + } + shared_capture_manager.stop_capture(); + } +} diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs new file mode 100644 index 0000000000..b77685bd94 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/mod.rs @@ -0,0 +1,805 @@ +/*! +# Metal API internals. + +## Pipeline Layout + +In Metal, push constants, vertex buffers, and resources in the bind groups +are all placed together in the native resource bindings, which work similarly to D3D11: +there are tables of textures, buffers, and samplers. + +We put push constants first (if any) in the table, followed by bind group 0 +resources, followed by other bind groups. The vertex buffers are bound at the very +end of the VS buffer table. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod surface; +mod time; + +use std::{ + fmt, iter, ops, + ptr::NonNull, + sync::{atomic, Arc}, + thread, +}; + +use arrayvec::ArrayVec; +use foreign_types::ForeignTypeRef as _; +use parking_lot::Mutex; + +#[derive(Clone)] +pub struct Api; + +type ResourceIndex = u32; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +pub struct Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate, +} + +impl Instance { + pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { + unsafe { Surface::from_layer(layer) } + } +} + +impl crate::Instance<Api> for Instance { + unsafe fn init(_desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + //TODO: enable `METAL_DEVICE_WRAPPER_TYPE` environment based on the flags? + Ok(Instance { + managed_metal_layer_delegate: surface::HalManagedMetalLayerDelegate::new(), + }) + } + + unsafe fn create_surface( + &self, + _display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<Surface, crate::InstanceError> { + match window_handle { + #[cfg(target_os = "ios")] + raw_window_handle::RawWindowHandle::UiKit(handle) => { + let _ = &self.managed_metal_layer_delegate; + Ok(unsafe { Surface::from_view(handle.ui_view, None) }) + } + #[cfg(target_os = "macos")] + raw_window_handle::RawWindowHandle::AppKit(handle) => Ok(unsafe { + Surface::from_view(handle.ns_view, Some(&self.managed_metal_layer_delegate)) + }), + _ => Err(crate::InstanceError), + } + } + + unsafe fn destroy_surface(&self, surface: Surface) { + unsafe { surface.dispose() }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<Api>> { + let devices = metal::Device::all(); + let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices + .into_iter() + .map(|dev| { + let name = dev.name().into(); + let shared = AdapterShared::new(dev); + crate::ExposedAdapter { + info: wgt::AdapterInfo { + name, + vendor: 0, + device: 0, + device_type: shared.private_caps.device_type(), + driver: String::new(), + driver_info: String::new(), + backend: wgt::Backend::Metal, + }, + features: shared.private_caps.features(), + capabilities: shared.private_caps.capabilities(), + adapter: Adapter::new(Arc::new(shared)), + } + }) + .collect(); + adapters.sort_by_key(|ad| { + ( + ad.adapter.shared.private_caps.low_power, + ad.adapter.shared.private_caps.headless, + ) + }); + adapters + } +} + +#[allow(dead_code)] +#[derive(Clone, Debug)] +struct PrivateCapabilities { + family_check: bool, + msl_version: metal::MTLLanguageVersion, + fragment_rw_storage: bool, + read_write_texture_tier: metal::MTLReadWriteTextureTier, + msaa_desktop: bool, + msaa_apple3: bool, + msaa_apple7: bool, + resource_heaps: bool, + argument_buffers: bool, + shared_textures: bool, + mutable_comparison_samplers: bool, + sampler_clamp_to_border: bool, + base_instance: bool, + base_vertex_instance_drawing: bool, + dual_source_blending: bool, + low_power: bool, + headless: bool, + layered_rendering: bool, + function_specialization: bool, + depth_clip_mode: bool, + texture_cube_array: bool, + format_depth24_stencil8: bool, + format_depth32_stencil8_filter: bool, + format_depth32_stencil8_none: bool, + format_min_srgb_channels: u8, + format_b5: bool, + format_bc: bool, + format_eac_etc: bool, + format_astc: bool, + format_astc_hdr: bool, + format_any8_unorm_srgb_all: bool, + format_any8_unorm_srgb_no_write: bool, + format_any8_snorm_all: bool, + format_r16_norm_all: bool, + format_r32_all: bool, + format_r32_no_write: bool, + format_r32float_no_write_no_filter: bool, + format_r32float_no_filter: bool, + format_r32float_all: bool, + format_rgba8_srgb_all: bool, + format_rgba8_srgb_no_write: bool, + format_rgb10a2_unorm_all: bool, + format_rgb10a2_unorm_no_write: bool, + format_rgb10a2_uint_color: bool, + format_rgb10a2_uint_color_write: bool, + format_rg11b10_all: bool, + format_rg11b10_no_write: bool, + format_rgb9e5_all: bool, + format_rgb9e5_no_write: bool, + format_rgb9e5_filter_only: bool, + format_rg32_color: bool, + format_rg32_color_write: bool, + format_rg32float_all: bool, + format_rg32float_color_blend: bool, + format_rg32float_no_filter: bool, + format_rgba32int_color: bool, + format_rgba32int_color_write: bool, + format_rgba32float_color: bool, + format_rgba32float_color_write: bool, + format_rgba32float_all: bool, + format_depth16unorm: bool, + format_depth32float_filter: bool, + format_depth32float_none: bool, + format_bgr10a2_all: bool, + format_bgr10a2_no_write: bool, + max_buffers_per_stage: ResourceIndex, + max_vertex_buffers: ResourceIndex, + max_textures_per_stage: ResourceIndex, + max_samplers_per_stage: ResourceIndex, + buffer_alignment: u64, + max_buffer_size: u64, + max_texture_size: u64, + max_texture_3d_size: u64, + max_texture_layers: u64, + max_fragment_input_components: u64, + max_color_render_targets: u8, + max_varying_components: u32, + max_threads_per_group: u32, + max_total_threadgroup_memory: u32, + sample_count_mask: crate::TextureFormatCapabilities, + supports_debug_markers: bool, + supports_binary_archives: bool, + supports_capture_manager: bool, + can_set_maximum_drawables_count: bool, + can_set_display_sync: bool, + can_set_next_drawable_timeout: bool, + supports_arrays_of_textures: bool, + supports_arrays_of_textures_write: bool, + supports_mutability: bool, + supports_depth_clip_control: bool, + supports_preserve_invariance: bool, + supports_shader_primitive_index: bool, + has_unified_memory: Option<bool>, +} + +#[derive(Clone, Debug)] +struct PrivateDisabilities { + /// Near depth is not respected properly on some Intel GPUs. + broken_viewport_near_depth: bool, + /// Multi-target clears don't appear to work properly on Intel GPUs. + #[allow(dead_code)] + broken_layered_clear_image: bool, +} + +#[derive(Debug, Default)] +struct Settings { + retain_command_buffer_references: bool, +} + +struct AdapterShared { + device: Mutex<metal::Device>, + disabilities: PrivateDisabilities, + private_caps: PrivateCapabilities, + settings: Settings, + presentation_timer: time::PresentationTimer, +} + +unsafe impl Send for AdapterShared {} +unsafe impl Sync for AdapterShared {} + +impl AdapterShared { + fn new(device: metal::Device) -> Self { + let private_caps = PrivateCapabilities::new(&device); + log::debug!("{:#?}", private_caps); + + Self { + disabilities: PrivateDisabilities::new(&device), + private_caps, + device: Mutex::new(device), + settings: Settings::default(), + presentation_timer: time::PresentationTimer::new(), + } + } +} + +pub struct Adapter { + shared: Arc<AdapterShared>, +} + +pub struct Queue { + raw: Arc<Mutex<metal::CommandQueue>>, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl Queue { + pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self { + Self { + raw: Arc::new(Mutex::new(raw)), + } + } +} +pub struct Device { + shared: Arc<AdapterShared>, + features: wgt::Features, +} + +pub struct Surface { + view: Option<NonNull<objc::runtime::Object>>, + render_layer: Mutex<metal::MetalLayer>, + swapchain_format: Option<wgt::TextureFormat>, + extent: wgt::Extent3d, + main_thread_id: thread::ThreadId, + // Useful for UI-intensive applications that are sensitive to + // window resizing. + pub present_with_transaction: bool, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug)] +pub struct SurfaceTexture { + texture: Texture, + drawable: metal::MetalDrawable, + present_with_transaction: bool, +} + +impl std::borrow::Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +unsafe impl Send for SurfaceTexture {} +unsafe impl Sync for SurfaceTexture {} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + objc::rc::autoreleasepool(|| { + let extra_command_buffer = match signal_fence { + Some((fence, value)) => { + let completed_value = Arc::clone(&fence.completed_value); + let block = block::ConcreteBlock::new(move |_cmd_buf| { + completed_value.store(value, atomic::Ordering::Release); + }) + .copy(); + + let raw = match command_buffers.last() { + Some(&cmd_buf) => cmd_buf.raw.to_owned(), + None => { + let queue = self.raw.lock(); + queue + .new_command_buffer_with_unretained_references() + .to_owned() + } + }; + raw.set_label("(wgpu internal) Signal"); + raw.add_completed_handler(&block); + + fence.maintain(); + fence.pending_command_buffers.push((value, raw.to_owned())); + // only return an extra one if it's extra + match command_buffers.last() { + Some(_) => None, + None => Some(raw), + } + } + None => None, + }; + + for cmd_buffer in command_buffers { + cmd_buffer.raw.commit(); + } + + if let Some(raw) = extra_command_buffer { + raw.commit(); + } + }); + Ok(()) + } + unsafe fn present( + &mut self, + _surface: &mut Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let queue = &self.raw.lock(); + objc::rc::autoreleasepool(|| { + let command_buffer = queue.new_command_buffer(); + command_buffer.set_label("(wgpu internal) Present"); + + // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc + if !texture.present_with_transaction { + command_buffer.present_drawable(&texture.drawable); + } + + command_buffer.commit(); + + if texture.present_with_transaction { + command_buffer.wait_until_scheduled(); + texture.drawable.present(); + } + }); + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + // TODO: This is hard, see https://github.com/gpuweb/gpuweb/issues/1325 + 1.0 + } +} + +#[derive(Debug)] +pub struct Buffer { + raw: metal::Buffer, + size: wgt::BufferAddress, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + +impl Buffer { + fn as_raw(&self) -> BufferPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Texture { + raw: metal::Texture, + format: wgt::TextureFormat, + raw_type: metal::MTLTextureType, + array_layers: u32, + mip_levels: u32, + copy_size: crate::CopyExtent, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +#[derive(Debug)] +pub struct TextureView { + raw: metal::Texture, + aspects: crate::FormatAspects, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} + +impl TextureView { + fn as_raw(&self) -> TexturePtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct Sampler { + raw: metal::SamplerState, +} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +impl Sampler { + fn as_raw(&self) -> SamplerPtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} + +#[derive(Debug)] +pub struct BindGroupLayout { + /// Sorted list of BGL entries. + entries: Arc<[wgt::BindGroupLayoutEntry]>, +} + +#[derive(Clone, Debug, Default)] +struct ResourceData<T> { + buffers: T, + textures: T, + samplers: T, +} + +#[derive(Clone, Debug, Default)] +struct MultiStageData<T> { + vs: T, + fs: T, + cs: T, +} + +const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData { + vs: naga::ShaderStage::Vertex, + fs: naga::ShaderStage::Fragment, + cs: naga::ShaderStage::Compute, +}; + +impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> { + type Output = T; + fn index(&self, stage: naga::ShaderStage) -> &T { + match stage { + naga::ShaderStage::Vertex => &self.vs, + naga::ShaderStage::Fragment => &self.fs, + naga::ShaderStage::Compute => &self.cs, + } + } +} + +impl<T> MultiStageData<T> { + fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(&self.vs), + fs: fun(&self.fs), + cs: fun(&self.cs), + } + } + fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> { + MultiStageData { + vs: fun(self.vs), + fs: fun(self.fs), + cs: fun(self.cs), + } + } + fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> { + iter::once(&self.vs) + .chain(iter::once(&self.fs)) + .chain(iter::once(&self.cs)) + } + fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> { + iter::once(&mut self.vs) + .chain(iter::once(&mut self.fs)) + .chain(iter::once(&mut self.cs)) + } +} + +type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>; +type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>; + +#[derive(Debug)] +struct BindGroupLayoutInfo { + base_resource_indices: MultiStageResourceCounters, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PushConstantsInfo { + count: u32, + buffer_index: ResourceIndex, +} + +#[derive(Debug)] +pub struct PipelineLayout { + bind_group_infos: ArrayVec<BindGroupLayoutInfo, { crate::MAX_BIND_GROUPS }>, + push_constants_infos: MultiStageData<Option<PushConstantsInfo>>, + total_counters: MultiStageResourceCounters, + total_push_constants: u32, + per_stage_map: MultiStageResources, +} + +trait AsNative { + type Native; + fn from(native: &Self::Native) -> Self; + fn as_native(&self) -> &Self::Native; +} + +type BufferPtr = NonNull<metal::MTLBuffer>; +type TexturePtr = NonNull<metal::MTLTexture>; +type SamplerPtr = NonNull<metal::MTLSamplerState>; + +impl AsNative for BufferPtr { + type Native = metal::BufferRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for TexturePtr { + type Native = metal::TextureRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +impl AsNative for SamplerPtr { + type Native = metal::SamplerStateRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + +#[derive(Debug)] +struct BufferResource { + ptr: BufferPtr, + offset: wgt::BufferAddress, + dynamic_index: Option<u32>, + + /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. + /// + /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can + /// hold WGSL runtime-sized arrays. When one does, we must pass its size to + /// shader entry points to implement bounds checks and WGSL's `arrayLength` + /// function. See [`device::CompiledShader::sized_bindings`] for details. + /// + /// [`Storage`]: wgt::BufferBindingType::Storage + binding_size: Option<wgt::BufferSize>, + + binding_location: u32, +} + +#[derive(Debug, Default)] +pub struct BindGroup { + counters: MultiStageResourceCounters, + buffers: Vec<BufferResource>, + samplers: Vec<SamplerPtr>, + textures: Vec<TexturePtr>, +} + +unsafe impl Send for BindGroup {} +unsafe impl Sync for BindGroup {} + +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, + runtime_checks: bool, +} + +#[derive(Debug, Default)] +struct PipelineStageInfo { + push_constants: Option<PushConstantsInfo>, + + /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sizes_slot: Option<naga::back::msl::Slot>, + + /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + sized_bindings: Vec<naga::ResourceBinding>, +} + +impl PipelineStageInfo { + fn clear(&mut self) { + self.push_constants = None; + self.sizes_slot = None; + self.sized_bindings.clear(); + } + + fn assign_from(&mut self, other: &Self) { + self.push_constants = other.push_constants; + self.sizes_slot = other.sizes_slot; + self.sized_bindings.clear(); + self.sized_bindings.extend_from_slice(&other.sized_bindings); + } +} + +pub struct RenderPipeline { + raw: metal::RenderPipelineState, + #[allow(dead_code)] + vs_lib: metal::Library, + #[allow(dead_code)] + fs_lib: Option<metal::Library>, + vs_info: PipelineStageInfo, + fs_info: Option<PipelineStageInfo>, + raw_primitive_type: metal::MTLPrimitiveType, + raw_triangle_fill_mode: metal::MTLTriangleFillMode, + raw_front_winding: metal::MTLWinding, + raw_cull_mode: metal::MTLCullMode, + raw_depth_clip_mode: Option<metal::MTLDepthClipMode>, + depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: metal::ComputePipelineState, + #[allow(dead_code)] + cs_lib: metal::Library, + cs_info: PipelineStageInfo, + work_group_size: metal::MTLSize, + work_group_memory_sizes: Vec<u32>, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + +#[derive(Debug)] +pub struct QuerySet { + raw_buffer: metal::Buffer, + ty: wgt::QueryType, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} + +#[derive(Debug)] +pub struct Fence { + completed_value: Arc<atomic::AtomicU64>, + /// The pending fence values have to be ascending. + pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + +impl Fence { + fn get_latest(&self) -> crate::FenceValue { + let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); + for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { + if cmd_buf.status() == metal::MTLCommandBufferStatus::Completed { + max_value = value; + } + } + max_value + } + + fn maintain(&mut self) { + let latest = self.get_latest(); + self.pending_command_buffers + .retain(|&(value, _)| value > latest); + } +} + +struct IndexState { + buffer_ptr: BufferPtr, + offset: wgt::BufferAddress, + stride: wgt::BufferAddress, + raw_type: metal::MTLIndexType, +} + +#[derive(Default)] +struct Temp { + binding_sizes: Vec<u32>, +} + +struct CommandState { + blit: Option<metal::BlitCommandEncoder>, + render: Option<metal::RenderCommandEncoder>, + compute: Option<metal::ComputeCommandEncoder>, + raw_primitive_type: metal::MTLPrimitiveType, + index: Option<IndexState>, + raw_wg_size: metal::MTLSize, + stage_infos: MultiStageData<PipelineStageInfo>, + + /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. + /// + /// Specifically: + /// + /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group` + /// and `@binding` attributes) for `var<storage>` global variables in the + /// current module that contain runtime-sized arrays. + /// + /// - The values are the actual sizes of the buffers currently bound to + /// provide those globals' contents, which are needed to implement bounds + /// checks and the WGSL `arrayLength` function. + /// + /// For each stage `S` in `stage_infos`, we consult this to find the sizes + /// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must + /// pass to the entry point. + /// + /// See [`device::CompiledShader::sized_bindings`] for more details. + /// + /// [`ResourceBinding`]: naga::ResourceBinding + storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, + + work_group_memory_sizes: Vec<u32>, + push_constants: Vec<u32>, +} + +pub struct CommandEncoder { + shared: Arc<AdapterShared>, + raw_queue: Arc<Mutex<metal::CommandQueue>>, + raw_cmd_buf: Option<metal::CommandBuffer>, + state: CommandState, + temp: Temp, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw_queue", &self.raw_queue) + .field("raw_cmd_buf", &self.raw_cmd_buf) + .finish() + } +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: metal::CommandBuffer, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} diff --git a/third_party/rust/wgpu-hal/src/metal/surface.rs b/third_party/rust/wgpu-hal/src/metal/surface.rs new file mode 100644 index 0000000000..8101d52969 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/surface.rs @@ -0,0 +1,278 @@ +#![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type + +use std::{mem, os::raw::c_void, ptr::NonNull, sync::Once, thread}; + +use core_graphics_types::{ + base::CGFloat, + geometry::{CGRect, CGSize}, +}; +use objc::{ + class, + declare::ClassDecl, + msg_send, + rc::autoreleasepool, + runtime::{Class, Object, Sel, BOOL, NO, YES}, + sel, sel_impl, +}; +use parking_lot::Mutex; + +#[cfg(target_os = "macos")] +#[link(name = "QuartzCore", kind = "framework")] +extern "C" { + #[allow(non_upper_case_globals)] + static kCAGravityTopLeft: *mut Object; +} + +extern "C" fn layer_should_inherit_contents_scale_from_window( + _: &Class, + _: Sel, + _layer: *mut Object, + _new_scale: CGFloat, + _from_window: *mut Object, +) -> BOOL { + YES +} + +static CAML_DELEGATE_REGISTER: Once = Once::new(); + +#[derive(Debug)] +pub struct HalManagedMetalLayerDelegate(&'static Class); + +impl HalManagedMetalLayerDelegate { + pub fn new() -> Self { + let class_name = format!("HalManagedMetalLayerDelegate@{:p}", &CAML_DELEGATE_REGISTER); + + CAML_DELEGATE_REGISTER.call_once(|| { + type Fun = extern "C" fn(&Class, Sel, *mut Object, CGFloat, *mut Object) -> BOOL; + let mut decl = ClassDecl::new(&class_name, class!(NSObject)).unwrap(); + #[allow(trivial_casts)] // false positive + unsafe { + decl.add_class_method( + sel!(layer:shouldInheritContentsScale:fromWindow:), + layer_should_inherit_contents_scale_from_window as Fun, + ); + } + decl.register(); + }); + Self(Class::get(&class_name).unwrap()) + } +} + +impl super::Surface { + fn new(view: Option<NonNull<Object>>, layer: metal::MetalLayer) -> Self { + Self { + view, + render_layer: Mutex::new(layer), + swapchain_format: None, + extent: wgt::Extent3d::default(), + main_thread_id: thread::current().id(), + present_with_transaction: false, + } + } + + pub unsafe fn dispose(self) { + if let Some(view) = self.view { + let () = msg_send![view.as_ptr(), release]; + } + } + + /// If not called on the main thread, this will panic. + #[allow(clippy::transmute_ptr_to_ref)] + pub unsafe fn from_view( + view: *mut c_void, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> Self { + let view = view as *mut Object; + let render_layer = { + let layer = unsafe { Self::get_metal_layer(view, delegate) }; + unsafe { mem::transmute::<_, &metal::MetalLayerRef>(layer) } + } + .to_owned(); + let _: *mut c_void = msg_send![view, retain]; + Self::new(NonNull::new(view), render_layer) + } + + pub unsafe fn from_layer(layer: &metal::MetalLayerRef) -> Self { + let class = class!(CAMetalLayer); + let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; + assert_eq!(proper_kind, YES); + Self::new(None, layer.to_owned()) + } + + /// If not called on the main thread, this will panic. + pub(crate) unsafe fn get_metal_layer( + view: *mut Object, + delegate: Option<&HalManagedMetalLayerDelegate>, + ) -> *mut Object { + if view.is_null() { + panic!("window does not have a valid contentView"); + } + + let is_main_thread: BOOL = msg_send![class!(NSThread), isMainThread]; + if is_main_thread == NO { + panic!("get_metal_layer cannot be called in non-ui thread."); + } + + let main_layer: *mut Object = msg_send![view, layer]; + let class = class!(CAMetalLayer); + let is_valid_layer: BOOL = msg_send![main_layer, isKindOfClass: class]; + + if is_valid_layer == YES { + main_layer + } else { + // If the main layer is not a CAMetalLayer, we create a CAMetalLayer and use it. + let new_layer: *mut Object = msg_send![class, new]; + let frame: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![new_layer, setFrame: frame]; + #[cfg(target_os = "ios")] + { + // Unlike NSView, UIView does not allow to replace main layer. + let () = msg_send![main_layer, addSublayer: new_layer]; + // On iOS, "from_view" may be called before the application initialization is complete, + // `msg_send![view, window]` and `msg_send![window, screen]` will get null. + let screen: *mut Object = msg_send![class!(UIScreen), mainScreen]; + let scale_factor: CGFloat = msg_send![screen, nativeScale]; + let () = msg_send![view, setContentScaleFactor: scale_factor]; + }; + #[cfg(target_os = "macos")] + { + let () = msg_send![view, setLayer: new_layer]; + let () = msg_send![view, setWantsLayer: YES]; + let () = msg_send![new_layer, setContentsGravity: unsafe { kCAGravityTopLeft }]; + let window: *mut Object = msg_send![view, window]; + if !window.is_null() { + let scale_factor: CGFloat = msg_send![window, backingScaleFactor]; + let () = msg_send![new_layer, setContentsScale: scale_factor]; + } + }; + if let Some(delegate) = delegate { + let () = msg_send![new_layer, setDelegate: delegate.0]; + } + new_layer + } + } + + pub(super) fn dimensions(&self) -> wgt::Extent3d { + let (size, scale): (CGSize, CGFloat) = unsafe { + let render_layer_borrow = self.render_layer.lock(); + let render_layer = render_layer_borrow.as_ref(); + let bounds: CGRect = msg_send![render_layer, bounds]; + let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + (bounds.size, contents_scale) + }; + + wgt::Extent3d { + width: (size.width * scale) as u32, + height: (size.height * scale) as u32, + depth_or_array_layers: 1, + } + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + log::info!("build swapchain {:?}", config); + + let caps = &device.shared.private_caps; + self.swapchain_format = Some(config.format); + self.extent = config.extent; + + let render_layer = self.render_layer.lock(); + let framebuffer_only = config.usage == crate::TextureUses::COLOR_TARGET; + let display_sync = match config.present_mode { + wgt::PresentMode::Fifo => true, + wgt::PresentMode::Immediate => false, + m => unreachable!("Unsupported present mode: {m:?}"), + }; + let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); + + match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Opaque => render_layer.set_opaque(true), + wgt::CompositeAlphaMode::PostMultiplied => render_layer.set_opaque(false), + _ => (), + } + + let device_raw = device.shared.device.lock(); + // On iOS, unless the user supplies a view with a CAMetalLayer, we + // create one as a sublayer. However, when the view changes size, + // its sublayers are not automatically resized, and we must resize + // it here. The drawable size and the layer size don't correlate + #[cfg(target_os = "ios")] + { + if let Some(view) = self.view { + let main_layer: *mut Object = msg_send![view.as_ptr(), layer]; + let bounds: CGRect = msg_send![main_layer, bounds]; + let () = msg_send![*render_layer, setFrame: bounds]; + } + } + render_layer.set_device(&device_raw); + render_layer.set_pixel_format(caps.map_format(config.format)); + render_layer.set_framebuffer_only(framebuffer_only); + render_layer.set_presents_with_transaction(self.present_with_transaction); + // opt-in to Metal EDR + // EDR potentially more power used in display and more bandwidth, memory footprint. + let wants_edr = config.format == wgt::TextureFormat::Rgba16Float; + if wants_edr != render_layer.wants_extended_dynamic_range_content() { + render_layer.set_wants_extended_dynamic_range_content(wants_edr); + } + + // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) + render_layer.set_maximum_drawable_count(config.swap_chain_size as _); + render_layer.set_drawable_size(drawable_size); + if caps.can_set_next_drawable_timeout { + let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + } + if caps.can_set_display_sync { + let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + } + + Ok(()) + } + + unsafe fn unconfigure(&mut self, _device: &super::Device) { + self.swapchain_format = None; + } + + unsafe fn acquire_texture( + &mut self, + _timeout_ms: Option<std::time::Duration>, //TODO + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let render_layer = self.render_layer.lock(); + let (drawable, texture) = match autoreleasepool(|| { + render_layer + .next_drawable() + .map(|drawable| (drawable.to_owned(), drawable.texture().to_owned())) + }) { + Some(pair) => pair, + None => return Ok(None), + }; + + let suf_texture = super::SurfaceTexture { + texture: super::Texture { + raw: texture, + format: self.swapchain_format.unwrap(), + raw_type: metal::MTLTextureType::D2, + array_layers: 1, + mip_levels: 1, + copy_size: crate::CopyExtent { + width: self.extent.width, + height: self.extent.height, + depth: 1, + }, + }, + drawable, + present_with_transaction: self.present_with_transaction, + }; + + Ok(Some(crate::AcquiredSurfaceTexture { + texture: suf_texture, + suboptimal: false, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/metal/time.rs b/third_party/rust/wgpu-hal/src/metal/time.rs new file mode 100644 index 0000000000..5c6bec10cd --- /dev/null +++ b/third_party/rust/wgpu-hal/src/metal/time.rs @@ -0,0 +1,38 @@ +//! Handling of global timestamps. + +#[repr(C)] +#[derive(Debug)] +struct MachTimebaseInfo { + numerator: u32, + denominator: u32, +} +extern "C" { + fn mach_timebase_info(out: *mut MachTimebaseInfo) -> u32; + fn mach_absolute_time() -> u64; +} + +/// A timer which uses mach_absolute_time to get its time. This is what the metal callbacks use. +#[derive(Debug)] +pub struct PresentationTimer { + scale: MachTimebaseInfo, +} +impl PresentationTimer { + /// Generates a new timer. + pub fn new() -> Self { + // Default to 1 / 1 in case the call to timebase_info fails. + let mut scale = MachTimebaseInfo { + numerator: 1, + denominator: 1, + }; + unsafe { mach_timebase_info(&mut scale) }; + + Self { scale } + } + + /// Gets the current time in nanoseconds. + pub fn get_timestamp_ns(&self) -> u128 { + let time = unsafe { mach_absolute_time() }; + + (time as u128 * self.scale.numerator as u128) / self.scale.denominator as u128 + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs new file mode 100644 index 0000000000..f8f26e422f --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs @@ -0,0 +1,1746 @@ +use super::conv; + +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{collections::BTreeMap, ffi::CStr, sync::Arc}; + +fn depth_stencil_required_flags() -> vk::FormatFeatureFlags { + vk::FormatFeatureFlags::SAMPLED_IMAGE | vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT +} + +//TODO: const fn? +fn indexing_features() -> wgt::Features { + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING +} + +/// Aggregate of the `vk::PhysicalDevice*Features` structs used by `gfx`. +#[derive(Debug, Default)] +pub struct PhysicalDeviceFeatures { + core: vk::PhysicalDeviceFeatures, + pub(super) descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingFeaturesEXT>, + imageless_framebuffer: Option<vk::PhysicalDeviceImagelessFramebufferFeaturesKHR>, + timeline_semaphore: Option<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>, + image_robustness: Option<vk::PhysicalDeviceImageRobustnessFeaturesEXT>, + robustness2: Option<vk::PhysicalDeviceRobustness2FeaturesEXT>, + depth_clip_enable: Option<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>, + multiview: Option<vk::PhysicalDeviceMultiviewFeaturesKHR>, + astc_hdr: Option<vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT>, + shader_float16: Option<( + vk::PhysicalDeviceShaderFloat16Int8Features, + vk::PhysicalDevice16BitStorageFeatures, + )>, + zero_initialize_workgroup_memory: + Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceFeatures {} +unsafe impl Sync for PhysicalDeviceFeatures {} + +impl PhysicalDeviceFeatures { + /// Add the members of `self` into `info.enabled_features` and its `p_next` chain. + pub fn add_to_device_create_builder<'a>( + &'a mut self, + mut info: vk::DeviceCreateInfoBuilder<'a>, + ) -> vk::DeviceCreateInfoBuilder<'a> { + info = info.enabled_features(&self.core); + if let Some(ref mut feature) = self.descriptor_indexing { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.imageless_framebuffer { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.timeline_semaphore { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.image_robustness { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.robustness2 { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.depth_clip_enable { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.astc_hdr { + info = info.push_next(feature); + } + if let Some((ref mut f16_i8_feature, ref mut _16bit_feature)) = self.shader_float16 { + info = info.push_next(f16_i8_feature); + info = info.push_next(_16bit_feature); + } + if let Some(ref mut feature) = self.zero_initialize_workgroup_memory { + info = info.push_next(feature); + } + info + } + + /// Create a `PhysicalDeviceFeatures` that will be used to create a logical device. + /// + /// `requested_features` should be the same as what was used to generate `enabled_extensions`. + fn from_extensions_and_requested_features( + effective_api_version: u32, + enabled_extensions: &[&'static CStr], + requested_features: wgt::Features, + downlevel_flags: wgt::DownlevelFlags, + private_caps: &super::PrivateCapabilities, + ) -> Self { + let needs_sampled_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_buffer_non_uniform = requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_uniform_buffer_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_storage_image_non_uniform = requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ); + let needs_partially_bound = + requested_features.intersects(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY); + + Self { + // vk::PhysicalDeviceFeatures is a struct composed of Bool32's while + // Features is a bitfield so we need to map everything manually + core: vk::PhysicalDeviceFeatures::builder() + .robust_buffer_access(private_caps.robust_buffer_access) + .independent_blend(downlevel_flags.contains(wgt::DownlevelFlags::INDEPENDENT_BLEND)) + .sample_rate_shading( + downlevel_flags.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), + ) + .image_cube_array( + downlevel_flags.contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), + ) + .draw_indirect_first_instance( + requested_features.contains(wgt::Features::INDIRECT_FIRST_INSTANCE), + ) + //.dual_src_blend(requested_features.contains(wgt::Features::DUAL_SRC_BLENDING)) + .multi_draw_indirect( + requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT), + ) + .fill_mode_non_solid(requested_features.intersects( + wgt::Features::POLYGON_MODE_LINE | wgt::Features::POLYGON_MODE_POINT, + )) + //.depth_bounds(requested_features.contains(wgt::Features::DEPTH_BOUNDS)) + //.alpha_to_one(requested_features.contains(wgt::Features::ALPHA_TO_ONE)) + //.multi_viewport(requested_features.contains(wgt::Features::MULTI_VIEWPORTS)) + .sampler_anisotropy( + downlevel_flags.contains(wgt::DownlevelFlags::ANISOTROPIC_FILTERING), + ) + .texture_compression_etc2( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ETC2), + ) + .texture_compression_astc_ldr( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC), + ) + .texture_compression_bc( + requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_BC), + ) + //.occlusion_query_precise(requested_features.contains(wgt::Features::PRECISE_OCCLUSION_QUERY)) + .pipeline_statistics_query( + requested_features.contains(wgt::Features::PIPELINE_STATISTICS_QUERY), + ) + .vertex_pipeline_stores_and_atomics( + requested_features.contains(wgt::Features::VERTEX_WRITABLE_STORAGE), + ) + .fragment_stores_and_atomics( + downlevel_flags.contains(wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE), + ) + //.shader_image_gather_extended( + //.shader_storage_image_extended_formats( + .shader_uniform_buffer_array_dynamic_indexing( + requested_features.contains(wgt::Features::BUFFER_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + .shader_sampled_image_array_dynamic_indexing( + requested_features.contains(wgt::Features::TEXTURE_BINDING_ARRAY), + ) + .shader_storage_buffer_array_dynamic_indexing(requested_features.contains( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + )) + //.shader_storage_image_array_dynamic_indexing( + //.shader_clip_distance(requested_features.contains(wgt::Features::SHADER_CLIP_DISTANCE)) + //.shader_cull_distance(requested_features.contains(wgt::Features::SHADER_CULL_DISTANCE)) + .shader_float64(requested_features.contains(wgt::Features::SHADER_F64)) + //.shader_int64(requested_features.contains(wgt::Features::SHADER_INT64)) + .shader_int16(requested_features.contains(wgt::Features::SHADER_I16)) + //.shader_resource_residency(requested_features.contains(wgt::Features::SHADER_RESOURCE_RESIDENCY)) + .geometry_shader(requested_features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX)) + .build(), + descriptor_indexing: if requested_features.intersects(indexing_features()) { + Some( + vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::builder() + .shader_sampled_image_array_non_uniform_indexing( + needs_sampled_image_non_uniform, + ) + .shader_storage_image_array_non_uniform_indexing( + needs_storage_image_non_uniform, + ) + .shader_uniform_buffer_array_non_uniform_indexing( + needs_uniform_buffer_non_uniform, + ) + .shader_storage_buffer_array_non_uniform_indexing( + needs_storage_buffer_non_uniform, + ) + .descriptor_binding_partially_bound(needs_partially_bound) + .build(), + ) + } else { + None + }, + imageless_framebuffer: if effective_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrImagelessFramebufferFn::name()) + { + Some( + vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::builder() + .imageless_framebuffer(private_caps.imageless_framebuffers) + .build(), + ) + } else { + None + }, + timeline_semaphore: if effective_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&vk::KhrTimelineSemaphoreFn::name()) + { + Some( + vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::builder() + .timeline_semaphore(private_caps.timeline_semaphores) + .build(), + ) + } else { + None + }, + image_robustness: if effective_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::ExtImageRobustnessFn::name()) + { + Some( + vk::PhysicalDeviceImageRobustnessFeaturesEXT::builder() + .robust_image_access(private_caps.robust_image_access) + .build(), + ) + } else { + None + }, + robustness2: if enabled_extensions.contains(&vk::ExtRobustness2Fn::name()) { + // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking + // since we can enable `robust_buffer_access` all the time. But it improves + // program portability, so we opt into it anyway. + Some( + vk::PhysicalDeviceRobustness2FeaturesEXT::builder() + .robust_buffer_access2(private_caps.robust_buffer_access) + .robust_image_access2(private_caps.robust_image_access) + .build(), + ) + } else { + None + }, + depth_clip_enable: if enabled_extensions.contains(&vk::ExtDepthClipEnableFn::name()) { + Some( + vk::PhysicalDeviceDepthClipEnableFeaturesEXT::builder() + .depth_clip_enable( + requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL), + ) + .build(), + ) + } else { + None + }, + multiview: if effective_api_version >= vk::API_VERSION_1_1 + || enabled_extensions.contains(&vk::KhrMultiviewFn::name()) + { + Some( + vk::PhysicalDeviceMultiviewFeatures::builder() + .multiview(requested_features.contains(wgt::Features::MULTIVIEW)) + .build(), + ) + } else { + None + }, + astc_hdr: if enabled_extensions.contains(&vk::ExtTextureCompressionAstcHdrFn::name()) { + Some( + vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::builder() + .texture_compression_astc_hdr(true) + .build(), + ) + } else { + None + }, + shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) { + Some(( + vk::PhysicalDeviceShaderFloat16Int8Features::builder() + .shader_float16(true) + .build(), + vk::PhysicalDevice16BitStorageFeatures::builder() + .storage_buffer16_bit_access(true) + .uniform_and_storage_buffer16_bit_access(true) + .build(), + )) + } else { + None + }, + zero_initialize_workgroup_memory: if effective_api_version >= vk::API_VERSION_1_3 + || enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + Some( + vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::builder() + .shader_zero_initialize_workgroup_memory( + private_caps.zero_initialize_workgroup_memory, + ) + .build(), + ) + } else { + None + }, + } + } + + fn to_wgpu( + &self, + instance: &ash::Instance, + phd: vk::PhysicalDevice, + caps: &PhysicalDeviceCapabilities, + ) -> (wgt::Features, wgt::DownlevelFlags) { + use crate::auxil::db; + use wgt::{DownlevelFlags as Df, Features as F}; + let mut features = F::empty() + | F::SPIRV_SHADER_PASSTHROUGH + | F::MAPPABLE_PRIMARY_BUFFERS + | F::PUSH_CONSTANTS + | F::ADDRESS_MODE_CLAMP_TO_BORDER + | F::ADDRESS_MODE_CLAMP_TO_ZERO + | F::TIMESTAMP_QUERY + | F::TIMESTAMP_QUERY_INSIDE_PASSES + | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES + | F::CLEAR_TEXTURE; + + let mut dl_flags = Df::COMPUTE_SHADERS + | Df::BASE_VERTEX + | Df::READ_ONLY_DEPTH_STENCIL + | Df::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES + | Df::COMPARISON_SAMPLERS + | Df::VERTEX_STORAGE + | Df::FRAGMENT_STORAGE + | Df::DEPTH_TEXTURE_AND_BUFFER_COPIES + | Df::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED + | Df::UNRESTRICTED_INDEX_BUFFER + | Df::INDIRECT_EXECUTION + | Df::VIEW_FORMATS + | Df::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES; + + dl_flags.set( + Df::SURFACE_VIEW_FORMATS, + caps.supports_extension(vk::KhrSwapchainMutableFormatFn::name()), + ); + dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0); + dl_flags.set(Df::ANISOTROPIC_FILTERING, self.core.sampler_anisotropy != 0); + dl_flags.set( + Df::FRAGMENT_WRITABLE_STORAGE, + self.core.fragment_stores_and_atomics != 0, + ); + dl_flags.set(Df::MULTISAMPLED_SHADING, self.core.sample_rate_shading != 0); + dl_flags.set(Df::INDEPENDENT_BLEND, self.core.independent_blend != 0); + dl_flags.set( + Df::FULL_DRAW_INDEX_UINT32, + self.core.full_draw_index_uint32 != 0, + ); + dl_flags.set(Df::DEPTH_BIAS_CLAMP, self.core.depth_bias_clamp != 0); + + features.set( + F::INDIRECT_FIRST_INSTANCE, + self.core.draw_indirect_first_instance != 0, + ); + //if self.core.dual_src_blend != 0 + features.set(F::MULTI_DRAW_INDIRECT, self.core.multi_draw_indirect != 0); + features.set(F::POLYGON_MODE_LINE, self.core.fill_mode_non_solid != 0); + features.set(F::POLYGON_MODE_POINT, self.core.fill_mode_non_solid != 0); + //if self.core.depth_bounds != 0 { + //if self.core.alpha_to_one != 0 { + //if self.core.multi_viewport != 0 { + features.set( + F::TEXTURE_COMPRESSION_ETC2, + self.core.texture_compression_etc2 != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_ASTC, + self.core.texture_compression_astc_ldr != 0, + ); + features.set( + F::TEXTURE_COMPRESSION_BC, + self.core.texture_compression_bc != 0, + ); + features.set( + F::PIPELINE_STATISTICS_QUERY, + self.core.pipeline_statistics_query != 0, + ); + features.set( + F::VERTEX_WRITABLE_STORAGE, + self.core.vertex_pipeline_stores_and_atomics != 0, + ); + //if self.core.shader_image_gather_extended != 0 { + //if self.core.shader_storage_image_extended_formats != 0 { + features.set( + F::BUFFER_BINDING_ARRAY, + self.core.shader_uniform_buffer_array_dynamic_indexing != 0, + ); + features.set( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_sampled_image_array_dynamic_indexing != 0, + ); + features.set(F::SHADER_PRIMITIVE_INDEX, self.core.geometry_shader != 0); + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + self.core.shader_storage_buffer_array_dynamic_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY, + self.core.shader_storage_image_array_dynamic_indexing, + ), + ], + ) { + features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); + } + //if self.core.shader_storage_image_array_dynamic_indexing != 0 { + //if self.core.shader_clip_distance != 0 { + //if self.core.shader_cull_distance != 0 { + features.set(F::SHADER_F64, self.core.shader_float64 != 0); + //if self.core.shader_int64 != 0 { + features.set(F::SHADER_I16, self.core.shader_int16 != 0); + + //if caps.supports_extension(vk::KhrSamplerMirrorClampToEdgeFn::name()) { + //if caps.supports_extension(vk::ExtSamplerFilterMinmaxFn::name()) { + features.set( + F::MULTI_DRAW_INDIRECT_COUNT, + caps.supports_extension(vk::KhrDrawIndirectCountFn::name()), + ); + features.set( + F::CONSERVATIVE_RASTERIZATION, + caps.supports_extension(vk::ExtConservativeRasterizationFn::name()), + ); + + let intel_windows = caps.properties.vendor_id == db::intel::VENDOR && cfg!(windows); + + if let Some(ref descriptor_indexing) = self.descriptor_indexing { + const STORAGE: F = F::STORAGE_RESOURCE_BINDING_ARRAY; + if Self::all_features_supported( + &features, + &[ + ( + F::TEXTURE_BINDING_ARRAY, + descriptor_indexing.shader_sampled_image_array_non_uniform_indexing, + ), + ( + F::BUFFER_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING); + } + if Self::all_features_supported( + &features, + &[ + ( + F::BUFFER_BINDING_ARRAY, + descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing, + ), + ( + F::TEXTURE_BINDING_ARRAY | STORAGE, + descriptor_indexing.shader_storage_image_array_non_uniform_indexing, + ), + ], + ) { + features.insert(F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING); + } + if descriptor_indexing.descriptor_binding_partially_bound != 0 && !intel_windows { + features |= F::PARTIALLY_BOUND_BINDING_ARRAY; + } + } + + if let Some(ref feature) = self.depth_clip_enable { + features.set(F::DEPTH_CLIP_CONTROL, feature.depth_clip_enable != 0); + } + + if let Some(ref multiview) = self.multiview { + features.set(F::MULTIVIEW, multiview.multiview != 0); + } + + features.set( + F::TEXTURE_FORMAT_16BIT_NORM, + is_format_16bit_norm_supported(instance, phd), + ); + + if let Some(ref astc_hdr) = self.astc_hdr { + features.set( + F::TEXTURE_COMPRESSION_ASTC_HDR, + astc_hdr.texture_compression_astc_hdr != 0, + ); + } + + if let Some((ref f16_i8, ref bit16)) = self.shader_float16 { + features.set( + F::SHADER_F16, + f16_i8.shader_float16 != 0 + && bit16.storage_buffer16_bit_access != 0 + && bit16.uniform_and_storage_buffer16_bit_access != 0, + ); + } + + let supports_depth_format = |format| { + supports_format( + instance, + phd, + format, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ) + }; + + let texture_s8 = supports_depth_format(vk::Format::S8_UINT); + let texture_d32 = supports_depth_format(vk::Format::D32_SFLOAT); + let texture_d24_s8 = supports_depth_format(vk::Format::D24_UNORM_S8_UINT); + let texture_d32_s8 = supports_depth_format(vk::Format::D32_SFLOAT_S8_UINT); + + let stencil8 = texture_s8 || texture_d24_s8; + let depth24_plus_stencil8 = texture_d24_s8 || texture_d32_s8; + + dl_flags.set( + Df::WEBGPU_TEXTURE_FORMAT_SUPPORT, + stencil8 && depth24_plus_stencil8 && texture_d32, + ); + + features.set(F::DEPTH32FLOAT_STENCIL8, texture_d32_s8); + + let rg11b10ufloat_renderable = supports_format( + instance, + phd, + vk::Format::B10G11R11_UFLOAT_PACK32, + vk::ImageTiling::OPTIMAL, + vk::FormatFeatureFlags::COLOR_ATTACHMENT + | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, + ); + features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); + + (features, dl_flags) + } + + fn all_features_supported( + features: &wgt::Features, + implications: &[(wgt::Features, vk::Bool32)], + ) -> bool { + implications + .iter() + .all(|&(flag, support)| !features.contains(flag) || support != 0) + } +} + +/// Information gathered about a physical device capabilities. +#[derive(Default)] +pub struct PhysicalDeviceCapabilities { + supported_extensions: Vec<vk::ExtensionProperties>, + properties: vk::PhysicalDeviceProperties, + maintenance_3: Option<vk::PhysicalDeviceMaintenance3Properties>, + descriptor_indexing: Option<vk::PhysicalDeviceDescriptorIndexingPropertiesEXT>, + driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>, + /// The effective driver api version supported by the physical device. + /// + /// The Vulkan specification states the following in the documentation for VkPhysicalDeviceProperties: + /// > The value of apiVersion may be different than the version returned by vkEnumerateInstanceVersion; + /// > either higher or lower. In such cases, the application must not use functionality that exceeds + /// > the version of Vulkan associated with a given object. + /// + /// For example, a Vulkan 1.1 instance cannot use functionality added in Vulkan 1.2 even if the physical + /// device supports Vulkan 1.2. + /// + /// This means that assuming that the apiVersion provided by VkPhysicalDeviceProperties is the actual + /// version we can use is incorrect. Instead the effective version is the lower of the instance version + /// and physical device version. + effective_api_version: u32, +} + +// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. +unsafe impl Send for PhysicalDeviceCapabilities {} +unsafe impl Sync for PhysicalDeviceCapabilities {} + +impl PhysicalDeviceCapabilities { + pub fn properties(&self) -> vk::PhysicalDeviceProperties { + self.properties + } + + pub fn supports_extension(&self, extension: &CStr) -> bool { + use crate::auxil::cstr_from_bytes_until_nul; + self.supported_extensions + .iter() + .any(|ep| cstr_from_bytes_until_nul(&ep.extension_name) == Some(extension)) + } + + /// Map `requested_features` to the list of Vulkan extension strings required to create the logical device. + fn get_required_extensions(&self, requested_features: wgt::Features) -> Vec<&'static CStr> { + let mut extensions = Vec::new(); + + // Note that quite a few extensions depend on the `VK_KHR_get_physical_device_properties2` instance extension. + // We enable `VK_KHR_get_physical_device_properties2` unconditionally (if available). + + // Require `VK_KHR_swapchain` + extensions.push(vk::KhrSwapchainFn::name()); + + if self.effective_api_version < vk::API_VERSION_1_1 { + // Require either `VK_KHR_maintenance1` or `VK_AMD_negative_viewport_height` + if self.supports_extension(vk::KhrMaintenance1Fn::name()) { + extensions.push(vk::KhrMaintenance1Fn::name()); + } else { + // `VK_AMD_negative_viewport_height` is obsoleted by `VK_KHR_maintenance1` and must not be enabled alongside it + extensions.push(vk::AmdNegativeViewportHeightFn::name()); + } + + // Optional `VK_KHR_maintenance2` + if self.supports_extension(vk::KhrMaintenance2Fn::name()) { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + + // Optional `VK_KHR_maintenance3` + if self.supports_extension(vk::KhrMaintenance3Fn::name()) { + extensions.push(vk::KhrMaintenance3Fn::name()); + } + + // Require `VK_KHR_storage_buffer_storage_class` + extensions.push(vk::KhrStorageBufferStorageClassFn::name()); + + // Require `VK_KHR_multiview` if the associated feature was requested + if requested_features.contains(wgt::Features::MULTIVIEW) { + extensions.push(vk::KhrMultiviewFn::name()); + } + } + + if self.effective_api_version < vk::API_VERSION_1_2 { + // Optional `VK_KHR_image_format_list` + if self.supports_extension(vk::KhrImageFormatListFn::name()) { + extensions.push(vk::KhrImageFormatListFn::name()); + } + + // Optional `VK_KHR_imageless_framebuffer` + if self.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + extensions.push(vk::KhrImagelessFramebufferFn::name()); + // Require `VK_KHR_maintenance2` due to it being a dependency + if self.effective_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::KhrMaintenance2Fn::name()); + } + } + + // Optional `VK_KHR_driver_properties` + if self.supports_extension(vk::KhrDriverPropertiesFn::name()) { + extensions.push(vk::KhrDriverPropertiesFn::name()); + } + + // Optional `VK_KHR_timeline_semaphore` + if self.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + extensions.push(vk::KhrTimelineSemaphoreFn::name()); + } + + // Require `VK_EXT_descriptor_indexing` if one of the associated features was requested + if requested_features.intersects(indexing_features()) { + extensions.push(vk::ExtDescriptorIndexingFn::name()); + } + + // Require `VK_KHR_shader_float16_int8` and `VK_KHR_16bit_storage` if the associated feature was requested + if requested_features.contains(wgt::Features::SHADER_F16) { + extensions.push(vk::KhrShaderFloat16Int8Fn::name()); + // `VK_KHR_16bit_storage` requires `VK_KHR_storage_buffer_storage_class`, however we require that one already + if self.effective_api_version < vk::API_VERSION_1_1 { + extensions.push(vk::Khr16bitStorageFn::name()); + } + } + + //extensions.push(vk::KhrSamplerMirrorClampToEdgeFn::name()); + //extensions.push(vk::ExtSamplerFilterMinmaxFn::name()); + } + + if self.effective_api_version < vk::API_VERSION_1_3 { + // Optional `VK_EXT_image_robustness` + if self.supports_extension(vk::ExtImageRobustnessFn::name()) { + extensions.push(vk::ExtImageRobustnessFn::name()); + } + } + + // Optional `VK_KHR_swapchain_mutable_format` + if self.supports_extension(vk::KhrSwapchainMutableFormatFn::name()) { + extensions.push(vk::KhrSwapchainMutableFormatFn::name()); + } + + // Optional `VK_EXT_robustness2` + if self.supports_extension(vk::ExtRobustness2Fn::name()) { + extensions.push(vk::ExtRobustness2Fn::name()); + } + + // Require `VK_KHR_draw_indirect_count` if the associated feature was requested + // Even though Vulkan 1.2 has promoted the extension to core, we must require the extension to avoid + // large amounts of spaghetti involved with using PhysicalDeviceVulkan12Features. + if requested_features.contains(wgt::Features::MULTI_DRAW_INDIRECT_COUNT) { + extensions.push(vk::KhrDrawIndirectCountFn::name()); + } + + // Require `VK_EXT_conservative_rasterization` if the associated feature was requested + if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { + extensions.push(vk::ExtConservativeRasterizationFn::name()); + } + + // Require `VK_EXT_depth_clip_enable` if the associated feature was requested + if requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL) { + extensions.push(vk::ExtDepthClipEnableFn::name()); + } + + // Require `VK_KHR_portability_subset` on macOS/iOS + #[cfg(any(target_os = "macos", target_os = "ios"))] + extensions.push(vk::KhrPortabilitySubsetFn::name()); + + // Require `VK_EXT_texture_compression_astc_hdr` if the associated feature was requested + if requested_features.contains(wgt::Features::TEXTURE_COMPRESSION_ASTC_HDR) { + extensions.push(vk::ExtTextureCompressionAstcHdrFn::name()); + } + + extensions + } + + fn to_wgpu_limits(&self) -> wgt::Limits { + let limits = &self.properties.limits; + + let max_compute_workgroup_sizes = limits.max_compute_work_group_size; + let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0] + .min(limits.max_compute_work_group_count[1]) + .min(limits.max_compute_work_group_count[2]); + + // Prevent very large buffers on mesa and most android devices. + let is_nvidia = self.properties.vendor_id == crate::auxil::db::nvidia::VENDOR; + let max_buffer_size = + if (cfg!(target_os = "linux") || cfg!(target_os = "android")) && !is_nvidia { + i32::MAX as u64 + } else { + u64::MAX + }; + + wgt::Limits { + max_texture_dimension_1d: limits.max_image_dimension1_d, + max_texture_dimension_2d: limits.max_image_dimension2_d, + max_texture_dimension_3d: limits.max_image_dimension3_d, + max_texture_array_layers: limits.max_image_array_layers, + max_bind_groups: limits + .max_bound_descriptor_sets + .min(crate::MAX_BIND_GROUPS as u32), + max_bindings_per_bind_group: 640, + max_dynamic_uniform_buffers_per_pipeline_layout: limits + .max_descriptor_set_uniform_buffers_dynamic, + max_dynamic_storage_buffers_per_pipeline_layout: limits + .max_descriptor_set_storage_buffers_dynamic, + max_sampled_textures_per_shader_stage: limits.max_per_stage_descriptor_sampled_images, + max_samplers_per_shader_stage: limits.max_per_stage_descriptor_samplers, + max_storage_buffers_per_shader_stage: limits.max_per_stage_descriptor_storage_buffers, + max_storage_textures_per_shader_stage: limits.max_per_stage_descriptor_storage_images, + max_uniform_buffers_per_shader_stage: limits.max_per_stage_descriptor_uniform_buffers, + max_uniform_buffer_binding_size: limits + .max_uniform_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_storage_buffer_binding_size: limits + .max_storage_buffer_range + .min(crate::auxil::MAX_I32_BINDING_SIZE), + max_vertex_buffers: limits + .max_vertex_input_bindings + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: limits.max_vertex_input_attributes, + max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride, + max_push_constant_size: limits.max_push_constants_size, + min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32, + min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32, + max_inter_stage_shader_components: limits + .max_vertex_output_components + .min(limits.max_fragment_input_components), + max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size, + max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations, + max_compute_workgroup_size_x: max_compute_workgroup_sizes[0], + max_compute_workgroup_size_y: max_compute_workgroup_sizes[1], + max_compute_workgroup_size_z: max_compute_workgroup_sizes[2], + max_compute_workgroups_per_dimension, + max_buffer_size, + } + } + + fn to_hal_alignments(&self) -> crate::Alignments { + let limits = &self.properties.limits; + crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new(limits.optimal_buffer_copy_offset_alignment) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment) + .unwrap(), + } + } +} + +impl super::InstanceShared { + #[allow(trivial_casts)] // false positives + fn inspect( + &self, + phd: vk::PhysicalDevice, + ) -> (PhysicalDeviceCapabilities, PhysicalDeviceFeatures) { + let capabilities = { + let mut capabilities = PhysicalDeviceCapabilities::default(); + capabilities.supported_extensions = + unsafe { self.raw.enumerate_device_extension_properties(phd).unwrap() }; + capabilities.properties = if let Some(ref get_device_properties) = + self.get_physical_device_properties + { + // Get these now to avoid borrowing conflicts later + let supports_descriptor_indexing = self.driver_api_version >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()); + let supports_driver_properties = self.driver_api_version >= vk::API_VERSION_1_2 + || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + + let mut builder = vk::PhysicalDeviceProperties2KHR::builder(); + if self.driver_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMaintenance3Fn::name()) + { + capabilities.maintenance_3 = + Some(vk::PhysicalDeviceMaintenance3Properties::default()); + builder = builder.push_next(capabilities.maintenance_3.as_mut().unwrap()); + } + + if supports_descriptor_indexing { + let next = capabilities + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingPropertiesEXT::default()); + builder = builder.push_next(next); + } + + if supports_driver_properties { + let next = capabilities + .driver + .insert(vk::PhysicalDeviceDriverPropertiesKHR::default()); + builder = builder.push_next(next); + } + + let mut properties2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_properties2(phd, &mut properties2); + } + properties2.properties + } else { + unsafe { self.raw.get_physical_device_properties(phd) } + }; + + // Set the effective api version + capabilities.effective_api_version = self + .driver_api_version + .min(capabilities.properties.api_version); + capabilities + }; + + let mut features = PhysicalDeviceFeatures::default(); + features.core = if let Some(ref get_device_properties) = self.get_physical_device_properties + { + let core = vk::PhysicalDeviceFeatures::default(); + let mut builder = vk::PhysicalDeviceFeatures2KHR::builder().features(core); + + // `VK_KHR_multiview` is promoted to 1.1 + if capabilities.effective_api_version >= vk::API_VERSION_1_1 + || capabilities.supports_extension(vk::KhrMultiviewFn::name()) + { + let next = features + .multiview + .insert(vk::PhysicalDeviceMultiviewFeatures::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()) { + let next = features + .descriptor_indexing + .insert(vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_imageless_framebuffer` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrImagelessFramebufferFn::name()) { + let next = features + .imageless_framebuffer + .insert(vk::PhysicalDeviceImagelessFramebufferFeaturesKHR::default()); + builder = builder.push_next(next); + } + + // `VK_KHR_timeline_semaphore` is promoted to 1.2, but has no changes, so we can keep using the extension unconditionally. + if capabilities.supports_extension(vk::KhrTimelineSemaphoreFn::name()) { + let next = features + .timeline_semaphore + .insert(vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::default()); + builder = builder.push_next(next); + } + + if capabilities.supports_extension(vk::ExtImageRobustnessFn::name()) { + let next = features + .image_robustness + .insert(vk::PhysicalDeviceImageRobustnessFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtRobustness2Fn::name()) { + let next = features + .robustness2 + .insert(vk::PhysicalDeviceRobustness2FeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtDepthClipEnableFn::name()) { + let next = features + .depth_clip_enable + .insert(vk::PhysicalDeviceDepthClipEnableFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::ExtTextureCompressionAstcHdrFn::name()) { + let next = features + .astc_hdr + .insert(vk::PhysicalDeviceTextureCompressionASTCHDRFeaturesEXT::default()); + builder = builder.push_next(next); + } + if capabilities.supports_extension(vk::KhrShaderFloat16Int8Fn::name()) + && capabilities.supports_extension(vk::Khr16bitStorageFn::name()) + { + let next = features.shader_float16.insert(( + vk::PhysicalDeviceShaderFloat16Int8FeaturesKHR::default(), + vk::PhysicalDevice16BitStorageFeaturesKHR::default(), + )); + builder = builder.push_next(&mut next.0); + builder = builder.push_next(&mut next.1); + } + + // `VK_KHR_zero_initialize_workgroup_memory` is promoted to 1.3 + if capabilities.effective_api_version >= vk::API_VERSION_1_3 + || capabilities.supports_extension(vk::KhrZeroInitializeWorkgroupMemoryFn::name()) + { + let next = features + .zero_initialize_workgroup_memory + .insert(vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::default()); + builder = builder.push_next(next); + } + + let mut features2 = builder.build(); + unsafe { + get_device_properties.get_physical_device_features2(phd, &mut features2); + } + features2.features + } else { + unsafe { self.raw.get_physical_device_features(phd) } + }; + + (capabilities, features) + } +} + +impl super::Instance { + pub fn expose_adapter( + &self, + phd: vk::PhysicalDevice, + ) -> Option<crate::ExposedAdapter<super::Api>> { + use crate::auxil::cstr_from_bytes_until_nul; + use crate::auxil::db; + + let (phd_capabilities, phd_features) = self.shared.inspect(phd); + + let info = wgt::AdapterInfo { + name: { + cstr_from_bytes_until_nul(&phd_capabilities.properties.device_name) + .and_then(|info| info.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + vendor: phd_capabilities.properties.vendor_id, + device: phd_capabilities.properties.device_id, + device_type: match phd_capabilities.properties.device_type { + ash::vk::PhysicalDeviceType::OTHER => wgt::DeviceType::Other, + ash::vk::PhysicalDeviceType::INTEGRATED_GPU => wgt::DeviceType::IntegratedGpu, + ash::vk::PhysicalDeviceType::DISCRETE_GPU => wgt::DeviceType::DiscreteGpu, + ash::vk::PhysicalDeviceType::VIRTUAL_GPU => wgt::DeviceType::VirtualGpu, + ash::vk::PhysicalDeviceType::CPU => wgt::DeviceType::Cpu, + _ => wgt::DeviceType::Other, + }, + driver: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_name)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + driver_info: { + phd_capabilities + .driver + .as_ref() + .and_then(|driver| cstr_from_bytes_until_nul(&driver.driver_info)) + .and_then(|name| name.to_str().ok()) + .unwrap_or("?") + .to_owned() + }, + backend: wgt::Backend::Vulkan, + }; + + let (available_features, downlevel_flags) = + phd_features.to_wgpu(&self.shared.raw, phd, &phd_capabilities); + let mut workarounds = super::Workarounds::empty(); + { + // see https://github.com/gfx-rs/gfx/issues/1930 + let _is_windows_intel_dual_src_bug = cfg!(windows) + && phd_capabilities.properties.vendor_id == db::intel::VENDOR + && (phd_capabilities.properties.device_id & db::intel::DEVICE_KABY_LAKE_MASK + == db::intel::DEVICE_KABY_LAKE_MASK + || phd_capabilities.properties.device_id & db::intel::DEVICE_SKY_LAKE_MASK + == db::intel::DEVICE_SKY_LAKE_MASK); + // TODO: only enable for particular devices + workarounds |= super::Workarounds::SEPARATE_ENTRY_POINTS; + workarounds.set( + super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS, + phd_capabilities.properties.vendor_id == db::qualcomm::VENDOR, + ); + }; + + if phd_capabilities.effective_api_version == vk::API_VERSION_1_0 + && !phd_capabilities.supports_extension(vk::KhrStorageBufferStorageClassFn::name()) + { + log::warn!( + "SPIR-V storage buffer class is not supported, hiding adapter: {}", + info.name + ); + return None; + } + if !phd_capabilities.supports_extension(vk::AmdNegativeViewportHeightFn::name()) + && !phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()) + && phd_capabilities.effective_api_version < vk::API_VERSION_1_1 + { + log::warn!( + "viewport Y-flip is not supported, hiding adapter: {}", + info.name + ); + return None; + } + + let queue_families = unsafe { + self.shared + .raw + .get_physical_device_queue_family_properties(phd) + }; + let queue_flags = queue_families.first()?.queue_flags; + if !queue_flags.contains(vk::QueueFlags::GRAPHICS) { + log::warn!("The first queue only exposes {:?}", queue_flags); + return None; + } + + let private_caps = super::PrivateCapabilities { + flip_y_requires_shift: phd_capabilities.effective_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance1Fn::name()), + imageless_framebuffers: match phd_features.imageless_framebuffer { + Some(features) => features.imageless_framebuffer == vk::TRUE, + None => phd_features + .imageless_framebuffer + .map_or(false, |ext| ext.imageless_framebuffer != 0), + }, + image_view_usage: phd_capabilities.effective_api_version >= vk::API_VERSION_1_1 + || phd_capabilities.supports_extension(vk::KhrMaintenance2Fn::name()), + timeline_semaphores: match phd_features.timeline_semaphore { + Some(features) => features.timeline_semaphore == vk::TRUE, + None => phd_features + .timeline_semaphore + .map_or(false, |ext| ext.timeline_semaphore != 0), + }, + texture_d24: supports_format( + &self.shared.raw, + phd, + vk::Format::X8_D24_UNORM_PACK32, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_d24_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::D24_UNORM_S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + texture_s8: supports_format( + &self.shared.raw, + phd, + vk::Format::S8_UINT, + vk::ImageTiling::OPTIMAL, + depth_stencil_required_flags(), + ), + non_coherent_map_mask: phd_capabilities.properties.limits.non_coherent_atom_size - 1, + can_present: true, + //TODO: make configurable + robust_buffer_access: phd_features.core.robust_buffer_access != 0, + robust_image_access: match phd_features.robustness2 { + Some(ref f) => f.robust_image_access2 != 0, + None => phd_features + .image_robustness + .map_or(false, |ext| ext.robust_image_access != 0), + }, + zero_initialize_workgroup_memory: phd_features + .zero_initialize_workgroup_memory + .map_or(false, |ext| { + ext.shader_zero_initialize_workgroup_memory == vk::TRUE + }), + }; + let capabilities = crate::Capabilities { + limits: phd_capabilities.to_wgpu_limits(), + alignments: phd_capabilities.to_hal_alignments(), + downlevel: wgt::DownlevelCapabilities { + flags: downlevel_flags, + limits: wgt::DownlevelLimits {}, + shader_model: wgt::ShaderModel::Sm5, //TODO? + }, + }; + + let adapter = super::Adapter { + raw: phd, + instance: Arc::clone(&self.shared), + //queue_families, + known_memory_flags: vk::MemoryPropertyFlags::DEVICE_LOCAL + | vk::MemoryPropertyFlags::HOST_VISIBLE + | vk::MemoryPropertyFlags::HOST_COHERENT + | vk::MemoryPropertyFlags::HOST_CACHED + | vk::MemoryPropertyFlags::LAZILY_ALLOCATED, + phd_capabilities, + //phd_features, + downlevel_flags, + private_caps, + workarounds, + }; + + Some(crate::ExposedAdapter { + adapter, + info, + features: available_features, + capabilities, + }) + } +} + +impl super::Adapter { + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.raw + } + + pub fn physical_device_capabilities(&self) -> &PhysicalDeviceCapabilities { + &self.phd_capabilities + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.instance + } + + pub fn required_device_extensions(&self, features: wgt::Features) -> Vec<&'static CStr> { + let (supported_extensions, unsupported_extensions) = self + .phd_capabilities + .get_required_extensions(features) + .iter() + .partition::<Vec<&CStr>, _>(|&&extension| { + self.phd_capabilities.supports_extension(extension) + }); + + if !unsupported_extensions.is_empty() { + log::warn!("Missing extensions: {:?}", unsupported_extensions); + } + + log::debug!("Supported extensions: {:?}", supported_extensions); + supported_extensions + } + + /// `features` must be the same features used to create `enabled_extensions`. + pub fn physical_device_features( + &self, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + ) -> PhysicalDeviceFeatures { + PhysicalDeviceFeatures::from_extensions_and_requested_features( + self.phd_capabilities.effective_api_version, + enabled_extensions, + features, + self.downlevel_flags, + &self.private_caps, + ) + } + + /// # Safety + /// + /// - `raw_device` must be created from this adapter. + /// - `raw_device` must be created using `family_index`, `enabled_extensions` and `physical_device_features()` + /// - `enabled_extensions` must be a superset of `required_device_extensions()`. + #[allow(clippy::too_many_arguments)] + pub unsafe fn device_from_raw( + &self, + raw_device: ash::Device, + handle_is_owned: bool, + enabled_extensions: &[&'static CStr], + features: wgt::Features, + family_index: u32, + queue_index: u32, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let mem_properties = { + profiling::scope!("vkGetPhysicalDeviceMemoryProperties"); + unsafe { + self.instance + .raw + .get_physical_device_memory_properties(self.raw) + } + }; + let memory_types = + &mem_properties.memory_types[..mem_properties.memory_type_count as usize]; + let valid_ash_memory_types = memory_types.iter().enumerate().fold(0, |u, (i, mem)| { + if self.known_memory_flags.contains(mem.property_flags) { + u | (1 << i) + } else { + u + } + }); + + let swapchain_fn = khr::Swapchain::new(&self.instance.raw, &raw_device); + + let indirect_count_fn = if enabled_extensions.contains(&khr::DrawIndirectCount::name()) { + Some(khr::DrawIndirectCount::new(&self.instance.raw, &raw_device)) + } else { + None + }; + let timeline_semaphore_fn = if enabled_extensions.contains(&khr::TimelineSemaphore::name()) + { + Some(super::ExtensionFn::Extension(khr::TimelineSemaphore::new( + &self.instance.raw, + &raw_device, + ))) + } else if self.phd_capabilities.effective_api_version >= vk::API_VERSION_1_2 { + Some(super::ExtensionFn::Promoted) + } else { + None + }; + + let naga_options = { + use naga::back::spv; + + let mut capabilities = vec![ + spv::Capability::Shader, + spv::Capability::Matrix, + spv::Capability::Sampled1D, + spv::Capability::Image1D, + spv::Capability::ImageQuery, + spv::Capability::DerivativeControl, + spv::Capability::SampledCubeArray, + spv::Capability::SampleRateShading, + //Note: this is requested always, no matter what the actual + // adapter supports. It's not the responsibility of SPV-out + // translation to handle the storage support for formats. + spv::Capability::StorageImageExtendedFormats, + //TODO: fill out the rest + ]; + + if features.contains(wgt::Features::MULTIVIEW) { + capabilities.push(spv::Capability::MultiView); + } + + if features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX) { + capabilities.push(spv::Capability::Geometry); + } + + if features.intersects( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING + | wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ) { + capabilities.push(spv::Capability::ShaderNonUniform); + } + + let mut flags = spv::WriterFlags::empty(); + flags.set( + spv::WriterFlags::DEBUG, + self.instance.flags.contains(crate::InstanceFlags::DEBUG), + ); + flags.set( + spv::WriterFlags::LABEL_VARYINGS, + self.phd_capabilities.properties.vendor_id != crate::auxil::db::qualcomm::VENDOR, + ); + flags.set( + spv::WriterFlags::FORCE_POINT_SIZE, + //Note: we could technically disable this when we are compiling separate entry points, + // and we know exactly that the primitive topology is not `PointList`. + // But this requires cloning the `spv::Options` struct, which has heap allocations. + true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS` + ); + spv::Options { + lang_version: (1, 0), + flags, + capabilities: Some(capabilities.iter().cloned().collect()), + bounds_check_policies: naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Restrict, + buffer: if self.private_caps.robust_buffer_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + image: if self.private_caps.robust_image_access { + naga::proc::BoundsCheckPolicy::Unchecked + } else { + naga::proc::BoundsCheckPolicy::Restrict + }, + // TODO: support bounds checks on binding arrays + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }, + zero_initialize_workgroup_memory: if self + .private_caps + .zero_initialize_workgroup_memory + { + spv::ZeroInitializeWorkgroupMemoryMode::Native + } else { + spv::ZeroInitializeWorkgroupMemoryMode::Polyfill + }, + // We need to build this separately for each invocation, so just default it out here + binding_map: BTreeMap::default(), + } + }; + + let raw_queue = { + profiling::scope!("vkGetDeviceQueue"); + unsafe { raw_device.get_device_queue(family_index, queue_index) } + }; + + let shared = Arc::new(super::DeviceShared { + raw: raw_device, + family_index, + queue_index, + raw_queue, + handle_is_owned, + instance: Arc::clone(&self.instance), + physical_device: self.raw, + enabled_extensions: enabled_extensions.into(), + extension_fns: super::DeviceExtensionFunctions { + draw_indirect_count: indirect_count_fn, + timeline_semaphore: timeline_semaphore_fn, + }, + vendor_id: self.phd_capabilities.properties.vendor_id, + timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, + private_caps: self.private_caps.clone(), + workarounds: self.workarounds, + render_passes: Mutex::new(Default::default()), + framebuffers: Mutex::new(Default::default()), + }); + let mut relay_semaphores = [vk::Semaphore::null(); 2]; + for sem in relay_semaphores.iter_mut() { + unsafe { + *sem = shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None)? + }; + } + let queue = super::Queue { + raw: raw_queue, + swapchain_fn, + device: Arc::clone(&shared), + family_index, + relay_semaphores, + relay_index: None, + }; + + let mem_allocator = { + let limits = self.phd_capabilities.properties.limits; + let config = gpu_alloc::Config::i_am_prototyping(); //TODO + let max_memory_allocation_size = + if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 { + maintenance_3.max_memory_allocation_size + } else { + u64::max_value() + }; + let properties = gpu_alloc::DeviceProperties { + max_memory_allocation_count: limits.max_memory_allocation_count, + max_memory_allocation_size, + non_coherent_atom_size: limits.non_coherent_atom_size, + memory_types: memory_types + .iter() + .map(|memory_type| gpu_alloc::MemoryType { + props: gpu_alloc::MemoryPropertyFlags::from_bits_truncate( + memory_type.property_flags.as_raw() as u8, + ), + heap: memory_type.heap_index, + }) + .collect(), + memory_heaps: mem_properties.memory_heaps + [..mem_properties.memory_heap_count as usize] + .iter() + .map(|&memory_heap| gpu_alloc::MemoryHeap { + size: memory_heap.size, + }) + .collect(), + buffer_device_address: false, + }; + gpu_alloc::GpuAllocator::new(config, properties) + }; + let desc_allocator = gpu_descriptor::DescriptorAllocator::new( + if let Some(di) = self.phd_capabilities.descriptor_indexing { + di.max_update_after_bind_descriptors_in_all_pools + } else { + 0 + }, + ); + + let device = super::Device { + shared, + mem_allocator: Mutex::new(mem_allocator), + desc_allocator: Mutex::new(desc_allocator), + valid_ash_memory_types, + naga_options, + #[cfg(feature = "renderdoc")] + render_doc: Default::default(), + }; + + Ok(crate::OpenDevice { device, queue }) + } +} + +impl crate::Adapter<super::Api> for super::Adapter { + unsafe fn open( + &self, + features: wgt::Features, + _limits: &wgt::Limits, + ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> { + let enabled_extensions = self.required_device_extensions(features); + let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features); + + let family_index = 0; //TODO + let family_info = vk::DeviceQueueCreateInfo::builder() + .queue_family_index(family_index) + .queue_priorities(&[1.0]) + .build(); + let family_infos = [family_info]; + + let str_pointers = enabled_extensions + .iter() + .map(|&s| { + // Safe because `enabled_extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let pre_info = vk::DeviceCreateInfo::builder() + .queue_create_infos(&family_infos) + .enabled_extension_names(&str_pointers); + let info = enabled_phd_features + .add_to_device_create_builder(pre_info) + .build(); + let raw_device = { + profiling::scope!("vkCreateDevice"); + unsafe { self.instance.raw.create_device(self.raw, &info, None)? } + }; + + unsafe { + self.device_from_raw( + raw_device, + true, + &enabled_extensions, + features, + family_info.queue_family_index, + 0, + ) + } + } + + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let vk_format = self.private_caps.map_texture_format(format); + let properties = unsafe { + self.instance + .raw + .get_physical_device_format_properties(self.raw, vk_format) + }; + let features = properties.optimal_tiling_features; + + let mut flags = Tfc::empty(); + flags.set( + Tfc::SAMPLED, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE), + ); + flags.set( + Tfc::SAMPLED_LINEAR, + features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_LINEAR), + ); + // flags.set( + // Tfc::SAMPLED_MINMAX, + // features.contains(vk::FormatFeatureFlags::SAMPLED_IMAGE_FILTER_MINMAX), + // ); + flags.set( + Tfc::STORAGE | Tfc::STORAGE_READ_WRITE, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE), + ); + flags.set( + Tfc::STORAGE_ATOMIC, + features.contains(vk::FormatFeatureFlags::STORAGE_IMAGE_ATOMIC), + ); + flags.set( + Tfc::COLOR_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT), + ); + flags.set( + Tfc::COLOR_ATTACHMENT_BLEND, + features.contains(vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND), + ); + flags.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + features.contains(vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT), + ); + flags.set( + Tfc::COPY_SRC, + features.intersects(vk::FormatFeatureFlags::TRANSFER_SRC), + ); + flags.set( + Tfc::COPY_DST, + features.intersects(vk::FormatFeatureFlags::TRANSFER_DST), + ); + // Vulkan is very permissive about MSAA + flags.set(Tfc::MULTISAMPLE_RESOLVE, !format.is_compressed()); + + // get the supported sample counts + let format_aspect = crate::FormatAspects::from(format); + let limits = self.phd_capabilities.properties.limits; + + let sample_flags = if format_aspect.contains(crate::FormatAspects::DEPTH) { + limits + .framebuffer_depth_sample_counts + .min(limits.sampled_image_depth_sample_counts) + } else if format_aspect.contains(crate::FormatAspects::STENCIL) { + limits + .framebuffer_stencil_sample_counts + .min(limits.sampled_image_stencil_sample_counts) + } else { + match format.sample_type(None).unwrap() { + wgt::TextureSampleType::Float { filterable: _ } => limits + .framebuffer_color_sample_counts + .min(limits.sampled_image_color_sample_counts), + wgt::TextureSampleType::Sint | wgt::TextureSampleType::Uint => { + limits.sampled_image_integer_sample_counts + } + _ => unreachable!(), + } + }; + + flags.set( + Tfc::MULTISAMPLE_X2, + sample_flags.contains(vk::SampleCountFlags::TYPE_2), + ); + flags.set( + Tfc::MULTISAMPLE_X4, + sample_flags.contains(vk::SampleCountFlags::TYPE_4), + ); + flags.set( + Tfc::MULTISAMPLE_X8, + sample_flags.contains(vk::SampleCountFlags::TYPE_8), + ); + flags.set( + Tfc::MULTISAMPLE_X16, + sample_flags.contains(vk::SampleCountFlags::TYPE_16), + ); + + flags + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option<crate::SurfaceCapabilities> { + if !self.private_caps.can_present { + return None; + } + let queue_family_index = 0; //TODO + { + profiling::scope!("vkGetPhysicalDeviceSurfaceSupportKHR"); + match unsafe { + surface.functor.get_physical_device_surface_support( + self.raw, + queue_family_index, + surface.raw, + ) + } { + Ok(true) => (), + Ok(false) => return None, + Err(e) => { + log::error!("get_physical_device_surface_support: {}", e); + return None; + } + } + } + + let caps = { + profiling::scope!("vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_capabilities(self.raw, surface.raw) + } { + Ok(caps) => caps, + Err(e) => { + log::error!("get_physical_device_surface_capabilities: {}", e); + return None; + } + } + }; + + // If image count is 0, the support number of images is unlimited. + let max_image_count = if caps.max_image_count == 0 { + !0 + } else { + caps.max_image_count + }; + + // `0xFFFFFFFF` indicates that the extent depends on the created swapchain. + let current_extent = if caps.current_extent.width != !0 && caps.current_extent.height != !0 + { + Some(wgt::Extent3d { + width: caps.current_extent.width, + height: caps.current_extent.height, + depth_or_array_layers: 1, + }) + } else { + None + }; + + let min_extent = wgt::Extent3d { + width: caps.min_image_extent.width, + height: caps.min_image_extent.height, + depth_or_array_layers: 1, + }; + + let max_extent = wgt::Extent3d { + width: caps.max_image_extent.width, + height: caps.max_image_extent.height, + depth_or_array_layers: caps.max_image_array_layers, + }; + + let raw_present_modes = { + profiling::scope!("vkGetPhysicalDeviceSurfacePresentModesKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_present_modes(self.raw, surface.raw) + } { + Ok(present_modes) => present_modes, + Err(e) => { + log::error!("get_physical_device_surface_present_modes: {}", e); + Vec::new() + } + } + }; + + let raw_surface_formats = { + profiling::scope!("vkGetPhysicalDeviceSurfaceFormatsKHR"); + match unsafe { + surface + .functor + .get_physical_device_surface_formats(self.raw, surface.raw) + } { + Ok(formats) => formats, + Err(e) => { + log::error!("get_physical_device_surface_formats: {}", e); + Vec::new() + } + } + }; + + let formats = raw_surface_formats + .into_iter() + .filter_map(conv::map_vk_surface_formats) + .collect(); + Some(crate::SurfaceCapabilities { + formats, + swap_chain_sizes: caps.min_image_count..=max_image_count, + current_extent, + extents: min_extent..=max_extent, + usage: conv::map_vk_image_usage(caps.supported_usage_flags), + present_modes: raw_present_modes + .into_iter() + .flat_map(conv::map_vk_present_mode) + .collect(), + composite_alpha_modes: conv::map_vk_composite_alpha(caps.supported_composite_alpha), + }) + } + + unsafe fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp { + // VK_GOOGLE_display_timing is the only way to get presentation + // timestamps on vulkan right now and it is only ever available + // on android and linux. This includes mac, but there's no alternative + // on mac, so this is fine. + #[cfg(unix)] + { + let mut timespec = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { + libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut timespec); + } + + wgt::PresentationTimestamp( + timespec.tv_sec as u128 * 1_000_000_000 + timespec.tv_nsec as u128, + ) + } + #[cfg(not(unix))] + { + wgt::PresentationTimestamp::INVALID_TIMESTAMP + } + } +} + +fn is_format_16bit_norm_supported(instance: &ash::Instance, phd: vk::PhysicalDevice) -> bool { + let tiling = vk::ImageTiling::OPTIMAL; + let features = vk::FormatFeatureFlags::SAMPLED_IMAGE + | vk::FormatFeatureFlags::STORAGE_IMAGE + | vk::FormatFeatureFlags::TRANSFER_SRC + | vk::FormatFeatureFlags::TRANSFER_DST; + let r16unorm = supports_format(instance, phd, vk::Format::R16_UNORM, tiling, features); + let r16snorm = supports_format(instance, phd, vk::Format::R16_SNORM, tiling, features); + let rg16unorm = supports_format(instance, phd, vk::Format::R16G16_UNORM, tiling, features); + let rg16snorm = supports_format(instance, phd, vk::Format::R16G16_SNORM, tiling, features); + let rgba16unorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_UNORM, + tiling, + features, + ); + let rgba16snorm = supports_format( + instance, + phd, + vk::Format::R16G16B16A16_SNORM, + tiling, + features, + ); + + r16unorm && r16snorm && rg16unorm && rg16snorm && rgba16unorm && rgba16snorm +} + +fn supports_format( + instance: &ash::Instance, + phd: vk::PhysicalDevice, + format: vk::Format, + tiling: vk::ImageTiling, + features: vk::FormatFeatureFlags, +) -> bool { + let properties = unsafe { instance.get_physical_device_format_properties(phd, format) }; + match tiling { + vk::ImageTiling::LINEAR => properties.linear_tiling_features.contains(features), + vk::ImageTiling::OPTIMAL => properties.optimal_tiling_features.contains(features), + _ => false, + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs new file mode 100644 index 0000000000..f6c871026c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs @@ -0,0 +1,826 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::ext, vk}; + +use std::{mem, ops::Range, slice}; + +const ALLOCATION_GRANULARITY: u32 = 16; +const DST_IMAGE_LAYOUT: vk::ImageLayout = vk::ImageLayout::TRANSFER_DST_OPTIMAL; + +impl super::Texture { + fn map_buffer_copies<T>(&self, regions: T) -> impl Iterator<Item = vk::BufferImageCopy> + where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let (block_width, block_height) = self.format.block_dimensions(); + let format = self.format; + let copy_size = self.copy_size; + regions.map(move |r| { + let extent = r.texture_base.max_copy_size(©_size).min(&r.size); + let (image_subresource, image_offset) = conv::map_subresource_layers(&r.texture_base); + vk::BufferImageCopy { + buffer_offset: r.buffer_layout.offset, + buffer_row_length: r.buffer_layout.bytes_per_row.map_or(0, |bpr| { + let block_size = format + .block_size(Some(r.texture_base.aspect.map())) + .unwrap(); + block_width * (bpr / block_size) + }), + buffer_image_height: r + .buffer_layout + .rows_per_image + .map_or(0, |rpi| rpi * block_height), + image_subresource, + image_offset, + image_extent: conv::map_copy_extent(&extent), + } + }) + } +} + +impl super::DeviceShared { + fn debug_messenger(&self) -> Option<&ext::DebugUtils> { + Some(&self.instance.debug_utils.as_ref()?.extension) + } +} + +impl crate::CommandEncoder<super::Api> for super::CommandEncoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + if self.free.is_empty() { + let vk_info = vk::CommandBufferAllocateInfo::builder() + .command_pool(self.raw) + .command_buffer_count(ALLOCATION_GRANULARITY) + .build(); + let cmd_buf_vec = unsafe { self.device.raw.allocate_command_buffers(&vk_info)? }; + self.free.extend(cmd_buf_vec); + } + let raw = self.free.pop().unwrap(); + + // Set the name unconditionally, since there might be a + // previous name assigned to this. + unsafe { + self.device.set_object_name( + vk::ObjectType::COMMAND_BUFFER, + raw, + label.unwrap_or_default(), + ) + }; + + // Reset this in case the last renderpass was never ended. + self.rpass_debug_marker_active = false; + + let vk_info = vk::CommandBufferBeginInfo::builder() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT) + .build(); + unsafe { self.device.raw.begin_command_buffer(raw, &vk_info) }?; + self.active = raw; + + Ok(()) + } + + unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> { + let raw = self.active; + self.active = vk::CommandBuffer::null(); + unsafe { self.device.raw.end_command_buffer(raw) }?; + Ok(super::CommandBuffer { raw }) + } + + unsafe fn discard_encoding(&mut self) { + self.discarded.push(self.active); + self.active = vk::CommandBuffer::null(); + } + + unsafe fn reset_all<I>(&mut self, cmd_bufs: I) + where + I: Iterator<Item = super::CommandBuffer>, + { + self.temp.clear(); + self.free + .extend(cmd_bufs.into_iter().map(|cmd_buf| cmd_buf.raw)); + self.free.append(&mut self.discarded); + let _ = unsafe { + self.device + .raw + .reset_command_pool(self.raw, vk::CommandPoolResetFlags::default()) + }; + } + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::BufferBarrier<'a, super::Api>>, + { + //Note: this is done so that we never end up with empty stage flags + let mut src_stages = vk::PipelineStageFlags::TOP_OF_PIPE; + let mut dst_stages = vk::PipelineStageFlags::BOTTOM_OF_PIPE; + let vk_barriers = &mut self.temp.buffer_barriers; + vk_barriers.clear(); + + for bar in barriers { + let (src_stage, src_access) = conv::map_buffer_usage_to_barrier(bar.usage.start); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_buffer_usage_to_barrier(bar.usage.end); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::BufferMemoryBarrier::builder() + .buffer(bar.buffer.raw) + .size(vk::WHOLE_SIZE) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .build(), + ) + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + vk_barriers, + &[], + ) + }; + } + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator<Item = crate::TextureBarrier<'a, super::Api>>, + { + let mut src_stages = vk::PipelineStageFlags::empty(); + let mut dst_stages = vk::PipelineStageFlags::empty(); + let vk_barriers = &mut self.temp.image_barriers; + vk_barriers.clear(); + + for bar in barriers { + let range = conv::map_subresource_range(&bar.range, bar.texture.format); + let (src_stage, src_access) = conv::map_texture_usage_to_barrier(bar.usage.start); + let src_layout = conv::derive_image_layout(bar.usage.start, bar.texture.format); + src_stages |= src_stage; + let (dst_stage, dst_access) = conv::map_texture_usage_to_barrier(bar.usage.end); + let dst_layout = conv::derive_image_layout(bar.usage.end, bar.texture.format); + dst_stages |= dst_stage; + + vk_barriers.push( + vk::ImageMemoryBarrier::builder() + .image(bar.texture.raw) + .subresource_range(range) + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .old_layout(src_layout) + .new_layout(dst_layout) + .build(), + ); + } + + if !vk_barriers.is_empty() { + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stages, + dst_stages, + vk::DependencyFlags::empty(), + &[], + &[], + vk_barriers, + ) + }; + } + } + + unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { + unsafe { + self.device.raw.cmd_fill_buffer( + self.active, + buffer.raw, + range.start, + range.end - range.start, + 0, + ) + }; + } + + unsafe fn copy_buffer_to_buffer<T>( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferCopy>, + { + let vk_regions_iter = regions.map(|r| vk::BufferCopy { + src_offset: r.src_offset, + dst_offset: r.dst_offset, + size: r.size.get(), + }); + + unsafe { + self.device.raw.cmd_copy_buffer( + self.active, + src.raw, + dst.raw, + &smallvec::SmallVec::<[vk::BufferCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_texture<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::TextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + + let vk_regions_iter = regions.map(|r| { + let (src_subresource, src_offset) = conv::map_subresource_layers(&r.src_base); + let (dst_subresource, dst_offset) = conv::map_subresource_layers(&r.dst_base); + let extent = r + .size + .min(&r.src_base.max_copy_size(&src.copy_size)) + .min(&r.dst_base.max_copy_size(&dst.copy_size)); + vk::ImageCopy { + src_subresource, + src_offset, + dst_subresource, + dst_offset, + extent: conv::map_copy_extent(&extent), + } + }); + + unsafe { + self.device.raw.cmd_copy_image( + self.active, + src.raw, + src_layout, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::ImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_buffer_to_texture<T>( + &mut self, + src: &super::Buffer, + dst: &super::Texture, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let vk_regions_iter = dst.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_buffer_to_image( + self.active, + src.raw, + dst.raw, + DST_IMAGE_LAYOUT, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn copy_texture_to_buffer<T>( + &mut self, + src: &super::Texture, + src_usage: crate::TextureUses, + dst: &super::Buffer, + regions: T, + ) where + T: Iterator<Item = crate::BufferTextureCopy>, + { + let src_layout = conv::derive_image_layout(src_usage, src.format); + let vk_regions_iter = src.map_buffer_copies(regions); + + unsafe { + self.device.raw.cmd_copy_image_to_buffer( + self.active, + src.raw, + src_layout, + dst.raw, + &smallvec::SmallVec::<[vk::BufferImageCopy; 32]>::from_iter(vk_regions_iter), + ) + }; + } + + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_begin_query( + self.active, + set.raw, + index, + vk::QueryControlFlags::empty(), + ) + }; + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + unsafe { self.device.raw.cmd_end_query(self.active, set.raw, index) }; + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + unsafe { + self.device.raw.cmd_write_timestamp( + self.active, + vk::PipelineStageFlags::BOTTOM_OF_PIPE, + set.raw, + index, + ) + }; + } + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range<u32>) { + unsafe { + self.device.raw.cmd_reset_query_pool( + self.active, + set.raw, + range.start, + range.end - range.start, + ) + }; + } + unsafe fn copy_query_results( + &mut self, + set: &super::QuerySet, + range: Range<u32>, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + unsafe { + self.device.raw.cmd_copy_query_pool_results( + self.active, + set.raw, + range.start, + range.end - range.start, + buffer.raw, + offset, + stride.get(), + vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT, + ) + }; + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor<super::Api>) { + let mut vk_clear_values = + ArrayVec::<vk::ClearValue, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut vk_image_views = ArrayVec::<vk::ImageView, { super::MAX_TOTAL_ATTACHMENTS }>::new(); + let mut rp_key = super::RenderPassKey::default(); + let mut fb_key = super::FramebufferKey { + attachments: ArrayVec::default(), + extent: desc.extent, + sample_count: desc.sample_count, + }; + let caps = &self.device.private_caps; + + for cat in desc.color_attachments { + if let Some(cat) = cat.as_ref() { + vk_clear_values.push(vk::ClearValue { + color: unsafe { cat.make_vk_clear_color() }, + }); + vk_image_views.push(cat.target.view.raw); + let color = super::ColorAttachmentKey { + base: cat.target.make_attachment_key(cat.ops, caps), + resolve: cat.resolve_target.as_ref().map(|target| { + target.make_attachment_key(crate::AttachmentOps::STORE, caps) + }), + }; + + rp_key.colors.push(Some(color)); + fb_key.attachments.push(cat.target.view.attachment.clone()); + if let Some(ref at) = cat.resolve_target { + vk_clear_values.push(unsafe { mem::zeroed() }); + vk_image_views.push(at.view.raw); + fb_key.attachments.push(at.view.attachment.clone()); + } + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(cat.target.view.layers, multiview); + if let Some(ref resolve_target) = cat.resolve_target { + assert_eq!(resolve_target.view.layers, multiview); + } + } + } else { + rp_key.colors.push(None); + } + } + if let Some(ref ds) = desc.depth_stencil_attachment { + vk_clear_values.push(vk::ClearValue { + depth_stencil: vk::ClearDepthStencilValue { + depth: ds.clear_value.0, + stencil: ds.clear_value.1, + }, + }); + vk_image_views.push(ds.target.view.raw); + rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: ds.target.make_attachment_key(ds.depth_ops, caps), + stencil_ops: ds.stencil_ops, + }); + fb_key.attachments.push(ds.target.view.attachment.clone()); + + // Assert this attachment is valid for the detected multiview, as a sanity check + // The driver crash for this is really bad on AMD, so the check is worth it + if let Some(multiview) = desc.multiview { + assert_eq!(ds.target.view.layers, multiview); + } + } + rp_key.sample_count = fb_key.sample_count; + rp_key.multiview = desc.multiview; + + let render_area = vk::Rect2D { + offset: vk::Offset2D { x: 0, y: 0 }, + extent: vk::Extent2D { + width: desc.extent.width, + height: desc.extent.height, + }, + }; + let vk_viewports = [vk::Viewport { + x: 0.0, + y: if self.device.private_caps.flip_y_requires_shift { + desc.extent.height as f32 + } else { + 0.0 + }, + width: desc.extent.width as f32, + height: -(desc.extent.height as f32), + min_depth: 0.0, + max_depth: 1.0, + }]; + + let raw_pass = self.device.make_render_pass(rp_key).unwrap(); + let raw_framebuffer = self + .device + .make_framebuffer(fb_key, raw_pass, desc.label) + .unwrap(); + + let mut vk_info = vk::RenderPassBeginInfo::builder() + .render_pass(raw_pass) + .render_area(render_area) + .clear_values(&vk_clear_values) + .framebuffer(raw_framebuffer); + let mut vk_attachment_info = if caps.imageless_framebuffers { + Some( + vk::RenderPassAttachmentBeginInfo::builder() + .attachments(&vk_image_views) + .build(), + ) + } else { + None + }; + if let Some(attachment_info) = vk_attachment_info.as_mut() { + vk_info = vk_info.push_next(attachment_info); + } + + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports); + self.device + .raw + .cmd_set_scissor(self.active, 0, &[render_area]); + self.device.raw.cmd_begin_render_pass( + self.active, + &vk_info, + vk::SubpassContents::INLINE, + ); + }; + + self.bind_point = vk::PipelineBindPoint::GRAPHICS; + } + unsafe fn end_render_pass(&mut self) { + unsafe { + self.device.raw.cmd_end_render_pass(self.active); + if self.rpass_debug_marker_active { + self.end_debug_marker(); + self.rpass_debug_marker_active = false; + } + } + } + + unsafe fn set_bind_group( + &mut self, + layout: &super::PipelineLayout, + index: u32, + group: &super::BindGroup, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + let sets = [*group.set.raw()]; + unsafe { + self.device.raw.cmd_bind_descriptor_sets( + self.active, + self.bind_point, + layout.raw, + index, + &sets, + dynamic_offsets, + ) + }; + } + unsafe fn set_push_constants( + &mut self, + layout: &super::PipelineLayout, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + unsafe { + self.device.raw.cmd_push_constants( + self.active, + layout.raw, + conv::map_shader_stage(stages), + offset, + slice::from_raw_parts(data.as_ptr() as _, data.len() * 4), + ) + }; + } + + unsafe fn insert_debug_marker(&mut self, label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_insert_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + if let Some(ext) = self.device.debug_messenger() { + let cstr = self.temp.make_c_str(group_label); + let vk_label = vk::DebugUtilsLabelEXT::builder().label_name(cstr).build(); + unsafe { ext.cmd_begin_debug_utils_label(self.active, &vk_label) }; + } + } + unsafe fn end_debug_marker(&mut self) { + if let Some(ext) = self.device.debug_messenger() { + unsafe { ext.cmd_end_debug_utils_label(self.active) }; + } + } + + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::GRAPHICS, + pipeline.raw, + ) + }; + } + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + unsafe { + self.device.raw.cmd_bind_index_buffer( + self.active, + binding.buffer.raw, + binding.offset, + conv::map_index_format(format), + ) + }; + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + let vk_buffers = [binding.buffer.raw]; + let vk_offsets = [binding.offset]; + unsafe { + self.device + .raw + .cmd_bind_vertex_buffers(self.active, index, &vk_buffers, &vk_offsets) + }; + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { + let vk_viewports = [vk::Viewport { + x: rect.x, + y: if self.device.private_caps.flip_y_requires_shift { + rect.y + rect.h + } else { + rect.y + }, + width: rect.w, + height: -rect.h, // flip Y + min_depth: depth_range.start, + max_depth: depth_range.end, + }]; + unsafe { + self.device + .raw + .cmd_set_viewport(self.active, 0, &vk_viewports) + }; + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect<u32>) { + let vk_scissors = [vk::Rect2D { + offset: vk::Offset2D { + x: rect.x as i32, + y: rect.y as i32, + }, + extent: vk::Extent2D { + width: rect.w, + height: rect.h, + }, + }]; + unsafe { + self.device + .raw + .cmd_set_scissor(self.active, 0, &vk_scissors) + }; + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + unsafe { + self.device.raw.cmd_set_stencil_reference( + self.active, + vk::StencilFaceFlags::FRONT_AND_BACK, + value, + ) + }; + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + unsafe { self.device.raw.cmd_set_blend_constants(self.active, color) }; + } + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw( + self.active, + vertex_count, + instance_count, + start_vertex, + start_instance, + ) + }; + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed( + self.active, + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ) + }; + } + unsafe fn draw_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + unsafe { + self.device.raw.cmd_draw_indexed_indirect( + self.active, + buffer.raw, + offset, + draw_count, + mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32, + ) + }; + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &super::Buffer, + offset: wgt::BufferAddress, + count_buffer: &super::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + let stride = mem::size_of::<wgt::DrawIndexedIndirectArgs>() as u32; + match self.device.extension_fns.draw_indirect_count { + Some(ref t) => { + unsafe { + t.cmd_draw_indexed_indirect_count( + self.active, + buffer.raw, + offset, + count_buffer.raw, + count_offset, + max_count, + stride, + ) + }; + } + None => panic!("Feature `DRAW_INDIRECT_COUNT` not enabled"), + } + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + self.bind_point = vk::PipelineBindPoint::COMPUTE; + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker() }; + self.rpass_debug_marker_active = false + } + } + + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::COMPUTE, + pipeline.raw, + ) + }; + } + + unsafe fn dispatch(&mut self, count: [u32; 3]) { + unsafe { + self.device + .raw + .cmd_dispatch(self.active, count[0], count[1], count[2]) + }; + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + unsafe { + self.device + .raw + .cmd_dispatch_indirect(self.active, buffer.raw, offset) + } + } +} + +#[test] +fn check_dst_image_layout() { + assert_eq!( + conv::derive_image_layout(crate::TextureUses::COPY_DST, wgt::TextureFormat::Rgba8Unorm), + DST_IMAGE_LAYOUT + ); +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs new file mode 100644 index 0000000000..a26f3765b9 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs @@ -0,0 +1,825 @@ +use ash::vk; + +impl super::PrivateCapabilities { + pub fn map_texture_format(&self, format: wgt::TextureFormat) -> vk::Format { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + use wgt::{AstcBlock, AstcChannel}; + match format { + Tf::R8Unorm => F::R8_UNORM, + Tf::R8Snorm => F::R8_SNORM, + Tf::R8Uint => F::R8_UINT, + Tf::R8Sint => F::R8_SINT, + Tf::R16Uint => F::R16_UINT, + Tf::R16Sint => F::R16_SINT, + Tf::R16Unorm => F::R16_UNORM, + Tf::R16Snorm => F::R16_SNORM, + Tf::R16Float => F::R16_SFLOAT, + Tf::Rg8Unorm => F::R8G8_UNORM, + Tf::Rg8Snorm => F::R8G8_SNORM, + Tf::Rg8Uint => F::R8G8_UINT, + Tf::Rg8Sint => F::R8G8_SINT, + Tf::Rg16Unorm => F::R16G16_UNORM, + Tf::Rg16Snorm => F::R16G16_SNORM, + Tf::R32Uint => F::R32_UINT, + Tf::R32Sint => F::R32_SINT, + Tf::R32Float => F::R32_SFLOAT, + Tf::Rg16Uint => F::R16G16_UINT, + Tf::Rg16Sint => F::R16G16_SINT, + Tf::Rg16Float => F::R16G16_SFLOAT, + Tf::Rgba8Unorm => F::R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => F::R8G8B8A8_SRGB, + Tf::Bgra8UnormSrgb => F::B8G8R8A8_SRGB, + Tf::Rgba8Snorm => F::R8G8B8A8_SNORM, + Tf::Bgra8Unorm => F::B8G8R8A8_UNORM, + Tf::Rgba8Uint => F::R8G8B8A8_UINT, + Tf::Rgba8Sint => F::R8G8B8A8_SINT, + Tf::Rgb10a2Unorm => F::A2B10G10R10_UNORM_PACK32, + Tf::Rg11b10Float => F::B10G11R11_UFLOAT_PACK32, + Tf::Rg32Uint => F::R32G32_UINT, + Tf::Rg32Sint => F::R32G32_SINT, + Tf::Rg32Float => F::R32G32_SFLOAT, + Tf::Rgba16Uint => F::R16G16B16A16_UINT, + Tf::Rgba16Sint => F::R16G16B16A16_SINT, + Tf::Rgba16Unorm => F::R16G16B16A16_UNORM, + Tf::Rgba16Snorm => F::R16G16B16A16_SNORM, + Tf::Rgba16Float => F::R16G16B16A16_SFLOAT, + Tf::Rgba32Uint => F::R32G32B32A32_UINT, + Tf::Rgba32Sint => F::R32G32B32A32_SINT, + Tf::Rgba32Float => F::R32G32B32A32_SFLOAT, + Tf::Depth32Float => F::D32_SFLOAT, + Tf::Depth32FloatStencil8 => F::D32_SFLOAT_S8_UINT, + Tf::Depth24Plus => { + if self.texture_d24 { + F::X8_D24_UNORM_PACK32 + } else { + F::D32_SFLOAT + } + } + Tf::Depth24PlusStencil8 => { + if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Stencil8 => { + if self.texture_s8 { + F::S8_UINT + } else if self.texture_d24_s8 { + F::D24_UNORM_S8_UINT + } else { + F::D32_SFLOAT_S8_UINT + } + } + Tf::Depth16Unorm => F::D16_UNORM, + Tf::Rgb9e5Ufloat => F::E5B9G9R9_UFLOAT_PACK32, + Tf::Bc1RgbaUnorm => F::BC1_RGBA_UNORM_BLOCK, + Tf::Bc1RgbaUnormSrgb => F::BC1_RGBA_SRGB_BLOCK, + Tf::Bc2RgbaUnorm => F::BC2_UNORM_BLOCK, + Tf::Bc2RgbaUnormSrgb => F::BC2_SRGB_BLOCK, + Tf::Bc3RgbaUnorm => F::BC3_UNORM_BLOCK, + Tf::Bc3RgbaUnormSrgb => F::BC3_SRGB_BLOCK, + Tf::Bc4RUnorm => F::BC4_UNORM_BLOCK, + Tf::Bc4RSnorm => F::BC4_SNORM_BLOCK, + Tf::Bc5RgUnorm => F::BC5_UNORM_BLOCK, + Tf::Bc5RgSnorm => F::BC5_SNORM_BLOCK, + Tf::Bc6hRgbUfloat => F::BC6H_UFLOAT_BLOCK, + Tf::Bc6hRgbFloat => F::BC6H_SFLOAT_BLOCK, + Tf::Bc7RgbaUnorm => F::BC7_UNORM_BLOCK, + Tf::Bc7RgbaUnormSrgb => F::BC7_SRGB_BLOCK, + Tf::Etc2Rgb8Unorm => F::ETC2_R8G8B8_UNORM_BLOCK, + Tf::Etc2Rgb8UnormSrgb => F::ETC2_R8G8B8_SRGB_BLOCK, + Tf::Etc2Rgb8A1Unorm => F::ETC2_R8G8B8A1_UNORM_BLOCK, + Tf::Etc2Rgb8A1UnormSrgb => F::ETC2_R8G8B8A1_SRGB_BLOCK, + Tf::Etc2Rgba8Unorm => F::ETC2_R8G8B8A8_UNORM_BLOCK, + Tf::Etc2Rgba8UnormSrgb => F::ETC2_R8G8B8A8_SRGB_BLOCK, + Tf::EacR11Unorm => F::EAC_R11_UNORM_BLOCK, + Tf::EacR11Snorm => F::EAC_R11_SNORM_BLOCK, + Tf::EacRg11Unorm => F::EAC_R11G11_UNORM_BLOCK, + Tf::EacRg11Snorm => F::EAC_R11G11_SNORM_BLOCK, + Tf::Astc { block, channel } => match channel { + AstcChannel::Unorm => match block { + AstcBlock::B4x4 => F::ASTC_4X4_UNORM_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_UNORM_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_UNORM_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_UNORM_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_UNORM_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_UNORM_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_UNORM_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_UNORM_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_UNORM_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_UNORM_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_UNORM_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_UNORM_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_UNORM_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_UNORM_BLOCK, + }, + AstcChannel::UnormSrgb => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SRGB_BLOCK, + AstcBlock::B5x4 => F::ASTC_5X4_SRGB_BLOCK, + AstcBlock::B5x5 => F::ASTC_5X5_SRGB_BLOCK, + AstcBlock::B6x5 => F::ASTC_6X5_SRGB_BLOCK, + AstcBlock::B6x6 => F::ASTC_6X6_SRGB_BLOCK, + AstcBlock::B8x5 => F::ASTC_8X5_SRGB_BLOCK, + AstcBlock::B8x6 => F::ASTC_8X6_SRGB_BLOCK, + AstcBlock::B8x8 => F::ASTC_8X8_SRGB_BLOCK, + AstcBlock::B10x5 => F::ASTC_10X5_SRGB_BLOCK, + AstcBlock::B10x6 => F::ASTC_10X6_SRGB_BLOCK, + AstcBlock::B10x8 => F::ASTC_10X8_SRGB_BLOCK, + AstcBlock::B10x10 => F::ASTC_10X10_SRGB_BLOCK, + AstcBlock::B12x10 => F::ASTC_12X10_SRGB_BLOCK, + AstcBlock::B12x12 => F::ASTC_12X12_SRGB_BLOCK, + }, + AstcChannel::Hdr => match block { + AstcBlock::B4x4 => F::ASTC_4X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x4 => F::ASTC_5X4_SFLOAT_BLOCK_EXT, + AstcBlock::B5x5 => F::ASTC_5X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x5 => F::ASTC_6X5_SFLOAT_BLOCK_EXT, + AstcBlock::B6x6 => F::ASTC_6X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x5 => F::ASTC_8X5_SFLOAT_BLOCK_EXT, + AstcBlock::B8x6 => F::ASTC_8X6_SFLOAT_BLOCK_EXT, + AstcBlock::B8x8 => F::ASTC_8X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x5 => F::ASTC_10X5_SFLOAT_BLOCK_EXT, + AstcBlock::B10x6 => F::ASTC_10X6_SFLOAT_BLOCK_EXT, + AstcBlock::B10x8 => F::ASTC_10X8_SFLOAT_BLOCK_EXT, + AstcBlock::B10x10 => F::ASTC_10X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x10 => F::ASTC_12X10_SFLOAT_BLOCK_EXT, + AstcBlock::B12x12 => F::ASTC_12X12_SFLOAT_BLOCK_EXT, + }, + }, + } + } +} + +pub fn map_vk_surface_formats(sf: vk::SurfaceFormatKHR) -> Option<wgt::TextureFormat> { + use ash::vk::Format as F; + use wgt::TextureFormat as Tf; + // List we care about pulled from https://vulkan.gpuinfo.org/listsurfaceformats.php + Some(match sf.color_space { + vk::ColorSpaceKHR::SRGB_NONLINEAR => match sf.format { + F::B8G8R8A8_UNORM => Tf::Bgra8Unorm, + F::B8G8R8A8_SRGB => Tf::Bgra8UnormSrgb, + F::R8G8B8A8_SNORM => Tf::Rgba8Snorm, + F::R8G8B8A8_UNORM => Tf::Rgba8Unorm, + F::R8G8B8A8_SRGB => Tf::Rgba8UnormSrgb, + _ => return None, + }, + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT => match sf.format { + F::R16G16B16A16_SFLOAT => Tf::Rgba16Float, + F::R16G16B16A16_SNORM => Tf::Rgba16Snorm, + F::R16G16B16A16_UNORM => Tf::Rgba16Unorm, + F::A2B10G10R10_UNORM_PACK32 => Tf::Rgb10a2Unorm, + _ => return None, + }, + _ => return None, + }) +} + +impl crate::Attachment<'_, super::Api> { + pub(super) fn make_attachment_key( + &self, + ops: crate::AttachmentOps, + caps: &super::PrivateCapabilities, + ) -> super::AttachmentKey { + super::AttachmentKey { + format: caps.map_texture_format(self.view.attachment.view_format), + layout: derive_image_layout(self.usage, self.view.attachment.view_format), + ops, + } + } +} + +impl crate::ColorAttachment<'_, super::Api> { + pub(super) unsafe fn make_vk_clear_color(&self) -> vk::ClearColorValue { + let cv = &self.clear_value; + match self + .target + .view + .attachment + .view_format + .sample_type(None) + .unwrap() + { + wgt::TextureSampleType::Float { .. } => vk::ClearColorValue { + float32: [cv.r as f32, cv.g as f32, cv.b as f32, cv.a as f32], + }, + wgt::TextureSampleType::Sint => vk::ClearColorValue { + int32: [cv.r as i32, cv.g as i32, cv.b as i32, cv.a as i32], + }, + wgt::TextureSampleType::Uint => vk::ClearColorValue { + uint32: [cv.r as u32, cv.g as u32, cv.b as u32, cv.a as u32], + }, + wgt::TextureSampleType::Depth => unreachable!(), + } + } +} + +pub fn derive_image_layout( + usage: crate::TextureUses, + format: wgt::TextureFormat, +) -> vk::ImageLayout { + // Note: depth textures are always sampled with RODS layout + let is_color = crate::FormatAspects::from(format).contains(crate::FormatAspects::COLOR); + match usage { + crate::TextureUses::UNINITIALIZED => vk::ImageLayout::UNDEFINED, + crate::TextureUses::COPY_SRC => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + crate::TextureUses::COPY_DST => vk::ImageLayout::TRANSFER_DST_OPTIMAL, + crate::TextureUses::RESOURCE if is_color => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL, + crate::TextureUses::COLOR_TARGET => vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + crate::TextureUses::DEPTH_STENCIL_WRITE => { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + } + _ => { + if usage == crate::TextureUses::PRESENT { + vk::ImageLayout::PRESENT_SRC_KHR + } else if is_color { + vk::ImageLayout::GENERAL + } else { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } + } + } +} + +pub fn map_texture_usage(usage: crate::TextureUses) -> vk::ImageUsageFlags { + let mut flags = vk::ImageUsageFlags::empty(); + if usage.contains(crate::TextureUses::COPY_SRC) { + flags |= vk::ImageUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::TextureUses::COPY_DST) { + flags |= vk::ImageUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::TextureUses::RESOURCE) { + flags |= vk::ImageUsageFlags::SAMPLED; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= vk::ImageUsageFlags::COLOR_ATTACHMENT; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT; + } + if usage.intersects(crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE) { + flags |= vk::ImageUsageFlags::STORAGE; + } + flags +} + +pub fn map_texture_usage_to_barrier( + usage: crate::TextureUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::TextureUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::TextureUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::TextureUses::RESOURCE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::COLOR_TARGET) { + stages |= vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT; + access |= vk::AccessFlags::COLOR_ATTACHMENT_READ | vk::AccessFlags::COLOR_ATTACHMENT_WRITE; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_READ) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ; + } + if usage.intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) { + stages |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS + | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS; + access |= vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ + | vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE; + } + if usage.contains(crate::TextureUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.contains(crate::TextureUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + + if usage == crate::TextureUses::UNINITIALIZED || usage == crate::TextureUses::PRESENT { + ( + vk::PipelineStageFlags::TOP_OF_PIPE, + vk::AccessFlags::empty(), + ) + } else { + (stages, access) + } +} + +pub fn map_vk_image_usage(usage: vk::ImageUsageFlags) -> crate::TextureUses { + let mut bits = crate::TextureUses::empty(); + if usage.contains(vk::ImageUsageFlags::TRANSFER_SRC) { + bits |= crate::TextureUses::COPY_SRC; + } + if usage.contains(vk::ImageUsageFlags::TRANSFER_DST) { + bits |= crate::TextureUses::COPY_DST; + } + if usage.contains(vk::ImageUsageFlags::SAMPLED) { + bits |= crate::TextureUses::RESOURCE; + } + if usage.contains(vk::ImageUsageFlags::COLOR_ATTACHMENT) { + bits |= crate::TextureUses::COLOR_TARGET; + } + if usage.contains(vk::ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT) { + bits |= crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE; + } + if usage.contains(vk::ImageUsageFlags::STORAGE) { + bits |= crate::TextureUses::STORAGE_READ | crate::TextureUses::STORAGE_READ_WRITE; + } + bits +} + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> vk::ImageType { + match dim { + wgt::TextureDimension::D1 => vk::ImageType::TYPE_1D, + wgt::TextureDimension::D2 => vk::ImageType::TYPE_2D, + wgt::TextureDimension::D3 => vk::ImageType::TYPE_3D, + } +} + +pub fn map_index_format(index_format: wgt::IndexFormat) -> vk::IndexType { + match index_format { + wgt::IndexFormat::Uint16 => vk::IndexType::UINT16, + wgt::IndexFormat::Uint32 => vk::IndexType::UINT32, + } +} + +pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format { + use wgt::VertexFormat as Vf; + match vertex_format { + Vf::Uint8x2 => vk::Format::R8G8_UINT, + Vf::Uint8x4 => vk::Format::R8G8B8A8_UINT, + Vf::Sint8x2 => vk::Format::R8G8_SINT, + Vf::Sint8x4 => vk::Format::R8G8B8A8_SINT, + Vf::Unorm8x2 => vk::Format::R8G8_UNORM, + Vf::Unorm8x4 => vk::Format::R8G8B8A8_UNORM, + Vf::Snorm8x2 => vk::Format::R8G8_SNORM, + Vf::Snorm8x4 => vk::Format::R8G8B8A8_SNORM, + Vf::Uint16x2 => vk::Format::R16G16_UINT, + Vf::Uint16x4 => vk::Format::R16G16B16A16_UINT, + Vf::Sint16x2 => vk::Format::R16G16_SINT, + Vf::Sint16x4 => vk::Format::R16G16B16A16_SINT, + Vf::Unorm16x2 => vk::Format::R16G16_UNORM, + Vf::Unorm16x4 => vk::Format::R16G16B16A16_UNORM, + Vf::Snorm16x2 => vk::Format::R16G16_SNORM, + Vf::Snorm16x4 => vk::Format::R16G16B16A16_SNORM, + Vf::Float16x2 => vk::Format::R16G16_SFLOAT, + Vf::Float16x4 => vk::Format::R16G16B16A16_SFLOAT, + Vf::Float32 => vk::Format::R32_SFLOAT, + Vf::Float32x2 => vk::Format::R32G32_SFLOAT, + Vf::Float32x3 => vk::Format::R32G32B32_SFLOAT, + Vf::Float32x4 => vk::Format::R32G32B32A32_SFLOAT, + Vf::Uint32 => vk::Format::R32_UINT, + Vf::Uint32x2 => vk::Format::R32G32_UINT, + Vf::Uint32x3 => vk::Format::R32G32B32_UINT, + Vf::Uint32x4 => vk::Format::R32G32B32A32_UINT, + Vf::Sint32 => vk::Format::R32_SINT, + Vf::Sint32x2 => vk::Format::R32G32_SINT, + Vf::Sint32x3 => vk::Format::R32G32B32_SINT, + Vf::Sint32x4 => vk::Format::R32G32B32A32_SINT, + Vf::Float64 => vk::Format::R64_SFLOAT, + Vf::Float64x2 => vk::Format::R64G64_SFLOAT, + Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT, + Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT, + } +} + +pub fn map_aspects(aspects: crate::FormatAspects) -> vk::ImageAspectFlags { + let mut flags = vk::ImageAspectFlags::empty(); + if aspects.contains(crate::FormatAspects::COLOR) { + flags |= vk::ImageAspectFlags::COLOR; + } + if aspects.contains(crate::FormatAspects::DEPTH) { + flags |= vk::ImageAspectFlags::DEPTH; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + flags |= vk::ImageAspectFlags::STENCIL; + } + flags +} + +pub fn map_attachment_ops( + op: crate::AttachmentOps, +) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) { + let load_op = if op.contains(crate::AttachmentOps::LOAD) { + vk::AttachmentLoadOp::LOAD + } else { + vk::AttachmentLoadOp::CLEAR + }; + let store_op = if op.contains(crate::AttachmentOps::STORE) { + vk::AttachmentStoreOp::STORE + } else { + vk::AttachmentStoreOp::DONT_CARE + }; + (load_op, store_op) +} + +pub fn map_present_mode(mode: wgt::PresentMode) -> vk::PresentModeKHR { + match mode { + wgt::PresentMode::Immediate => vk::PresentModeKHR::IMMEDIATE, + wgt::PresentMode::Mailbox => vk::PresentModeKHR::MAILBOX, + wgt::PresentMode::Fifo => vk::PresentModeKHR::FIFO, + wgt::PresentMode::FifoRelaxed => vk::PresentModeKHR::FIFO_RELAXED, + wgt::PresentMode::AutoNoVsync | wgt::PresentMode::AutoVsync => { + unreachable!("Cannot create swapchain with Auto PresentationMode") + } + } +} + +pub fn map_vk_present_mode(mode: vk::PresentModeKHR) -> Option<wgt::PresentMode> { + if mode == vk::PresentModeKHR::IMMEDIATE { + Some(wgt::PresentMode::Immediate) + } else if mode == vk::PresentModeKHR::MAILBOX { + Some(wgt::PresentMode::Mailbox) + } else if mode == vk::PresentModeKHR::FIFO { + Some(wgt::PresentMode::Fifo) + } else if mode == vk::PresentModeKHR::FIFO_RELAXED { + //Some(wgt::PresentMode::Relaxed) + None + } else { + log::warn!("Unrecognized present mode {:?}", mode); + None + } +} + +pub fn map_composite_alpha_mode(mode: wgt::CompositeAlphaMode) -> vk::CompositeAlphaFlagsKHR { + match mode { + wgt::CompositeAlphaMode::Opaque => vk::CompositeAlphaFlagsKHR::OPAQUE, + wgt::CompositeAlphaMode::PreMultiplied => vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED, + wgt::CompositeAlphaMode::PostMultiplied => vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED, + wgt::CompositeAlphaMode::Inherit => vk::CompositeAlphaFlagsKHR::INHERIT, + wgt::CompositeAlphaMode::Auto => unreachable!(), + } +} + +pub fn map_vk_composite_alpha(flags: vk::CompositeAlphaFlagsKHR) -> Vec<wgt::CompositeAlphaMode> { + let mut modes = Vec::new(); + if flags.contains(vk::CompositeAlphaFlagsKHR::OPAQUE) { + modes.push(wgt::CompositeAlphaMode::Opaque); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::PRE_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PreMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::POST_MULTIPLIED) { + modes.push(wgt::CompositeAlphaMode::PostMultiplied); + } + if flags.contains(vk::CompositeAlphaFlagsKHR::INHERIT) { + modes.push(wgt::CompositeAlphaMode::Inherit); + } + modes +} + +pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { + let mut flags = vk::BufferUsageFlags::empty(); + if usage.contains(crate::BufferUses::COPY_SRC) { + flags |= vk::BufferUsageFlags::TRANSFER_SRC; + } + if usage.contains(crate::BufferUses::COPY_DST) { + flags |= vk::BufferUsageFlags::TRANSFER_DST; + } + if usage.contains(crate::BufferUses::UNIFORM) { + flags |= vk::BufferUsageFlags::UNIFORM_BUFFER; + } + if usage.intersects(crate::BufferUses::STORAGE_READ | crate::BufferUses::STORAGE_READ_WRITE) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER; + } + if usage.contains(crate::BufferUses::INDEX) { + flags |= vk::BufferUsageFlags::INDEX_BUFFER; + } + if usage.contains(crate::BufferUses::VERTEX) { + flags |= vk::BufferUsageFlags::VERTEX_BUFFER; + } + if usage.contains(crate::BufferUses::INDIRECT) { + flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; + } + flags +} + +pub fn map_buffer_usage_to_barrier( + usage: crate::BufferUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + let shader_stages = vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + + if usage.contains(crate::BufferUses::MAP_READ) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_READ; + } + if usage.contains(crate::BufferUses::MAP_WRITE) { + stages |= vk::PipelineStageFlags::HOST; + access |= vk::AccessFlags::HOST_WRITE; + } + if usage.contains(crate::BufferUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_READ; + } + if usage.contains(crate::BufferUses::COPY_DST) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } + if usage.contains(crate::BufferUses::UNIFORM) { + stages |= shader_stages; + access |= vk::AccessFlags::UNIFORM_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ; + } + if usage.intersects(crate::BufferUses::STORAGE_READ_WRITE) { + stages |= shader_stages; + access |= vk::AccessFlags::SHADER_READ | vk::AccessFlags::SHADER_WRITE; + } + if usage.contains(crate::BufferUses::INDEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::INDEX_READ; + } + if usage.contains(crate::BufferUses::VERTEX) { + stages |= vk::PipelineStageFlags::VERTEX_INPUT; + access |= vk::AccessFlags::VERTEX_ATTRIBUTE_READ; + } + if usage.contains(crate::BufferUses::INDIRECT) { + stages |= vk::PipelineStageFlags::DRAW_INDIRECT; + access |= vk::AccessFlags::INDIRECT_COMMAND_READ; + } + + (stages, access) +} + +pub fn map_view_dimension(dim: wgt::TextureViewDimension) -> vk::ImageViewType { + match dim { + wgt::TextureViewDimension::D1 => vk::ImageViewType::TYPE_1D, + wgt::TextureViewDimension::D2 => vk::ImageViewType::TYPE_2D, + wgt::TextureViewDimension::D2Array => vk::ImageViewType::TYPE_2D_ARRAY, + wgt::TextureViewDimension::Cube => vk::ImageViewType::CUBE, + wgt::TextureViewDimension::CubeArray => vk::ImageViewType::CUBE_ARRAY, + wgt::TextureViewDimension::D3 => vk::ImageViewType::TYPE_3D, + } +} + +pub fn map_copy_extent(extent: &crate::CopyExtent) -> vk::Extent3D { + vk::Extent3D { + width: extent.width, + height: extent.height, + depth: extent.depth, + } +} + +pub fn map_subresource_range( + range: &wgt::ImageSubresourceRange, + format: wgt::TextureFormat, +) -> vk::ImageSubresourceRange { + vk::ImageSubresourceRange { + aspect_mask: map_aspects(crate::FormatAspects::new(format, range.aspect)), + base_mip_level: range.base_mip_level, + level_count: range.mip_level_count.unwrap_or(vk::REMAINING_MIP_LEVELS), + base_array_layer: range.base_array_layer, + layer_count: range + .array_layer_count + .unwrap_or(vk::REMAINING_ARRAY_LAYERS), + } +} + +pub fn map_subresource_layers( + base: &crate::TextureCopyBase, +) -> (vk::ImageSubresourceLayers, vk::Offset3D) { + let offset = vk::Offset3D { + x: base.origin.x as i32, + y: base.origin.y as i32, + z: base.origin.z as i32, + }; + let subresource = vk::ImageSubresourceLayers { + aspect_mask: map_aspects(base.aspect), + mip_level: base.mip_level, + base_array_layer: base.array_layer, + layer_count: 1, + }; + (subresource, offset) +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> vk::Filter { + match mode { + wgt::FilterMode::Nearest => vk::Filter::NEAREST, + wgt::FilterMode::Linear => vk::Filter::LINEAR, + } +} + +pub fn map_mip_filter_mode(mode: wgt::FilterMode) -> vk::SamplerMipmapMode { + match mode { + wgt::FilterMode::Nearest => vk::SamplerMipmapMode::NEAREST, + wgt::FilterMode::Linear => vk::SamplerMipmapMode::LINEAR, + } +} + +pub fn map_address_mode(mode: wgt::AddressMode) -> vk::SamplerAddressMode { + match mode { + wgt::AddressMode::ClampToEdge => vk::SamplerAddressMode::CLAMP_TO_EDGE, + wgt::AddressMode::Repeat => vk::SamplerAddressMode::REPEAT, + wgt::AddressMode::MirrorRepeat => vk::SamplerAddressMode::MIRRORED_REPEAT, + wgt::AddressMode::ClampToBorder => vk::SamplerAddressMode::CLAMP_TO_BORDER, + // wgt::AddressMode::MirrorClamp => vk::SamplerAddressMode::MIRROR_CLAMP_TO_EDGE, + } +} + +pub fn map_border_color(border_color: wgt::SamplerBorderColor) -> vk::BorderColor { + match border_color { + wgt::SamplerBorderColor::TransparentBlack | wgt::SamplerBorderColor::Zero => { + vk::BorderColor::FLOAT_TRANSPARENT_BLACK + } + wgt::SamplerBorderColor::OpaqueBlack => vk::BorderColor::FLOAT_OPAQUE_BLACK, + wgt::SamplerBorderColor::OpaqueWhite => vk::BorderColor::FLOAT_OPAQUE_WHITE, + } +} + +pub fn map_comparison(fun: wgt::CompareFunction) -> vk::CompareOp { + use wgt::CompareFunction as Cf; + match fun { + Cf::Never => vk::CompareOp::NEVER, + Cf::Less => vk::CompareOp::LESS, + Cf::LessEqual => vk::CompareOp::LESS_OR_EQUAL, + Cf::Equal => vk::CompareOp::EQUAL, + Cf::GreaterEqual => vk::CompareOp::GREATER_OR_EQUAL, + Cf::Greater => vk::CompareOp::GREATER, + Cf::NotEqual => vk::CompareOp::NOT_EQUAL, + Cf::Always => vk::CompareOp::ALWAYS, + } +} + +pub fn map_shader_stage(stage: wgt::ShaderStages) -> vk::ShaderStageFlags { + let mut flags = vk::ShaderStageFlags::empty(); + if stage.contains(wgt::ShaderStages::VERTEX) { + flags |= vk::ShaderStageFlags::VERTEX; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + flags |= vk::ShaderStageFlags::FRAGMENT; + } + if stage.contains(wgt::ShaderStages::COMPUTE) { + flags |= vk::ShaderStageFlags::COMPUTE; + } + flags +} + +pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { + match ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Storage { .. } => match has_dynamic_offset { + true => vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + false => vk::DescriptorType::STORAGE_BUFFER, + }, + wgt::BufferBindingType::Uniform => match has_dynamic_offset { + true => vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + false => vk::DescriptorType::UNIFORM_BUFFER, + }, + }, + wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, + wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, + wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + } +} + +pub fn map_topology(topology: wgt::PrimitiveTopology) -> vk::PrimitiveTopology { + use wgt::PrimitiveTopology as Pt; + match topology { + Pt::PointList => vk::PrimitiveTopology::POINT_LIST, + Pt::LineList => vk::PrimitiveTopology::LINE_LIST, + Pt::LineStrip => vk::PrimitiveTopology::LINE_STRIP, + Pt::TriangleList => vk::PrimitiveTopology::TRIANGLE_LIST, + Pt::TriangleStrip => vk::PrimitiveTopology::TRIANGLE_STRIP, + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> vk::PolygonMode { + match mode { + wgt::PolygonMode::Fill => vk::PolygonMode::FILL, + wgt::PolygonMode::Line => vk::PolygonMode::LINE, + wgt::PolygonMode::Point => vk::PolygonMode::POINT, + } +} + +pub fn map_front_face(front_face: wgt::FrontFace) -> vk::FrontFace { + match front_face { + wgt::FrontFace::Cw => vk::FrontFace::CLOCKWISE, + wgt::FrontFace::Ccw => vk::FrontFace::COUNTER_CLOCKWISE, + } +} + +pub fn map_cull_face(face: wgt::Face) -> vk::CullModeFlags { + match face { + wgt::Face::Front => vk::CullModeFlags::FRONT, + wgt::Face::Back => vk::CullModeFlags::BACK, + } +} + +pub fn map_stencil_op(op: wgt::StencilOperation) -> vk::StencilOp { + use wgt::StencilOperation as So; + match op { + So::Keep => vk::StencilOp::KEEP, + So::Zero => vk::StencilOp::ZERO, + So::Replace => vk::StencilOp::REPLACE, + So::Invert => vk::StencilOp::INVERT, + So::IncrementClamp => vk::StencilOp::INCREMENT_AND_CLAMP, + So::IncrementWrap => vk::StencilOp::INCREMENT_AND_WRAP, + So::DecrementClamp => vk::StencilOp::DECREMENT_AND_CLAMP, + So::DecrementWrap => vk::StencilOp::DECREMENT_AND_WRAP, + } +} + +pub fn map_stencil_face( + face: &wgt::StencilFaceState, + compare_mask: u32, + write_mask: u32, +) -> vk::StencilOpState { + vk::StencilOpState { + fail_op: map_stencil_op(face.fail_op), + pass_op: map_stencil_op(face.pass_op), + depth_fail_op: map_stencil_op(face.depth_fail_op), + compare_op: map_comparison(face.compare), + compare_mask, + write_mask, + reference: 0, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor) -> vk::BlendFactor { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => vk::BlendFactor::ZERO, + Bf::One => vk::BlendFactor::ONE, + Bf::Src => vk::BlendFactor::SRC_COLOR, + Bf::OneMinusSrc => vk::BlendFactor::ONE_MINUS_SRC_COLOR, + Bf::SrcAlpha => vk::BlendFactor::SRC_ALPHA, + Bf::OneMinusSrcAlpha => vk::BlendFactor::ONE_MINUS_SRC_ALPHA, + Bf::Dst => vk::BlendFactor::DST_COLOR, + Bf::OneMinusDst => vk::BlendFactor::ONE_MINUS_DST_COLOR, + Bf::DstAlpha => vk::BlendFactor::DST_ALPHA, + Bf::OneMinusDstAlpha => vk::BlendFactor::ONE_MINUS_DST_ALPHA, + Bf::SrcAlphaSaturated => vk::BlendFactor::SRC_ALPHA_SATURATE, + Bf::Constant => vk::BlendFactor::CONSTANT_COLOR, + Bf::OneMinusConstant => vk::BlendFactor::ONE_MINUS_CONSTANT_COLOR, + } +} + +fn map_blend_op(operation: wgt::BlendOperation) -> vk::BlendOp { + use wgt::BlendOperation as Bo; + match operation { + Bo::Add => vk::BlendOp::ADD, + Bo::Subtract => vk::BlendOp::SUBTRACT, + Bo::ReverseSubtract => vk::BlendOp::REVERSE_SUBTRACT, + Bo::Min => vk::BlendOp::MIN, + Bo::Max => vk::BlendOp::MAX, + } +} + +pub fn map_blend_component( + component: &wgt::BlendComponent, +) -> (vk::BlendOp, vk::BlendFactor, vk::BlendFactor) { + let op = map_blend_op(component.operation); + let src = map_blend_factor(component.src_factor); + let dst = map_blend_factor(component.dst_factor); + (op, src, dst) +} + +pub fn map_pipeline_statistics( + types: wgt::PipelineStatisticsTypes, +) -> vk::QueryPipelineStatisticFlags { + use wgt::PipelineStatisticsTypes as Pst; + let mut flags = vk::QueryPipelineStatisticFlags::empty(); + if types.contains(Pst::VERTEX_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::VERTEX_SHADER_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_INVOCATIONS; + } + if types.contains(Pst::CLIPPER_PRIMITIVES_OUT) { + flags |= vk::QueryPipelineStatisticFlags::CLIPPING_PRIMITIVES; + } + if types.contains(Pst::FRAGMENT_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::FRAGMENT_SHADER_INVOCATIONS; + } + if types.contains(Pst::COMPUTE_SHADER_INVOCATIONS) { + flags |= vk::QueryPipelineStatisticFlags::COMPUTE_SHADER_INVOCATIONS; + } + flags +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs new file mode 100644 index 0000000000..09b887772c --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs @@ -0,0 +1,2032 @@ +use super::conv; + +use arrayvec::ArrayVec; +use ash::{extensions::khr, vk}; +use parking_lot::Mutex; + +use std::{ + borrow::Cow, + collections::{hash_map::Entry, BTreeMap}, + ffi::{CStr, CString}, + num::NonZeroU32, + ptr, + sync::Arc, +}; + +impl super::DeviceShared { + pub(super) unsafe fn set_object_name( + &self, + object_type: vk::ObjectType, + object: impl vk::Handle, + name: &str, + ) { + let extension = match self.instance.debug_utils { + Some(ref debug_utils) => &debug_utils.extension, + None => return, + }; + + // Keep variables outside the if-else block to ensure they do not + // go out of scope while we hold a pointer to them + let mut buffer: [u8; 64] = [0u8; 64]; + let buffer_vec: Vec<u8>; + + // Append a null terminator to the string + let name_bytes = if name.len() < buffer.len() { + // Common case, string is very small. Allocate a copy on the stack. + buffer[..name.len()].copy_from_slice(name.as_bytes()); + // Add null terminator + buffer[name.len()] = 0; + &buffer[..name.len() + 1] + } else { + // Less common case, the string is large. + // This requires a heap allocation. + buffer_vec = name + .as_bytes() + .iter() + .cloned() + .chain(std::iter::once(0)) + .collect(); + &buffer_vec + }; + + let name = unsafe { CStr::from_bytes_with_nul_unchecked(name_bytes) }; + + let _result = unsafe { + extension.set_debug_utils_object_name( + self.raw.handle(), + &vk::DebugUtilsObjectNameInfoEXT::builder() + .object_type(object_type) + .object_handle(object.as_raw()) + .object_name(name), + ) + }; + } + + pub fn make_render_pass( + &self, + key: super::RenderPassKey, + ) -> Result<vk::RenderPass, crate::DeviceError> { + Ok(match self.render_passes.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let mut vk_attachments = Vec::new(); + let mut color_refs = Vec::with_capacity(e.key().colors.len()); + let mut resolve_refs = Vec::with_capacity(color_refs.capacity()); + let mut ds_ref = None; + let samples = vk::SampleCountFlags::from_raw(e.key().sample_count); + let unused = vk::AttachmentReference { + attachment: vk::ATTACHMENT_UNUSED, + layout: vk::ImageLayout::UNDEFINED, + }; + for cat in e.key().colors.iter() { + let (color_ref, resolve_ref) = if let Some(cat) = cat.as_ref() { + let color_ref = vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: cat.base.layout, + }; + vk_attachments.push({ + let (load_op, store_op) = conv::map_attachment_ops(cat.base.ops); + vk::AttachmentDescription::builder() + .format(cat.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .initial_layout(cat.base.layout) + .final_layout(cat.base.layout) + .build() + }); + let resolve_ref = if let Some(ref rat) = cat.resolve { + let (load_op, store_op) = conv::map_attachment_ops(rat.ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(rat.format) + .samples(vk::SampleCountFlags::TYPE_1) + .load_op(load_op) + .store_op(store_op) + .initial_layout(rat.layout) + .final_layout(rat.layout) + .build(); + vk_attachments.push(vk_attachment); + + vk::AttachmentReference { + attachment: vk_attachments.len() as u32 - 1, + layout: rat.layout, + } + } else { + unused + }; + + (color_ref, resolve_ref) + } else { + (unused, unused) + }; + + color_refs.push(color_ref); + resolve_refs.push(resolve_ref); + } + + if let Some(ref ds) = e.key().depth_stencil { + ds_ref = Some(vk::AttachmentReference { + attachment: vk_attachments.len() as u32, + layout: ds.base.layout, + }); + let (load_op, store_op) = conv::map_attachment_ops(ds.base.ops); + let (stencil_load_op, stencil_store_op) = + conv::map_attachment_ops(ds.stencil_ops); + let vk_attachment = vk::AttachmentDescription::builder() + .format(ds.base.format) + .samples(samples) + .load_op(load_op) + .store_op(store_op) + .stencil_load_op(stencil_load_op) + .stencil_store_op(stencil_store_op) + .initial_layout(ds.base.layout) + .final_layout(ds.base.layout) + .build(); + vk_attachments.push(vk_attachment); + } + + let vk_subpasses = [{ + let mut vk_subpass = vk::SubpassDescription::builder() + .pipeline_bind_point(vk::PipelineBindPoint::GRAPHICS) + .color_attachments(&color_refs) + .resolve_attachments(&resolve_refs); + + if self + .workarounds + .contains(super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS) + && resolve_refs.is_empty() + { + vk_subpass.p_resolve_attachments = ptr::null(); + } + + if let Some(ref reference) = ds_ref { + vk_subpass = vk_subpass.depth_stencil_attachment(reference) + } + vk_subpass.build() + }]; + + let mut vk_info = vk::RenderPassCreateInfo::builder() + .attachments(&vk_attachments) + .subpasses(&vk_subpasses); + + let mut multiview_info; + let mask; + if let Some(multiview) = e.key().multiview { + // Sanity checks, better to panic here than cause a driver crash + assert!(multiview.get() <= 8); + assert!(multiview.get() > 1); + + // Right now we enable all bits on the view masks and correlation masks. + // This means we're rendering to all views in the subpass, and that all views + // can be rendered concurrently. + mask = [(1 << multiview.get()) - 1]; + + // On Vulkan 1.1 or later, this is an alias for core functionality + multiview_info = vk::RenderPassMultiviewCreateInfoKHR::builder() + .view_masks(&mask) + .correlation_masks(&mask) + .build(); + vk_info = vk_info.push_next(&mut multiview_info); + } + + let raw = unsafe { self.raw.create_render_pass(&vk_info, None)? }; + + *e.insert(raw) + } + }) + } + + pub fn make_framebuffer( + &self, + key: super::FramebufferKey, + raw_pass: vk::RenderPass, + pass_label: crate::Label, + ) -> Result<vk::Framebuffer, crate::DeviceError> { + Ok(match self.framebuffers.lock().entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let vk_views = e + .key() + .attachments + .iter() + .map(|at| at.raw) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats = e + .key() + .attachments + .iter() + .map(|at| self.private_caps.map_texture_format(at.view_format)) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + let vk_view_formats_list = e + .key() + .attachments + .iter() + .map(|at| at.raw_view_formats.clone()) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let vk_image_infos = e + .key() + .attachments + .iter() + .enumerate() + .map(|(i, at)| { + let mut info = vk::FramebufferAttachmentImageInfo::builder() + .usage(conv::map_texture_usage(at.view_usage)) + .flags(at.raw_image_flags) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layer_count(e.key().extent.depth_or_array_layers); + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03214 + if vk_view_formats_list[i].is_empty() { + info = info.view_formats(&vk_view_formats[i..i + 1]); + } else { + info = info.view_formats(&vk_view_formats_list[i]); + }; + info.build() + }) + .collect::<ArrayVec<_, { super::MAX_TOTAL_ATTACHMENTS }>>(); + + let mut vk_attachment_info = vk::FramebufferAttachmentsCreateInfo::builder() + .attachment_image_infos(&vk_image_infos) + .build(); + let mut vk_info = vk::FramebufferCreateInfo::builder() + .render_pass(raw_pass) + .width(e.key().extent.width) + .height(e.key().extent.height) + .layers(e.key().extent.depth_or_array_layers); + + if self.private_caps.imageless_framebuffers { + //TODO: https://github.com/MaikKlein/ash/issues/450 + vk_info = vk_info + .flags(vk::FramebufferCreateFlags::IMAGELESS_KHR) + .push_next(&mut vk_attachment_info); + vk_info.attachment_count = e.key().attachments.len() as u32; + } else { + vk_info = vk_info.attachments(&vk_views); + } + + *e.insert(unsafe { + let raw = self.raw.create_framebuffer(&vk_info, None).unwrap(); + if let Some(label) = pass_label { + self.set_object_name(vk::ObjectType::FRAMEBUFFER, raw, label); + } + raw + }) + } + }) + } + + fn make_memory_ranges<'a, I: 'a + Iterator<Item = crate::MemoryRange>>( + &self, + buffer: &'a super::Buffer, + ranges: I, + ) -> impl 'a + Iterator<Item = vk::MappedMemoryRange> { + let block = buffer.block.lock(); + let mask = self.private_caps.non_coherent_map_mask; + ranges.map(move |range| { + vk::MappedMemoryRange::builder() + .memory(*block.memory()) + .offset((block.offset() + range.start) & !mask) + .size((range.end - range.start + mask) & !mask) + .build() + }) + } + + unsafe fn free_resources(&self) { + for &raw in self.render_passes.lock().values() { + unsafe { self.raw.destroy_render_pass(raw, None) }; + } + for &raw in self.framebuffers.lock().values() { + unsafe { self.raw.destroy_framebuffer(raw, None) }; + } + if self.handle_is_owned { + unsafe { self.raw.destroy_device(None) }; + } + } +} + +impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared { + unsafe fn allocate_memory( + &self, + size: u64, + memory_type: u32, + flags: gpu_alloc::AllocationFlags, + ) -> Result<vk::DeviceMemory, gpu_alloc::OutOfMemory> { + let mut info = vk::MemoryAllocateInfo::builder() + .allocation_size(size) + .memory_type_index(memory_type); + + let mut info_flags; + + if flags.contains(gpu_alloc::AllocationFlags::DEVICE_ADDRESS) { + info_flags = vk::MemoryAllocateFlagsInfo::builder() + .flags(vk::MemoryAllocateFlags::DEVICE_ADDRESS); + info = info.push_next(&mut info_flags); + } + + match unsafe { self.raw.allocate_memory(&info, None) } { + Ok(memory) => Ok(memory), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::OutOfMemory::OutOfHostMemory) + } + Err(vk::Result::ERROR_TOO_MANY_OBJECTS) => panic!("Too many objects"), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn deallocate_memory(&self, memory: vk::DeviceMemory) { + unsafe { self.raw.free_memory(memory, None) }; + } + + unsafe fn map_memory( + &self, + memory: &mut vk::DeviceMemory, + offset: u64, + size: u64, + ) -> Result<ptr::NonNull<u8>, gpu_alloc::DeviceMapError> { + match unsafe { + self.raw + .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty()) + } { + Ok(ptr) => Ok(ptr::NonNull::new(ptr as *mut u8) + .expect("Pointer to memory mapping must not be null")), + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_alloc::DeviceMapError::OutOfHostMemory) + } + Err(vk::Result::ERROR_MEMORY_MAP_FAILED) => Err(gpu_alloc::DeviceMapError::MapFailed), + Err(err) => panic!("Unexpected Vulkan error: `{err}`"), + } + } + + unsafe fn unmap_memory(&self, memory: &mut vk::DeviceMemory) { + unsafe { self.raw.unmap_memory(*memory) }; + } + + unsafe fn invalidate_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } + + unsafe fn flush_memory_ranges( + &self, + _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], + ) -> Result<(), gpu_alloc::OutOfMemory> { + // should never be called + unimplemented!() + } +} + +impl + gpu_descriptor::DescriptorDevice<vk::DescriptorSetLayout, vk::DescriptorPool, vk::DescriptorSet> + for super::DeviceShared +{ + unsafe fn create_descriptor_pool( + &self, + descriptor_count: &gpu_descriptor::DescriptorTotalCount, + max_sets: u32, + flags: gpu_descriptor::DescriptorPoolCreateFlags, + ) -> Result<vk::DescriptorPool, gpu_descriptor::CreatePoolError> { + //Note: ignoring other types, since they can't appear here + let unfiltered_counts = [ + (vk::DescriptorType::SAMPLER, descriptor_count.sampler), + ( + vk::DescriptorType::SAMPLED_IMAGE, + descriptor_count.sampled_image, + ), + ( + vk::DescriptorType::STORAGE_IMAGE, + descriptor_count.storage_image, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER, + descriptor_count.uniform_buffer, + ), + ( + vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC, + descriptor_count.uniform_buffer_dynamic, + ), + ( + vk::DescriptorType::STORAGE_BUFFER, + descriptor_count.storage_buffer, + ), + ( + vk::DescriptorType::STORAGE_BUFFER_DYNAMIC, + descriptor_count.storage_buffer_dynamic, + ), + ]; + + let filtered_counts = unfiltered_counts + .iter() + .cloned() + .filter(|&(_, count)| count != 0) + .map(|(ty, count)| vk::DescriptorPoolSize { + ty, + descriptor_count: count, + }) + .collect::<ArrayVec<_, 8>>(); + + let mut vk_flags = + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND) { + vk::DescriptorPoolCreateFlags::UPDATE_AFTER_BIND + } else { + vk::DescriptorPoolCreateFlags::empty() + }; + if flags.contains(gpu_descriptor::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET) { + vk_flags |= vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET; + } + let vk_info = vk::DescriptorPoolCreateInfo::builder() + .max_sets(max_sets) + .flags(vk_flags) + .pool_sizes(&filtered_counts) + .build(); + + match unsafe { self.raw.create_descriptor_pool(&vk_info, None) } { + Ok(pool) => Ok(pool), + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::CreatePoolError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTATION) => { + Err(gpu_descriptor::CreatePoolError::Fragmentation) + } + Err(other) => { + log::error!("create_descriptor_pool: {:?}", other); + Err(gpu_descriptor::CreatePoolError::OutOfHostMemory) + } + } + } + + unsafe fn destroy_descriptor_pool(&self, pool: vk::DescriptorPool) { + unsafe { self.raw.destroy_descriptor_pool(pool, None) } + } + + unsafe fn alloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + layouts: impl ExactSizeIterator<Item = &'a vk::DescriptorSetLayout>, + sets: &mut impl Extend<vk::DescriptorSet>, + ) -> Result<(), gpu_descriptor::DeviceAllocationError> { + let result = unsafe { + self.raw.allocate_descriptor_sets( + &vk::DescriptorSetAllocateInfo::builder() + .descriptor_pool(*pool) + .set_layouts( + &smallvec::SmallVec::<[vk::DescriptorSetLayout; 32]>::from_iter( + layouts.cloned(), + ), + ) + .build(), + ) + }; + + match result { + Ok(vk_sets) => { + sets.extend(vk_sets); + Ok(()) + } + Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) + | Err(vk::Result::ERROR_OUT_OF_POOL_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { + Err(gpu_descriptor::DeviceAllocationError::OutOfDeviceMemory) + } + Err(vk::Result::ERROR_FRAGMENTED_POOL) => { + Err(gpu_descriptor::DeviceAllocationError::FragmentedPool) + } + Err(other) => { + log::error!("allocate_descriptor_sets: {:?}", other); + Err(gpu_descriptor::DeviceAllocationError::OutOfHostMemory) + } + } + } + + unsafe fn dealloc_descriptor_sets<'a>( + &self, + pool: &mut vk::DescriptorPool, + sets: impl Iterator<Item = vk::DescriptorSet>, + ) { + let result = unsafe { + self.raw.free_descriptor_sets( + *pool, + &smallvec::SmallVec::<[vk::DescriptorSet; 32]>::from_iter(sets), + ) + }; + match result { + Ok(()) => {} + Err(err) => log::error!("free_descriptor_sets: {:?}", err), + } + } +} + +struct CompiledStage { + create_info: vk::PipelineShaderStageCreateInfo, + _entry_point: CString, + temp_raw_module: Option<vk::ShaderModule>, +} + +impl super::Device { + pub(super) unsafe fn create_swapchain( + &self, + surface: &mut super::Surface, + config: &crate::SurfaceConfiguration, + provided_old_swapchain: Option<super::Swapchain>, + ) -> Result<super::Swapchain, crate::SurfaceError> { + profiling::scope!("Device::create_swapchain"); + let functor = khr::Swapchain::new(&surface.instance.raw, &self.shared.raw); + + let old_swapchain = match provided_old_swapchain { + Some(osc) => osc.raw, + None => vk::SwapchainKHR::null(), + }; + + let color_space = if config.format == wgt::TextureFormat::Rgba16Float { + // Enable wide color gamut mode + // Vulkan swapchain for Android only supports DISPLAY_P3_NONLINEAR_EXT and EXTENDED_SRGB_LINEAR_EXT + vk::ColorSpaceKHR::EXTENDED_SRGB_LINEAR_EXT + } else { + vk::ColorSpaceKHR::SRGB_NONLINEAR + }; + + let original_format = self.shared.private_caps.map_texture_format(config.format); + let mut raw_flags = vk::SwapchainCreateFlagsKHR::empty(); + let mut raw_view_formats: Vec<vk::Format> = vec![]; + let mut wgt_view_formats = vec![]; + if !config.view_formats.is_empty() { + raw_flags |= vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT; + raw_view_formats = config + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + raw_view_formats.push(original_format); + + wgt_view_formats = config.view_formats.clone(); + wgt_view_formats.push(config.format); + } + + let mut info = vk::SwapchainCreateInfoKHR::builder() + .flags(raw_flags) + .surface(surface.raw) + .min_image_count(config.swap_chain_size) + .image_format(original_format) + .image_color_space(color_space) + .image_extent(vk::Extent2D { + width: config.extent.width, + height: config.extent.height, + }) + .image_array_layers(config.extent.depth_or_array_layers) + .image_usage(conv::map_texture_usage(config.usage)) + .image_sharing_mode(vk::SharingMode::EXCLUSIVE) + .pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY) + .composite_alpha(conv::map_composite_alpha_mode(config.composite_alpha_mode)) + .present_mode(conv::map_present_mode(config.present_mode)) + .clipped(true) + .old_swapchain(old_swapchain); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !raw_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&raw_view_formats); + info = info.push_next(&mut format_list_info); + } + + let result = { + profiling::scope!("vkCreateSwapchainKHR"); + unsafe { functor.create_swapchain(&info, None) } + }; + + // doing this before bailing out with error + if old_swapchain != vk::SwapchainKHR::null() { + unsafe { functor.destroy_swapchain(old_swapchain, None) } + } + + let raw = match result { + Ok(swapchain) => swapchain, + Err(error) => { + return Err(match error { + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + vk::Result::ERROR_NATIVE_WINDOW_IN_USE_KHR => { + crate::SurfaceError::Other("Native window is in use") + } + other => crate::DeviceError::from(other).into(), + }) + } + }; + + let images = + unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; + + let vk_info = vk::FenceCreateInfo::builder().build(); + let fence = unsafe { self.shared.raw.create_fence(&vk_info, None) } + .map_err(crate::DeviceError::from)?; + + Ok(super::Swapchain { + raw, + raw_flags, + functor, + device: Arc::clone(&self.shared), + fence, + images, + config: config.clone(), + view_formats: wgt_view_formats, + }) + } + + /// # Safety + /// + /// - `vk_image` must be created respecting `desc` + /// - If `drop_guard` is `Some`, the application must manually destroy the image handle. This + /// can be done inside the `Drop` impl of `drop_guard`. + /// - If the `ImageCreateFlags` does not contain `MUTABLE_FORMAT`, the `view_formats` of `desc` must be empty. + pub unsafe fn texture_from_raw( + vk_image: vk::Image, + desc: &crate::TextureDescriptor, + drop_guard: Option<crate::DropGuard>, + ) -> super::Texture { + let mut raw_flags = vk::ImageCreateFlags::empty(); + let mut view_formats = vec![]; + for tf in desc.view_formats.iter() { + if *tf == desc.format { + continue; + } + view_formats.push(*tf); + } + if !view_formats.is_empty() { + raw_flags |= + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE; + view_formats.push(desc.format) + } + + super::Texture { + raw: vk_image, + drop_guard, + block: None, + usage: desc.usage, + format: desc.format, + raw_flags: vk::ImageCreateFlags::empty(), + copy_size: desc.copy_extent(), + view_formats, + } + } + + fn create_shader_module_impl( + &self, + spv: &[u32], + ) -> Result<vk::ShaderModule, crate::DeviceError> { + let vk_info = vk::ShaderModuleCreateInfo::builder() + .flags(vk::ShaderModuleCreateFlags::empty()) + .code(spv); + + let raw = unsafe { + profiling::scope!("vkCreateShaderModule"); + self.shared.raw.create_shader_module(&vk_info, None)? + }; + Ok(raw) + } + + fn compile_stage( + &self, + stage: &crate::ProgrammableStage<super::Api>, + naga_stage: naga::ShaderStage, + binding_map: &naga::back::spv::BindingMap, + ) -> Result<CompiledStage, crate::PipelineError> { + let stage_flags = crate::auxil::map_naga_stage(naga_stage); + let vk_module = match *stage.module { + super::ShaderModule::Raw(raw) => raw, + super::ShaderModule::Intermediate { + ref naga_shader, + runtime_checks, + } => { + let pipeline_options = naga::back::spv::PipelineOptions { + entry_point: stage.entry_point.to_string(), + shader_stage: naga_stage, + }; + let needs_temp_options = !runtime_checks || !binding_map.is_empty(); + let mut temp_options; + let options = if needs_temp_options { + temp_options = self.naga_options.clone(); + if !runtime_checks { + temp_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + if !binding_map.is_empty() { + temp_options.binding_map = binding_map.clone(); + } + &temp_options + } else { + &self.naga_options + }; + let spv = { + profiling::scope!("naga::spv::write_vec"); + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + options, + Some(&pipeline_options), + ) + } + .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?; + self.create_shader_module_impl(&spv)? + } + }; + + let entry_point = CString::new(stage.entry_point).unwrap(); + let create_info = vk::PipelineShaderStageCreateInfo::builder() + .stage(conv::map_shader_stage(stage_flags)) + .module(vk_module) + .name(&entry_point) + .build(); + + Ok(CompiledStage { + create_info, + _entry_point: entry_point, + temp_raw_module: match *stage.module { + super::ShaderModule::Raw(_) => None, + super::ShaderModule::Intermediate { .. } => Some(vk_module), + }, + }) + } + + /// Returns the queue family index of the device's internal queue. + /// + /// This is useful for constructing memory barriers needed for queue family ownership transfer when + /// external memory is involved (from/to `VK_QUEUE_FAMILY_EXTERNAL_KHR` and `VK_QUEUE_FAMILY_FOREIGN_EXT` + /// for example). + pub fn queue_family_index(&self) -> u32 { + self.shared.family_index + } + + pub fn queue_index(&self) -> u32 { + self.shared.queue_index + } + + pub fn raw_device(&self) -> &ash::Device { + &self.shared.raw + } + + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.shared.physical_device + } + + pub fn raw_queue(&self) -> ash::vk::Queue { + self.shared.raw_queue + } + + pub fn enabled_device_extensions(&self) -> &[&'static CStr] { + &self.shared.enabled_extensions + } + + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared.instance + } +} + +impl crate::Device<super::Api> for super::Device { + unsafe fn exit(self, queue: super::Queue) { + unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) }; + unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) }; + for &sem in queue.relay_semaphores.iter() { + unsafe { self.shared.raw.destroy_semaphore(sem, None) }; + } + unsafe { self.shared.free_resources() }; + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result<super::Buffer, crate::DeviceError> { + let vk_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(conv::map_buffer_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw = unsafe { self.shared.raw.create_buffer(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) }; + + let mut alloc_usage = if desc + .usage + .intersects(crate::BufferUses::MAP_READ | crate::BufferUses::MAP_WRITE) + { + let mut flags = gpu_alloc::UsageFlags::HOST_ACCESS; + //TODO: find a way to use `crate::MemoryFlags::PREFER_COHERENT` + flags.set( + gpu_alloc::UsageFlags::DOWNLOAD, + desc.usage.contains(crate::BufferUses::MAP_READ), + ); + flags.set( + gpu_alloc::UsageFlags::UPLOAD, + desc.usage.contains(crate::BufferUses::MAP_WRITE), + ); + flags + } else { + gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS + }; + alloc_usage.set( + gpu_alloc::UsageFlags::TRANSIENT, + desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), + ); + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: alloc_usage, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_buffer_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw, label) + }; + } + + Ok(super::Buffer { + raw, + block: Mutex::new(block), + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + unsafe { self.shared.raw.destroy_buffer(buffer.raw, None) }; + unsafe { + self.mem_allocator + .lock() + .dealloc(&*self.shared, buffer.block.into_inner()) + }; + } + + unsafe fn map_buffer( + &self, + buffer: &super::Buffer, + range: crate::MemoryRange, + ) -> Result<crate::BufferMapping, crate::DeviceError> { + let size = range.end - range.start; + let mut block = buffer.block.lock(); + let ptr = unsafe { block.map(&*self.shared, range.start, size as usize)? }; + let is_coherent = block + .props() + .contains(gpu_alloc::MemoryPropertyFlags::HOST_COHERENT); + Ok(crate::BufferMapping { ptr, is_coherent }) + } + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe { buffer.block.lock().unmap(&*self.shared) }; + Ok(()) + } + + unsafe fn flush_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + let vk_ranges = self.shared.make_memory_ranges(buffer, ranges); + + unsafe { + self.shared + .raw + .flush_mapped_memory_ranges( + &smallvec::SmallVec::<[vk::MappedMemoryRange; 32]>::from_iter(vk_ranges), + ) + } + .unwrap(); + } + unsafe fn invalidate_mapped_ranges<I>(&self, buffer: &super::Buffer, ranges: I) + where + I: Iterator<Item = crate::MemoryRange>, + { + let vk_ranges = self.shared.make_memory_ranges(buffer, ranges); + + unsafe { + self.shared + .raw + .invalidate_mapped_memory_ranges( + &smallvec::SmallVec::<[vk::MappedMemoryRange; 32]>::from_iter(vk_ranges), + ) + } + .unwrap(); + } + + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result<super::Texture, crate::DeviceError> { + let copy_size = desc.copy_extent(); + + let mut raw_flags = vk::ImageCreateFlags::empty(); + if desc.is_cube_compatible() { + raw_flags |= vk::ImageCreateFlags::CUBE_COMPATIBLE; + } + + let original_format = self.shared.private_caps.map_texture_format(desc.format); + let mut vk_view_formats = vec![]; + let mut wgt_view_formats = vec![]; + if !desc.view_formats.is_empty() { + raw_flags |= vk::ImageCreateFlags::MUTABLE_FORMAT; + wgt_view_formats = desc.view_formats.clone(); + wgt_view_formats.push(desc.format); + + if self.shared_instance().driver_api_version >= vk::API_VERSION_1_2 + || self + .enabled_device_extensions() + .contains(&vk::KhrImageFormatListFn::name()) + { + vk_view_formats = desc + .view_formats + .iter() + .map(|f| self.shared.private_caps.map_texture_format(*f)) + .collect(); + vk_view_formats.push(original_format) + } + } + + let mut vk_info = vk::ImageCreateInfo::builder() + .flags(raw_flags) + .image_type(conv::map_texture_dimension(desc.dimension)) + .format(original_format) + .extent(conv::map_copy_extent(©_size)) + .mip_levels(desc.mip_level_count) + .array_layers(desc.array_layer_count()) + .samples(vk::SampleCountFlags::from_raw(desc.sample_count)) + .tiling(vk::ImageTiling::OPTIMAL) + .usage(conv::map_texture_usage(desc.usage)) + .sharing_mode(vk::SharingMode::EXCLUSIVE) + .initial_layout(vk::ImageLayout::UNDEFINED); + + let mut format_list_info = vk::ImageFormatListCreateInfo::builder(); + if !vk_view_formats.is_empty() { + format_list_info = format_list_info.view_formats(&vk_view_formats); + vk_info = vk_info.push_next(&mut format_list_info); + } + + let raw = unsafe { self.shared.raw.create_image(&vk_info, None)? }; + let req = unsafe { self.shared.raw.get_image_memory_requirements(raw) }; + + let block = unsafe { + self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )? + }; + + unsafe { + self.shared + .raw + .bind_image_memory(raw, *block.memory(), block.offset())? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE, raw, label) + }; + } + + Ok(super::Texture { + raw, + drop_guard: None, + block: Some(block), + usage: desc.usage, + format: desc.format, + raw_flags, + copy_size, + view_formats: wgt_view_formats, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + if texture.drop_guard.is_none() { + unsafe { self.shared.raw.destroy_image(texture.raw, None) }; + } + if let Some(block) = texture.block { + unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) }; + } + } + + unsafe fn create_texture_view( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> Result<super::TextureView, crate::DeviceError> { + let subresource_range = conv::map_subresource_range(&desc.range, desc.format); + let mut vk_info = vk::ImageViewCreateInfo::builder() + .flags(vk::ImageViewCreateFlags::empty()) + .image(texture.raw) + .view_type(conv::map_view_dimension(desc.dimension)) + .format(self.shared.private_caps.map_texture_format(desc.format)) + .subresource_range(subresource_range); + let layers = + NonZeroU32::new(subresource_range.layer_count).expect("Unexpected zero layer count"); + + let mut image_view_info; + let view_usage = if self.shared.private_caps.image_view_usage && !desc.usage.is_empty() { + image_view_info = vk::ImageViewUsageCreateInfo::builder() + .usage(conv::map_texture_usage(desc.usage)) + .build(); + vk_info = vk_info.push_next(&mut image_view_info); + desc.usage + } else { + texture.usage + }; + + let raw = unsafe { self.shared.raw.create_image_view(&vk_info, None) }?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::IMAGE_VIEW, raw, label) + }; + } + + let attachment = super::FramebufferAttachment { + raw: if self.shared.private_caps.imageless_framebuffers { + vk::ImageView::null() + } else { + raw + }, + raw_image_flags: texture.raw_flags, + view_usage, + view_format: desc.format, + raw_view_formats: texture + .view_formats + .iter() + .map(|tf| self.shared.private_caps.map_texture_format(*tf)) + .collect(), + }; + + Ok(super::TextureView { + raw, + layers, + attachment, + }) + } + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if !self.shared.private_caps.imageless_framebuffers { + let mut fbuf_lock = self.shared.framebuffers.lock(); + for (key, &raw_fbuf) in fbuf_lock.iter() { + if key.attachments.iter().any(|at| at.raw == view.raw) { + unsafe { self.shared.raw.destroy_framebuffer(raw_fbuf, None) }; + } + } + fbuf_lock.retain(|key, _| !key.attachments.iter().any(|at| at.raw == view.raw)); + } + unsafe { self.shared.raw.destroy_image_view(view.raw, None) }; + } + + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result<super::Sampler, crate::DeviceError> { + let mut vk_info = vk::SamplerCreateInfo::builder() + .flags(vk::SamplerCreateFlags::empty()) + .mag_filter(conv::map_filter_mode(desc.mag_filter)) + .min_filter(conv::map_filter_mode(desc.min_filter)) + .mipmap_mode(conv::map_mip_filter_mode(desc.mipmap_filter)) + .address_mode_u(conv::map_address_mode(desc.address_modes[0])) + .address_mode_v(conv::map_address_mode(desc.address_modes[1])) + .address_mode_w(conv::map_address_mode(desc.address_modes[2])) + .min_lod(desc.lod_clamp.start) + .max_lod(desc.lod_clamp.end); + + if let Some(fun) = desc.compare { + vk_info = vk_info + .compare_enable(true) + .compare_op(conv::map_comparison(fun)); + } + + if desc.anisotropy_clamp != 1 { + // We only enable anisotropy if it is supported, and wgpu-hal interface guarentees + // the clamp is in the range [1, 16] which is always supported if anisotropy is. + vk_info = vk_info + .anisotropy_enable(true) + .max_anisotropy(desc.anisotropy_clamp as f32); + } + + if let Some(color) = desc.border_color { + vk_info = vk_info.border_color(conv::map_border_color(color)); + } + + let raw = unsafe { self.shared.raw.create_sampler(&vk_info, None)? }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SAMPLER, raw, label) + }; + } + + Ok(super::Sampler { raw }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + unsafe { self.shared.raw.destroy_sampler(sampler.raw, None) }; + } + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor<super::Api>, + ) -> Result<super::CommandEncoder, crate::DeviceError> { + let vk_info = vk::CommandPoolCreateInfo::builder() + .queue_family_index(desc.queue.family_index) + .flags(vk::CommandPoolCreateFlags::TRANSIENT) + .build(); + let raw = unsafe { self.shared.raw.create_command_pool(&vk_info, None)? }; + + Ok(super::CommandEncoder { + raw, + device: Arc::clone(&self.shared), + active: vk::CommandBuffer::null(), + bind_point: vk::PipelineBindPoint::default(), + temp: super::Temp::default(), + free: Vec::new(), + discarded: Vec::new(), + rpass_debug_marker_active: false, + }) + } + unsafe fn destroy_command_encoder(&self, cmd_encoder: super::CommandEncoder) { + unsafe { + if !cmd_encoder.free.is_empty() { + self.shared + .raw + .free_command_buffers(cmd_encoder.raw, &cmd_encoder.free) + } + if !cmd_encoder.discarded.is_empty() { + self.shared + .raw + .free_command_buffers(cmd_encoder.raw, &cmd_encoder.discarded) + } + self.shared.raw.destroy_command_pool(cmd_encoder.raw, None); + } + } + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> Result<super::BindGroupLayout, crate::DeviceError> { + let mut desc_count = gpu_descriptor::DescriptorTotalCount::default(); + let mut types = Vec::new(); + for entry in desc.entries { + let count = entry.count.map_or(1, |c| c.get()); + if entry.binding as usize >= types.len() { + types.resize( + entry.binding as usize + 1, + (vk::DescriptorType::INPUT_ATTACHMENT, 0), + ); + } + types[entry.binding as usize] = ( + conv::map_binding_type(entry.ty), + entry.count.map_or(1, |c| c.get()), + ); + + match entry.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => match ty { + wgt::BufferBindingType::Uniform => { + if has_dynamic_offset { + desc_count.uniform_buffer_dynamic += count; + } else { + desc_count.uniform_buffer += count; + } + } + wgt::BufferBindingType::Storage { .. } => { + if has_dynamic_offset { + desc_count.storage_buffer_dynamic += count; + } else { + desc_count.storage_buffer += count; + } + } + }, + wgt::BindingType::Sampler { .. } => { + desc_count.sampler += count; + } + wgt::BindingType::Texture { .. } => { + desc_count.sampled_image += count; + } + wgt::BindingType::StorageTexture { .. } => { + desc_count.storage_image += count; + } + } + } + + //Note: not bothering with on stack array here as it's low frequency + let vk_bindings = desc + .entries + .iter() + .map(|entry| vk::DescriptorSetLayoutBinding { + binding: entry.binding, + descriptor_type: types[entry.binding as usize].0, + descriptor_count: types[entry.binding as usize].1, + stage_flags: conv::map_shader_stage(entry.visibility), + p_immutable_samplers: ptr::null(), + }) + .collect::<Vec<_>>(); + + let vk_info = vk::DescriptorSetLayoutCreateInfo::builder().bindings(&vk_bindings); + + let binding_arrays = desc + .entries + .iter() + .enumerate() + .filter_map(|(idx, entry)| entry.count.map(|count| (idx as u32, count))) + .collect(); + + let mut binding_flag_info; + let binding_flag_vec; + + let partially_bound = desc + .flags + .contains(crate::BindGroupLayoutFlags::PARTIALLY_BOUND); + + let vk_info = if partially_bound { + binding_flag_vec = desc + .entries + .iter() + .map(|entry| { + let mut flags = vk::DescriptorBindingFlags::empty(); + + if partially_bound && entry.count.is_some() { + flags |= vk::DescriptorBindingFlags::PARTIALLY_BOUND; + } + + flags + }) + .collect::<Vec<_>>(); + + binding_flag_info = vk::DescriptorSetLayoutBindingFlagsCreateInfo::builder() + .binding_flags(&binding_flag_vec); + + vk_info.push_next(&mut binding_flag_info) + } else { + vk_info + }; + + let raw = unsafe { + self.shared + .raw + .create_descriptor_set_layout(&vk_info, None)? + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET_LAYOUT, raw, label) + }; + } + + Ok(super::BindGroupLayout { + raw, + desc_count, + types: types.into_boxed_slice(), + binding_arrays, + }) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + unsafe { + self.shared + .raw + .destroy_descriptor_set_layout(bg_layout.raw, None) + }; + } + + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor<super::Api>, + ) -> Result<super::PipelineLayout, crate::DeviceError> { + //Note: not bothering with on stack array here as it's low frequency + let vk_set_layouts = desc + .bind_group_layouts + .iter() + .map(|bgl| bgl.raw) + .collect::<Vec<_>>(); + let vk_push_constant_ranges = desc + .push_constant_ranges + .iter() + .map(|pcr| vk::PushConstantRange { + stage_flags: conv::map_shader_stage(pcr.stages), + offset: pcr.range.start, + size: pcr.range.end - pcr.range.start, + }) + .collect::<Vec<_>>(); + + let vk_info = vk::PipelineLayoutCreateInfo::builder() + .flags(vk::PipelineLayoutCreateFlags::empty()) + .set_layouts(&vk_set_layouts) + .push_constant_ranges(&vk_push_constant_ranges); + + let raw = { + profiling::scope!("vkCreatePipelineLayout"); + unsafe { self.shared.raw.create_pipeline_layout(&vk_info, None)? } + }; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE_LAYOUT, raw, label) + }; + } + + let mut binding_arrays = BTreeMap::new(); + for (group, &layout) in desc.bind_group_layouts.iter().enumerate() { + for &(binding, binding_array_size) in &layout.binding_arrays { + binding_arrays.insert( + naga::ResourceBinding { + group: group as u32, + binding, + }, + naga::back::spv::BindingInfo { + binding_array_size: Some(binding_array_size.get()), + }, + ); + } + } + + Ok(super::PipelineLayout { + raw, + binding_arrays, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + unsafe { + self.shared + .raw + .destroy_pipeline_layout(pipeline_layout.raw, None) + }; + } + + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor<super::Api>, + ) -> Result<super::BindGroup, crate::DeviceError> { + let mut vk_sets = unsafe { + self.desc_allocator.lock().allocate( + &*self.shared, + &desc.layout.raw, + gpu_descriptor::DescriptorSetLayoutCreateFlags::empty(), + &desc.layout.desc_count, + 1, + )? + }; + + let set = vk_sets.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::DESCRIPTOR_SET, *set.raw(), label) + }; + } + + let mut writes = Vec::with_capacity(desc.entries.len()); + let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); + let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); + let mut image_infos = Vec::with_capacity(desc.textures.len()); + for entry in desc.entries { + let (ty, size) = desc.layout.types[entry.binding as usize]; + if size == 0 { + continue; // empty slot + } + let mut write = vk::WriteDescriptorSet::builder() + .dst_set(*set.raw()) + .dst_binding(entry.binding) + .descriptor_type(ty); + write = match ty { + vk::DescriptorType::SAMPLER => { + let index = sampler_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + sampler_infos.extend(desc.samplers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorImageInfo::builder() + .sampler(binding.raw) + .build() + }, + )); + write.image_info(&sampler_infos[index..]) + } + vk::DescriptorType::SAMPLED_IMAGE | vk::DescriptorType::STORAGE_IMAGE => { + let index = image_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + image_infos.extend(desc.textures[start as usize..end as usize].iter().map( + |binding| { + let layout = conv::derive_image_layout( + binding.usage, + binding.view.attachment.view_format, + ); + vk::DescriptorImageInfo::builder() + .image_view(binding.view.raw) + .image_layout(layout) + .build() + }, + )); + write.image_info(&image_infos[index..]) + } + vk::DescriptorType::UNIFORM_BUFFER + | vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC + | vk::DescriptorType::STORAGE_BUFFER + | vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => { + let index = buffer_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + buffer_infos.extend(desc.buffers[start as usize..end as usize].iter().map( + |binding| { + vk::DescriptorBufferInfo::builder() + .buffer(binding.buffer.raw) + .offset(binding.offset) + .range(binding.size.map_or(vk::WHOLE_SIZE, wgt::BufferSize::get)) + .build() + }, + )); + write.buffer_info(&buffer_infos[index..]) + } + _ => unreachable!(), + }; + writes.push(write.build()); + } + + unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; + Ok(super::BindGroup { set }) + } + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + unsafe { + self.desc_allocator + .lock() + .free(&*self.shared, Some(group.set)) + }; + } + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result<super::ShaderModule, crate::ShaderError> { + let spv = match shader { + crate::ShaderInput::Naga(naga_shader) => { + if self + .shared + .workarounds + .contains(super::Workarounds::SEPARATE_ENTRY_POINTS) + { + return Ok(super::ShaderModule::Intermediate { + naga_shader, + runtime_checks: desc.runtime_checks, + }); + } + let mut naga_options = self.naga_options.clone(); + if !desc.runtime_checks { + naga_options.bounds_check_policies = naga::proc::BoundsCheckPolicies { + index: naga::proc::BoundsCheckPolicy::Unchecked, + buffer: naga::proc::BoundsCheckPolicy::Unchecked, + image: naga::proc::BoundsCheckPolicy::Unchecked, + binding_array: naga::proc::BoundsCheckPolicy::Unchecked, + }; + } + Cow::Owned( + naga::back::spv::write_vec( + &naga_shader.module, + &naga_shader.info, + &naga_options, + None, + ) + .map_err(|e| crate::ShaderError::Compilation(format!("{e}")))?, + ) + } + crate::ShaderInput::SpirV(spv) => Cow::Borrowed(spv), + }; + + let raw = self.create_shader_module_impl(&spv)?; + + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::SHADER_MODULE, raw, label) + }; + } + + Ok(super::ShaderModule::Raw(raw)) + } + unsafe fn destroy_shader_module(&self, module: super::ShaderModule) { + match module { + super::ShaderModule::Raw(raw) => { + unsafe { self.shared.raw.destroy_shader_module(raw, None) }; + } + super::ShaderModule::Intermediate { .. } => {} + } + } + + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor<super::Api>, + ) -> Result<super::RenderPipeline, crate::PipelineError> { + let dynamic_states = [ + vk::DynamicState::VIEWPORT, + vk::DynamicState::SCISSOR, + vk::DynamicState::BLEND_CONSTANTS, + vk::DynamicState::STENCIL_REFERENCE, + ]; + let mut compatible_rp_key = super::RenderPassKey { + sample_count: desc.multisample.count, + multiview: desc.multiview, + ..Default::default() + }; + let mut stages = ArrayVec::<_, 2>::new(); + let mut vertex_buffers = Vec::with_capacity(desc.vertex_buffers.len()); + let mut vertex_attributes = Vec::new(); + + for (i, vb) in desc.vertex_buffers.iter().enumerate() { + vertex_buffers.push(vk::VertexInputBindingDescription { + binding: i as u32, + stride: vb.array_stride as u32, + input_rate: match vb.step_mode { + wgt::VertexStepMode::Vertex => vk::VertexInputRate::VERTEX, + wgt::VertexStepMode::Instance => vk::VertexInputRate::INSTANCE, + }, + }); + for at in vb.attributes { + vertex_attributes.push(vk::VertexInputAttributeDescription { + location: at.shader_location, + binding: i as u32, + format: conv::map_vertex_format(at.format), + offset: at.offset as u32, + }); + } + } + + let vk_vertex_input = vk::PipelineVertexInputStateCreateInfo::builder() + .vertex_binding_descriptions(&vertex_buffers) + .vertex_attribute_descriptions(&vertex_attributes) + .build(); + + let vk_input_assembly = vk::PipelineInputAssemblyStateCreateInfo::builder() + .topology(conv::map_topology(desc.primitive.topology)) + .primitive_restart_enable(desc.primitive.strip_index_format.is_some()) + .build(); + + let compiled_vs = self.compile_stage( + &desc.vertex_stage, + naga::ShaderStage::Vertex, + &desc.layout.binding_arrays, + )?; + stages.push(compiled_vs.create_info); + let compiled_fs = match desc.fragment_stage { + Some(ref stage) => { + let compiled = self.compile_stage( + stage, + naga::ShaderStage::Fragment, + &desc.layout.binding_arrays, + )?; + stages.push(compiled.create_info); + Some(compiled) + } + None => None, + }; + + let mut vk_rasterization = vk::PipelineRasterizationStateCreateInfo::builder() + .polygon_mode(conv::map_polygon_mode(desc.primitive.polygon_mode)) + .front_face(conv::map_front_face(desc.primitive.front_face)) + .line_width(1.0); + if let Some(face) = desc.primitive.cull_mode { + vk_rasterization = vk_rasterization.cull_mode(conv::map_cull_face(face)) + } + let mut vk_rasterization_conservative_state = + vk::PipelineRasterizationConservativeStateCreateInfoEXT::builder() + .conservative_rasterization_mode(vk::ConservativeRasterizationModeEXT::OVERESTIMATE) + .build(); + if desc.primitive.conservative { + vk_rasterization = vk_rasterization.push_next(&mut vk_rasterization_conservative_state); + } + let mut vk_depth_clip_state = + vk::PipelineRasterizationDepthClipStateCreateInfoEXT::builder() + .depth_clip_enable(false) + .build(); + if desc.primitive.unclipped_depth { + vk_rasterization = vk_rasterization.push_next(&mut vk_depth_clip_state); + } + + let mut vk_depth_stencil = vk::PipelineDepthStencilStateCreateInfo::builder(); + if let Some(ref ds) = desc.depth_stencil { + let vk_format = self.shared.private_caps.map_texture_format(ds.format); + let vk_layout = if ds.is_read_only(desc.primitive.cull_mode) { + vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL + } else { + vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL + }; + compatible_rp_key.depth_stencil = Some(super::DepthStencilAttachmentKey { + base: super::AttachmentKey::compatible(vk_format, vk_layout), + stencil_ops: crate::AttachmentOps::all(), + }); + + if ds.is_depth_enabled() { + vk_depth_stencil = vk_depth_stencil + .depth_test_enable(true) + .depth_write_enable(ds.depth_write_enabled) + .depth_compare_op(conv::map_comparison(ds.depth_compare)); + } + if ds.stencil.is_enabled() { + let s = &ds.stencil; + let front = conv::map_stencil_face(&s.front, s.read_mask, s.write_mask); + let back = conv::map_stencil_face(&s.back, s.read_mask, s.write_mask); + vk_depth_stencil = vk_depth_stencil + .stencil_test_enable(true) + .front(front) + .back(back); + } + + if ds.bias.is_enabled() { + vk_rasterization = vk_rasterization + .depth_bias_enable(true) + .depth_bias_constant_factor(ds.bias.constant as f32) + .depth_bias_clamp(ds.bias.clamp) + .depth_bias_slope_factor(ds.bias.slope_scale); + } + } + + let vk_viewport = vk::PipelineViewportStateCreateInfo::builder() + .flags(vk::PipelineViewportStateCreateFlags::empty()) + .scissor_count(1) + .viewport_count(1) + .build(); + + let vk_sample_mask = [ + desc.multisample.mask as u32, + (desc.multisample.mask >> 32) as u32, + ]; + let vk_multisample = vk::PipelineMultisampleStateCreateInfo::builder() + .rasterization_samples(vk::SampleCountFlags::from_raw(desc.multisample.count)) + .alpha_to_coverage_enable(desc.multisample.alpha_to_coverage_enabled) + .sample_mask(&vk_sample_mask) + .build(); + + let mut vk_attachments = Vec::with_capacity(desc.color_targets.len()); + for cat in desc.color_targets { + let (key, attarchment) = if let Some(cat) = cat.as_ref() { + let mut vk_attachment = vk::PipelineColorBlendAttachmentState::builder() + .color_write_mask(vk::ColorComponentFlags::from_raw(cat.write_mask.bits())); + if let Some(ref blend) = cat.blend { + let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); + let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); + vk_attachment = vk_attachment + .blend_enable(true) + .color_blend_op(color_op) + .src_color_blend_factor(color_src) + .dst_color_blend_factor(color_dst) + .alpha_blend_op(alpha_op) + .src_alpha_blend_factor(alpha_src) + .dst_alpha_blend_factor(alpha_dst); + } + + let vk_format = self.shared.private_caps.map_texture_format(cat.format); + ( + Some(super::ColorAttachmentKey { + base: super::AttachmentKey::compatible( + vk_format, + vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + ), + resolve: None, + }), + vk_attachment.build(), + ) + } else { + (None, vk::PipelineColorBlendAttachmentState::default()) + }; + + compatible_rp_key.colors.push(key); + vk_attachments.push(attarchment); + } + + let vk_color_blend = vk::PipelineColorBlendStateCreateInfo::builder() + .attachments(&vk_attachments) + .build(); + + let vk_dynamic_state = vk::PipelineDynamicStateCreateInfo::builder() + .dynamic_states(&dynamic_states) + .build(); + + let raw_pass = self + .shared + .make_render_pass(compatible_rp_key) + .map_err(crate::DeviceError::from)?; + + let vk_infos = [{ + vk::GraphicsPipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stages(&stages) + .vertex_input_state(&vk_vertex_input) + .input_assembly_state(&vk_input_assembly) + .rasterization_state(&vk_rasterization) + .viewport_state(&vk_viewport) + .multisample_state(&vk_multisample) + .depth_stencil_state(&vk_depth_stencil) + .color_blend_state(&vk_color_blend) + .dynamic_state(&vk_dynamic_state) + .render_pass(raw_pass) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateGraphicsPipelines"); + unsafe { + self.shared + .raw + .create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled_vs.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + if let Some(CompiledStage { + temp_raw_module: Some(raw_module), + .. + }) = compiled_fs + { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::RenderPipeline { raw }) + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor<super::Api>, + ) -> Result<super::ComputePipeline, crate::PipelineError> { + let compiled = self.compile_stage( + &desc.stage, + naga::ShaderStage::Compute, + &desc.layout.binding_arrays, + )?; + + let vk_infos = [{ + vk::ComputePipelineCreateInfo::builder() + .layout(desc.layout.raw) + .stage(compiled.create_info) + .build() + }]; + + let mut raw_vec = { + profiling::scope!("vkCreateComputePipelines"); + unsafe { + self.shared + .raw + .create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None) + .map_err(|(_, e)| crate::DeviceError::from(e)) + }? + }; + + let raw = raw_vec.pop().unwrap(); + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::PIPELINE, raw, label) + }; + } + + if let Some(raw_module) = compiled.temp_raw_module { + unsafe { self.shared.raw.destroy_shader_module(raw_module, None) }; + } + + Ok(super::ComputePipeline { raw }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor<crate::Label>, + ) -> Result<super::QuerySet, crate::DeviceError> { + let (vk_type, pipeline_statistics) = match desc.ty { + wgt::QueryType::Occlusion => ( + vk::QueryType::OCCLUSION, + vk::QueryPipelineStatisticFlags::empty(), + ), + wgt::QueryType::PipelineStatistics(statistics) => ( + vk::QueryType::PIPELINE_STATISTICS, + conv::map_pipeline_statistics(statistics), + ), + wgt::QueryType::Timestamp => ( + vk::QueryType::TIMESTAMP, + vk::QueryPipelineStatisticFlags::empty(), + ), + }; + + let vk_info = vk::QueryPoolCreateInfo::builder() + .query_type(vk_type) + .query_count(desc.count) + .pipeline_statistics(pipeline_statistics) + .build(); + + let raw = unsafe { self.shared.raw.create_query_pool(&vk_info, None) }?; + if let Some(label) = desc.label { + unsafe { + self.shared + .set_object_name(vk::ObjectType::QUERY_POOL, raw, label) + }; + } + + Ok(super::QuerySet { raw }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + unsafe { self.shared.raw.destroy_query_pool(set.raw, None) }; + } + + unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> { + Ok(if self.shared.private_caps.timeline_semaphores { + let mut sem_type_info = + vk::SemaphoreTypeCreateInfo::builder().semaphore_type(vk::SemaphoreType::TIMELINE); + let vk_info = vk::SemaphoreCreateInfo::builder().push_next(&mut sem_type_info); + let raw = unsafe { self.shared.raw.create_semaphore(&vk_info, None) }?; + super::Fence::TimelineSemaphore(raw) + } else { + super::Fence::FencePool { + last_completed: 0, + active: Vec::new(), + free: Vec::new(), + } + }) + } + unsafe fn destroy_fence(&self, fence: super::Fence) { + match fence { + super::Fence::TimelineSemaphore(raw) => { + unsafe { self.shared.raw.destroy_semaphore(raw, None) }; + } + super::Fence::FencePool { + active, + free, + last_completed: _, + } => { + for (_, raw) in active { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + for raw in free { + unsafe { self.shared.raw.destroy_fence(raw, None) }; + } + } + } + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result<crate::FenceValue, crate::DeviceError> { + fence.get_latest( + &self.shared.raw, + self.shared.extension_fns.timeline_semaphore.as_ref(), + ) + } + unsafe fn wait( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ms: u32, + ) -> Result<bool, crate::DeviceError> { + let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS; + match *fence { + super::Fence::TimelineSemaphore(raw) => { + let semaphores = [raw]; + let values = [wait_value]; + let vk_info = vk::SemaphoreWaitInfo::builder() + .semaphores(&semaphores) + .values(&values); + let result = match self.shared.extension_fns.timeline_semaphore { + Some(super::ExtensionFn::Extension(ref ext)) => unsafe { + ext.wait_semaphores(&vk_info, timeout_ns) + }, + Some(super::ExtensionFn::Promoted) => unsafe { + self.shared.raw.wait_semaphores(&vk_info, timeout_ns) + }, + None => unreachable!(), + }; + match result { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + super::Fence::FencePool { + last_completed, + ref active, + free: _, + } => { + if wait_value <= last_completed { + Ok(true) + } else { + match active.iter().find(|&&(value, _)| value >= wait_value) { + Some(&(_, raw)) => { + match unsafe { + self.shared.raw.wait_for_fences(&[raw], true, timeout_ns) + } { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + None => { + log::error!("No signals reached value {}", wait_value); + Err(crate::DeviceError::Lost) + } + } + } + } + } + } + + unsafe fn start_capture(&self) -> bool { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + unsafe { + self.render_doc + .start_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + #[cfg(not(feature = "renderdoc"))] + false + } + unsafe fn stop_capture(&self) { + #[cfg(feature = "renderdoc")] + { + // Renderdoc requires us to give us the pointer that vkInstance _points to_. + let raw_vk_instance = + ash::vk::Handle::as_raw(self.shared.instance.raw.handle()) as *mut *mut _; + let raw_vk_instance_dispatch_table = unsafe { *raw_vk_instance }; + + unsafe { + self.render_doc + .end_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) + } + } + } +} + +impl From<gpu_alloc::AllocationError> for crate::DeviceError { + fn from(error: gpu_alloc::AllocationError) -> Self { + use gpu_alloc::AllocationError as Ae; + match error { + Ae::OutOfDeviceMemory | Ae::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory allocation: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_alloc::MapError> for crate::DeviceError { + fn from(error: gpu_alloc::MapError) -> Self { + use gpu_alloc::MapError as Me; + match error { + Me::OutOfDeviceMemory | Me::OutOfHostMemory => Self::OutOfMemory, + _ => { + log::error!("memory mapping: {:?}", error); + Self::Lost + } + } + } +} +impl From<gpu_descriptor::AllocationError> for crate::DeviceError { + fn from(error: gpu_descriptor::AllocationError) -> Self { + log::error!("descriptor allocation: {:?}", error); + Self::OutOfMemory + } +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/instance.rs b/third_party/rust/wgpu-hal/src/vulkan/instance.rs new file mode 100644 index 0000000000..101f303c16 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/instance.rs @@ -0,0 +1,832 @@ +use std::{ + ffi::{c_void, CStr, CString}, + slice, + sync::Arc, + thread, +}; + +use ash::{ + extensions::{ext, khr}, + vk, +}; + +unsafe extern "system" fn debug_utils_messenger_callback( + message_severity: vk::DebugUtilsMessageSeverityFlagsEXT, + message_type: vk::DebugUtilsMessageTypeFlagsEXT, + callback_data_ptr: *const vk::DebugUtilsMessengerCallbackDataEXT, + _user_data: *mut c_void, +) -> vk::Bool32 { + const VUID_VKSWAPCHAINCREATEINFOKHR_IMAGEEXTENT_01274: i32 = 0x7cd0911d; + use std::borrow::Cow; + + if thread::panicking() { + return vk::FALSE; + } + + let level = match message_severity { + vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE => log::Level::Debug, + vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, + vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => log::Level::Warn, + vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => log::Level::Error, + _ => log::Level::Warn, + }; + + let cd = unsafe { &*callback_data_ptr }; + + let message_id_name = if cd.p_message_id_name.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message_id_name) }.to_string_lossy() + }; + let message = if cd.p_message.is_null() { + Cow::from("") + } else { + unsafe { CStr::from_ptr(cd.p_message) }.to_string_lossy() + }; + + // Silence Vulkan Validation error "VUID-VkSwapchainCreateInfoKHR-imageExtent-01274" + // - it's a false positive due to the inherent racy-ness of surface resizing + if cd.message_id_number == VUID_VKSWAPCHAINCREATEINFOKHR_IMAGEEXTENT_01274 { + return vk::FALSE; + } + + let _ = std::panic::catch_unwind(|| { + log::log!( + level, + "{:?} [{} (0x{:x})]\n\t{}", + message_type, + message_id_name, + cd.message_id_number, + message, + ); + }); + + if cd.queue_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_queue_labels, cd.queue_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tqueues: {}", names.join(", ")); + }); + } + + if cd.cmd_buf_label_count != 0 { + let labels = + unsafe { slice::from_raw_parts(cd.p_cmd_buf_labels, cd.cmd_buf_label_count as usize) }; + let names = labels + .iter() + .flat_map(|dul_obj| { + unsafe { dul_obj.p_label_name.as_ref() } + .map(|lbl| unsafe { CStr::from_ptr(lbl) }.to_string_lossy()) + }) + .collect::<Vec<_>>(); + + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tcommand buffers: {}", names.join(", ")); + }); + } + + if cd.object_count != 0 { + let labels = unsafe { slice::from_raw_parts(cd.p_objects, cd.object_count as usize) }; + //TODO: use color fields of `vk::DebugUtilsLabelExt`? + let names = labels + .iter() + .map(|obj_info| { + let name = unsafe { obj_info.p_object_name.as_ref() } + .map(|name| unsafe { CStr::from_ptr(name) }.to_string_lossy()) + .unwrap_or(Cow::Borrowed("?")); + + format!( + "(type: {:?}, hndl: 0x{:x}, name: {})", + obj_info.object_type, obj_info.object_handle, name + ) + }) + .collect::<Vec<_>>(); + let _ = std::panic::catch_unwind(|| { + log::log!(level, "\tobjects: {}", names.join(", ")); + }); + } + + if cfg!(debug_assertions) && level == log::Level::Error { + // Set canary and continue + crate::VALIDATION_CANARY.set(); + } + + vk::FALSE +} + +impl super::Swapchain { + unsafe fn release_resources(self, device: &ash::Device) -> Self { + profiling::scope!("Swapchain::release_resources"); + { + profiling::scope!("vkDeviceWaitIdle"); + let _ = unsafe { device.device_wait_idle() }; + }; + unsafe { device.destroy_fence(self.fence, None) }; + self + } +} + +impl super::InstanceShared { + pub fn entry(&self) -> &ash::Entry { + &self.entry + } + + pub fn raw_instance(&self) -> &ash::Instance { + &self.raw + } + + pub fn driver_api_version(&self) -> u32 { + self.driver_api_version + } + + pub fn extensions(&self) -> &[&'static CStr] { + &self.extensions[..] + } +} + +impl super::Instance { + pub fn shared_instance(&self) -> &super::InstanceShared { + &self.shared + } + + pub fn required_extensions( + entry: &ash::Entry, + _driver_api_version: u32, + flags: crate::InstanceFlags, + ) -> Result<Vec<&'static CStr>, crate::InstanceError> { + let instance_extensions = entry + .enumerate_instance_extension_properties(None) + .map_err(|e| { + log::info!("enumerate_instance_extension_properties: {:?}", e); + crate::InstanceError + })?; + + // Check our extensions against the available extensions + let mut extensions: Vec<&'static CStr> = Vec::new(); + + // VK_KHR_surface + extensions.push(khr::Surface::name()); + + // Platform-specific WSI extensions + if cfg!(all( + unix, + not(target_os = "android"), + not(target_os = "macos") + )) { + // VK_KHR_xlib_surface + extensions.push(khr::XlibSurface::name()); + // VK_KHR_xcb_surface + extensions.push(khr::XcbSurface::name()); + // VK_KHR_wayland_surface + extensions.push(khr::WaylandSurface::name()); + } + if cfg!(target_os = "android") { + // VK_KHR_android_surface + extensions.push(khr::AndroidSurface::name()); + } + if cfg!(target_os = "windows") { + // VK_KHR_win32_surface + extensions.push(khr::Win32Surface::name()); + } + if cfg!(target_os = "macos") { + // VK_EXT_metal_surface + extensions.push(ext::MetalSurface::name()); + } + + if flags.contains(crate::InstanceFlags::DEBUG) { + // VK_EXT_debug_utils + extensions.push(ext::DebugUtils::name()); + } + + // VK_EXT_swapchain_colorspace + // Provid wide color gamut + extensions.push(vk::ExtSwapchainColorspaceFn::name()); + + // VK_KHR_get_physical_device_properties2 + // Even though the extension was promoted to Vulkan 1.1, we still require the extension + // so that we don't have to conditionally use the functions provided by the 1.1 instance + extensions.push(vk::KhrGetPhysicalDeviceProperties2Fn::name()); + + // Only keep available extensions. + extensions.retain(|&ext| { + if instance_extensions.iter().any(|inst_ext| { + crate::auxil::cstr_from_bytes_until_nul(&inst_ext.extension_name) == Some(ext) + }) { + true + } else { + log::info!("Unable to find extension: {}", ext.to_string_lossy()); + false + } + }); + Ok(extensions) + } + + /// # Safety + /// + /// - `raw_instance` must be created from `entry` + /// - `raw_instance` must be created respecting `driver_api_version`, `extensions` and `flags` + /// - `extensions` must be a superset of `required_extensions()` and must be created from the + /// same entry, driver_api_version and flags. + /// - `android_sdk_version` is ignored and can be `0` for all platforms besides Android + #[allow(clippy::too_many_arguments)] + pub unsafe fn from_raw( + entry: ash::Entry, + raw_instance: ash::Instance, + driver_api_version: u32, + android_sdk_version: u32, + extensions: Vec<&'static CStr>, + flags: crate::InstanceFlags, + has_nv_optimus: bool, + drop_guard: Option<crate::DropGuard>, + ) -> Result<Self, crate::InstanceError> { + log::info!("Instance version: 0x{:x}", driver_api_version); + + let debug_utils = if extensions.contains(&ext::DebugUtils::name()) { + log::info!("Enabling debug utils"); + let extension = ext::DebugUtils::new(&entry, &raw_instance); + // having ERROR unconditionally because Vk doesn't like empty flags + let mut severity = vk::DebugUtilsMessageSeverityFlagsEXT::ERROR; + if log::max_level() >= log::LevelFilter::Debug { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::VERBOSE; + } + if log::max_level() >= log::LevelFilter::Info { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::INFO; + } + if log::max_level() >= log::LevelFilter::Warn { + severity |= vk::DebugUtilsMessageSeverityFlagsEXT::WARNING; + } + let vk_info = vk::DebugUtilsMessengerCreateInfoEXT::builder() + .flags(vk::DebugUtilsMessengerCreateFlagsEXT::empty()) + .message_severity(severity) + .message_type( + vk::DebugUtilsMessageTypeFlagsEXT::GENERAL + | vk::DebugUtilsMessageTypeFlagsEXT::VALIDATION + | vk::DebugUtilsMessageTypeFlagsEXT::PERFORMANCE, + ) + .pfn_user_callback(Some(debug_utils_messenger_callback)); + let messenger = + unsafe { extension.create_debug_utils_messenger(&vk_info, None) }.unwrap(); + Some(super::DebugUtils { + extension, + messenger, + }) + } else { + None + }; + + let get_physical_device_properties = + if extensions.contains(&khr::GetPhysicalDeviceProperties2::name()) { + log::info!("Enabling device properties2"); + Some(khr::GetPhysicalDeviceProperties2::new( + &entry, + &raw_instance, + )) + } else { + None + }; + + Ok(Self { + shared: Arc::new(super::InstanceShared { + raw: raw_instance, + extensions, + drop_guard, + flags, + debug_utils, + get_physical_device_properties, + entry, + has_nv_optimus, + driver_api_version, + android_sdk_version, + }), + }) + } + + #[allow(dead_code)] + fn create_surface_from_xlib( + &self, + dpy: *mut vk::Display, + window: vk::Window, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XlibSurface::name()) { + log::warn!("Vulkan driver does not support VK_KHR_xlib_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let xlib_loader = khr::XlibSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XlibSurfaceCreateInfoKHR::builder() + .flags(vk::XlibSurfaceCreateFlagsKHR::empty()) + .window(window) + .dpy(dpy); + + unsafe { xlib_loader.create_xlib_surface(&info, None) } + .expect("XlibSurface::create_xlib_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_xcb( + &self, + connection: *mut vk::xcb_connection_t, + window: vk::xcb_window_t, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::XcbSurface::name()) { + log::warn!("Vulkan driver does not support VK_KHR_xcb_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let xcb_loader = khr::XcbSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::XcbSurfaceCreateInfoKHR::builder() + .flags(vk::XcbSurfaceCreateFlagsKHR::empty()) + .window(window) + .connection(connection); + + unsafe { xcb_loader.create_xcb_surface(&info, None) } + .expect("XcbSurface::create_xcb_surface() failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_wayland( + &self, + display: *mut c_void, + surface: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::WaylandSurface::name()) + { + log::debug!("Vulkan driver does not support VK_KHR_wayland_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let w_loader = khr::WaylandSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::WaylandSurfaceCreateInfoKHR::builder() + .flags(vk::WaylandSurfaceCreateFlagsKHR::empty()) + .display(display) + .surface(surface); + + unsafe { w_loader.create_wayland_surface(&info, None) }.expect("WaylandSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_android( + &self, + window: *const c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self + .shared + .extensions + .contains(&khr::AndroidSurface::name()) + { + log::warn!("Vulkan driver does not support VK_KHR_android_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let a_loader = khr::AndroidSurface::new(&self.shared.entry, &self.shared.raw); + let info = vk::AndroidSurfaceCreateInfoKHR::builder() + .flags(vk::AndroidSurfaceCreateFlagsKHR::empty()) + .window(window as *mut _); + + unsafe { a_loader.create_android_surface(&info, None) }.expect("AndroidSurface failed") + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[allow(dead_code)] + fn create_surface_from_hwnd( + &self, + hinstance: *mut c_void, + hwnd: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&khr::Win32Surface::name()) { + log::debug!("Vulkan driver does not support VK_KHR_win32_surface"); + return Err(crate::InstanceError); + } + + let surface = { + let info = vk::Win32SurfaceCreateInfoKHR::builder() + .flags(vk::Win32SurfaceCreateFlagsKHR::empty()) + .hinstance(hinstance) + .hwnd(hwnd); + let win32_loader = khr::Win32Surface::new(&self.shared.entry, &self.shared.raw); + unsafe { + win32_loader + .create_win32_surface(&info, None) + .expect("Unable to create Win32 surface") + } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + #[cfg(any(target_os = "macos", target_os = "ios"))] + fn create_surface_from_view( + &self, + view: *mut c_void, + ) -> Result<super::Surface, crate::InstanceError> { + if !self.shared.extensions.contains(&ext::MetalSurface::name()) { + log::warn!("Vulkan driver does not support VK_EXT_metal_surface"); + return Err(crate::InstanceError); + } + + let layer = unsafe { + crate::metal::Surface::get_metal_layer(view as *mut objc::runtime::Object, None) + }; + + let surface = { + let metal_loader = ext::MetalSurface::new(&self.shared.entry, &self.shared.raw); + let vk_info = vk::MetalSurfaceCreateInfoEXT::builder() + .flags(vk::MetalSurfaceCreateFlagsEXT::empty()) + .layer(layer as *mut _) + .build(); + + unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() } + }; + + Ok(self.create_surface_from_vk_surface_khr(surface)) + } + + fn create_surface_from_vk_surface_khr(&self, surface: vk::SurfaceKHR) -> super::Surface { + let functor = khr::Surface::new(&self.shared.entry, &self.shared.raw); + super::Surface { + raw: surface, + functor, + instance: Arc::clone(&self.shared), + swapchain: None, + } + } +} + +impl Drop for super::InstanceShared { + fn drop(&mut self) { + unsafe { + if let Some(du) = self.debug_utils.take() { + du.extension + .destroy_debug_utils_messenger(du.messenger, None); + } + if let Some(_drop_guard) = self.drop_guard.take() { + self.raw.destroy_instance(None); + } + } + } +} + +impl crate::Instance<super::Api> for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result<Self, crate::InstanceError> { + use crate::auxil::cstr_from_bytes_until_nul; + + let entry = match unsafe { ash::Entry::load() } { + Ok(entry) => entry, + Err(err) => { + log::info!("Missing Vulkan entry points: {:?}", err); + return Err(crate::InstanceError); + } + }; + let driver_api_version = match entry.try_enumerate_instance_version() { + // Vulkan 1.1+ + Ok(Some(version)) => version, + Ok(None) => vk::API_VERSION_1_0, + Err(err) => { + log::warn!("try_enumerate_instance_version: {:?}", err); + return Err(crate::InstanceError); + } + }; + + let app_name = CString::new(desc.name).unwrap(); + let app_info = vk::ApplicationInfo::builder() + .application_name(app_name.as_c_str()) + .application_version(1) + .engine_name(CStr::from_bytes_with_nul(b"wgpu-hal\0").unwrap()) + .engine_version(2) + .api_version( + // Vulkan 1.0 doesn't like anything but 1.0 passed in here... + if driver_api_version < vk::API_VERSION_1_1 { + vk::API_VERSION_1_0 + } else { + // This is the max Vulkan API version supported by `wgpu-hal`. + // + // If we want to increment this, there are some things that must be done first: + // - Audit the behavioral differences between the previous and new API versions. + // - Audit all extensions used by this backend: + // - If any were promoted in the new API version and the behavior has changed, we must handle the new behavior in addition to the old behavior. + // - If any were obsoleted in the new API version, we must implement a fallback for the new API version + // - If any are non-KHR-vendored, we must ensure the new behavior is still correct (since backwards-compatibility is not guaranteed). + vk::HEADER_VERSION_COMPLETE + }, + ); + + let extensions = Self::required_extensions(&entry, driver_api_version, desc.flags)?; + + let instance_layers = entry.enumerate_instance_layer_properties().map_err(|e| { + log::info!("enumerate_instance_layer_properties: {:?}", e); + crate::InstanceError + })?; + + let nv_optimus_layer = CStr::from_bytes_with_nul(b"VK_LAYER_NV_optimus\0").unwrap(); + let has_nv_optimus = instance_layers.iter().any(|inst_layer| { + cstr_from_bytes_until_nul(&inst_layer.layer_name) == Some(nv_optimus_layer) + }); + + // Check requested layers against the available layers + let layers = { + let mut layers: Vec<&'static CStr> = Vec::new(); + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + layers.push(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap()); + } + + // Only keep available layers. + layers.retain(|&layer| { + if instance_layers.iter().any(|inst_layer| { + cstr_from_bytes_until_nul(&inst_layer.layer_name) == Some(layer) + }) { + true + } else { + log::warn!("Unable to find layer: {}", layer.to_string_lossy()); + false + } + }); + layers + }; + + #[cfg(target_os = "android")] + let android_sdk_version = { + let properties = android_system_properties::AndroidSystemProperties::new(); + // See: https://developer.android.com/reference/android/os/Build.VERSION_CODES + if let Some(val) = properties.get("ro.build.version.sdk") { + match val.parse::<u32>() { + Ok(sdk_ver) => sdk_ver, + Err(err) => { + log::error!( + "Couldn't parse Android's ro.build.version.sdk system property ({val}): {err}" + ); + 0 + } + } + } else { + log::error!("Couldn't read Android's ro.build.version.sdk system property"); + 0 + } + }; + #[cfg(not(target_os = "android"))] + let android_sdk_version = 0; + + let vk_instance = { + let str_pointers = layers + .iter() + .chain(extensions.iter()) + .map(|&s| { + // Safe because `layers` and `extensions` entries have static lifetime. + s.as_ptr() + }) + .collect::<Vec<_>>(); + + let create_info = vk::InstanceCreateInfo::builder() + .flags(vk::InstanceCreateFlags::empty()) + .application_info(&app_info) + .enabled_layer_names(&str_pointers[..layers.len()]) + .enabled_extension_names(&str_pointers[layers.len()..]); + + unsafe { entry.create_instance(&create_info, None) }.map_err(|e| { + log::warn!("create_instance: {:?}", e); + crate::InstanceError + })? + }; + + unsafe { + Self::from_raw( + entry, + vk_instance, + driver_api_version, + android_sdk_version, + extensions, + desc.flags, + has_nv_optimus, + Some(Box::new(())), // `Some` signals that wgpu-hal is in charge of destroying vk_instance + ) + } + } + + unsafe fn create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Result<super::Surface, crate::InstanceError> { + use raw_window_handle::{RawDisplayHandle as Rdh, RawWindowHandle as Rwh}; + + match (window_handle, display_handle) { + (Rwh::Wayland(handle), Rdh::Wayland(display)) => { + self.create_surface_from_wayland(display.display, handle.surface) + } + (Rwh::Xlib(handle), Rdh::Xlib(display)) => { + self.create_surface_from_xlib(display.display as *mut _, handle.window) + } + (Rwh::Xcb(handle), Rdh::Xcb(display)) => { + self.create_surface_from_xcb(display.connection, handle.window) + } + (Rwh::AndroidNdk(handle), _) => self.create_surface_android(handle.a_native_window), + #[cfg(windows)] + (Rwh::Win32(handle), _) => { + use winapi::um::libloaderapi::GetModuleHandleW; + + let hinstance = unsafe { GetModuleHandleW(std::ptr::null()) }; + self.create_surface_from_hwnd(hinstance as *mut _, handle.hwnd) + } + #[cfg(target_os = "macos")] + (Rwh::AppKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ns_view) + } + #[cfg(target_os = "ios")] + (Rwh::UiKit(handle), _) + if self.shared.extensions.contains(&ext::MetalSurface::name()) => + { + self.create_surface_from_view(handle.ui_view) + } + (_, _) => Err(crate::InstanceError), + } + } + + unsafe fn destroy_surface(&self, surface: super::Surface) { + unsafe { surface.functor.destroy_surface(surface.raw, None) }; + } + + unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> { + use crate::auxil::db; + + let raw_devices = match unsafe { self.shared.raw.enumerate_physical_devices() } { + Ok(devices) => devices, + Err(err) => { + log::error!("enumerate_adapters: {}", err); + Vec::new() + } + }; + + let mut exposed_adapters = raw_devices + .into_iter() + .flat_map(|device| self.expose_adapter(device)) + .collect::<Vec<_>>(); + + // Detect if it's an Intel + NVidia configuration with Optimus + let has_nvidia_dgpu = exposed_adapters.iter().any(|exposed| { + exposed.info.device_type == wgt::DeviceType::DiscreteGpu + && exposed.info.vendor == db::nvidia::VENDOR + }); + if cfg!(target_os = "linux") && has_nvidia_dgpu && self.shared.has_nv_optimus { + for exposed in exposed_adapters.iter_mut() { + if exposed.info.device_type == wgt::DeviceType::IntegratedGpu + && exposed.info.vendor == db::intel::VENDOR + { + // See https://gitlab.freedesktop.org/mesa/mesa/-/issues/4688 + log::warn!( + "Disabling presentation on '{}' (id {:?}) because of NV Optimus (on Linux)", + exposed.info.name, + exposed.adapter.raw + ); + exposed.adapter.private_caps.can_present = false; + } + } + } + + exposed_adapters + } +} + +impl crate::Surface<super::Api> for super::Surface { + unsafe fn configure( + &mut self, + device: &super::Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + let old = self + .swapchain + .take() + .map(|sc| unsafe { sc.release_resources(&device.shared.raw) }); + + let swapchain = unsafe { device.create_swapchain(self, config, old)? }; + self.swapchain = Some(swapchain); + + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &super::Device) { + if let Some(sc) = self.swapchain.take() { + let swapchain = unsafe { sc.release_resources(&device.shared.raw) }; + unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) }; + } + } + + unsafe fn acquire_texture( + &mut self, + timeout: Option<std::time::Duration>, + ) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> { + let sc = self.swapchain.as_mut().unwrap(); + + let mut timeout_ns = match timeout { + Some(duration) => duration.as_nanos() as u64, + None => u64::MAX, + }; + + // AcquireNextImageKHR on Android (prior to Android 11) doesn't support timeouts + // and will also log verbose warnings if tying to use a timeout. + // + // Android 10 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-10.0.0_r13/vulkan/libvulkan/swapchain.cpp#1426 + // Android 11 implementation for reference: + // https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-mainline-11.0.0_r45/vulkan/libvulkan/swapchain.cpp#1438 + // + // Android 11 corresponds to an SDK_INT/ro.build.version.sdk of 30 + if cfg!(target_os = "android") && self.instance.android_sdk_version < 30 { + timeout_ns = u64::MAX; + } + + // will block if no image is available + let (index, suboptimal) = match unsafe { + sc.functor + .acquire_next_image(sc.raw, timeout_ns, vk::Semaphore::null(), sc.fence) + } { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // See the comment in `Queue::present`. + #[cfg(target_os = "android")] + Ok((index, _)) => (index, false), + #[cfg(not(target_os = "android"))] + Ok(pair) => pair, + Err(error) => { + return match error { + vk::Result::TIMEOUT => Ok(None), + vk::Result::NOT_READY | vk::Result::ERROR_OUT_OF_DATE_KHR => { + Err(crate::SurfaceError::Outdated) + } + vk::Result::ERROR_SURFACE_LOST_KHR => Err(crate::SurfaceError::Lost), + other => Err(crate::DeviceError::from(other).into()), + } + } + }; + + // special case for Intel Vulkan returning bizzare values (ugh) + if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { + return Err(crate::SurfaceError::Outdated); + } + + let fences = &[sc.fence]; + + unsafe { sc.device.raw.wait_for_fences(fences, true, !0) } + .map_err(crate::DeviceError::from)?; + unsafe { sc.device.raw.reset_fences(fences) }.map_err(crate::DeviceError::from)?; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 + let raw_flags = if sc + .raw_flags + .contains(vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT) + { + vk::ImageCreateFlags::MUTABLE_FORMAT | vk::ImageCreateFlags::EXTENDED_USAGE + } else { + vk::ImageCreateFlags::empty() + }; + + let texture = super::SurfaceTexture { + index, + texture: super::Texture { + raw: sc.images[index as usize], + drop_guard: None, + block: None, + usage: sc.config.usage, + format: sc.config.format, + raw_flags, + copy_size: crate::CopyExtent { + width: sc.config.extent.width, + height: sc.config.extent.height, + depth: 1, + }, + view_formats: sc.view_formats.clone(), + }, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal, + })) + } + + unsafe fn discard_texture(&mut self, _texture: super::SurfaceTexture) {} +} diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs new file mode 100644 index 0000000000..27200dc4e0 --- /dev/null +++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs @@ -0,0 +1,626 @@ +/*! +# Vulkan API internals. + +## Stack memory + +Ash expects slices, which we don't generally have available. +We cope with this requirement by the combination of the following ways: + - temporarily allocating `Vec` on heap, where overhead is permitted + - growing temporary local storage + - using `implace_it` on iterators + +## Framebuffers and Render passes + +Render passes are cached on the device and kept forever. + +Framebuffers are also cached on the device, but they are removed when +any of the image views (they have) gets removed. +If Vulkan supports image-less framebuffers, +then the actual views are excluded from the framebuffer key. + +## Fences + +If timeline semaphores are available, they are used 1:1 with wgpu-hal fences. +Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`. + +!*/ + +mod adapter; +mod command; +mod conv; +mod device; +mod instance; + +use std::{borrow::Borrow, ffi::CStr, fmt, num::NonZeroU32, sync::Arc}; + +use arrayvec::ArrayVec; +use ash::{ + extensions::{ext, khr}, + vk, +}; +use parking_lot::Mutex; + +const MILLIS_TO_NANOS: u64 = 1_000_000; +const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; + +#[derive(Clone)] +pub struct Api; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = CommandEncoder; + type CommandBuffer = CommandBuffer; + + type Buffer = Buffer; + type Texture = Texture; + type SurfaceTexture = SurfaceTexture; + type TextureView = TextureView; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; + + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; +} + +struct DebugUtils { + extension: ext::DebugUtils, + messenger: vk::DebugUtilsMessengerEXT, +} + +pub struct InstanceShared { + raw: ash::Instance, + extensions: Vec<&'static CStr>, + drop_guard: Option<crate::DropGuard>, + flags: crate::InstanceFlags, + debug_utils: Option<DebugUtils>, + get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>, + entry: ash::Entry, + has_nv_optimus: bool, + android_sdk_version: u32, + driver_api_version: u32, +} + +pub struct Instance { + shared: Arc<InstanceShared>, +} + +struct Swapchain { + raw: vk::SwapchainKHR, + raw_flags: vk::SwapchainCreateFlagsKHR, + functor: khr::Swapchain, + device: Arc<DeviceShared>, + fence: vk::Fence, + images: Vec<vk::Image>, + config: crate::SurfaceConfiguration, + view_formats: Vec<wgt::TextureFormat>, +} + +pub struct Surface { + raw: vk::SurfaceKHR, + functor: khr::Surface, + instance: Arc<InstanceShared>, + swapchain: Option<Swapchain>, +} + +#[derive(Debug)] +pub struct SurfaceTexture { + index: u32, + texture: Texture, +} + +impl Borrow<Texture> for SurfaceTexture { + fn borrow(&self) -> &Texture { + &self.texture + } +} + +pub struct Adapter { + raw: vk::PhysicalDevice, + instance: Arc<InstanceShared>, + //queue_families: Vec<vk::QueueFamilyProperties>, + known_memory_flags: vk::MemoryPropertyFlags, + phd_capabilities: adapter::PhysicalDeviceCapabilities, + //phd_features: adapter::PhysicalDeviceFeatures, + downlevel_flags: wgt::DownlevelFlags, + private_caps: PrivateCapabilities, + workarounds: Workarounds, +} + +// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`. +enum ExtensionFn<T> { + /// The loaded function pointer struct for an extension. + Extension(T), + /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used. + Promoted, +} + +struct DeviceExtensionFunctions { + draw_indirect_count: Option<khr::DrawIndirectCount>, + timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>, +} + +/// Set of internal capabilities, which don't show up in the exposed +/// device geometry, but affect the code paths taken internally. +#[derive(Clone, Debug)] +struct PrivateCapabilities { + /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift. + /// + /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`). + flip_y_requires_shift: bool, + imageless_framebuffers: bool, + image_view_usage: bool, + timeline_semaphores: bool, + texture_d24: bool, + texture_d24_s8: bool, + texture_s8: bool, + /// Ability to present contents to any screen. Only needed to work around broken platform configurations. + can_present: bool, + non_coherent_map_mask: wgt::BufferAddress, + robust_buffer_access: bool, + robust_image_access: bool, + zero_initialize_workgroup_memory: bool, +} + +bitflags::bitflags!( + /// Workaround flags. + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct Workarounds: u32 { + /// Only generate SPIR-V for one entry point at a time. + const SEPARATE_ENTRY_POINTS = 0x1; + /// Qualcomm OOMs when there are zero color attachments but a non-null pointer + /// to a subpass resolve attachment array. This nulls out that pointer in that case. + const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2; + } +); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct AttachmentKey { + format: vk::Format, + layout: vk::ImageLayout, + ops: crate::AttachmentOps, +} + +impl AttachmentKey { + /// Returns an attachment key for a compatible attachment. + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { + Self { + format, + layout, + ops: crate::AttachmentOps::all(), + } + } +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct ColorAttachmentKey { + base: AttachmentKey, + resolve: Option<AttachmentKey>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct DepthStencilAttachmentKey { + base: AttachmentKey, + stencil_ops: crate::AttachmentOps, +} + +#[derive(Clone, Eq, Default, Hash, PartialEq)] +struct RenderPassKey { + colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>, + depth_stencil: Option<DepthStencilAttachmentKey>, + sample_count: u32, + multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct FramebufferAttachment { + /// Can be NULL if the framebuffer is image-less + raw: vk::ImageView, + raw_image_flags: vk::ImageCreateFlags, + view_usage: crate::TextureUses, + view_format: wgt::TextureFormat, + raw_view_formats: Vec<vk::Format>, +} + +#[derive(Clone, Eq, Hash, PartialEq)] +struct FramebufferKey { + attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>, + extent: wgt::Extent3d, + sample_count: u32, +} + +struct DeviceShared { + raw: ash::Device, + family_index: u32, + queue_index: u32, + raw_queue: ash::vk::Queue, + handle_is_owned: bool, + instance: Arc<InstanceShared>, + physical_device: ash::vk::PhysicalDevice, + enabled_extensions: Vec<&'static CStr>, + extension_fns: DeviceExtensionFunctions, + vendor_id: u32, + timestamp_period: f32, + private_caps: PrivateCapabilities, + workarounds: Workarounds, + render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>, + framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>, +} + +pub struct Device { + shared: Arc<DeviceShared>, + mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>, + desc_allocator: + Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>, + valid_ash_memory_types: u32, + naga_options: naga::back::spv::Options, + #[cfg(feature = "renderdoc")] + render_doc: crate::auxil::renderdoc::RenderDoc, +} + +pub struct Queue { + raw: vk::Queue, + swapchain_fn: khr::Swapchain, + device: Arc<DeviceShared>, + family_index: u32, + /// We use a redundant chain of semaphores to pass on the signal + /// from submissions to the last present, since it's required by the + /// specification. + /// It would be correct to use a single semaphore there, but + /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508). + relay_semaphores: [vk::Semaphore; 2], + relay_index: Option<usize>, +} + +#[derive(Debug)] +pub struct Buffer { + raw: vk::Buffer, + block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, +} + +#[derive(Debug)] +pub struct Texture { + raw: vk::Image, + drop_guard: Option<crate::DropGuard>, + block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>, + usage: crate::TextureUses, + format: wgt::TextureFormat, + raw_flags: vk::ImageCreateFlags, + copy_size: crate::CopyExtent, + view_formats: Vec<wgt::TextureFormat>, +} + +impl Texture { + /// # Safety + /// + /// - The image handle must not be manually destroyed + pub unsafe fn raw_handle(&self) -> vk::Image { + self.raw + } +} + +#[derive(Debug)] +pub struct TextureView { + raw: vk::ImageView, + layers: NonZeroU32, + attachment: FramebufferAttachment, +} + +#[derive(Debug)] +pub struct Sampler { + raw: vk::Sampler, +} + +#[derive(Debug)] +pub struct BindGroupLayout { + raw: vk::DescriptorSetLayout, + desc_count: gpu_descriptor::DescriptorTotalCount, + types: Box<[(vk::DescriptorType, u32)]>, + /// Map of binding index to size, + binding_arrays: Vec<(u32, NonZeroU32)>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + raw: vk::PipelineLayout, + binding_arrays: naga::back::spv::BindingMap, +} + +#[derive(Debug)] +pub struct BindGroup { + set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>, +} + +#[derive(Default)] +struct Temp { + marker: Vec<u8>, + buffer_barriers: Vec<vk::BufferMemoryBarrier>, + image_barriers: Vec<vk::ImageMemoryBarrier>, +} + +unsafe impl Send for Temp {} +unsafe impl Sync for Temp {} + +impl Temp { + fn clear(&mut self) { + self.marker.clear(); + self.buffer_barriers.clear(); + self.image_barriers.clear(); + //see also - https://github.com/NotIntMan/inplace_it/issues/8 + } + + fn make_c_str(&mut self, name: &str) -> &CStr { + self.marker.clear(); + self.marker.extend_from_slice(name.as_bytes()); + self.marker.push(0); + unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) } + } +} + +pub struct CommandEncoder { + raw: vk::CommandPool, + device: Arc<DeviceShared>, + active: vk::CommandBuffer, + bind_point: vk::PipelineBindPoint, + temp: Temp, + free: Vec<vk::CommandBuffer>, + discarded: Vec<vk::CommandBuffer>, + /// If this is true, the active renderpass enabled a debug span, + /// and needs to be disabled on renderpass close. + rpass_debug_marker_active: bool, +} + +impl fmt::Debug for CommandEncoder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CommandEncoder") + .field("raw", &self.raw) + .finish() + } +} + +#[derive(Debug)] +pub struct CommandBuffer { + raw: vk::CommandBuffer, +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum ShaderModule { + Raw(vk::ShaderModule), + Intermediate { + naga_shader: crate::NagaShader, + runtime_checks: bool, + }, +} + +#[derive(Debug)] +pub struct RenderPipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct ComputePipeline { + raw: vk::Pipeline, +} + +#[derive(Debug)] +pub struct QuerySet { + raw: vk::QueryPool, +} + +#[derive(Debug)] +pub enum Fence { + TimelineSemaphore(vk::Semaphore), + FencePool { + last_completed: crate::FenceValue, + /// The pending fence values have to be ascending. + active: Vec<(crate::FenceValue, vk::Fence)>, + free: Vec<vk::Fence>, + }, +} + +impl Fence { + fn check_active( + device: &ash::Device, + mut max_value: crate::FenceValue, + active: &[(crate::FenceValue, vk::Fence)], + ) -> Result<crate::FenceValue, crate::DeviceError> { + for &(value, raw) in active.iter() { + unsafe { + if value > max_value && device.get_fence_status(raw)? { + max_value = value; + } + } + } + Ok(max_value) + } + + fn get_latest( + &self, + device: &ash::Device, + extension: Option<&ExtensionFn<khr::TimelineSemaphore>>, + ) -> Result<crate::FenceValue, crate::DeviceError> { + match *self { + Self::TimelineSemaphore(raw) => unsafe { + Ok(match *extension.unwrap() { + ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?, + ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?, + }) + }, + Self::FencePool { + last_completed, + ref active, + free: _, + } => Self::check_active(device, last_completed, active), + } + } + + fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { + match *self { + Self::TimelineSemaphore(_) => {} + Self::FencePool { + ref mut last_completed, + ref mut active, + ref mut free, + } => { + let latest = Self::check_active(device, *last_completed, active)?; + let base_free = free.len(); + for &(value, raw) in active.iter() { + if value <= latest { + free.push(raw); + } + } + if free.len() != base_free { + active.retain(|&(value, _)| value > latest); + unsafe { + device.reset_fences(&free[base_free..])?; + } + } + *last_completed = latest; + } + } + Ok(()) + } +} + +impl crate::Queue<Api> for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&CommandBuffer], + signal_fence: Option<(&mut Fence, crate::FenceValue)>, + ) -> Result<(), crate::DeviceError> { + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::<Vec<_>>(); + + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + let mut fence_raw = vk::Fence::null(); + let mut vk_timeline_info; + let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; + let signal_values; + + if let Some((fence, value)) = signal_fence { + fence.maintain(&self.device.raw)?; + match *fence { + Fence::TimelineSemaphore(raw) => { + signal_values = [!0, value]; + signal_semaphores[1] = raw; + vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() + .signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } + Fence::FencePool { + ref mut active, + ref mut free, + .. + } => { + fence_raw = match free.pop() { + Some(raw) => raw, + None => unsafe { + self.device + .raw + .create_fence(&vk::FenceCreateInfo::builder(), None)? + }, + }; + active.push((value, fence_raw)); + } + } + } + + let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; + let sem_index = match self.relay_index { + Some(old_index) => { + vk_info = vk_info + .wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]) + .wait_dst_stage_mask(&wait_stage_mask); + (old_index + 1) % self.relay_semaphores.len() + } + None => 0, + }; + self.relay_index = Some(sem_index); + signal_semaphores[0] = self.relay_semaphores[sem_index]; + + let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { + 1 + } else { + 2 + }; + vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + + profiling::scope!("vkQueueSubmit"); + unsafe { + self.device + .raw + .queue_submit(self.raw, &[vk_info.build()], fence_raw)? + }; + Ok(()) + } + + unsafe fn present( + &mut self, + surface: &mut Surface, + texture: SurfaceTexture, + ) -> Result<(), crate::SurfaceError> { + let ssc = surface.swapchain.as_ref().unwrap(); + + let swapchains = [ssc.raw]; + let image_indices = [texture.index]; + let mut vk_info = vk::PresentInfoKHR::builder() + .swapchains(&swapchains) + .image_indices(&image_indices); + + if let Some(old_index) = self.relay_index.take() { + vk_info = vk_info.wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]); + } + + let suboptimal = { + profiling::scope!("vkQueuePresentKHR"); + unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| { + match error { + vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated, + vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost, + _ => crate::DeviceError::from(error).into(), + } + })? + }; + if suboptimal { + // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. + // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation + // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation). + // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`. + #[cfg(not(target_os = "android"))] + log::warn!("Suboptimal present of frame {}", texture.index); + } + Ok(()) + } + + unsafe fn get_timestamp_period(&self) -> f32 { + self.device.timestamp_period + } +} + +impl From<vk::Result> for crate::DeviceError { + fn from(result: vk::Result) -> Self { + match result { + vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => { + Self::OutOfMemory + } + vk::Result::ERROR_DEVICE_LOST => Self::Lost, + _ => { + log::warn!("Unrecognized device error {:?}", result); + Self::Lost + } + } + } +} |